diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/adapter_model.safetensors deleted file mode 100644 index 3bcc7f06285e218df89ffac97820a08f2875becb..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3998cb16f83a5deb3ff6fe0e36e9c165afe76105855779789564d6663ece011e -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/optimizer.pt deleted file mode 100644 index 2cd10892e6c093d1230092de64847c9536892b45..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:167a8807a4f09ae154cc13e8241ad219ccc02ef35bfecd8cf5da4005845ac15d -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/rng_state.pth deleted file mode 100644 index 29eecf71f027a23b5a0d826cd3148e39a8d85840..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d569a9c96ff4e89d32dfe8ca0b97952cc2edcd76f89c14caa7e57238c4b46c8a -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/scheduler.pt deleted file mode 100644 index 1d5ede001a61435db4d22e4aaeaf208a169dbd59..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4812f164ea7757b51fbfb4d00b1fa6325746cb792d7c7ec0dd7e074068a93cfb -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/trainer_state.json deleted file mode 100644 index 7f474bde61ec011f50860db9717ce8145b013e9e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/trainer_state.json +++ /dev/null @@ -1,1615 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.7489768076398363, - "eval_steps": 366, - "global_step": 1098, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 1.1443907853139968e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1098/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/adapter_model.safetensors deleted file mode 100644 index 296c17fb139eaba1b283672f37a1a68c5d266aa2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c7d76ec612c68701e4ab38fd7332e83b533d3cb5d77e47f5f177c8f5e3f02371 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/optimizer.pt deleted file mode 100644 index 9429552599969c908a1d13f6e5ef889f98f41b7c..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:573920222e0747f8b4f52f63492bebb39a8010748f870f74d36be47092625986 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/rng_state.pth deleted file mode 100644 index 97a0dbefb70722fe69a095f738aee6ae07e2ac12..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55618d72fdc98ce531c1d13612b817a14a50cd3ee258c44dec6a79dee2ab54ab -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/scheduler.pt deleted file mode 100644 index 346578444c977b739b419bbb5b1b24afa713bd1d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:092b2ba9e97dcb53fb28563a3b62dc47503e6a9c063fdb13a4250a47b2fa6abf -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/trainer_state.json deleted file mode 100644 index 9aa4c624772f0258ec195c6cfdf2f3538f1d17e0..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/trainer_state.json +++ /dev/null @@ -1,16577 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.9996589940323957, - "eval_steps": 1466, - "global_step": 11728, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - }, - { - "epoch": 0.7502131287297528, - "grad_norm": 0.061287231743335724, - "learning_rate": 6.972370044908141e-05, - "loss": 0.011720015108585358, - "step": 4400 - }, - { - "epoch": 0.7510656436487638, - "grad_norm": 0.0668778270483017, - "learning_rate": 6.971058796426478e-05, - "loss": 0.012064819037914277, - "step": 4405 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 0.07897942513227463, - "learning_rate": 6.969746044247421e-05, - "loss": 0.010592888295650481, - "step": 4410 - }, - { - "epoch": 0.7527706734867861, - "grad_norm": 0.09090534597635269, - "learning_rate": 6.968431788983806e-05, - "loss": 0.011600010097026825, - "step": 4415 - }, - { - "epoch": 0.7536231884057971, - "grad_norm": 0.080506332218647, - "learning_rate": 6.967116031249172e-05, - "loss": 0.013006125390529633, - "step": 4420 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 0.04851040989160538, - "learning_rate": 6.965798771657758e-05, - "loss": 0.010165790468454361, - "step": 4425 - }, - { - "epoch": 0.7553282182438192, - "grad_norm": 0.06298603117465973, - "learning_rate": 6.964480010824505e-05, - "loss": 0.007800602912902832, - "step": 4430 - }, - { - "epoch": 0.7561807331628303, - "grad_norm": 0.09919694811105728, - "learning_rate": 6.963159749365053e-05, - "loss": 0.010877586901187897, - "step": 4435 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 0.083896704018116, - "learning_rate": 6.961837987895747e-05, - "loss": 0.011114003509283066, - "step": 4440 - }, - { - "epoch": 0.7578857630008525, - "grad_norm": 0.0452699176967144, - "learning_rate": 6.960514727033626e-05, - "loss": 0.008609171956777573, - "step": 4445 - }, - { - "epoch": 0.7587382779198636, - "grad_norm": 0.08951374143362045, - "learning_rate": 6.959189967396435e-05, - "loss": 0.01193360835313797, - "step": 4450 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 0.08749551326036453, - "learning_rate": 6.957863709602611e-05, - "loss": 0.009163837879896164, - "step": 4455 - }, - { - "epoch": 0.7604433077578857, - "grad_norm": 0.09304409474134445, - "learning_rate": 6.956535954271301e-05, - "loss": 0.01038273349404335, - "step": 4460 - }, - { - "epoch": 0.7612958226768969, - "grad_norm": 0.06662629544734955, - "learning_rate": 6.955206702022342e-05, - "loss": 0.010570932179689407, - "step": 4465 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 0.07736595720052719, - "learning_rate": 6.953875953476276e-05, - "loss": 0.009856238961219788, - "step": 4470 - }, - { - "epoch": 0.763000852514919, - "grad_norm": 0.04692552238702774, - "learning_rate": 6.952543709254338e-05, - "loss": 0.006967573612928391, - "step": 4475 - }, - { - "epoch": 0.7638533674339301, - "grad_norm": 0.06901179254055023, - "learning_rate": 6.951209969978464e-05, - "loss": 0.008588603138923645, - "step": 4480 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 0.07733304053544998, - "learning_rate": 6.949874736271289e-05, - "loss": 0.012579981982707978, - "step": 4485 - }, - { - "epoch": 0.7655583972719523, - "grad_norm": 0.0693301409482956, - "learning_rate": 6.948538008756144e-05, - "loss": 0.009747470915317535, - "step": 4490 - }, - { - "epoch": 0.7664109121909634, - "grad_norm": 0.07054253667593002, - "learning_rate": 6.947199788057059e-05, - "loss": 0.008387601375579834, - "step": 4495 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 0.06526053696870804, - "learning_rate": 6.945860074798757e-05, - "loss": 0.008389735966920853, - "step": 4500 - }, - { - "epoch": 0.7681159420289855, - "grad_norm": 0.06862380355596542, - "learning_rate": 6.944518869606662e-05, - "loss": 0.008736115694046021, - "step": 4505 - }, - { - "epoch": 0.7689684569479965, - "grad_norm": 0.06233246996998787, - "learning_rate": 6.943176173106897e-05, - "loss": 0.008519527316093446, - "step": 4510 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 0.07696249336004257, - "learning_rate": 6.941831985926273e-05, - "loss": 0.011381441354751587, - "step": 4515 - }, - { - "epoch": 0.7706734867860188, - "grad_norm": 0.11450641602277756, - "learning_rate": 6.940486308692302e-05, - "loss": 0.012895810604095458, - "step": 4520 - }, - { - "epoch": 0.7715260017050298, - "grad_norm": 0.09141158312559128, - "learning_rate": 6.939139142033191e-05, - "loss": 0.009508632868528367, - "step": 4525 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 0.09469986706972122, - "learning_rate": 6.937790486577844e-05, - "loss": 0.014812557399272919, - "step": 4530 - }, - { - "epoch": 0.773231031543052, - "grad_norm": 0.08494299650192261, - "learning_rate": 6.936440342955855e-05, - "loss": 0.01355334222316742, - "step": 4535 - }, - { - "epoch": 0.7740835464620631, - "grad_norm": 0.09328251332044601, - "learning_rate": 6.93508871179752e-05, - "loss": 0.011529977619647979, - "step": 4540 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 0.06696850806474686, - "learning_rate": 6.933735593733821e-05, - "loss": 0.010230815410614014, - "step": 4545 - }, - { - "epoch": 0.7757885763000852, - "grad_norm": 0.07968153059482574, - "learning_rate": 6.932380989396442e-05, - "loss": 0.012129776924848557, - "step": 4550 - }, - { - "epoch": 0.7766410912190963, - "grad_norm": 0.07916650176048279, - "learning_rate": 6.931024899417756e-05, - "loss": 0.009455519914627075, - "step": 4555 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 0.05229945108294487, - "learning_rate": 6.92966732443083e-05, - "loss": 0.008516684174537659, - "step": 4560 - }, - { - "epoch": 0.7783461210571185, - "grad_norm": 0.08314234018325806, - "learning_rate": 6.928308265069428e-05, - "loss": 0.010914114117622376, - "step": 4565 - }, - { - "epoch": 0.7791986359761296, - "grad_norm": 0.05075672268867493, - "learning_rate": 6.926947721968001e-05, - "loss": 0.008188208192586898, - "step": 4570 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 0.09465362876653671, - "learning_rate": 6.925585695761697e-05, - "loss": 0.009074468165636063, - "step": 4575 - }, - { - "epoch": 0.7809036658141517, - "grad_norm": 0.09024044126272202, - "learning_rate": 6.924222187086356e-05, - "loss": 0.006571034342050553, - "step": 4580 - }, - { - "epoch": 0.7817561807331628, - "grad_norm": 0.050575681030750275, - "learning_rate": 6.922857196578507e-05, - "loss": 0.008829852938652039, - "step": 4585 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.09888230264186859, - "learning_rate": 6.921490724875376e-05, - "loss": 0.01053793728351593, - "step": 4590 - }, - { - "epoch": 0.783461210571185, - "grad_norm": 0.042567264288663864, - "learning_rate": 6.920122772614875e-05, - "loss": 0.009682109951972962, - "step": 4595 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 0.10021623969078064, - "learning_rate": 6.91875334043561e-05, - "loss": 0.012160807102918624, - "step": 4600 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 0.07016255706548691, - "learning_rate": 6.917382428976878e-05, - "loss": 0.008590599894523621, - "step": 4605 - }, - { - "epoch": 0.7860187553282183, - "grad_norm": 0.06567320227622986, - "learning_rate": 6.916010038878667e-05, - "loss": 0.00809207409620285, - "step": 4610 - }, - { - "epoch": 0.7868712702472294, - "grad_norm": 0.05032164603471756, - "learning_rate": 6.914636170781652e-05, - "loss": 0.009291460365056991, - "step": 4615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 0.06303273886442184, - "learning_rate": 6.913260825327204e-05, - "loss": 0.00837131291627884, - "step": 4620 - }, - { - "epoch": 0.7885763000852515, - "grad_norm": 0.05837355926632881, - "learning_rate": 6.911884003157376e-05, - "loss": 0.007800968736410141, - "step": 4625 - }, - { - "epoch": 0.7894288150042625, - "grad_norm": 0.07052712142467499, - "learning_rate": 6.910505704914916e-05, - "loss": 0.010577390342950821, - "step": 4630 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 0.08550997078418732, - "learning_rate": 6.909125931243259e-05, - "loss": 0.012821859121322632, - "step": 4635 - }, - { - "epoch": 0.7911338448422848, - "grad_norm": 0.060406558215618134, - "learning_rate": 6.90774468278653e-05, - "loss": 0.0065113060176372525, - "step": 4640 - }, - { - "epoch": 0.7919863597612958, - "grad_norm": 0.13999445736408234, - "learning_rate": 6.906361960189542e-05, - "loss": 0.012699820101261139, - "step": 4645 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 0.07585978507995605, - "learning_rate": 6.904977764097797e-05, - "loss": 0.008435635268688202, - "step": 4650 - }, - { - "epoch": 0.793691389599318, - "grad_norm": 0.07488108426332474, - "learning_rate": 6.90359209515748e-05, - "loss": 0.011925875395536422, - "step": 4655 - }, - { - "epoch": 0.7945439045183291, - "grad_norm": 0.12476535886526108, - "learning_rate": 6.902204954015471e-05, - "loss": 0.009086847305297852, - "step": 4660 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 0.08779732137918472, - "learning_rate": 6.900816341319331e-05, - "loss": 0.00962812826037407, - "step": 4665 - }, - { - "epoch": 0.7962489343563512, - "grad_norm": 0.15791405737400055, - "learning_rate": 6.899426257717312e-05, - "loss": 0.011767397075891495, - "step": 4670 - }, - { - "epoch": 0.7971014492753623, - "grad_norm": 0.11228909343481064, - "learning_rate": 6.898034703858352e-05, - "loss": 0.008271434903144836, - "step": 4675 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 0.07288003712892532, - "learning_rate": 6.896641680392073e-05, - "loss": 0.009384474158287049, - "step": 4680 - }, - { - "epoch": 0.7988064791133845, - "grad_norm": 0.08285173773765564, - "learning_rate": 6.895247187968784e-05, - "loss": 0.012600034475326538, - "step": 4685 - }, - { - "epoch": 0.7996589940323956, - "grad_norm": 0.07812397927045822, - "learning_rate": 6.893851227239484e-05, - "loss": 0.008935874700546265, - "step": 4690 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 0.07500546425580978, - "learning_rate": 6.892453798855852e-05, - "loss": 0.010619471222162247, - "step": 4695 - }, - { - "epoch": 0.8013640238704177, - "grad_norm": 0.05211177095770836, - "learning_rate": 6.891054903470251e-05, - "loss": 0.008601508289575576, - "step": 4700 - }, - { - "epoch": 0.8022165387894288, - "grad_norm": 0.029616642743349075, - "learning_rate": 6.889654541735738e-05, - "loss": 0.007921247184276581, - "step": 4705 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 0.06894131749868393, - "learning_rate": 6.888252714306044e-05, - "loss": 0.010935742408037186, - "step": 4710 - }, - { - "epoch": 0.803921568627451, - "grad_norm": 0.0766182467341423, - "learning_rate": 6.886849421835587e-05, - "loss": 0.010556706041097642, - "step": 4715 - }, - { - "epoch": 0.8047740835464621, - "grad_norm": 0.09164462238550186, - "learning_rate": 6.885444664979477e-05, - "loss": 0.010812586545944214, - "step": 4720 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 0.06463408470153809, - "learning_rate": 6.884038444393496e-05, - "loss": 0.009179002791643142, - "step": 4725 - }, - { - "epoch": 0.8064791133844842, - "grad_norm": 0.06639672070741653, - "learning_rate": 6.882630760734118e-05, - "loss": 0.012755092978477479, - "step": 4730 - }, - { - "epoch": 0.8073316283034954, - "grad_norm": 0.062024496495723724, - "learning_rate": 6.881221614658493e-05, - "loss": 0.009655499458312988, - "step": 4735 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 0.06751494854688644, - "learning_rate": 6.879811006824459e-05, - "loss": 0.010860173404216767, - "step": 4740 - }, - { - "epoch": 0.8090366581415175, - "grad_norm": 0.05535218119621277, - "learning_rate": 6.878398937890535e-05, - "loss": 0.011440058052539826, - "step": 4745 - }, - { - "epoch": 0.8098891730605285, - "grad_norm": 0.08365204185247421, - "learning_rate": 6.876985408515922e-05, - "loss": 0.011058357357978821, - "step": 4750 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 0.06450537592172623, - "learning_rate": 6.875570419360501e-05, - "loss": 0.01046149879693985, - "step": 4755 - }, - { - "epoch": 0.8115942028985508, - "grad_norm": 0.08542726188898087, - "learning_rate": 6.874153971084837e-05, - "loss": 0.009869573265314102, - "step": 4760 - }, - { - "epoch": 0.8124467178175618, - "grad_norm": 0.08184531331062317, - "learning_rate": 6.872736064350176e-05, - "loss": 0.01054040789604187, - "step": 4765 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 0.07068512588739395, - "learning_rate": 6.871316699818442e-05, - "loss": 0.009573462605476379, - "step": 4770 - }, - { - "epoch": 0.814151747655584, - "grad_norm": 0.08866564929485321, - "learning_rate": 6.869895878152244e-05, - "loss": 0.008078956604003906, - "step": 4775 - }, - { - "epoch": 0.815004262574595, - "grad_norm": 0.08215270191431046, - "learning_rate": 6.868473600014867e-05, - "loss": 0.010586659610271453, - "step": 4780 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 0.0449003241956234, - "learning_rate": 6.867049866070278e-05, - "loss": 0.008572281152009965, - "step": 4785 - }, - { - "epoch": 0.8167092924126172, - "grad_norm": 0.0766722783446312, - "learning_rate": 6.865624676983124e-05, - "loss": 0.009015947580337524, - "step": 4790 - }, - { - "epoch": 0.8175618073316283, - "grad_norm": 0.07404733449220657, - "learning_rate": 6.864198033418732e-05, - "loss": 0.014639028906822204, - "step": 4795 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 0.10437514632940292, - "learning_rate": 6.862769936043102e-05, - "loss": 0.009333716332912445, - "step": 4800 - }, - { - "epoch": 0.8192668371696504, - "grad_norm": 0.06732609868049622, - "learning_rate": 6.861340385522921e-05, - "loss": 0.007169592380523682, - "step": 4805 - }, - { - "epoch": 0.8201193520886616, - "grad_norm": 0.06016068905591965, - "learning_rate": 6.859909382525552e-05, - "loss": 0.009211564064025879, - "step": 4810 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 0.07302942126989365, - "learning_rate": 6.858476927719031e-05, - "loss": 0.009643231332302094, - "step": 4815 - }, - { - "epoch": 0.8218243819266837, - "grad_norm": 0.07511111348867416, - "learning_rate": 6.857043021772079e-05, - "loss": 0.010751830041408538, - "step": 4820 - }, - { - "epoch": 0.8226768968456948, - "grad_norm": 0.04791528359055519, - "learning_rate": 6.855607665354088e-05, - "loss": 0.008413314074277877, - "step": 4825 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 0.08279003202915192, - "learning_rate": 6.854170859135132e-05, - "loss": 0.009260118752717972, - "step": 4830 - }, - { - "epoch": 0.824381926683717, - "grad_norm": 0.06907783448696136, - "learning_rate": 6.85273260378596e-05, - "loss": 0.009681220352649688, - "step": 4835 - }, - { - "epoch": 0.8252344416027281, - "grad_norm": 0.09847953170537949, - "learning_rate": 6.851292899977997e-05, - "loss": 0.009847448766231537, - "step": 4840 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 0.0683966800570488, - "learning_rate": 6.849851748383343e-05, - "loss": 0.007601346075534821, - "step": 4845 - }, - { - "epoch": 0.8269394714407502, - "grad_norm": 0.0523662269115448, - "learning_rate": 6.848409149674779e-05, - "loss": 0.00900915488600731, - "step": 4850 - }, - { - "epoch": 0.8277919863597613, - "grad_norm": 0.07112257927656174, - "learning_rate": 6.846965104525757e-05, - "loss": 0.011230588704347611, - "step": 4855 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 0.09305348247289658, - "learning_rate": 6.845519613610402e-05, - "loss": 0.01087992861866951, - "step": 4860 - }, - { - "epoch": 0.8294970161977835, - "grad_norm": 0.062347084283828735, - "learning_rate": 6.84407267760352e-05, - "loss": 0.00806276947259903, - "step": 4865 - }, - { - "epoch": 0.8303495311167945, - "grad_norm": 0.09091926366090775, - "learning_rate": 6.84262429718059e-05, - "loss": 0.010562103241682053, - "step": 4870 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 0.05807443708181381, - "learning_rate": 6.841174473017762e-05, - "loss": 0.010788433253765106, - "step": 4875 - }, - { - "epoch": 0.8320545609548167, - "grad_norm": 0.06664252281188965, - "learning_rate": 6.839723205791863e-05, - "loss": 0.01030244082212448, - "step": 4880 - }, - { - "epoch": 0.8329070758738278, - "grad_norm": 0.07469561696052551, - "learning_rate": 6.838270496180392e-05, - "loss": 0.01250479370355606, - "step": 4885 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 0.0469096302986145, - "learning_rate": 6.836816344861523e-05, - "loss": 0.010546717792749405, - "step": 4890 - }, - { - "epoch": 0.83461210571185, - "grad_norm": 0.0646355077624321, - "learning_rate": 6.835360752514104e-05, - "loss": 0.008491561561822892, - "step": 4895 - }, - { - "epoch": 0.835464620630861, - "grad_norm": 0.06006006523966789, - "learning_rate": 6.83390371981765e-05, - "loss": 0.010175065696239471, - "step": 4900 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 0.0595518983900547, - "learning_rate": 6.832445247452355e-05, - "loss": 0.009471315890550613, - "step": 4905 - }, - { - "epoch": 0.8371696504688833, - "grad_norm": 0.0722845196723938, - "learning_rate": 6.830985336099081e-05, - "loss": 0.011656039208173753, - "step": 4910 - }, - { - "epoch": 0.8380221653878943, - "grad_norm": 0.09830670058727264, - "learning_rate": 6.829523986439366e-05, - "loss": 0.0106172576546669, - "step": 4915 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 0.0725899264216423, - "learning_rate": 6.828061199155413e-05, - "loss": 0.00935768336057663, - "step": 4920 - }, - { - "epoch": 0.8397271952259164, - "grad_norm": 0.06721889227628708, - "learning_rate": 6.826596974930101e-05, - "loss": 0.010951700061559677, - "step": 4925 - }, - { - "epoch": 0.8405797101449275, - "grad_norm": 0.09289079904556274, - "learning_rate": 6.82513131444698e-05, - "loss": 0.010057362169027329, - "step": 4930 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 0.07667957991361618, - "learning_rate": 6.823664218390267e-05, - "loss": 0.012943412363529205, - "step": 4935 - }, - { - "epoch": 0.8422847399829497, - "grad_norm": 0.041785743087530136, - "learning_rate": 6.822195687444853e-05, - "loss": 0.009845246374607087, - "step": 4940 - }, - { - "epoch": 0.8431372549019608, - "grad_norm": 0.062134817242622375, - "learning_rate": 6.820725722296295e-05, - "loss": 0.010506168007850647, - "step": 4945 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 0.0870882049202919, - "learning_rate": 6.819254323630825e-05, - "loss": 0.008953387290239334, - "step": 4950 - }, - { - "epoch": 0.8448422847399829, - "grad_norm": 0.06943989545106888, - "learning_rate": 6.817781492135337e-05, - "loss": 0.009975537657737732, - "step": 4955 - }, - { - "epoch": 0.8456947996589941, - "grad_norm": 0.0961625948548317, - "learning_rate": 6.8163072284974e-05, - "loss": 0.010299818217754364, - "step": 4960 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 0.0685473084449768, - "learning_rate": 6.814831533405249e-05, - "loss": 0.007892660051584243, - "step": 4965 - }, - { - "epoch": 0.8473998294970162, - "grad_norm": 0.06370922178030014, - "learning_rate": 6.813354407547787e-05, - "loss": 0.011673354357481004, - "step": 4970 - }, - { - "epoch": 0.8482523444160273, - "grad_norm": 0.1212867870926857, - "learning_rate": 6.811875851614586e-05, - "loss": 0.01049395203590393, - "step": 4975 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 0.07590476423501968, - "learning_rate": 6.810395866295885e-05, - "loss": 0.011261900514364242, - "step": 4980 - }, - { - "epoch": 0.8499573742540495, - "grad_norm": 0.06342966109514236, - "learning_rate": 6.808914452282592e-05, - "loss": 0.011414043605327606, - "step": 4985 - }, - { - "epoch": 0.8508098891730606, - "grad_norm": 0.08939556777477264, - "learning_rate": 6.807431610266278e-05, - "loss": 0.008719882369041443, - "step": 4990 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 0.09418119490146637, - "learning_rate": 6.805947340939183e-05, - "loss": 0.011406099796295166, - "step": 4995 - }, - { - "epoch": 0.8525149190110827, - "grad_norm": 0.09113836288452148, - "learning_rate": 6.804461644994213e-05, - "loss": 0.011070792376995087, - "step": 5000 - }, - { - "epoch": 0.8533674339300937, - "grad_norm": 0.06545080989599228, - "learning_rate": 6.802974523124941e-05, - "loss": 0.009602059423923493, - "step": 5005 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 0.07779792696237564, - "learning_rate": 6.801485976025607e-05, - "loss": 0.008207190036773681, - "step": 5010 - }, - { - "epoch": 0.855072463768116, - "grad_norm": 0.038606271147727966, - "learning_rate": 6.799996004391113e-05, - "loss": 0.008772189915180206, - "step": 5015 - }, - { - "epoch": 0.855924978687127, - "grad_norm": 0.07329479604959488, - "learning_rate": 6.798504608917025e-05, - "loss": 0.011226999759674072, - "step": 5020 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 0.08631903678178787, - "learning_rate": 6.797011790299579e-05, - "loss": 0.012361649423837662, - "step": 5025 - }, - { - "epoch": 0.8576300085251491, - "grad_norm": 0.09041957557201385, - "learning_rate": 6.79551754923567e-05, - "loss": 0.010576151311397552, - "step": 5030 - }, - { - "epoch": 0.8584825234441603, - "grad_norm": 0.07003892213106155, - "learning_rate": 6.794021886422861e-05, - "loss": 0.008748160302639007, - "step": 5035 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 0.06976106762886047, - "learning_rate": 6.792524802559378e-05, - "loss": 0.010539846867322922, - "step": 5040 - }, - { - "epoch": 0.8601875532821824, - "grad_norm": 0.05501266196370125, - "learning_rate": 6.791026298344107e-05, - "loss": 0.01045292615890503, - "step": 5045 - }, - { - "epoch": 0.8610400682011935, - "grad_norm": 0.051503781229257584, - "learning_rate": 6.789526374476602e-05, - "loss": 0.009410140663385391, - "step": 5050 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 0.05674072727560997, - "learning_rate": 6.788025031657076e-05, - "loss": 0.00899135023355484, - "step": 5055 - }, - { - "epoch": 0.8627450980392157, - "grad_norm": 0.09688259661197662, - "learning_rate": 6.786522270586406e-05, - "loss": 0.010228607058525085, - "step": 5060 - }, - { - "epoch": 0.8635976129582268, - "grad_norm": 0.08542542159557343, - "learning_rate": 6.785018091966131e-05, - "loss": 0.010819461196660995, - "step": 5065 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 0.05703757330775261, - "learning_rate": 6.783512496498452e-05, - "loss": 0.011345957219600678, - "step": 5070 - }, - { - "epoch": 0.8653026427962489, - "grad_norm": 0.08096349984407425, - "learning_rate": 6.782005484886231e-05, - "loss": 0.011220332235097885, - "step": 5075 - }, - { - "epoch": 0.86615515771526, - "grad_norm": 0.07224266976118088, - "learning_rate": 6.780497057832988e-05, - "loss": 0.012557309865951539, - "step": 5080 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 0.08718731999397278, - "learning_rate": 6.778987216042912e-05, - "loss": 0.009770408272743225, - "step": 5085 - }, - { - "epoch": 0.8678601875532822, - "grad_norm": 0.05731170251965523, - "learning_rate": 6.777475960220846e-05, - "loss": 0.008567098528146744, - "step": 5090 - }, - { - "epoch": 0.8687127024722933, - "grad_norm": 0.07938708364963531, - "learning_rate": 6.775963291072292e-05, - "loss": 0.010664292424917222, - "step": 5095 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.028421485796570778, - "learning_rate": 6.774449209303416e-05, - "loss": 0.010194088518619537, - "step": 5100 - }, - { - "epoch": 0.8704177323103154, - "grad_norm": 0.06094631180167198, - "learning_rate": 6.772933715621042e-05, - "loss": 0.009481045603752136, - "step": 5105 - }, - { - "epoch": 0.8712702472293266, - "grad_norm": 0.08887558430433273, - "learning_rate": 6.771416810732653e-05, - "loss": 0.012845474481582641, - "step": 5110 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 0.07431238144636154, - "learning_rate": 6.76989849534639e-05, - "loss": 0.00815560668706894, - "step": 5115 - }, - { - "epoch": 0.8729752770673487, - "grad_norm": 0.09380137175321579, - "learning_rate": 6.768378770171052e-05, - "loss": 0.009580246359109878, - "step": 5120 - }, - { - "epoch": 0.8738277919863597, - "grad_norm": 0.08001488447189331, - "learning_rate": 6.766857635916099e-05, - "loss": 0.00857289507985115, - "step": 5125 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 0.049355555325746536, - "learning_rate": 6.765335093291647e-05, - "loss": 0.009263276308774947, - "step": 5130 - }, - { - "epoch": 0.875532821824382, - "grad_norm": 0.05873994901776314, - "learning_rate": 6.763811143008469e-05, - "loss": 0.008233514428138734, - "step": 5135 - }, - { - "epoch": 0.876385336743393, - "grad_norm": 0.10190756618976593, - "learning_rate": 6.762285785777995e-05, - "loss": 0.01529676467180252, - "step": 5140 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 0.08395158499479294, - "learning_rate": 6.760759022312313e-05, - "loss": 0.00961325541138649, - "step": 5145 - }, - { - "epoch": 0.8780903665814151, - "grad_norm": 0.07476748526096344, - "learning_rate": 6.759230853324169e-05, - "loss": 0.010477164387702942, - "step": 5150 - }, - { - "epoch": 0.8789428815004262, - "grad_norm": 0.07773051410913467, - "learning_rate": 6.757701279526961e-05, - "loss": 0.010389962792396545, - "step": 5155 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 0.07345708459615707, - "learning_rate": 6.756170301634745e-05, - "loss": 0.009174319356679917, - "step": 5160 - }, - { - "epoch": 0.8806479113384484, - "grad_norm": 0.07917368412017822, - "learning_rate": 6.754637920362233e-05, - "loss": 0.012756256759166718, - "step": 5165 - }, - { - "epoch": 0.8815004262574595, - "grad_norm": 0.06016271933913231, - "learning_rate": 6.75310413642479e-05, - "loss": 0.011058077961206437, - "step": 5170 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 0.06637005507946014, - "learning_rate": 6.751568950538441e-05, - "loss": 0.013590328395366669, - "step": 5175 - }, - { - "epoch": 0.8832054560954816, - "grad_norm": 0.06486016511917114, - "learning_rate": 6.750032363419857e-05, - "loss": 0.010195261240005494, - "step": 5180 - }, - { - "epoch": 0.8840579710144928, - "grad_norm": 0.09800687432289124, - "learning_rate": 6.748494375786372e-05, - "loss": 0.011106249690055848, - "step": 5185 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 0.04665162041783333, - "learning_rate": 6.746954988355967e-05, - "loss": 0.009880972653627395, - "step": 5190 - }, - { - "epoch": 0.8857630008525149, - "grad_norm": 0.05554487928748131, - "learning_rate": 6.745414201847282e-05, - "loss": 0.009480565786361694, - "step": 5195 - }, - { - "epoch": 0.886615515771526, - "grad_norm": 0.059967316687107086, - "learning_rate": 6.743872016979605e-05, - "loss": 0.010072766989469528, - "step": 5200 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 0.08275031298398972, - "learning_rate": 6.74232843447288e-05, - "loss": 0.010208947211503982, - "step": 5205 - }, - { - "epoch": 0.8883205456095482, - "grad_norm": 0.07091715931892395, - "learning_rate": 6.740783455047704e-05, - "loss": 0.011881709098815918, - "step": 5210 - }, - { - "epoch": 0.8891730605285593, - "grad_norm": 0.1465480923652649, - "learning_rate": 6.739237079425322e-05, - "loss": 0.010970161855220794, - "step": 5215 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 0.07571437954902649, - "learning_rate": 6.737689308327636e-05, - "loss": 0.010722124576568603, - "step": 5220 - }, - { - "epoch": 0.8908780903665814, - "grad_norm": 0.06671100109815598, - "learning_rate": 6.736140142477194e-05, - "loss": 0.010463282465934753, - "step": 5225 - }, - { - "epoch": 0.8917306052855924, - "grad_norm": 0.05676295980811119, - "learning_rate": 6.734589582597204e-05, - "loss": 0.00933043509721756, - "step": 5230 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 0.09708777070045471, - "learning_rate": 6.733037629411514e-05, - "loss": 0.011712780594825745, - "step": 5235 - }, - { - "epoch": 0.8934356351236147, - "grad_norm": 0.07073090970516205, - "learning_rate": 6.731484283644626e-05, - "loss": 0.008112293481826783, - "step": 5240 - }, - { - "epoch": 0.8942881500426257, - "grad_norm": 0.08821752667427063, - "learning_rate": 6.7299295460217e-05, - "loss": 0.01026250645518303, - "step": 5245 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 0.05165687948465347, - "learning_rate": 6.728373417268533e-05, - "loss": 0.008761890232563019, - "step": 5250 - }, - { - "epoch": 0.8959931798806479, - "grad_norm": 0.06872246414422989, - "learning_rate": 6.726815898111581e-05, - "loss": 0.012551462650299073, - "step": 5255 - }, - { - "epoch": 0.896845694799659, - "grad_norm": 0.07550673931837082, - "learning_rate": 6.725256989277944e-05, - "loss": 0.010165071487426758, - "step": 5260 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 0.05931933969259262, - "learning_rate": 6.723696691495373e-05, - "loss": 0.009457996487617493, - "step": 5265 - }, - { - "epoch": 0.8985507246376812, - "grad_norm": 0.10838861763477325, - "learning_rate": 6.722135005492268e-05, - "loss": 0.012053199112415314, - "step": 5270 - }, - { - "epoch": 0.8994032395566922, - "grad_norm": 0.062102265655994415, - "learning_rate": 6.720571931997676e-05, - "loss": 0.007582514733076096, - "step": 5275 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 0.05997858941555023, - "learning_rate": 6.719007471741289e-05, - "loss": 0.009348342567682267, - "step": 5280 - }, - { - "epoch": 0.9011082693947144, - "grad_norm": 0.05405129864811897, - "learning_rate": 6.717441625453451e-05, - "loss": 0.009910254925489425, - "step": 5285 - }, - { - "epoch": 0.9019607843137255, - "grad_norm": 0.08071329444646835, - "learning_rate": 6.715874393865152e-05, - "loss": 0.010672248899936676, - "step": 5290 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 0.06863300502300262, - "learning_rate": 6.714305777708027e-05, - "loss": 0.008570954948663712, - "step": 5295 - }, - { - "epoch": 0.9036658141517476, - "grad_norm": 0.07818135619163513, - "learning_rate": 6.712735777714357e-05, - "loss": 0.008647527545690536, - "step": 5300 - }, - { - "epoch": 0.9045183290707587, - "grad_norm": 0.14757376909255981, - "learning_rate": 6.711164394617072e-05, - "loss": 0.010825049877166749, - "step": 5305 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 0.07376445829868317, - "learning_rate": 6.709591629149746e-05, - "loss": 0.01299697458744049, - "step": 5310 - }, - { - "epoch": 0.9062233589087809, - "grad_norm": 0.05860469490289688, - "learning_rate": 6.708017482046597e-05, - "loss": 0.009549598395824432, - "step": 5315 - }, - { - "epoch": 0.907075873827792, - "grad_norm": 0.0799872875213623, - "learning_rate": 6.706441954042488e-05, - "loss": 0.009733843803405761, - "step": 5320 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 0.05245954543352127, - "learning_rate": 6.704865045872932e-05, - "loss": 0.009799794852733612, - "step": 5325 - }, - { - "epoch": 0.9087809036658141, - "grad_norm": 0.05515241622924805, - "learning_rate": 6.703286758274079e-05, - "loss": 0.007391643524169922, - "step": 5330 - }, - { - "epoch": 0.9096334185848253, - "grad_norm": 0.05900256708264351, - "learning_rate": 6.701707091982726e-05, - "loss": 0.009107303619384766, - "step": 5335 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 0.09559495002031326, - "learning_rate": 6.700126047736317e-05, - "loss": 0.009052158147096635, - "step": 5340 - }, - { - "epoch": 0.9113384484228474, - "grad_norm": 0.11189334839582443, - "learning_rate": 6.698543626272932e-05, - "loss": 0.011292549222707749, - "step": 5345 - }, - { - "epoch": 0.9121909633418585, - "grad_norm": 0.07031659781932831, - "learning_rate": 6.6969598283313e-05, - "loss": 0.008589480072259903, - "step": 5350 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.1652907431125641, - "learning_rate": 6.69537465465079e-05, - "loss": 0.010865563899278641, - "step": 5355 - }, - { - "epoch": 0.9138959931798807, - "grad_norm": 0.06157436594367027, - "learning_rate": 6.693788105971413e-05, - "loss": 0.012611952424049378, - "step": 5360 - }, - { - "epoch": 0.9147485080988917, - "grad_norm": 0.03928734362125397, - "learning_rate": 6.692200183033826e-05, - "loss": 0.009115418046712875, - "step": 5365 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 0.06604880094528198, - "learning_rate": 6.690610886579321e-05, - "loss": 0.010015038400888443, - "step": 5370 - }, - { - "epoch": 0.9164535379369139, - "grad_norm": 0.07625336199998856, - "learning_rate": 6.689020217349835e-05, - "loss": 0.010416677594184876, - "step": 5375 - }, - { - "epoch": 0.9173060528559249, - "grad_norm": 0.07674526423215866, - "learning_rate": 6.687428176087946e-05, - "loss": 0.01016802191734314, - "step": 5380 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 0.08422617614269257, - "learning_rate": 6.685834763536872e-05, - "loss": 0.011127004027366638, - "step": 5385 - }, - { - "epoch": 0.9190110826939472, - "grad_norm": 0.057719554752111435, - "learning_rate": 6.684239980440472e-05, - "loss": 0.008915853500366212, - "step": 5390 - }, - { - "epoch": 0.9198635976129582, - "grad_norm": 0.056555263698101044, - "learning_rate": 6.682643827543241e-05, - "loss": 0.0095272496342659, - "step": 5395 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 0.07605638355016708, - "learning_rate": 6.681046305590317e-05, - "loss": 0.010731159895658492, - "step": 5400 - }, - { - "epoch": 0.9215686274509803, - "grad_norm": 0.07499220222234726, - "learning_rate": 6.679447415327479e-05, - "loss": 0.010919998586177825, - "step": 5405 - }, - { - "epoch": 0.9224211423699915, - "grad_norm": 0.052863143384456635, - "learning_rate": 6.677847157501137e-05, - "loss": 0.011300939321517944, - "step": 5410 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 0.09210597723722458, - "learning_rate": 6.676245532858351e-05, - "loss": 0.013997772336006164, - "step": 5415 - }, - { - "epoch": 0.9241261722080136, - "grad_norm": 0.0746840238571167, - "learning_rate": 6.674642542146807e-05, - "loss": 0.012542533874511718, - "step": 5420 - }, - { - "epoch": 0.9249786871270247, - "grad_norm": 0.06137506663799286, - "learning_rate": 6.67303818611484e-05, - "loss": 0.008029398322105408, - "step": 5425 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 0.03867131471633911, - "learning_rate": 6.671432465511411e-05, - "loss": 0.009305672347545623, - "step": 5430 - }, - { - "epoch": 0.9266837169650469, - "grad_norm": 0.08291540294885635, - "learning_rate": 6.669825381086128e-05, - "loss": 0.011059926450252533, - "step": 5435 - }, - { - "epoch": 0.927536231884058, - "grad_norm": 0.0689411610364914, - "learning_rate": 6.668216933589228e-05, - "loss": 0.008350597321987152, - "step": 5440 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 0.051505669951438904, - "learning_rate": 6.666607123771591e-05, - "loss": 0.011102759093046189, - "step": 5445 - }, - { - "epoch": 0.9292412617220801, - "grad_norm": 0.08774327486753464, - "learning_rate": 6.664995952384729e-05, - "loss": 0.009498609602451325, - "step": 5450 - }, - { - "epoch": 0.9300937766410913, - "grad_norm": 0.060566093772649765, - "learning_rate": 6.663383420180789e-05, - "loss": 0.007811173051595688, - "step": 5455 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 0.09952156245708466, - "learning_rate": 6.661769527912555e-05, - "loss": 0.010514630377292633, - "step": 5460 - }, - { - "epoch": 0.9317988064791134, - "grad_norm": 0.04871741309762001, - "learning_rate": 6.660154276333446e-05, - "loss": 0.006630983203649521, - "step": 5465 - }, - { - "epoch": 0.9326513213981245, - "grad_norm": 0.06254981458187103, - "learning_rate": 6.658537666197517e-05, - "loss": 0.009895097464323044, - "step": 5470 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 0.08351470530033112, - "learning_rate": 6.656919698259452e-05, - "loss": 0.010659988969564438, - "step": 5475 - }, - { - "epoch": 0.9343563512361467, - "grad_norm": 0.07085305452346802, - "learning_rate": 6.655300373274575e-05, - "loss": 0.008971457183361054, - "step": 5480 - }, - { - "epoch": 0.9352088661551577, - "grad_norm": 0.06461923569440842, - "learning_rate": 6.653679691998839e-05, - "loss": 0.009138958156108856, - "step": 5485 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 0.11675399541854858, - "learning_rate": 6.652057655188832e-05, - "loss": 0.008388948440551759, - "step": 5490 - }, - { - "epoch": 0.9369138959931799, - "grad_norm": 0.09698229283094406, - "learning_rate": 6.650434263601777e-05, - "loss": 0.011885351687669753, - "step": 5495 - }, - { - "epoch": 0.9377664109121909, - "grad_norm": 0.06786464154720306, - "learning_rate": 6.648809517995524e-05, - "loss": 0.012351768463850022, - "step": 5500 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 0.09192351251840591, - "learning_rate": 6.647183419128561e-05, - "loss": 0.010940471291542053, - "step": 5505 - }, - { - "epoch": 0.9394714407502132, - "grad_norm": 0.05660499259829521, - "learning_rate": 6.645555967760003e-05, - "loss": 0.01160380095243454, - "step": 5510 - }, - { - "epoch": 0.9403239556692242, - "grad_norm": 0.0829106792807579, - "learning_rate": 6.6439271646496e-05, - "loss": 0.009475469589233398, - "step": 5515 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 0.08007021248340607, - "learning_rate": 6.642297010557733e-05, - "loss": 0.010524801164865493, - "step": 5520 - }, - { - "epoch": 0.9420289855072463, - "grad_norm": 0.07578855752944946, - "learning_rate": 6.640665506245406e-05, - "loss": 0.008864742517471314, - "step": 5525 - }, - { - "epoch": 0.9428815004262575, - "grad_norm": 0.064674511551857, - "learning_rate": 6.639032652474265e-05, - "loss": 0.010619612783193589, - "step": 5530 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 0.07176528871059418, - "learning_rate": 6.637398450006579e-05, - "loss": 0.011696039140224457, - "step": 5535 - }, - { - "epoch": 0.9445865302642796, - "grad_norm": 0.16578713059425354, - "learning_rate": 6.635762899605248e-05, - "loss": 0.009867334365844726, - "step": 5540 - }, - { - "epoch": 0.9454390451832907, - "grad_norm": 0.069394052028656, - "learning_rate": 6.634126002033802e-05, - "loss": 0.012951886653900147, - "step": 5545 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 0.07247213274240494, - "learning_rate": 6.632487758056397e-05, - "loss": 0.009127721190452576, - "step": 5550 - }, - { - "epoch": 0.9471440750213129, - "grad_norm": 0.11084317415952682, - "learning_rate": 6.630848168437822e-05, - "loss": 0.009197863936424255, - "step": 5555 - }, - { - "epoch": 0.947996589940324, - "grad_norm": 0.08719248324632645, - "learning_rate": 6.629207233943492e-05, - "loss": 0.010768509656190871, - "step": 5560 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 0.0857851505279541, - "learning_rate": 6.62756495533945e-05, - "loss": 0.009163270145654679, - "step": 5565 - }, - { - "epoch": 0.9497016197783461, - "grad_norm": 0.09808778762817383, - "learning_rate": 6.625921333392362e-05, - "loss": 0.01005362868309021, - "step": 5570 - }, - { - "epoch": 0.9505541346973572, - "grad_norm": 0.08482059836387634, - "learning_rate": 6.624276368869532e-05, - "loss": 0.008303509652614593, - "step": 5575 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 0.07460886240005493, - "learning_rate": 6.62263006253888e-05, - "loss": 0.00857923850417137, - "step": 5580 - }, - { - "epoch": 0.9522591645353794, - "grad_norm": 0.09494256228208542, - "learning_rate": 6.620982415168956e-05, - "loss": 0.009573552012443542, - "step": 5585 - }, - { - "epoch": 0.9531116794543905, - "grad_norm": 0.08753519505262375, - "learning_rate": 6.61933342752894e-05, - "loss": 0.010430536419153213, - "step": 5590 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 0.05132949724793434, - "learning_rate": 6.617683100388632e-05, - "loss": 0.009080658107995987, - "step": 5595 - }, - { - "epoch": 0.9548167092924126, - "grad_norm": 0.07516856491565704, - "learning_rate": 6.61603143451846e-05, - "loss": 0.009718524664640427, - "step": 5600 - }, - { - "epoch": 0.9556692242114238, - "grad_norm": 0.12723733484745026, - "learning_rate": 6.614378430689477e-05, - "loss": 0.01136334240436554, - "step": 5605 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 0.0905863493680954, - "learning_rate": 6.612724089673359e-05, - "loss": 0.013209307193756103, - "step": 5610 - }, - { - "epoch": 0.9573742540494459, - "grad_norm": 0.06084009259939194, - "learning_rate": 6.611068412242409e-05, - "loss": 0.01001257449388504, - "step": 5615 - }, - { - "epoch": 0.9582267689684569, - "grad_norm": 0.11799532175064087, - "learning_rate": 6.60941139916955e-05, - "loss": 0.008610795438289642, - "step": 5620 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 0.08627504110336304, - "learning_rate": 6.607753051228333e-05, - "loss": 0.009049218893051148, - "step": 5625 - }, - { - "epoch": 0.9599317988064792, - "grad_norm": 0.0910186693072319, - "learning_rate": 6.60609336919293e-05, - "loss": 0.01068672090768814, - "step": 5630 - }, - { - "epoch": 0.9607843137254902, - "grad_norm": 0.06503022462129593, - "learning_rate": 6.604432353838134e-05, - "loss": 0.010604655742645264, - "step": 5635 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 0.07681523263454437, - "learning_rate": 6.602770005939363e-05, - "loss": 0.010527564585208893, - "step": 5640 - }, - { - "epoch": 0.9624893435635123, - "grad_norm": 0.0680806040763855, - "learning_rate": 6.601106326272659e-05, - "loss": 0.009375665336847305, - "step": 5645 - }, - { - "epoch": 0.9633418584825234, - "grad_norm": 0.06601905822753906, - "learning_rate": 6.599441315614678e-05, - "loss": 0.009470004588365555, - "step": 5650 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 0.06291890889406204, - "learning_rate": 6.597774974742706e-05, - "loss": 0.012320800870656966, - "step": 5655 - }, - { - "epoch": 0.9650468883205456, - "grad_norm": 0.0956176221370697, - "learning_rate": 6.596107304434645e-05, - "loss": 0.01018187329173088, - "step": 5660 - }, - { - "epoch": 0.9658994032395567, - "grad_norm": 0.06642715632915497, - "learning_rate": 6.59443830546902e-05, - "loss": 0.010271859169006348, - "step": 5665 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 0.06783592700958252, - "learning_rate": 6.592767978624973e-05, - "loss": 0.00986798033118248, - "step": 5670 - }, - { - "epoch": 0.9676044330775788, - "grad_norm": 0.05877846106886864, - "learning_rate": 6.591096324682272e-05, - "loss": 0.009708859026432037, - "step": 5675 - }, - { - "epoch": 0.96845694799659, - "grad_norm": 0.049258604645729065, - "learning_rate": 6.589423344421297e-05, - "loss": 0.008615868538618088, - "step": 5680 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 0.05622515454888344, - "learning_rate": 6.587749038623052e-05, - "loss": 0.010138686001300811, - "step": 5685 - }, - { - "epoch": 0.9701619778346121, - "grad_norm": 0.05719893425703049, - "learning_rate": 6.586073408069159e-05, - "loss": 0.00869678258895874, - "step": 5690 - }, - { - "epoch": 0.9710144927536232, - "grad_norm": 0.07675095647573471, - "learning_rate": 6.584396453541856e-05, - "loss": 0.010557885468006133, - "step": 5695 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 0.0999779924750328, - "learning_rate": 6.582718175824006e-05, - "loss": 0.010667790472507478, - "step": 5700 - }, - { - "epoch": 0.9727195225916454, - "grad_norm": 0.04620293527841568, - "learning_rate": 6.58103857569908e-05, - "loss": 0.0078192800283432, - "step": 5705 - }, - { - "epoch": 0.9735720375106565, - "grad_norm": 0.06725125759840012, - "learning_rate": 6.579357653951174e-05, - "loss": 0.010441574454307555, - "step": 5710 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 0.09062530100345612, - "learning_rate": 6.577675411364997e-05, - "loss": 0.011757946014404297, - "step": 5715 - }, - { - "epoch": 0.9752770673486786, - "grad_norm": 0.050651032477617264, - "learning_rate": 6.575991848725876e-05, - "loss": 0.009817829728126526, - "step": 5720 - }, - { - "epoch": 0.9761295822676896, - "grad_norm": 0.06951560825109482, - "learning_rate": 6.574306966819755e-05, - "loss": 0.008903174102306366, - "step": 5725 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 0.0733589306473732, - "learning_rate": 6.57262076643319e-05, - "loss": 0.009786784648895264, - "step": 5730 - }, - { - "epoch": 0.9778346121057119, - "grad_norm": 0.0736282467842102, - "learning_rate": 6.570933248353359e-05, - "loss": 0.012176553905010223, - "step": 5735 - }, - { - "epoch": 0.9786871270247229, - "grad_norm": 0.069704569876194, - "learning_rate": 6.56924441336805e-05, - "loss": 0.008654942363500595, - "step": 5740 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 0.07497496902942657, - "learning_rate": 6.567554262265668e-05, - "loss": 0.010902392119169236, - "step": 5745 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 0.07559038698673248, - "learning_rate": 6.56586279583523e-05, - "loss": 0.00921270027756691, - "step": 5750 - }, - { - "epoch": 0.9812446717817562, - "grad_norm": 0.05894545465707779, - "learning_rate": 6.56417001486637e-05, - "loss": 0.009073206037282944, - "step": 5755 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 0.06555377691984177, - "learning_rate": 6.562475920149335e-05, - "loss": 0.010872729122638702, - "step": 5760 - }, - { - "epoch": 0.9829497016197783, - "grad_norm": 0.15036429464817047, - "learning_rate": 6.560780512474984e-05, - "loss": 0.009879975020885468, - "step": 5765 - }, - { - "epoch": 0.9838022165387894, - "grad_norm": 0.06842299550771713, - "learning_rate": 6.559083792634791e-05, - "loss": 0.00965554341673851, - "step": 5770 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 0.0486510805785656, - "learning_rate": 6.557385761420839e-05, - "loss": 0.00872802734375, - "step": 5775 - }, - { - "epoch": 0.9855072463768116, - "grad_norm": 0.059796739369630814, - "learning_rate": 6.555686419625826e-05, - "loss": 0.009720289707183838, - "step": 5780 - }, - { - "epoch": 0.9863597612958227, - "grad_norm": 0.07971934229135513, - "learning_rate": 6.553985768043062e-05, - "loss": 0.008043316006660462, - "step": 5785 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 0.09556971490383148, - "learning_rate": 6.552283807466468e-05, - "loss": 0.009030704945325851, - "step": 5790 - }, - { - "epoch": 0.9880647911338448, - "grad_norm": 0.04631726071238518, - "learning_rate": 6.550580538690577e-05, - "loss": 0.007388583570718765, - "step": 5795 - }, - { - "epoch": 0.9889173060528559, - "grad_norm": 0.08172665536403656, - "learning_rate": 6.548875962510528e-05, - "loss": 0.007863265275955201, - "step": 5800 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 0.11129096895456314, - "learning_rate": 6.547170079722076e-05, - "loss": 0.012218999862670898, - "step": 5805 - }, - { - "epoch": 0.9906223358908781, - "grad_norm": 0.06619804352521896, - "learning_rate": 6.545462891121584e-05, - "loss": 0.007535400986671448, - "step": 5810 - }, - { - "epoch": 0.9914748508098892, - "grad_norm": 0.11495351046323776, - "learning_rate": 6.543754397506025e-05, - "loss": 0.0121284119784832, - "step": 5815 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 0.06017669290304184, - "learning_rate": 6.542044599672978e-05, - "loss": 0.008776353299617767, - "step": 5820 - }, - { - "epoch": 0.9931798806479113, - "grad_norm": 0.08049561828374863, - "learning_rate": 6.540333498420637e-05, - "loss": 0.010460492223501205, - "step": 5825 - }, - { - "epoch": 0.9940323955669225, - "grad_norm": 0.07041274011135101, - "learning_rate": 6.538621094547798e-05, - "loss": 0.008290639519691468, - "step": 5830 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 0.061981480568647385, - "learning_rate": 6.53690738885387e-05, - "loss": 0.007011125236749649, - "step": 5835 - }, - { - "epoch": 0.9957374254049446, - "grad_norm": 0.09022640436887741, - "learning_rate": 6.535192382138867e-05, - "loss": 0.012455084919929504, - "step": 5840 - }, - { - "epoch": 0.9965899403239556, - "grad_norm": 0.05652628839015961, - "learning_rate": 6.53347607520341e-05, - "loss": 0.011704784631729127, - "step": 5845 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 0.0717577114701271, - "learning_rate": 6.531758468848732e-05, - "loss": 0.007738448679447174, - "step": 5850 - }, - { - "epoch": 0.9982949701619779, - "grad_norm": 0.04797588661313057, - "learning_rate": 6.530039563876665e-05, - "loss": 0.00894927978515625, - "step": 5855 - }, - { - "epoch": 0.9991474850809889, - "grad_norm": 0.06541015207767487, - "learning_rate": 6.528319361089651e-05, - "loss": 0.00731588676571846, - "step": 5860 - }, - { - "epoch": 0.9998294970161978, - "eval_loss": 0.03369956836104393, - "eval_runtime": 3.5892, - "eval_samples_per_second": 70.21, - "eval_steps_per_second": 1.114, - "step": 5864 - }, - { - "eval_cer_subset": 0.014444607292328236, - "eval_cer_subset_edit_distance": 887, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 5864 - }, - { - "epoch": 1.0, - "grad_norm": 0.06960473209619522, - "learning_rate": 6.52659786129074e-05, - "loss": 0.009423434734344482, - "step": 5865 - }, - { - "epoch": 1.000852514919011, - "grad_norm": 0.08438396453857422, - "learning_rate": 6.524875065283587e-05, - "loss": 0.009560108184814453, - "step": 5870 - }, - { - "epoch": 1.0017050298380221, - "grad_norm": 0.06281089037656784, - "learning_rate": 6.523150973872446e-05, - "loss": 0.007503298670053482, - "step": 5875 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 0.11766793578863144, - "learning_rate": 6.52142558786218e-05, - "loss": 0.008890827000141144, - "step": 5880 - }, - { - "epoch": 1.0034100596760442, - "grad_norm": 0.058115314692258835, - "learning_rate": 6.519698908058262e-05, - "loss": 0.006190531700849533, - "step": 5885 - }, - { - "epoch": 1.0042625745950555, - "grad_norm": 0.06857501715421677, - "learning_rate": 6.51797093526676e-05, - "loss": 0.007162582129240036, - "step": 5890 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 0.03517467528581619, - "learning_rate": 6.51624167029435e-05, - "loss": 0.0060476396232843396, - "step": 5895 - }, - { - "epoch": 1.0059676044330776, - "grad_norm": 0.10047292709350586, - "learning_rate": 6.514511113948307e-05, - "loss": 0.006416718661785126, - "step": 5900 - }, - { - "epoch": 1.0068201193520887, - "grad_norm": 0.07266796380281448, - "learning_rate": 6.512779267036518e-05, - "loss": 0.005519292503595352, - "step": 5905 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 0.05385264754295349, - "learning_rate": 6.511046130367464e-05, - "loss": 0.006731215119361878, - "step": 5910 - }, - { - "epoch": 1.0085251491901108, - "grad_norm": 0.0927869975566864, - "learning_rate": 6.50931170475023e-05, - "loss": 0.0073065564036369325, - "step": 5915 - }, - { - "epoch": 1.0093776641091219, - "grad_norm": 0.08416371792554855, - "learning_rate": 6.507575990994504e-05, - "loss": 0.005843915045261383, - "step": 5920 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 0.06585095822811127, - "learning_rate": 6.505838989910576e-05, - "loss": 0.006345044076442719, - "step": 5925 - }, - { - "epoch": 1.011082693947144, - "grad_norm": 0.06341785192489624, - "learning_rate": 6.504100702309336e-05, - "loss": 0.005391617119312286, - "step": 5930 - }, - { - "epoch": 1.011935208866155, - "grad_norm": 0.08260001242160797, - "learning_rate": 6.502361129002273e-05, - "loss": 0.008031262457370758, - "step": 5935 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 0.08805666118860245, - "learning_rate": 6.500620270801478e-05, - "loss": 0.006408621370792389, - "step": 5940 - }, - { - "epoch": 1.0136402387041774, - "grad_norm": 0.0704861581325531, - "learning_rate": 6.498878128519642e-05, - "loss": 0.006208440661430359, - "step": 5945 - }, - { - "epoch": 1.0144927536231885, - "grad_norm": 0.07539117336273193, - "learning_rate": 6.497134702970055e-05, - "loss": 0.005263582617044449, - "step": 5950 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 0.022507963702082634, - "learning_rate": 6.495389994966606e-05, - "loss": 0.005692056566476822, - "step": 5955 - }, - { - "epoch": 1.0161977834612106, - "grad_norm": 0.05641510710120201, - "learning_rate": 6.493644005323783e-05, - "loss": 0.007954449951648712, - "step": 5960 - }, - { - "epoch": 1.0170502983802217, - "grad_norm": 0.04853788763284683, - "learning_rate": 6.49189673485667e-05, - "loss": 0.006910678744316101, - "step": 5965 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 0.07868898659944534, - "learning_rate": 6.490148184380956e-05, - "loss": 0.007678037136793136, - "step": 5970 - }, - { - "epoch": 1.0187553282182438, - "grad_norm": 0.08481275290250778, - "learning_rate": 6.488398354712917e-05, - "loss": 0.0060794509947299956, - "step": 5975 - }, - { - "epoch": 1.0196078431372548, - "grad_norm": 0.05573422089219093, - "learning_rate": 6.486647246669435e-05, - "loss": 0.0050107244402170185, - "step": 5980 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 0.10777781158685684, - "learning_rate": 6.484894861067983e-05, - "loss": 0.006611569225788117, - "step": 5985 - }, - { - "epoch": 1.0213128729752772, - "grad_norm": 0.041842151433229446, - "learning_rate": 6.483141198726635e-05, - "loss": 0.0060344856232404705, - "step": 5990 - }, - { - "epoch": 1.0221653878942882, - "grad_norm": 0.05765567347407341, - "learning_rate": 6.48138626046406e-05, - "loss": 0.005772604793310166, - "step": 5995 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 0.05987582355737686, - "learning_rate": 6.479630047099517e-05, - "loss": 0.006899695098400116, - "step": 6000 - }, - { - "epoch": 1.0238704177323104, - "grad_norm": 0.046085257083177567, - "learning_rate": 6.477872559452867e-05, - "loss": 0.006151453405618667, - "step": 6005 - }, - { - "epoch": 1.0247229326513214, - "grad_norm": 0.05994739755988121, - "learning_rate": 6.476113798344566e-05, - "loss": 0.007787984609603882, - "step": 6010 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 0.08866287767887115, - "learning_rate": 6.47435376459566e-05, - "loss": 0.007754974067211151, - "step": 6015 - }, - { - "epoch": 1.0264279624893435, - "grad_norm": 0.07492240518331528, - "learning_rate": 6.472592459027793e-05, - "loss": 0.005562775582075119, - "step": 6020 - }, - { - "epoch": 1.0272804774083546, - "grad_norm": 0.058771468698978424, - "learning_rate": 6.470829882463198e-05, - "loss": 0.008101420104503631, - "step": 6025 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 0.08099868148565292, - "learning_rate": 6.469066035724708e-05, - "loss": 0.007585109025239944, - "step": 6030 - }, - { - "epoch": 1.0289855072463767, - "grad_norm": 0.09368649870157242, - "learning_rate": 6.467300919635743e-05, - "loss": 0.007342393696308136, - "step": 6035 - }, - { - "epoch": 1.029838022165388, - "grad_norm": 0.07358572632074356, - "learning_rate": 6.465534535020317e-05, - "loss": 0.007179292291402817, - "step": 6040 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 0.0542459636926651, - "learning_rate": 6.46376688270304e-05, - "loss": 0.007178785651922226, - "step": 6045 - }, - { - "epoch": 1.0315430520034101, - "grad_norm": 0.04534808546304703, - "learning_rate": 6.461997963509109e-05, - "loss": 0.005939013883471489, - "step": 6050 - }, - { - "epoch": 1.0323955669224212, - "grad_norm": 0.04498334974050522, - "learning_rate": 6.460227778264314e-05, - "loss": 0.007932021468877792, - "step": 6055 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 0.09503943473100662, - "learning_rate": 6.458456327795038e-05, - "loss": 0.006005316227674484, - "step": 6060 - }, - { - "epoch": 1.0341005967604433, - "grad_norm": 0.06634567677974701, - "learning_rate": 6.456683612928252e-05, - "loss": 0.00472346730530262, - "step": 6065 - }, - { - "epoch": 1.0349531116794544, - "grad_norm": 0.06090138852596283, - "learning_rate": 6.454909634491518e-05, - "loss": 0.0071956045925617215, - "step": 6070 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 0.09833965450525284, - "learning_rate": 6.453134393312988e-05, - "loss": 0.00738539919257164, - "step": 6075 - }, - { - "epoch": 1.0366581415174765, - "grad_norm": 0.07924133539199829, - "learning_rate": 6.451357890221406e-05, - "loss": 0.008464773744344711, - "step": 6080 - }, - { - "epoch": 1.0375106564364875, - "grad_norm": 0.04132373258471489, - "learning_rate": 6.4495801260461e-05, - "loss": 0.005705388635396958, - "step": 6085 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 0.08653424680233002, - "learning_rate": 6.44780110161699e-05, - "loss": 0.00777137503027916, - "step": 6090 - }, - { - "epoch": 1.0392156862745099, - "grad_norm": 0.08147025108337402, - "learning_rate": 6.446020817764583e-05, - "loss": 0.005003783106803894, - "step": 6095 - }, - { - "epoch": 1.040068201193521, - "grad_norm": 0.07091398537158966, - "learning_rate": 6.444239275319977e-05, - "loss": 0.005957254022359848, - "step": 6100 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 0.06259306520223618, - "learning_rate": 6.442456475114855e-05, - "loss": 0.005096634104847908, - "step": 6105 - }, - { - "epoch": 1.041773231031543, - "grad_norm": 0.07044103741645813, - "learning_rate": 6.440672417981485e-05, - "loss": 0.00557241328060627, - "step": 6110 - }, - { - "epoch": 1.0426257459505541, - "grad_norm": 0.05029159039258957, - "learning_rate": 6.438887104752726e-05, - "loss": 0.0056043524295091626, - "step": 6115 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 0.04778699576854706, - "learning_rate": 6.437100536262022e-05, - "loss": 0.00855453684926033, - "step": 6120 - }, - { - "epoch": 1.0443307757885762, - "grad_norm": 0.07467184215784073, - "learning_rate": 6.435312713343401e-05, - "loss": 0.006690071523189544, - "step": 6125 - }, - { - "epoch": 1.0451832907075873, - "grad_norm": 0.07189153879880905, - "learning_rate": 6.433523636831481e-05, - "loss": 0.007009527087211609, - "step": 6130 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 0.08000020682811737, - "learning_rate": 6.431733307561459e-05, - "loss": 0.007411211729049683, - "step": 6135 - }, - { - "epoch": 1.0468883205456097, - "grad_norm": 0.06737730652093887, - "learning_rate": 6.429941726369124e-05, - "loss": 0.006843548268079758, - "step": 6140 - }, - { - "epoch": 1.0477408354646207, - "grad_norm": 0.09834714978933334, - "learning_rate": 6.428148894090841e-05, - "loss": 0.007167841494083405, - "step": 6145 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 0.06415695697069168, - "learning_rate": 6.426354811563567e-05, - "loss": 0.005131457373499871, - "step": 6150 - }, - { - "epoch": 1.0494458653026428, - "grad_norm": 0.07823871076107025, - "learning_rate": 6.424559479624839e-05, - "loss": 0.004797356575727463, - "step": 6155 - }, - { - "epoch": 1.050298380221654, - "grad_norm": 0.07165013998746872, - "learning_rate": 6.422762899112777e-05, - "loss": 0.006430945545434952, - "step": 6160 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 0.10924427956342697, - "learning_rate": 6.420965070866086e-05, - "loss": 0.008151047676801682, - "step": 6165 - }, - { - "epoch": 1.052003410059676, - "grad_norm": 0.10381831228733063, - "learning_rate": 6.41916599572405e-05, - "loss": 0.009056917577981948, - "step": 6170 - }, - { - "epoch": 1.052855924978687, - "grad_norm": 0.05251248553395271, - "learning_rate": 6.417365674526539e-05, - "loss": 0.004240944981575012, - "step": 6175 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 0.0812104344367981, - "learning_rate": 6.415564108114001e-05, - "loss": 0.008805926889181137, - "step": 6180 - }, - { - "epoch": 1.0545609548167092, - "grad_norm": 0.05640942230820656, - "learning_rate": 6.413761297327469e-05, - "loss": 0.005727213248610497, - "step": 6185 - }, - { - "epoch": 1.0554134697357205, - "grad_norm": 0.10114334523677826, - "learning_rate": 6.411957243008552e-05, - "loss": 0.008660107105970382, - "step": 6190 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 0.06809760630130768, - "learning_rate": 6.410151945999447e-05, - "loss": 0.006786180287599563, - "step": 6195 - }, - { - "epoch": 1.0571184995737426, - "grad_norm": 0.08121974021196365, - "learning_rate": 6.408345407142924e-05, - "loss": 0.004730105027556419, - "step": 6200 - }, - { - "epoch": 1.0579710144927537, - "grad_norm": 0.0630379393696785, - "learning_rate": 6.406537627282336e-05, - "loss": 0.006532897800207138, - "step": 6205 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 0.09354323893785477, - "learning_rate": 6.404728607261612e-05, - "loss": 0.008165966719388962, - "step": 6210 - }, - { - "epoch": 1.0596760443307758, - "grad_norm": 0.0509798526763916, - "learning_rate": 6.402918347925267e-05, - "loss": 0.006781977415084839, - "step": 6215 - }, - { - "epoch": 1.0605285592497868, - "grad_norm": 0.09830603748559952, - "learning_rate": 6.401106850118389e-05, - "loss": 0.00675075501203537, - "step": 6220 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 0.08417326211929321, - "learning_rate": 6.399294114686645e-05, - "loss": 0.005759935826063156, - "step": 6225 - }, - { - "epoch": 1.062233589087809, - "grad_norm": 0.04999511316418648, - "learning_rate": 6.39748014247628e-05, - "loss": 0.0059943776577711105, - "step": 6230 - }, - { - "epoch": 1.06308610400682, - "grad_norm": 0.0355304591357708, - "learning_rate": 6.395664934334116e-05, - "loss": 0.003978967294096946, - "step": 6235 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 0.09096778929233551, - "learning_rate": 6.393848491107554e-05, - "loss": 0.006428928673267364, - "step": 6240 - }, - { - "epoch": 1.0647911338448424, - "grad_norm": 0.09047707170248032, - "learning_rate": 6.392030813644569e-05, - "loss": 0.005584535002708435, - "step": 6245 - }, - { - "epoch": 1.0656436487638534, - "grad_norm": 0.07133036106824875, - "learning_rate": 6.390211902793714e-05, - "loss": 0.00610351674258709, - "step": 6250 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 0.1025620549917221, - "learning_rate": 6.388391759404117e-05, - "loss": 0.006316560506820679, - "step": 6255 - }, - { - "epoch": 1.0673486786018755, - "grad_norm": 0.0922650694847107, - "learning_rate": 6.386570384325482e-05, - "loss": 0.008717238903045654, - "step": 6260 - }, - { - "epoch": 1.0682011935208866, - "grad_norm": 0.094338558614254, - "learning_rate": 6.384747778408085e-05, - "loss": 0.0067199327051639555, - "step": 6265 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 0.07260075211524963, - "learning_rate": 6.382923942502782e-05, - "loss": 0.007249505072832107, - "step": 6270 - }, - { - "epoch": 1.0699062233589087, - "grad_norm": 0.06572386622428894, - "learning_rate": 6.381098877460999e-05, - "loss": 0.007879015803337098, - "step": 6275 - }, - { - "epoch": 1.0707587382779198, - "grad_norm": 0.11646077036857605, - "learning_rate": 6.379272584134737e-05, - "loss": 0.006477512419223785, - "step": 6280 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 0.14154180884361267, - "learning_rate": 6.37744506337657e-05, - "loss": 0.0069471016526222226, - "step": 6285 - }, - { - "epoch": 1.0724637681159421, - "grad_norm": 0.113606296479702, - "learning_rate": 6.375616316039647e-05, - "loss": 0.010210946947336198, - "step": 6290 - }, - { - "epoch": 1.0733162830349532, - "grad_norm": 0.07193166017532349, - "learning_rate": 6.373786342977687e-05, - "loss": 0.00820360854268074, - "step": 6295 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 0.06180251017212868, - "learning_rate": 6.371955145044983e-05, - "loss": 0.006048502773046494, - "step": 6300 - }, - { - "epoch": 1.0750213128729753, - "grad_norm": 0.06956778466701508, - "learning_rate": 6.370122723096398e-05, - "loss": 0.005345676839351654, - "step": 6305 - }, - { - "epoch": 1.0758738277919864, - "grad_norm": 0.09170625358819962, - "learning_rate": 6.368289077987368e-05, - "loss": 0.0068355493247509004, - "step": 6310 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 0.07023731619119644, - "learning_rate": 6.366454210573901e-05, - "loss": 0.004600600153207779, - "step": 6315 - }, - { - "epoch": 1.0775788576300085, - "grad_norm": 0.07429320365190506, - "learning_rate": 6.36461812171257e-05, - "loss": 0.006272794306278228, - "step": 6320 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 0.11356805264949799, - "learning_rate": 6.362780812260528e-05, - "loss": 0.0048342026770114895, - "step": 6325 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 0.11231013387441635, - "learning_rate": 6.360942283075489e-05, - "loss": 0.00653451681137085, - "step": 6330 - }, - { - "epoch": 1.0801364023870417, - "grad_norm": 0.09655431658029556, - "learning_rate": 6.359102535015739e-05, - "loss": 0.008280844241380692, - "step": 6335 - }, - { - "epoch": 1.080988917306053, - "grad_norm": 0.10172779113054276, - "learning_rate": 6.357261568940135e-05, - "loss": 0.007757744938135147, - "step": 6340 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 0.06417235732078552, - "learning_rate": 6.3554193857081e-05, - "loss": 0.007309675216674805, - "step": 6345 - }, - { - "epoch": 1.082693947144075, - "grad_norm": 0.053178418427705765, - "learning_rate": 6.35357598617963e-05, - "loss": 0.007162143290042877, - "step": 6350 - }, - { - "epoch": 1.0835464620630861, - "grad_norm": 0.07408315688371658, - "learning_rate": 6.351731371215278e-05, - "loss": 0.008392173796892166, - "step": 6355 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 0.03643275052309036, - "learning_rate": 6.349885541676179e-05, - "loss": 0.00610513798892498, - "step": 6360 - }, - { - "epoch": 1.0852514919011083, - "grad_norm": 0.04701307415962219, - "learning_rate": 6.348038498424023e-05, - "loss": 0.00612705871462822, - "step": 6365 - }, - { - "epoch": 1.0861040068201193, - "grad_norm": 0.05050053820014, - "learning_rate": 6.346190242321075e-05, - "loss": 0.005640604719519615, - "step": 6370 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 0.05203640088438988, - "learning_rate": 6.344340774230159e-05, - "loss": 0.005340654775500298, - "step": 6375 - }, - { - "epoch": 1.0878090366581414, - "grad_norm": 0.07451866567134857, - "learning_rate": 6.342490095014669e-05, - "loss": 0.006459225714206695, - "step": 6380 - }, - { - "epoch": 1.0886615515771525, - "grad_norm": 0.09951499849557877, - "learning_rate": 6.340638205538566e-05, - "loss": 0.008529558777809143, - "step": 6385 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 0.06064416840672493, - "learning_rate": 6.33878510666637e-05, - "loss": 0.007885071635246276, - "step": 6390 - }, - { - "epoch": 1.0903665814151748, - "grad_norm": 0.09382321685552597, - "learning_rate": 6.33693079926317e-05, - "loss": 0.007992906123399734, - "step": 6395 - }, - { - "epoch": 1.091219096334186, - "grad_norm": 0.054066915065050125, - "learning_rate": 6.335075284194621e-05, - "loss": 0.007473263889551163, - "step": 6400 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 0.06763065606355667, - "learning_rate": 6.333218562326937e-05, - "loss": 0.006374929845333099, - "step": 6405 - }, - { - "epoch": 1.092924126172208, - "grad_norm": 0.0656818076968193, - "learning_rate": 6.331360634526899e-05, - "loss": 0.006085469573736191, - "step": 6410 - }, - { - "epoch": 1.093776641091219, - "grad_norm": 0.060463279485702515, - "learning_rate": 6.329501501661848e-05, - "loss": 0.005605050176382065, - "step": 6415 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 0.05734890326857567, - "learning_rate": 6.32764116459969e-05, - "loss": 0.00563613623380661, - "step": 6420 - }, - { - "epoch": 1.0954816709292412, - "grad_norm": 0.0502542182803154, - "learning_rate": 6.32577962420889e-05, - "loss": 0.004675766825675965, - "step": 6425 - }, - { - "epoch": 1.0963341858482523, - "grad_norm": 0.06550677120685577, - "learning_rate": 6.32391688135848e-05, - "loss": 0.006265480071306229, - "step": 6430 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 0.1298699826002121, - "learning_rate": 6.322052936918048e-05, - "loss": 0.008352620899677277, - "step": 6435 - }, - { - "epoch": 1.0980392156862746, - "grad_norm": 0.08422241359949112, - "learning_rate": 6.320187791757748e-05, - "loss": 0.005868597701191902, - "step": 6440 - }, - { - "epoch": 1.0988917306052857, - "grad_norm": 0.07807652652263641, - "learning_rate": 6.318321446748291e-05, - "loss": 0.010353461652994157, - "step": 6445 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 0.0663999617099762, - "learning_rate": 6.316453902760946e-05, - "loss": 0.00667201578617096, - "step": 6450 - }, - { - "epoch": 1.1005967604433078, - "grad_norm": 0.06362646073102951, - "learning_rate": 6.314585160667547e-05, - "loss": 0.006539353728294372, - "step": 6455 - }, - { - "epoch": 1.1014492753623188, - "grad_norm": 0.04160219058394432, - "learning_rate": 6.312715221340485e-05, - "loss": 0.004082740843296051, - "step": 6460 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 0.05449013039469719, - "learning_rate": 6.31084408565271e-05, - "loss": 0.007537595182657242, - "step": 6465 - }, - { - "epoch": 1.103154305200341, - "grad_norm": 0.06819169223308563, - "learning_rate": 6.308971754477729e-05, - "loss": 0.006866573542356491, - "step": 6470 - }, - { - "epoch": 1.104006820119352, - "grad_norm": 0.06622573733329773, - "learning_rate": 6.307098228689611e-05, - "loss": 0.009055091440677643, - "step": 6475 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 0.05732693895697594, - "learning_rate": 6.305223509162978e-05, - "loss": 0.006077280640602112, - "step": 6480 - }, - { - "epoch": 1.1057118499573741, - "grad_norm": 0.06630431115627289, - "learning_rate": 6.303347596773012e-05, - "loss": 0.0064442440867424015, - "step": 6485 - }, - { - "epoch": 1.1065643648763854, - "grad_norm": 0.06782745569944382, - "learning_rate": 6.301470492395451e-05, - "loss": 0.005072608217597007, - "step": 6490 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 0.05796601250767708, - "learning_rate": 6.299592196906591e-05, - "loss": 0.0074319586157798765, - "step": 6495 - }, - { - "epoch": 1.1082693947144076, - "grad_norm": 0.04627149552106857, - "learning_rate": 6.297712711183282e-05, - "loss": 0.00512685589492321, - "step": 6500 - }, - { - "epoch": 1.1091219096334186, - "grad_norm": 0.08210720866918564, - "learning_rate": 6.295832036102929e-05, - "loss": 0.006917678564786911, - "step": 6505 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 0.08367052674293518, - "learning_rate": 6.293950172543496e-05, - "loss": 0.0054212499409914015, - "step": 6510 - }, - { - "epoch": 1.1108269394714407, - "grad_norm": 0.08192700892686844, - "learning_rate": 6.292067121383499e-05, - "loss": 0.00791442021727562, - "step": 6515 - }, - { - "epoch": 1.1116794543904518, - "grad_norm": 0.05766206234693527, - "learning_rate": 6.290182883502008e-05, - "loss": 0.006540960818529129, - "step": 6520 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 0.07752664387226105, - "learning_rate": 6.28829745977865e-05, - "loss": 0.009196925908327103, - "step": 6525 - }, - { - "epoch": 1.113384484228474, - "grad_norm": 0.07419038563966751, - "learning_rate": 6.2864108510936e-05, - "loss": 0.007524489611387253, - "step": 6530 - }, - { - "epoch": 1.1142369991474852, - "grad_norm": 0.04851066321134567, - "learning_rate": 6.284523058327593e-05, - "loss": 0.004060015082359314, - "step": 6535 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 0.0764140635728836, - "learning_rate": 6.282634082361911e-05, - "loss": 0.006797254830598831, - "step": 6540 - }, - { - "epoch": 1.1159420289855073, - "grad_norm": 0.06873292475938797, - "learning_rate": 6.280743924078392e-05, - "loss": 0.007637844234704971, - "step": 6545 - }, - { - "epoch": 1.1167945439045184, - "grad_norm": 0.047832686454057693, - "learning_rate": 6.278852584359425e-05, - "loss": 0.00542646199464798, - "step": 6550 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 0.10061443597078323, - "learning_rate": 6.27696006408795e-05, - "loss": 0.008591605722904206, - "step": 6555 - }, - { - "epoch": 1.1184995737425405, - "grad_norm": 0.09729041159152985, - "learning_rate": 6.27506636414746e-05, - "loss": 0.0064585842192173, - "step": 6560 - }, - { - "epoch": 1.1193520886615516, - "grad_norm": 0.04427873343229294, - "learning_rate": 6.273171485421992e-05, - "loss": 0.004846593365073204, - "step": 6565 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 0.07813888788223267, - "learning_rate": 6.271275428796146e-05, - "loss": 0.007345958054065705, - "step": 6570 - }, - { - "epoch": 1.1210571184995737, - "grad_norm": 0.12500733137130737, - "learning_rate": 6.269378195155058e-05, - "loss": 0.006376177072525024, - "step": 6575 - }, - { - "epoch": 1.1219096334185847, - "grad_norm": 0.09636004269123077, - "learning_rate": 6.267479785384422e-05, - "loss": 0.0069282323122024534, - "step": 6580 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 0.06236017122864723, - "learning_rate": 6.265580200370478e-05, - "loss": 0.0050656192004680635, - "step": 6585 - }, - { - "epoch": 1.123614663256607, - "grad_norm": 0.0596463568508625, - "learning_rate": 6.263679441000019e-05, - "loss": 0.006198804825544357, - "step": 6590 - }, - { - "epoch": 1.1244671781756181, - "grad_norm": 0.0846855491399765, - "learning_rate": 6.261777508160378e-05, - "loss": 0.0073812372982501985, - "step": 6595 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 0.05228402093052864, - "learning_rate": 6.259874402739442e-05, - "loss": 0.006196213513612747, - "step": 6600 - }, - { - "epoch": 1.1261722080136403, - "grad_norm": 0.08395595103502274, - "learning_rate": 6.257970125625647e-05, - "loss": 0.0060448311269283295, - "step": 6605 - }, - { - "epoch": 1.1270247229326513, - "grad_norm": 0.055274344980716705, - "learning_rate": 6.25606467770797e-05, - "loss": 0.006578336656093598, - "step": 6610 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 0.054609380662441254, - "learning_rate": 6.254158059875936e-05, - "loss": 0.008093905448913575, - "step": 6615 - }, - { - "epoch": 1.1287297527706734, - "grad_norm": 0.05168715491890907, - "learning_rate": 6.25225027301962e-05, - "loss": 0.006086795404553413, - "step": 6620 - }, - { - "epoch": 1.1295822676896845, - "grad_norm": 0.06260590255260468, - "learning_rate": 6.250341318029641e-05, - "loss": 0.007288631051778793, - "step": 6625 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 0.06585957854986191, - "learning_rate": 6.24843119579716e-05, - "loss": 0.005779954791069031, - "step": 6630 - }, - { - "epoch": 1.1312872975277068, - "grad_norm": 0.05828391760587692, - "learning_rate": 6.246519907213888e-05, - "loss": 0.006320308148860932, - "step": 6635 - }, - { - "epoch": 1.132139812446718, - "grad_norm": 0.08400154113769531, - "learning_rate": 6.244607453172078e-05, - "loss": 0.00452205128967762, - "step": 6640 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 0.059920940548181534, - "learning_rate": 6.242693834564525e-05, - "loss": 0.00620727390050888, - "step": 6645 - }, - { - "epoch": 1.13384484228474, - "grad_norm": 0.1100456491112709, - "learning_rate": 6.240779052284571e-05, - "loss": 0.006768511235713958, - "step": 6650 - }, - { - "epoch": 1.134697357203751, - "grad_norm": 0.07722730189561844, - "learning_rate": 6.238863107226102e-05, - "loss": 0.008207496255636215, - "step": 6655 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 0.05468403548002243, - "learning_rate": 6.236946000283542e-05, - "loss": 0.005272969231009483, - "step": 6660 - }, - { - "epoch": 1.1364023870417732, - "grad_norm": 0.0685155913233757, - "learning_rate": 6.235027732351863e-05, - "loss": 0.008074409514665603, - "step": 6665 - }, - { - "epoch": 1.1372549019607843, - "grad_norm": 0.07667854428291321, - "learning_rate": 6.233108304326571e-05, - "loss": 0.00863628089427948, - "step": 6670 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 0.0727204978466034, - "learning_rate": 6.231187717103727e-05, - "loss": 0.004446333646774292, - "step": 6675 - }, - { - "epoch": 1.1389599317988064, - "grad_norm": 0.06465403735637665, - "learning_rate": 6.229265971579918e-05, - "loss": 0.007380707561969757, - "step": 6680 - }, - { - "epoch": 1.1398124467178175, - "grad_norm": 0.04102586954832077, - "learning_rate": 6.227343068652281e-05, - "loss": 0.006103607639670372, - "step": 6685 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 0.06988929212093353, - "learning_rate": 6.225419009218493e-05, - "loss": 0.007358456403017044, - "step": 6690 - }, - { - "epoch": 1.1415174765558398, - "grad_norm": 0.07802724838256836, - "learning_rate": 6.223493794176767e-05, - "loss": 0.007887010276317597, - "step": 6695 - }, - { - "epoch": 1.1423699914748509, - "grad_norm": 0.10777991265058517, - "learning_rate": 6.221567424425857e-05, - "loss": 0.007653985172510147, - "step": 6700 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 0.0922352522611618, - "learning_rate": 6.219639900865058e-05, - "loss": 0.007459624856710434, - "step": 6705 - }, - { - "epoch": 1.144075021312873, - "grad_norm": 0.06321967393159866, - "learning_rate": 6.217711224394202e-05, - "loss": 0.00540911853313446, - "step": 6710 - }, - { - "epoch": 1.144927536231884, - "grad_norm": 0.09344825148582458, - "learning_rate": 6.215781395913656e-05, - "loss": 0.0053936421871185304, - "step": 6715 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 0.03457584232091904, - "learning_rate": 6.213850416324333e-05, - "loss": 0.006388355046510696, - "step": 6720 - }, - { - "epoch": 1.1466325660699062, - "grad_norm": 0.06931985914707184, - "learning_rate": 6.211918286527676e-05, - "loss": 0.005831810832023621, - "step": 6725 - }, - { - "epoch": 1.1474850809889172, - "grad_norm": 0.05588890612125397, - "learning_rate": 6.209985007425668e-05, - "loss": 0.0041655078530311584, - "step": 6730 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 0.07582694292068481, - "learning_rate": 6.208050579920826e-05, - "loss": 0.006521198153495789, - "step": 6735 - }, - { - "epoch": 1.1491901108269396, - "grad_norm": 0.07055433094501495, - "learning_rate": 6.206115004916209e-05, - "loss": 0.0066129244863986966, - "step": 6740 - }, - { - "epoch": 1.1500426257459506, - "grad_norm": 0.07039172202348709, - "learning_rate": 6.204178283315405e-05, - "loss": 0.005633739382028579, - "step": 6745 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 0.07833350449800491, - "learning_rate": 6.202240416022541e-05, - "loss": 0.006761975586414337, - "step": 6750 - }, - { - "epoch": 1.1517476555839727, - "grad_norm": 0.05553733557462692, - "learning_rate": 6.200301403942278e-05, - "loss": 0.006545065343379975, - "step": 6755 - }, - { - "epoch": 1.1526001705029838, - "grad_norm": 0.07306832075119019, - "learning_rate": 6.198361247979809e-05, - "loss": 0.008323725312948227, - "step": 6760 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 0.04787914454936981, - "learning_rate": 6.196419949040867e-05, - "loss": 0.004425797611474991, - "step": 6765 - }, - { - "epoch": 1.154305200341006, - "grad_norm": 0.08021930605173111, - "learning_rate": 6.194477508031712e-05, - "loss": 0.005103312805294991, - "step": 6770 - }, - { - "epoch": 1.155157715260017, - "grad_norm": 0.0821428894996643, - "learning_rate": 6.192533925859144e-05, - "loss": 0.009274877607822418, - "step": 6775 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 0.09880609810352325, - "learning_rate": 6.190589203430486e-05, - "loss": 0.007863005250692367, - "step": 6780 - }, - { - "epoch": 1.156862745098039, - "grad_norm": 0.08075276762247086, - "learning_rate": 6.188643341653604e-05, - "loss": 0.004675677418708802, - "step": 6785 - }, - { - "epoch": 1.1577152600170504, - "grad_norm": 0.0633573830127716, - "learning_rate": 6.186696341436889e-05, - "loss": 0.007359890639781952, - "step": 6790 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 0.03828895092010498, - "learning_rate": 6.184748203689265e-05, - "loss": 0.004494070634245872, - "step": 6795 - }, - { - "epoch": 1.1594202898550725, - "grad_norm": 0.07907325774431229, - "learning_rate": 6.18279892932019e-05, - "loss": 0.006256269663572312, - "step": 6800 - }, - { - "epoch": 1.1602728047740836, - "grad_norm": 0.055188342928886414, - "learning_rate": 6.180848519239647e-05, - "loss": 0.009548474848270417, - "step": 6805 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 0.05850991606712341, - "learning_rate": 6.178896974358154e-05, - "loss": 0.0056076571345329285, - "step": 6810 - }, - { - "epoch": 1.1619778346121057, - "grad_norm": 0.0626932755112648, - "learning_rate": 6.176944295586757e-05, - "loss": 0.005667714029550552, - "step": 6815 - }, - { - "epoch": 1.1628303495311167, - "grad_norm": 0.06506946682929993, - "learning_rate": 6.174990483837031e-05, - "loss": 0.006154880672693252, - "step": 6820 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 0.0535859651863575, - "learning_rate": 6.17303554002108e-05, - "loss": 0.0042555928230285645, - "step": 6825 - }, - { - "epoch": 1.1645353793691389, - "grad_norm": 0.05577898398041725, - "learning_rate": 6.171079465051538e-05, - "loss": 0.006060104072093964, - "step": 6830 - }, - { - "epoch": 1.1653878942881502, - "grad_norm": 0.05574663355946541, - "learning_rate": 6.169122259841566e-05, - "loss": 0.008667515218257904, - "step": 6835 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 0.09157130867242813, - "learning_rate": 6.16716392530485e-05, - "loss": 0.007259176671504974, - "step": 6840 - }, - { - "epoch": 1.1670929241261723, - "grad_norm": 0.06404415518045425, - "learning_rate": 6.165204462355608e-05, - "loss": 0.007140593230724334, - "step": 6845 - }, - { - "epoch": 1.1679454390451833, - "grad_norm": 0.0713329091668129, - "learning_rate": 6.163243871908581e-05, - "loss": 0.006118748337030411, - "step": 6850 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 0.04038231074810028, - "learning_rate": 6.16128215487904e-05, - "loss": 0.006028918176889419, - "step": 6855 - }, - { - "epoch": 1.1696504688832055, - "grad_norm": 0.07793593406677246, - "learning_rate": 6.159319312182777e-05, - "loss": 0.006851163506507873, - "step": 6860 - }, - { - "epoch": 1.1705029838022165, - "grad_norm": 0.07543511688709259, - "learning_rate": 6.157355344736114e-05, - "loss": 0.006878272444009781, - "step": 6865 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 0.06332696974277496, - "learning_rate": 6.155390253455897e-05, - "loss": 0.006324999034404755, - "step": 6870 - }, - { - "epoch": 1.1722080136402386, - "grad_norm": 0.06920734792947769, - "learning_rate": 6.153424039259495e-05, - "loss": 0.00536310225725174, - "step": 6875 - }, - { - "epoch": 1.1730605285592497, - "grad_norm": 0.09313163161277771, - "learning_rate": 6.151456703064802e-05, - "loss": 0.00795424059033394, - "step": 6880 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 0.08588451147079468, - "learning_rate": 6.149488245790234e-05, - "loss": 0.006889034807682037, - "step": 6885 - }, - { - "epoch": 1.174765558397272, - "grad_norm": 0.09814538061618805, - "learning_rate": 6.147518668354737e-05, - "loss": 0.007332245260477066, - "step": 6890 - }, - { - "epoch": 1.175618073316283, - "grad_norm": 0.05141104385256767, - "learning_rate": 6.145547971677772e-05, - "loss": 0.00333656407892704, - "step": 6895 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 0.05575519800186157, - "learning_rate": 6.143576156679327e-05, - "loss": 0.005542768910527229, - "step": 6900 - }, - { - "epoch": 1.1773231031543052, - "grad_norm": 0.04917008429765701, - "learning_rate": 6.14160322427991e-05, - "loss": 0.007007633149623871, - "step": 6905 - }, - { - "epoch": 1.1781756180733163, - "grad_norm": 0.06385336071252823, - "learning_rate": 6.139629175400552e-05, - "loss": 0.007495941221714019, - "step": 6910 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 0.08664151281118393, - "learning_rate": 6.137654010962805e-05, - "loss": 0.0075534448027610775, - "step": 6915 - }, - { - "epoch": 1.1798806479113384, - "grad_norm": 0.08881189674139023, - "learning_rate": 6.13567773188874e-05, - "loss": 0.0059935558587312695, - "step": 6920 - }, - { - "epoch": 1.1807331628303495, - "grad_norm": 0.07297934591770172, - "learning_rate": 6.133700339100952e-05, - "loss": 0.006142234057188034, - "step": 6925 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 0.053351663053035736, - "learning_rate": 6.131721833522552e-05, - "loss": 0.006038139387965202, - "step": 6930 - }, - { - "epoch": 1.1824381926683718, - "grad_norm": 0.12923622131347656, - "learning_rate": 6.129742216077172e-05, - "loss": 0.008645947277545928, - "step": 6935 - }, - { - "epoch": 1.1832907075873829, - "grad_norm": 0.095455601811409, - "learning_rate": 6.127761487688964e-05, - "loss": 0.004740688577294349, - "step": 6940 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 0.11498606204986572, - "learning_rate": 6.125779649282599e-05, - "loss": 0.00805070549249649, - "step": 6945 - }, - { - "epoch": 1.184995737425405, - "grad_norm": 0.07489286363124847, - "learning_rate": 6.123796701783264e-05, - "loss": 0.0060746859759092334, - "step": 6950 - }, - { - "epoch": 1.185848252344416, - "grad_norm": 0.07027438282966614, - "learning_rate": 6.121812646116663e-05, - "loss": 0.006816025823354721, - "step": 6955 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 0.08542973548173904, - "learning_rate": 6.119827483209024e-05, - "loss": 0.005315831303596497, - "step": 6960 - }, - { - "epoch": 1.1875532821824382, - "grad_norm": 0.08409032970666885, - "learning_rate": 6.117841213987082e-05, - "loss": 0.0061601437628269196, - "step": 6965 - }, - { - "epoch": 1.1884057971014492, - "grad_norm": 0.10387974232435226, - "learning_rate": 6.115853839378096e-05, - "loss": 0.0071022816002368925, - "step": 6970 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 0.056441329419612885, - "learning_rate": 6.113865360309838e-05, - "loss": 0.004539218544960022, - "step": 6975 - }, - { - "epoch": 1.1901108269394713, - "grad_norm": 0.10132234543561935, - "learning_rate": 6.111875777710598e-05, - "loss": 0.0060778014361858364, - "step": 6980 - }, - { - "epoch": 1.1909633418584824, - "grad_norm": 0.07129226624965668, - "learning_rate": 6.109885092509177e-05, - "loss": 0.007794113457202911, - "step": 6985 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 0.09267892688512802, - "learning_rate": 6.107893305634893e-05, - "loss": 0.006867295503616333, - "step": 6990 - }, - { - "epoch": 1.1926683716965047, - "grad_norm": 0.0739888921380043, - "learning_rate": 6.10590041801758e-05, - "loss": 0.006263263523578644, - "step": 6995 - }, - { - "epoch": 1.1935208866155158, - "grad_norm": 0.10201311856508255, - "learning_rate": 6.103906430587583e-05, - "loss": 0.006245525926351548, - "step": 7000 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 0.08561360090970993, - "learning_rate": 6.1019113442757636e-05, - "loss": 0.005739351361989975, - "step": 7005 - }, - { - "epoch": 1.195225916453538, - "grad_norm": 0.06410367786884308, - "learning_rate": 6.099915160013491e-05, - "loss": 0.00598936527967453, - "step": 7010 - }, - { - "epoch": 1.196078431372549, - "grad_norm": 0.11765716224908829, - "learning_rate": 6.0979178787326544e-05, - "loss": 0.010477253794670105, - "step": 7015 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 0.06659694015979767, - "learning_rate": 6.095919501365648e-05, - "loss": 0.0072447523474693295, - "step": 7020 - }, - { - "epoch": 1.197783461210571, - "grad_norm": 0.05651358142495155, - "learning_rate": 6.093920028845381e-05, - "loss": 0.004644358158111572, - "step": 7025 - }, - { - "epoch": 1.1986359761295824, - "grad_norm": 0.07198809087276459, - "learning_rate": 6.0919194621052764e-05, - "loss": 0.00517328754067421, - "step": 7030 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 0.06188420578837395, - "learning_rate": 6.0899178020792614e-05, - "loss": 0.005182894691824913, - "step": 7035 - }, - { - "epoch": 1.2003410059676045, - "grad_norm": 0.07731341570615768, - "learning_rate": 6.087915049701783e-05, - "loss": 0.006863371282815933, - "step": 7040 - }, - { - "epoch": 1.2011935208866156, - "grad_norm": 0.07693833857774734, - "learning_rate": 6.0859112059077866e-05, - "loss": 0.008634812384843826, - "step": 7045 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 0.08118413388729095, - "learning_rate": 6.083906271632736e-05, - "loss": 0.008003174513578414, - "step": 7050 - }, - { - "epoch": 1.2028985507246377, - "grad_norm": 0.07794218510389328, - "learning_rate": 6.0819002478126016e-05, - "loss": 0.005899757146835327, - "step": 7055 - }, - { - "epoch": 1.2037510656436488, - "grad_norm": 0.08107218146324158, - "learning_rate": 6.079893135383861e-05, - "loss": 0.007581057399511338, - "step": 7060 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 0.06910198926925659, - "learning_rate": 6.077884935283502e-05, - "loss": 0.00794234573841095, - "step": 7065 - }, - { - "epoch": 1.2054560954816709, - "grad_norm": 0.08426421135663986, - "learning_rate": 6.0758756484490186e-05, - "loss": 0.0057635679841041565, - "step": 7070 - }, - { - "epoch": 1.206308610400682, - "grad_norm": 0.08670961856842041, - "learning_rate": 6.073865275818413e-05, - "loss": 0.006369538605213165, - "step": 7075 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 0.04245399683713913, - "learning_rate": 6.071853818330193e-05, - "loss": 0.0067548036575317385, - "step": 7080 - }, - { - "epoch": 1.208013640238704, - "grad_norm": 0.10600235313177109, - "learning_rate": 6.069841276923376e-05, - "loss": 0.006923867762088776, - "step": 7085 - }, - { - "epoch": 1.2088661551577153, - "grad_norm": 0.07093790173530579, - "learning_rate": 6.0678276525374814e-05, - "loss": 0.005083417519927025, - "step": 7090 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 0.04997126758098602, - "learning_rate": 6.065812946112537e-05, - "loss": 0.006207586824893951, - "step": 7095 - }, - { - "epoch": 1.2105711849957375, - "grad_norm": 0.04425262287259102, - "learning_rate": 6.063797158589074e-05, - "loss": 0.0046977601945400235, - "step": 7100 - }, - { - "epoch": 1.2114236999147485, - "grad_norm": 0.07300136238336563, - "learning_rate": 6.0617802909081295e-05, - "loss": 0.005589437484741211, - "step": 7105 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 0.0878889262676239, - "learning_rate": 6.0597623440112445e-05, - "loss": 0.005844222381711006, - "step": 7110 - }, - { - "epoch": 1.2131287297527706, - "grad_norm": 0.09079992771148682, - "learning_rate": 6.0577433188404635e-05, - "loss": 0.007329034805297852, - "step": 7115 - }, - { - "epoch": 1.2139812446717817, - "grad_norm": 0.07165387272834778, - "learning_rate": 6.055723216338336e-05, - "loss": 0.006642927974462509, - "step": 7120 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 0.07113955169916153, - "learning_rate": 6.053702037447911e-05, - "loss": 0.006992670893669129, - "step": 7125 - }, - { - "epoch": 1.215686274509804, - "grad_norm": 0.08737215399742126, - "learning_rate": 6.0516797831127425e-05, - "loss": 0.006702055037021637, - "step": 7130 - }, - { - "epoch": 1.216538789428815, - "grad_norm": 0.07276564091444016, - "learning_rate": 6.049656454276887e-05, - "loss": 0.005692619457840919, - "step": 7135 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 0.09203831106424332, - "learning_rate": 6.0476320518849006e-05, - "loss": 0.006464710086584091, - "step": 7140 - }, - { - "epoch": 1.2182438192668372, - "grad_norm": 0.07749375700950623, - "learning_rate": 6.045606576881843e-05, - "loss": 0.008598372340202332, - "step": 7145 - }, - { - "epoch": 1.2190963341858483, - "grad_norm": 0.04338040575385094, - "learning_rate": 6.043580030213272e-05, - "loss": 0.006892016530036927, - "step": 7150 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 0.06691034138202667, - "learning_rate": 6.0415524128252474e-05, - "loss": 0.007622670382261276, - "step": 7155 - }, - { - "epoch": 1.2208013640238704, - "grad_norm": 0.07532396167516708, - "learning_rate": 6.039523725664329e-05, - "loss": 0.00698106437921524, - "step": 7160 - }, - { - "epoch": 1.2216538789428815, - "grad_norm": 0.0408058688044548, - "learning_rate": 6.037493969677575e-05, - "loss": 0.005919446796178817, - "step": 7165 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 0.07306578010320663, - "learning_rate": 6.0354631458125425e-05, - "loss": 0.008953345566987991, - "step": 7170 - }, - { - "epoch": 1.2233589087809036, - "grad_norm": 0.08269601315259933, - "learning_rate": 6.033431255017289e-05, - "loss": 0.007224951684474945, - "step": 7175 - }, - { - "epoch": 1.2242114236999146, - "grad_norm": 0.045140884816646576, - "learning_rate": 6.0313982982403676e-05, - "loss": 0.006175454705953598, - "step": 7180 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 0.0721440315246582, - "learning_rate": 6.0293642764308304e-05, - "loss": 0.007267911732196808, - "step": 7185 - }, - { - "epoch": 1.225916453537937, - "grad_norm": 0.081813283264637, - "learning_rate": 6.027329190538227e-05, - "loss": 0.006872846186161042, - "step": 7190 - }, - { - "epoch": 1.226768968456948, - "grad_norm": 0.05662613734602928, - "learning_rate": 6.025293041512602e-05, - "loss": 0.004837202653288841, - "step": 7195 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 0.10023492574691772, - "learning_rate": 6.023255830304498e-05, - "loss": 0.0060194481164217, - "step": 7200 - }, - { - "epoch": 1.2284739982949702, - "grad_norm": 0.06398235261440277, - "learning_rate": 6.021217557864954e-05, - "loss": 0.007653398066759109, - "step": 7205 - }, - { - "epoch": 1.2293265132139812, - "grad_norm": 0.09494475275278091, - "learning_rate": 6.019178225145503e-05, - "loss": 0.007547302544116974, - "step": 7210 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 0.05356467142701149, - "learning_rate": 6.017137833098171e-05, - "loss": 0.007133310288190841, - "step": 7215 - }, - { - "epoch": 1.2310315430520034, - "grad_norm": 0.09225092083215714, - "learning_rate": 6.0150963826754836e-05, - "loss": 0.006320309638977051, - "step": 7220 - }, - { - "epoch": 1.2318840579710144, - "grad_norm": 0.07072161883115768, - "learning_rate": 6.013053874830458e-05, - "loss": 0.007313568145036697, - "step": 7225 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 0.07206818461418152, - "learning_rate": 6.0110103105166026e-05, - "loss": 0.0054031949490308765, - "step": 7230 - }, - { - "epoch": 1.2335890878090368, - "grad_norm": 0.08611681312322617, - "learning_rate": 6.008965690687922e-05, - "loss": 0.00670153945684433, - "step": 7235 - }, - { - "epoch": 1.2344416027280478, - "grad_norm": 0.07864221930503845, - "learning_rate": 6.0069200162989154e-05, - "loss": 0.0069690033793449405, - "step": 7240 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 0.06847227364778519, - "learning_rate": 6.0048732883045665e-05, - "loss": 0.006755173206329346, - "step": 7245 - }, - { - "epoch": 1.23614663256607, - "grad_norm": 0.06264699995517731, - "learning_rate": 6.0028255076603606e-05, - "loss": 0.00519348569214344, - "step": 7250 - }, - { - "epoch": 1.236999147485081, - "grad_norm": 0.04114431515336037, - "learning_rate": 6.0007766753222665e-05, - "loss": 0.006071234866976738, - "step": 7255 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 0.03660140559077263, - "learning_rate": 5.998726792246751e-05, - "loss": 0.007517064362764359, - "step": 7260 - }, - { - "epoch": 1.2387041773231031, - "grad_norm": 0.10343052446842194, - "learning_rate": 5.9966758593907636e-05, - "loss": 0.0070131182670593265, - "step": 7265 - }, - { - "epoch": 1.2395566922421142, - "grad_norm": 0.08169959485530853, - "learning_rate": 5.994623877711751e-05, - "loss": 0.006279528886079788, - "step": 7270 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 0.06312677264213562, - "learning_rate": 5.992570848167645e-05, - "loss": 0.0041657909750938416, - "step": 7275 - }, - { - "epoch": 1.2412617220801363, - "grad_norm": 0.08725713193416595, - "learning_rate": 5.990516771716869e-05, - "loss": 0.007908149063587189, - "step": 7280 - }, - { - "epoch": 1.2421142369991476, - "grad_norm": 0.05857875198125839, - "learning_rate": 5.988461649318333e-05, - "loss": 0.005137740075588227, - "step": 7285 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 0.04836168512701988, - "learning_rate": 5.986405481931438e-05, - "loss": 0.005033157765865326, - "step": 7290 - }, - { - "epoch": 1.2438192668371697, - "grad_norm": 0.09514568001031876, - "learning_rate": 5.98434827051607e-05, - "loss": 0.007460397481918335, - "step": 7295 - }, - { - "epoch": 1.2446717817561808, - "grad_norm": 0.049415748566389084, - "learning_rate": 5.982290016032604e-05, - "loss": 0.0049881644546985624, - "step": 7300 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 0.1069302037358284, - "learning_rate": 5.980230719441903e-05, - "loss": 0.005356961116194725, - "step": 7305 - }, - { - "epoch": 1.2463768115942029, - "grad_norm": 0.1031380444765091, - "learning_rate": 5.9781703817053136e-05, - "loss": 0.0067513369023799895, - "step": 7310 - }, - { - "epoch": 1.247229326513214, - "grad_norm": 0.05909884348511696, - "learning_rate": 5.976109003784671e-05, - "loss": 0.005357486009597778, - "step": 7315 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 0.09306607395410538, - "learning_rate": 5.974046586642295e-05, - "loss": 0.005747456848621368, - "step": 7320 - }, - { - "epoch": 1.248934356351236, - "grad_norm": 0.0688752606511116, - "learning_rate": 5.971983131240988e-05, - "loss": 0.0073902375996112825, - "step": 7325 - }, - { - "epoch": 1.2497868712702473, - "grad_norm": 0.06592141836881638, - "learning_rate": 5.969918638544044e-05, - "loss": 0.008268805593252182, - "step": 7330 - }, - { - "epoch": 1.2497868712702473, - "eval_loss": 0.037298671901226044, - "eval_runtime": 3.6917, - "eval_samples_per_second": 68.261, - "eval_steps_per_second": 1.084, - "step": 7330 - }, - { - "eval_cer_subset": 0.01283241324278991, - "eval_cer_subset_edit_distance": 788, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 7330 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 0.06094380095601082, - "learning_rate": 5.9678531095152326e-05, - "loss": 0.005528298765420913, - "step": 7335 - }, - { - "epoch": 1.2514919011082695, - "grad_norm": 0.1417030543088913, - "learning_rate": 5.965786545118815e-05, - "loss": 0.00984017476439476, - "step": 7340 - }, - { - "epoch": 1.2523444160272805, - "grad_norm": 0.08209668844938278, - "learning_rate": 5.963718946319529e-05, - "loss": 0.007516486942768097, - "step": 7345 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 0.06825494766235352, - "learning_rate": 5.9616503140826006e-05, - "loss": 0.005924524366855621, - "step": 7350 - }, - { - "epoch": 1.2540494458653026, - "grad_norm": 0.11229037493467331, - "learning_rate": 5.959580649373736e-05, - "loss": 0.006495627760887146, - "step": 7355 - }, - { - "epoch": 1.2549019607843137, - "grad_norm": 0.13235078752040863, - "learning_rate": 5.957509953159123e-05, - "loss": 0.00942063182592392, - "step": 7360 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 0.04514055699110031, - "learning_rate": 5.955438226405432e-05, - "loss": 0.006601292639970779, - "step": 7365 - }, - { - "epoch": 1.2566069906223358, - "grad_norm": 0.08192043751478195, - "learning_rate": 5.9533654700798126e-05, - "loss": 0.007403627783060074, - "step": 7370 - }, - { - "epoch": 1.257459505541347, - "grad_norm": 0.07101254910230637, - "learning_rate": 5.951291685149898e-05, - "loss": 0.006301522254943848, - "step": 7375 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 0.05598035827279091, - "learning_rate": 5.949216872583799e-05, - "loss": 0.006812388449907303, - "step": 7380 - }, - { - "epoch": 1.259164535379369, - "grad_norm": 0.06444506347179413, - "learning_rate": 5.9471410333501085e-05, - "loss": 0.005891536176204681, - "step": 7385 - }, - { - "epoch": 1.2600170502983803, - "grad_norm": 0.04921717569231987, - "learning_rate": 5.945064168417895e-05, - "loss": 0.004649973660707474, - "step": 7390 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 0.09095602482557297, - "learning_rate": 5.94298627875671e-05, - "loss": 0.007515725493431091, - "step": 7395 - }, - { - "epoch": 1.2617220801364024, - "grad_norm": 0.09932803362607956, - "learning_rate": 5.9409073653365816e-05, - "loss": 0.006223166733980179, - "step": 7400 - }, - { - "epoch": 1.2625745950554135, - "grad_norm": 0.08616010844707489, - "learning_rate": 5.938827429128014e-05, - "loss": 0.006999516487121582, - "step": 7405 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 0.11979297548532486, - "learning_rate": 5.936746471101993e-05, - "loss": 0.00812242105603218, - "step": 7410 - }, - { - "epoch": 1.2642796248934356, - "grad_norm": 0.12872007489204407, - "learning_rate": 5.934664492229976e-05, - "loss": 0.006246988475322723, - "step": 7415 - }, - { - "epoch": 1.2651321398124467, - "grad_norm": 0.0831044539809227, - "learning_rate": 5.932581493483903e-05, - "loss": 0.00590248554944992, - "step": 7420 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 0.09913221001625061, - "learning_rate": 5.9304974758361857e-05, - "loss": 0.007224322855472564, - "step": 7425 - }, - { - "epoch": 1.266837169650469, - "grad_norm": 0.08654595911502838, - "learning_rate": 5.928412440259713e-05, - "loss": 0.007056090980768204, - "step": 7430 - }, - { - "epoch": 1.26768968456948, - "grad_norm": 0.07882801443338394, - "learning_rate": 5.926326387727849e-05, - "loss": 0.00572751946747303, - "step": 7435 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 0.12886428833007812, - "learning_rate": 5.924239319214432e-05, - "loss": 0.0106881283223629, - "step": 7440 - }, - { - "epoch": 1.2693947144075022, - "grad_norm": 0.05597686767578125, - "learning_rate": 5.922151235693775e-05, - "loss": 0.005041084438562393, - "step": 7445 - }, - { - "epoch": 1.2702472293265132, - "grad_norm": 0.10719682276248932, - "learning_rate": 5.920062138140665e-05, - "loss": 0.007724158465862274, - "step": 7450 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 0.045485325157642365, - "learning_rate": 5.917972027530363e-05, - "loss": 0.003246675431728363, - "step": 7455 - }, - { - "epoch": 1.2719522591645354, - "grad_norm": 0.09602563083171844, - "learning_rate": 5.9158809048386017e-05, - "loss": 0.006592199206352234, - "step": 7460 - }, - { - "epoch": 1.2728047740835464, - "grad_norm": 0.0555407889187336, - "learning_rate": 5.913788771041586e-05, - "loss": 0.00537751168012619, - "step": 7465 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 0.15820109844207764, - "learning_rate": 5.911695627115994e-05, - "loss": 0.005968114733695984, - "step": 7470 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 0.05781199410557747, - "learning_rate": 5.9096014740389754e-05, - "loss": 0.00887204110622406, - "step": 7475 - }, - { - "epoch": 1.2753623188405796, - "grad_norm": 0.07927337288856506, - "learning_rate": 5.90750631278815e-05, - "loss": 0.006439142674207687, - "step": 7480 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 0.03843824937939644, - "learning_rate": 5.905410144341609e-05, - "loss": 0.007792883366346359, - "step": 7485 - }, - { - "epoch": 1.277067348678602, - "grad_norm": 0.0692640095949173, - "learning_rate": 5.903312969677914e-05, - "loss": 0.006274447590112686, - "step": 7490 - }, - { - "epoch": 1.277919863597613, - "grad_norm": 0.07501527667045593, - "learning_rate": 5.901214789776094e-05, - "loss": 0.007496471703052521, - "step": 7495 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 0.10271260142326355, - "learning_rate": 5.8991156056156514e-05, - "loss": 0.008766942471265794, - "step": 7500 - }, - { - "epoch": 1.2796248934356351, - "grad_norm": 0.03995242714881897, - "learning_rate": 5.897015418176555e-05, - "loss": 0.0055749226361513134, - "step": 7505 - }, - { - "epoch": 1.2804774083546462, - "grad_norm": 0.09215585142374039, - "learning_rate": 5.8949142284392406e-05, - "loss": 0.005763960257172585, - "step": 7510 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 0.07763402909040451, - "learning_rate": 5.892812037384615e-05, - "loss": 0.006439389288425445, - "step": 7515 - }, - { - "epoch": 1.2821824381926683, - "grad_norm": 0.04945438355207443, - "learning_rate": 5.890708845994049e-05, - "loss": 0.006960665434598922, - "step": 7520 - }, - { - "epoch": 1.2830349531116796, - "grad_norm": 0.05348283797502518, - "learning_rate": 5.888604655249384e-05, - "loss": 0.0061422914266586305, - "step": 7525 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 0.10389877110719681, - "learning_rate": 5.886499466132926e-05, - "loss": 0.009247081726789475, - "step": 7530 - }, - { - "epoch": 1.2847399829497017, - "grad_norm": 0.07753872126340866, - "learning_rate": 5.884393279627448e-05, - "loss": 0.004902977123856544, - "step": 7535 - }, - { - "epoch": 1.2855924978687128, - "grad_norm": 0.10553103685379028, - "learning_rate": 5.8822860967161856e-05, - "loss": 0.004547145590186119, - "step": 7540 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 0.08235067129135132, - "learning_rate": 5.880177918382844e-05, - "loss": 0.005282455682754516, - "step": 7545 - }, - { - "epoch": 1.287297527706735, - "grad_norm": 0.08135014772415161, - "learning_rate": 5.878068745611591e-05, - "loss": 0.006127358600497246, - "step": 7550 - }, - { - "epoch": 1.288150042625746, - "grad_norm": 0.04027952626347542, - "learning_rate": 5.875958579387056e-05, - "loss": 0.008251778036355972, - "step": 7555 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 0.1060953438282013, - "learning_rate": 5.8738474206943385e-05, - "loss": 0.008290941268205643, - "step": 7560 - }, - { - "epoch": 1.289855072463768, - "grad_norm": 0.06716421991586685, - "learning_rate": 5.871735270518995e-05, - "loss": 0.004932524263858795, - "step": 7565 - }, - { - "epoch": 1.2907075873827791, - "grad_norm": 0.07644582539796829, - "learning_rate": 5.869622129847048e-05, - "loss": 0.006172410026192665, - "step": 7570 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 0.06018557399511337, - "learning_rate": 5.867507999664983e-05, - "loss": 0.005532362312078476, - "step": 7575 - }, - { - "epoch": 1.2924126172208013, - "grad_norm": 0.06454342603683472, - "learning_rate": 5.865392880959745e-05, - "loss": 0.005053167790174484, - "step": 7580 - }, - { - "epoch": 1.2932651321398123, - "grad_norm": 0.07618142664432526, - "learning_rate": 5.863276774718742e-05, - "loss": 0.005658206716179848, - "step": 7585 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 0.05649973824620247, - "learning_rate": 5.8611596819298434e-05, - "loss": 0.007477214187383651, - "step": 7590 - }, - { - "epoch": 1.2949701619778347, - "grad_norm": 0.09222351759672165, - "learning_rate": 5.859041603581377e-05, - "loss": 0.006974493712186813, - "step": 7595 - }, - { - "epoch": 1.2958226768968457, - "grad_norm": 0.07462326437234879, - "learning_rate": 5.856922540662134e-05, - "loss": 0.008175718039274216, - "step": 7600 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 0.10593193024396896, - "learning_rate": 5.854802494161364e-05, - "loss": 0.006635700166225433, - "step": 7605 - }, - { - "epoch": 1.2975277067348678, - "grad_norm": 0.08673358708620071, - "learning_rate": 5.8526814650687724e-05, - "loss": 0.007347754389047623, - "step": 7610 - }, - { - "epoch": 1.298380221653879, - "grad_norm": 0.10450063645839691, - "learning_rate": 5.850559454374528e-05, - "loss": 0.008085139095783234, - "step": 7615 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 0.04219435900449753, - "learning_rate": 5.848436463069257e-05, - "loss": 0.006296204030513763, - "step": 7620 - }, - { - "epoch": 1.3000852514919012, - "grad_norm": 0.08187524974346161, - "learning_rate": 5.84631249214404e-05, - "loss": 0.007680010050535202, - "step": 7625 - }, - { - "epoch": 1.3009377664109123, - "grad_norm": 0.21044164896011353, - "learning_rate": 5.844187542590418e-05, - "loss": 0.008709554374217988, - "step": 7630 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 0.09822215139865875, - "learning_rate": 5.842061615400389e-05, - "loss": 0.007412384450435639, - "step": 7635 - }, - { - "epoch": 1.3026427962489344, - "grad_norm": 0.05957398563623428, - "learning_rate": 5.8399347115664053e-05, - "loss": 0.0062717020511627196, - "step": 7640 - }, - { - "epoch": 1.3034953111679455, - "grad_norm": 0.07013436406850815, - "learning_rate": 5.837806832081378e-05, - "loss": 0.005471421033143997, - "step": 7645 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 0.09616916626691818, - "learning_rate": 5.835677977938671e-05, - "loss": 0.008985907584428788, - "step": 7650 - }, - { - "epoch": 1.3052003410059676, - "grad_norm": 0.07946161180734634, - "learning_rate": 5.833548150132105e-05, - "loss": 0.00563003197312355, - "step": 7655 - }, - { - "epoch": 1.3060528559249787, - "grad_norm": 0.0630686804652214, - "learning_rate": 5.831417349655953e-05, - "loss": 0.007591667026281357, - "step": 7660 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 0.08530164510011673, - "learning_rate": 5.829285577504944e-05, - "loss": 0.006751708686351776, - "step": 7665 - }, - { - "epoch": 1.3077578857630008, - "grad_norm": 0.045148320496082306, - "learning_rate": 5.8271528346742616e-05, - "loss": 0.0052963607013225555, - "step": 7670 - }, - { - "epoch": 1.3086104006820118, - "grad_norm": 0.07147885859012604, - "learning_rate": 5.82501912215954e-05, - "loss": 0.005282463133335113, - "step": 7675 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 0.0933302789926529, - "learning_rate": 5.8228844409568654e-05, - "loss": 0.0073209434747695925, - "step": 7680 - }, - { - "epoch": 1.310315430520034, - "grad_norm": 0.07449645549058914, - "learning_rate": 5.820748792062781e-05, - "loss": 0.007801111787557602, - "step": 7685 - }, - { - "epoch": 1.3111679454390452, - "grad_norm": 0.04569214582443237, - "learning_rate": 5.8186121764742774e-05, - "loss": 0.006659354269504547, - "step": 7690 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 0.07046396285295486, - "learning_rate": 5.8164745951887995e-05, - "loss": 0.006448440253734589, - "step": 7695 - }, - { - "epoch": 1.3128729752770674, - "grad_norm": 0.09704319387674332, - "learning_rate": 5.814336049204239e-05, - "loss": 0.008210816234350205, - "step": 7700 - }, - { - "epoch": 1.3137254901960784, - "grad_norm": 0.06477776169776917, - "learning_rate": 5.81219653951894e-05, - "loss": 0.005369330942630768, - "step": 7705 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 0.11657397449016571, - "learning_rate": 5.810056067131698e-05, - "loss": 0.010190412402153015, - "step": 7710 - }, - { - "epoch": 1.3154305200341005, - "grad_norm": 0.06578268110752106, - "learning_rate": 5.8079146330417575e-05, - "loss": 0.006289052963256836, - "step": 7715 - }, - { - "epoch": 1.3162830349531116, - "grad_norm": 0.06296945363283157, - "learning_rate": 5.80577223824881e-05, - "loss": 0.008120459318161011, - "step": 7720 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 0.08275634050369263, - "learning_rate": 5.803628883752996e-05, - "loss": 0.006926379352807999, - "step": 7725 - }, - { - "epoch": 1.317988064791134, - "grad_norm": 0.0693436712026596, - "learning_rate": 5.8014845705549086e-05, - "loss": 0.006521113961935043, - "step": 7730 - }, - { - "epoch": 1.318840579710145, - "grad_norm": 0.05845775827765465, - "learning_rate": 5.799339299655579e-05, - "loss": 0.00762510895729065, - "step": 7735 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 0.08802217245101929, - "learning_rate": 5.7971930720564947e-05, - "loss": 0.008071760833263397, - "step": 7740 - }, - { - "epoch": 1.3205456095481671, - "grad_norm": 0.08866037428379059, - "learning_rate": 5.795045888759585e-05, - "loss": 0.006111105903983116, - "step": 7745 - }, - { - "epoch": 1.3213981244671782, - "grad_norm": 0.0844360888004303, - "learning_rate": 5.792897750767225e-05, - "loss": 0.005196729302406311, - "step": 7750 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 0.06763950735330582, - "learning_rate": 5.79074865908224e-05, - "loss": 0.006462454050779343, - "step": 7755 - }, - { - "epoch": 1.3231031543052003, - "grad_norm": 0.06333937495946884, - "learning_rate": 5.7885986147078946e-05, - "loss": 0.0068017512559890745, - "step": 7760 - }, - { - "epoch": 1.3239556692242114, - "grad_norm": 0.05730217695236206, - "learning_rate": 5.786447618647904e-05, - "loss": 0.0065845087170600895, - "step": 7765 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 0.06838720291852951, - "learning_rate": 5.784295671906422e-05, - "loss": 0.0059626404196023945, - "step": 7770 - }, - { - "epoch": 1.3256606990622335, - "grad_norm": 0.06693503260612488, - "learning_rate": 5.782142775488051e-05, - "loss": 0.008056168258190156, - "step": 7775 - }, - { - "epoch": 1.3265132139812446, - "grad_norm": 0.07886708527803421, - "learning_rate": 5.7799889303978324e-05, - "loss": 0.006670922040939331, - "step": 7780 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 0.06932322680950165, - "learning_rate": 5.777834137641255e-05, - "loss": 0.006734507530927658, - "step": 7785 - }, - { - "epoch": 1.328218243819267, - "grad_norm": 0.08057818561792374, - "learning_rate": 5.775678398224247e-05, - "loss": 0.005952415242791176, - "step": 7790 - }, - { - "epoch": 1.329070758738278, - "grad_norm": 0.06614059209823608, - "learning_rate": 5.7735217131531785e-05, - "loss": 0.007471600174903869, - "step": 7795 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 0.06335467845201492, - "learning_rate": 5.771364083434862e-05, - "loss": 0.007279399782419205, - "step": 7800 - }, - { - "epoch": 1.3307757885763, - "grad_norm": 0.11745526641607285, - "learning_rate": 5.769205510076552e-05, - "loss": 0.006242561340332031, - "step": 7805 - }, - { - "epoch": 1.3316283034953111, - "grad_norm": 0.0590963289141655, - "learning_rate": 5.7670459940859414e-05, - "loss": 0.006263129413127899, - "step": 7810 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 0.05416800454258919, - "learning_rate": 5.764885536471164e-05, - "loss": 0.00531160868704319, - "step": 7815 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.05527244135737419, - "learning_rate": 5.7627241382407933e-05, - "loss": 0.005747637152671814, - "step": 7820 - }, - { - "epoch": 1.3341858482523445, - "grad_norm": 0.057753629982471466, - "learning_rate": 5.760561800403844e-05, - "loss": 0.004979781061410904, - "step": 7825 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 0.10882547497749329, - "learning_rate": 5.758398523969763e-05, - "loss": 0.00589316263794899, - "step": 7830 - }, - { - "epoch": 1.3358908780903667, - "grad_norm": 0.08053787797689438, - "learning_rate": 5.756234309948443e-05, - "loss": 0.004465704411268234, - "step": 7835 - }, - { - "epoch": 1.3367433930093777, - "grad_norm": 0.09168808907270432, - "learning_rate": 5.75406915935021e-05, - "loss": 0.004735191911458969, - "step": 7840 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 0.0956537052989006, - "learning_rate": 5.751903073185829e-05, - "loss": 0.005519610643386841, - "step": 7845 - }, - { - "epoch": 1.3384484228473998, - "grad_norm": 0.05775619298219681, - "learning_rate": 5.749736052466501e-05, - "loss": 0.005525605380535125, - "step": 7850 - }, - { - "epoch": 1.339300937766411, - "grad_norm": 0.08594895154237747, - "learning_rate": 5.7475680982038616e-05, - "loss": 0.005280618742108345, - "step": 7855 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 0.10326153039932251, - "learning_rate": 5.745399211409987e-05, - "loss": 0.005818159133195877, - "step": 7860 - }, - { - "epoch": 1.341005967604433, - "grad_norm": 0.053448133170604706, - "learning_rate": 5.743229393097384e-05, - "loss": 0.008255011588335037, - "step": 7865 - }, - { - "epoch": 1.341858482523444, - "grad_norm": 0.05307561904191971, - "learning_rate": 5.741058644278995e-05, - "loss": 0.006851959228515625, - "step": 7870 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 0.050789013504981995, - "learning_rate": 5.738886965968199e-05, - "loss": 0.005396667867898941, - "step": 7875 - }, - { - "epoch": 1.3435635123614662, - "grad_norm": 0.06762190908193588, - "learning_rate": 5.736714359178808e-05, - "loss": 0.005661940947175026, - "step": 7880 - }, - { - "epoch": 1.3444160272804775, - "grad_norm": 0.06955094635486603, - "learning_rate": 5.734540824925066e-05, - "loss": 0.0065834902226924895, - "step": 7885 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 0.09844111651182175, - "learning_rate": 5.7323663642216525e-05, - "loss": 0.006687184423208236, - "step": 7890 - }, - { - "epoch": 1.3461210571184996, - "grad_norm": 0.05249316990375519, - "learning_rate": 5.7301909780836766e-05, - "loss": 0.00670531764626503, - "step": 7895 - }, - { - "epoch": 1.3469735720375107, - "grad_norm": 0.06578750163316727, - "learning_rate": 5.7280146675266815e-05, - "loss": 0.0063153237104415895, - "step": 7900 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 0.10460260510444641, - "learning_rate": 5.725837433566643e-05, - "loss": 0.008820119500160217, - "step": 7905 - }, - { - "epoch": 1.3486786018755328, - "grad_norm": 0.06620552390813828, - "learning_rate": 5.7236592772199624e-05, - "loss": 0.006502580642700195, - "step": 7910 - }, - { - "epoch": 1.3495311167945439, - "grad_norm": 0.1033373698592186, - "learning_rate": 5.72148019950348e-05, - "loss": 0.008503454178571701, - "step": 7915 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 0.05790281295776367, - "learning_rate": 5.719300201434458e-05, - "loss": 0.006304294615983963, - "step": 7920 - }, - { - "epoch": 1.3512361466325662, - "grad_norm": 0.06094033271074295, - "learning_rate": 5.717119284030595e-05, - "loss": 0.006775079667568207, - "step": 7925 - }, - { - "epoch": 1.3520886615515773, - "grad_norm": 0.08011666685342789, - "learning_rate": 5.714937448310015e-05, - "loss": 0.0064566083252429966, - "step": 7930 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 0.06395548582077026, - "learning_rate": 5.7127546952912686e-05, - "loss": 0.009279583394527436, - "step": 7935 - }, - { - "epoch": 1.3537936913895994, - "grad_norm": 0.06697574257850647, - "learning_rate": 5.710571025993342e-05, - "loss": 0.005718713253736496, - "step": 7940 - }, - { - "epoch": 1.3546462063086104, - "grad_norm": 0.08821829408407211, - "learning_rate": 5.7083864414356414e-05, - "loss": 0.008157726377248764, - "step": 7945 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 0.07097669690847397, - "learning_rate": 5.706200942638006e-05, - "loss": 0.004782359302043915, - "step": 7950 - }, - { - "epoch": 1.3563512361466326, - "grad_norm": 0.05015713721513748, - "learning_rate": 5.7040145306206963e-05, - "loss": 0.004204710572957992, - "step": 7955 - }, - { - "epoch": 1.3572037510656436, - "grad_norm": 0.054049719125032425, - "learning_rate": 5.701827206404406e-05, - "loss": 0.00606432780623436, - "step": 7960 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 0.0878557488322258, - "learning_rate": 5.6996389710102474e-05, - "loss": 0.008037066459655762, - "step": 7965 - }, - { - "epoch": 1.3589087809036657, - "grad_norm": 0.10873926430940628, - "learning_rate": 5.697449825459762e-05, - "loss": 0.007864821702241898, - "step": 7970 - }, - { - "epoch": 1.3597612958226768, - "grad_norm": 0.05823246389627457, - "learning_rate": 5.695259770774919e-05, - "loss": 0.00715988278388977, - "step": 7975 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 0.06690117716789246, - "learning_rate": 5.693068807978106e-05, - "loss": 0.006888707727193832, - "step": 7980 - }, - { - "epoch": 1.3614663256606991, - "grad_norm": 0.07290884852409363, - "learning_rate": 5.6908769380921363e-05, - "loss": 0.005684115365147591, - "step": 7985 - }, - { - "epoch": 1.3623188405797102, - "grad_norm": 0.07930465042591095, - "learning_rate": 5.6886841621402504e-05, - "loss": 0.0077220767736434935, - "step": 7990 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 0.08893048763275146, - "learning_rate": 5.686490481146107e-05, - "loss": 0.007824088633060455, - "step": 7995 - }, - { - "epoch": 1.3640238704177323, - "grad_norm": 0.09335844218730927, - "learning_rate": 5.6842958961337905e-05, - "loss": 0.006522499769926071, - "step": 8000 - }, - { - "epoch": 1.3648763853367434, - "grad_norm": 0.07194571942090988, - "learning_rate": 5.682100408127806e-05, - "loss": 0.008011893928050995, - "step": 8005 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 0.053429413586854935, - "learning_rate": 5.6799040181530794e-05, - "loss": 0.006260050833225251, - "step": 8010 - }, - { - "epoch": 1.3665814151747655, - "grad_norm": 0.11974478513002396, - "learning_rate": 5.677706727234959e-05, - "loss": 0.006692723929882049, - "step": 8015 - }, - { - "epoch": 1.3674339300937766, - "grad_norm": 0.07810027152299881, - "learning_rate": 5.6755085363992155e-05, - "loss": 0.007429388910531997, - "step": 8020 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 0.10204190760850906, - "learning_rate": 5.673309446672034e-05, - "loss": 0.005550343170762062, - "step": 8025 - }, - { - "epoch": 1.369138959931799, - "grad_norm": 0.07640541344881058, - "learning_rate": 5.671109459080026e-05, - "loss": 0.006840181350708008, - "step": 8030 - }, - { - "epoch": 1.36999147485081, - "grad_norm": 0.06644181162118912, - "learning_rate": 5.668908574650216e-05, - "loss": 0.005395495146512985, - "step": 8035 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 0.09630967676639557, - "learning_rate": 5.6667067944100526e-05, - "loss": 0.005423872545361519, - "step": 8040 - }, - { - "epoch": 1.371696504688832, - "grad_norm": 0.07114128023386002, - "learning_rate": 5.664504119387398e-05, - "loss": 0.007013414800167084, - "step": 8045 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 0.07324981689453125, - "learning_rate": 5.662300550610535e-05, - "loss": 0.008274464309215546, - "step": 8050 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 0.06012870743870735, - "learning_rate": 5.660096089108163e-05, - "loss": 0.00520169697701931, - "step": 8055 - }, - { - "epoch": 1.3742540494458653, - "grad_norm": 0.07458557933568954, - "learning_rate": 5.657890735909397e-05, - "loss": 0.006112886965274811, - "step": 8060 - }, - { - "epoch": 1.3751065643648763, - "grad_norm": 0.0470297709107399, - "learning_rate": 5.655684492043771e-05, - "loss": 0.004435106366872788, - "step": 8065 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 0.05244847387075424, - "learning_rate": 5.653477358541231e-05, - "loss": 0.006484140455722809, - "step": 8070 - }, - { - "epoch": 1.3768115942028984, - "grad_norm": 0.10809201747179031, - "learning_rate": 5.651269336432142e-05, - "loss": 0.006385499238967895, - "step": 8075 - }, - { - "epoch": 1.3776641091219095, - "grad_norm": 0.11761374026536942, - "learning_rate": 5.649060426747281e-05, - "loss": 0.0056259695440530775, - "step": 8080 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 0.06250949203968048, - "learning_rate": 5.646850630517842e-05, - "loss": 0.005127568915486336, - "step": 8085 - }, - { - "epoch": 1.3793691389599319, - "grad_norm": 0.07686682790517807, - "learning_rate": 5.6446399487754307e-05, - "loss": 0.006484859436750412, - "step": 8090 - }, - { - "epoch": 1.380221653878943, - "grad_norm": 0.10453952848911285, - "learning_rate": 5.6424283825520656e-05, - "loss": 0.007125881314277649, - "step": 8095 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 0.08170976489782333, - "learning_rate": 5.640215932880181e-05, - "loss": 0.007152590900659561, - "step": 8100 - }, - { - "epoch": 1.381926683716965, - "grad_norm": 0.08639637380838394, - "learning_rate": 5.638002600792621e-05, - "loss": 0.006862475723028183, - "step": 8105 - }, - { - "epoch": 1.382779198635976, - "grad_norm": 0.061349738389253616, - "learning_rate": 5.635788387322642e-05, - "loss": 0.006520121544599533, - "step": 8110 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 0.09568873792886734, - "learning_rate": 5.633573293503915e-05, - "loss": 0.00690893828868866, - "step": 8115 - }, - { - "epoch": 1.3844842284739982, - "grad_norm": 0.05280910059809685, - "learning_rate": 5.631357320370518e-05, - "loss": 0.0068241022527217865, - "step": 8120 - }, - { - "epoch": 1.3853367433930095, - "grad_norm": 0.08307540416717529, - "learning_rate": 5.6291404689569406e-05, - "loss": 0.009796305000782013, - "step": 8125 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 0.06511564552783966, - "learning_rate": 5.6269227402980824e-05, - "loss": 0.00675605982542038, - "step": 8130 - }, - { - "epoch": 1.3870417732310316, - "grad_norm": 0.09521665424108505, - "learning_rate": 5.624704135429255e-05, - "loss": 0.00661565363407135, - "step": 8135 - }, - { - "epoch": 1.3878942881500427, - "grad_norm": 0.06467590481042862, - "learning_rate": 5.622484655386175e-05, - "loss": 0.007056808471679688, - "step": 8140 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 0.04240449517965317, - "learning_rate": 5.62026430120497e-05, - "loss": 0.005277678743004799, - "step": 8145 - }, - { - "epoch": 1.3895993179880648, - "grad_norm": 0.08462672680616379, - "learning_rate": 5.618043073922176e-05, - "loss": 0.005951377004384995, - "step": 8150 - }, - { - "epoch": 1.3904518329070759, - "grad_norm": 0.08304573595523834, - "learning_rate": 5.615820974574735e-05, - "loss": 0.006729351729154587, - "step": 8155 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 0.04584382846951485, - "learning_rate": 5.6135980041999964e-05, - "loss": 0.00490913912653923, - "step": 8160 - }, - { - "epoch": 1.392156862745098, - "grad_norm": 0.06771710515022278, - "learning_rate": 5.6113741638357175e-05, - "loss": 0.007046511024236679, - "step": 8165 - }, - { - "epoch": 1.393009377664109, - "grad_norm": 0.06334209442138672, - "learning_rate": 5.609149454520062e-05, - "loss": 0.006314977258443833, - "step": 8170 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 0.06783269345760345, - "learning_rate": 5.606923877291595e-05, - "loss": 0.006176649779081345, - "step": 8175 - }, - { - "epoch": 1.3947144075021312, - "grad_norm": 0.10245220363140106, - "learning_rate": 5.604697433189293e-05, - "loss": 0.006309907138347626, - "step": 8180 - }, - { - "epoch": 1.3955669224211424, - "grad_norm": 0.07151709496974945, - "learning_rate": 5.6024701232525325e-05, - "loss": 0.005038458108901978, - "step": 8185 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 0.08811933547258377, - "learning_rate": 5.600241948521099e-05, - "loss": 0.006065644696354866, - "step": 8190 - }, - { - "epoch": 1.3972719522591646, - "grad_norm": 0.07598903775215149, - "learning_rate": 5.5980129100351736e-05, - "loss": 0.006201237812638283, - "step": 8195 - }, - { - "epoch": 1.3981244671781756, - "grad_norm": 0.058092061430215836, - "learning_rate": 5.5957830088353475e-05, - "loss": 0.006383272260427475, - "step": 8200 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 0.18196560442447662, - "learning_rate": 5.593552245962616e-05, - "loss": 0.004768157005310058, - "step": 8205 - }, - { - "epoch": 1.3998294970161977, - "grad_norm": 0.09071574360132217, - "learning_rate": 5.591320622458369e-05, - "loss": 0.007671289891004562, - "step": 8210 - }, - { - "epoch": 1.4006820119352088, - "grad_norm": 0.09470858424901962, - "learning_rate": 5.589088139364405e-05, - "loss": 0.006691985577344894, - "step": 8215 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 0.05345157906413078, - "learning_rate": 5.58685479772292e-05, - "loss": 0.005832263827323913, - "step": 8220 - }, - { - "epoch": 1.4023870417732311, - "grad_norm": 0.08154480904340744, - "learning_rate": 5.584620598576514e-05, - "loss": 0.00907905399799347, - "step": 8225 - }, - { - "epoch": 1.4032395566922422, - "grad_norm": 0.06621691584587097, - "learning_rate": 5.582385542968185e-05, - "loss": 0.005938088893890381, - "step": 8230 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 0.0557720884680748, - "learning_rate": 5.580149631941329e-05, - "loss": 0.005079039558768273, - "step": 8235 - }, - { - "epoch": 1.4049445865302643, - "grad_norm": 0.07839027792215347, - "learning_rate": 5.577912866539746e-05, - "loss": 0.006307472288608551, - "step": 8240 - }, - { - "epoch": 1.4057971014492754, - "grad_norm": 0.05926419049501419, - "learning_rate": 5.575675247807632e-05, - "loss": 0.0072102643549442295, - "step": 8245 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 0.0570182129740715, - "learning_rate": 5.5734367767895814e-05, - "loss": 0.0066485337913036345, - "step": 8250 - }, - { - "epoch": 1.4075021312872975, - "grad_norm": 0.0930657833814621, - "learning_rate": 5.571197454530588e-05, - "loss": 0.005854785442352295, - "step": 8255 - }, - { - "epoch": 1.4083546462063086, - "grad_norm": 0.06018427759408951, - "learning_rate": 5.568957282076041e-05, - "loss": 0.0049718767404556274, - "step": 8260 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 0.0889105498790741, - "learning_rate": 5.566716260471726e-05, - "loss": 0.005993577092885971, - "step": 8265 - }, - { - "epoch": 1.4100596760443307, - "grad_norm": 0.04429823160171509, - "learning_rate": 5.5644743907638294e-05, - "loss": 0.005357314646244049, - "step": 8270 - }, - { - "epoch": 1.4109121909633418, - "grad_norm": 0.054813142865896225, - "learning_rate": 5.5622316739989296e-05, - "loss": 0.005666692927479744, - "step": 8275 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 0.06909197568893433, - "learning_rate": 5.559988111224001e-05, - "loss": 0.005058525875210762, - "step": 8280 - }, - { - "epoch": 1.412617220801364, - "grad_norm": 0.10060004889965057, - "learning_rate": 5.557743703486413e-05, - "loss": 0.0070966087281703946, - "step": 8285 - }, - { - "epoch": 1.4134697357203752, - "grad_norm": 0.052008096128702164, - "learning_rate": 5.555498451833929e-05, - "loss": 0.006671085208654404, - "step": 8290 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 0.06272758543491364, - "learning_rate": 5.5532523573147094e-05, - "loss": 0.006071484088897705, - "step": 8295 - }, - { - "epoch": 1.4151747655583973, - "grad_norm": 0.08005380630493164, - "learning_rate": 5.551005420977304e-05, - "loss": 0.006429645419120789, - "step": 8300 - }, - { - "epoch": 1.4160272804774083, - "grad_norm": 0.08246695250272751, - "learning_rate": 5.548757643870659e-05, - "loss": 0.004599097743630409, - "step": 8305 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 0.1190599724650383, - "learning_rate": 5.54650902704411e-05, - "loss": 0.00652359127998352, - "step": 8310 - }, - { - "epoch": 1.4177323103154305, - "grad_norm": 0.042393747717142105, - "learning_rate": 5.5442595715473867e-05, - "loss": 0.004551848769187928, - "step": 8315 - }, - { - "epoch": 1.4185848252344417, - "grad_norm": 0.0809137374162674, - "learning_rate": 5.54200927843061e-05, - "loss": 0.0062880381941795346, - "step": 8320 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 0.09030820429325104, - "learning_rate": 5.5397581487442905e-05, - "loss": 0.007365265488624572, - "step": 8325 - }, - { - "epoch": 1.4202898550724639, - "grad_norm": 0.060766976326704025, - "learning_rate": 5.537506183539333e-05, - "loss": 0.0047208376228809355, - "step": 8330 - }, - { - "epoch": 1.421142369991475, - "grad_norm": 0.0763862356543541, - "learning_rate": 5.535253383867027e-05, - "loss": 0.006549081206321717, - "step": 8335 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 0.13493886590003967, - "learning_rate": 5.532999750779056e-05, - "loss": 0.0075773999094963075, - "step": 8340 - }, - { - "epoch": 1.422847399829497, - "grad_norm": 0.07888541370630264, - "learning_rate": 5.53074528532749e-05, - "loss": 0.007893601059913635, - "step": 8345 - }, - { - "epoch": 1.423699914748508, - "grad_norm": 0.04488302394747734, - "learning_rate": 5.528489988564789e-05, - "loss": 0.006052879989147187, - "step": 8350 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 0.09534436464309692, - "learning_rate": 5.5262338615438e-05, - "loss": 0.006183170899748802, - "step": 8355 - }, - { - "epoch": 1.4254049445865302, - "grad_norm": 0.0796227753162384, - "learning_rate": 5.523976905317758e-05, - "loss": 0.006368820369243622, - "step": 8360 - }, - { - "epoch": 1.4262574595055413, - "grad_norm": 0.039230771362781525, - "learning_rate": 5.521719120940287e-05, - "loss": 0.005282421037554741, - "step": 8365 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 0.12020807713270187, - "learning_rate": 5.5194605094653935e-05, - "loss": 0.00718868374824524, - "step": 8370 - }, - { - "epoch": 1.4279624893435634, - "grad_norm": 0.07434894144535065, - "learning_rate": 5.5172010719474746e-05, - "loss": 0.007591472566127777, - "step": 8375 - }, - { - "epoch": 1.4288150042625745, - "grad_norm": 0.0722346156835556, - "learning_rate": 5.514940809441309e-05, - "loss": 0.005854631587862968, - "step": 8380 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 0.07834078371524811, - "learning_rate": 5.5126797230020634e-05, - "loss": 0.007415357977151871, - "step": 8385 - }, - { - "epoch": 1.4305200341005968, - "grad_norm": 0.08718696236610413, - "learning_rate": 5.5104178136852884e-05, - "loss": 0.007089633494615555, - "step": 8390 - }, - { - "epoch": 1.4313725490196079, - "grad_norm": 0.08823035657405853, - "learning_rate": 5.508155082546918e-05, - "loss": 0.007153714448213578, - "step": 8395 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 0.07249119132757187, - "learning_rate": 5.505891530643269e-05, - "loss": 0.007651906460523605, - "step": 8400 - }, - { - "epoch": 1.43307757885763, - "grad_norm": 0.06284502893686295, - "learning_rate": 5.503627159031046e-05, - "loss": 0.007741397619247437, - "step": 8405 - }, - { - "epoch": 1.433930093776641, - "grad_norm": 0.06557357311248779, - "learning_rate": 5.501361968767331e-05, - "loss": 0.007656902819871902, - "step": 8410 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 0.0775318294763565, - "learning_rate": 5.49909596090959e-05, - "loss": 0.006714560091495514, - "step": 8415 - }, - { - "epoch": 1.4356351236146634, - "grad_norm": 0.05347822234034538, - "learning_rate": 5.496829136515672e-05, - "loss": 0.0048537302762269975, - "step": 8420 - }, - { - "epoch": 1.4364876385336744, - "grad_norm": 0.07040467113256454, - "learning_rate": 5.4945614966438046e-05, - "loss": 0.005411979556083679, - "step": 8425 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 0.09473410993814468, - "learning_rate": 5.492293042352598e-05, - "loss": 0.008005911856889725, - "step": 8430 - }, - { - "epoch": 1.4381926683716966, - "grad_norm": 0.037446580827236176, - "learning_rate": 5.4900237747010426e-05, - "loss": 0.006237779557704925, - "step": 8435 - }, - { - "epoch": 1.4390451832907076, - "grad_norm": 0.11029476672410965, - "learning_rate": 5.4877536947485074e-05, - "loss": 0.008190502226352692, - "step": 8440 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 0.0514204315841198, - "learning_rate": 5.4854828035547424e-05, - "loss": 0.006500741839408875, - "step": 8445 - }, - { - "epoch": 1.4407502131287298, - "grad_norm": 0.08411483466625214, - "learning_rate": 5.483211102179873e-05, - "loss": 0.0053235463798046116, - "step": 8450 - }, - { - "epoch": 1.4416027280477408, - "grad_norm": 0.09279052913188934, - "learning_rate": 5.480938591684407e-05, - "loss": 0.006842000037431717, - "step": 8455 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 0.09881046414375305, - "learning_rate": 5.478665273129228e-05, - "loss": 0.007064050436019898, - "step": 8460 - }, - { - "epoch": 1.443307757885763, - "grad_norm": 0.09018172323703766, - "learning_rate": 5.476391147575595e-05, - "loss": 0.005222787708044052, - "step": 8465 - }, - { - "epoch": 1.444160272804774, - "grad_norm": 0.11489493399858475, - "learning_rate": 5.4741162160851455e-05, - "loss": 0.004823528230190277, - "step": 8470 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 0.11010619252920151, - "learning_rate": 5.4718404797198955e-05, - "loss": 0.007554465532302856, - "step": 8475 - }, - { - "epoch": 1.4458653026427961, - "grad_norm": 0.10469060391187668, - "learning_rate": 5.469563939542233e-05, - "loss": 0.005817038565874099, - "step": 8480 - }, - { - "epoch": 1.4467178175618074, - "grad_norm": 0.06776002794504166, - "learning_rate": 5.467286596614922e-05, - "loss": 0.00899386927485466, - "step": 8485 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 0.08414942771196365, - "learning_rate": 5.4650084520011026e-05, - "loss": 0.00611347034573555, - "step": 8490 - }, - { - "epoch": 1.4484228473998295, - "grad_norm": 0.09625279158353806, - "learning_rate": 5.462729506764289e-05, - "loss": 0.005645812302827835, - "step": 8495 - }, - { - "epoch": 1.4492753623188406, - "grad_norm": 0.10020645707845688, - "learning_rate": 5.4604497619683674e-05, - "loss": 0.0058198563754558565, - "step": 8500 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 0.08466780185699463, - "learning_rate": 5.4581692186776e-05, - "loss": 0.005098164081573486, - "step": 8505 - }, - { - "epoch": 1.4509803921568627, - "grad_norm": 0.058955296874046326, - "learning_rate": 5.4558878779566194e-05, - "loss": 0.004072735831141472, - "step": 8510 - }, - { - "epoch": 1.4518329070758738, - "grad_norm": 0.14849397540092468, - "learning_rate": 5.4536057408704304e-05, - "loss": 0.011097650229930877, - "step": 8515 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 0.08641809970140457, - "learning_rate": 5.451322808484413e-05, - "loss": 0.006210784614086151, - "step": 8520 - }, - { - "epoch": 1.453537936913896, - "grad_norm": 0.07506752014160156, - "learning_rate": 5.4490390818643136e-05, - "loss": 0.006071462482213974, - "step": 8525 - }, - { - "epoch": 1.4543904518329072, - "grad_norm": 0.10383405536413193, - "learning_rate": 5.4467545620762545e-05, - "loss": 0.008749781548976899, - "step": 8530 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 0.08180487155914307, - "learning_rate": 5.444469250186721e-05, - "loss": 0.00613279715180397, - "step": 8535 - }, - { - "epoch": 1.4560954816709293, - "grad_norm": 0.07797367125749588, - "learning_rate": 5.442183147262577e-05, - "loss": 0.005885690450668335, - "step": 8540 - }, - { - "epoch": 1.4569479965899403, - "grad_norm": 0.0780278891324997, - "learning_rate": 5.439896254371049e-05, - "loss": 0.007259850949048996, - "step": 8545 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 0.10005395114421844, - "learning_rate": 5.437608572579737e-05, - "loss": 0.0061523888260126116, - "step": 8550 - }, - { - "epoch": 1.4586530264279625, - "grad_norm": 0.10141763836145401, - "learning_rate": 5.435320102956604e-05, - "loss": 0.006501191109418869, - "step": 8555 - }, - { - "epoch": 1.4595055413469735, - "grad_norm": 0.0938732773065567, - "learning_rate": 5.4330308465699865e-05, - "loss": 0.008337517827749252, - "step": 8560 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 0.1085699051618576, - "learning_rate": 5.430740804488582e-05, - "loss": 0.005681714415550232, - "step": 8565 - }, - { - "epoch": 1.4612105711849956, - "grad_norm": 0.07967904955148697, - "learning_rate": 5.428449977781463e-05, - "loss": 0.006299185007810593, - "step": 8570 - }, - { - "epoch": 1.4620630861040067, - "grad_norm": 0.090158611536026, - "learning_rate": 5.426158367518061e-05, - "loss": 0.007821831852197647, - "step": 8575 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 0.12222256511449814, - "learning_rate": 5.4238659747681736e-05, - "loss": 0.0065193742513656614, - "step": 8580 - }, - { - "epoch": 1.463768115942029, - "grad_norm": 0.07724417746067047, - "learning_rate": 5.421572800601971e-05, - "loss": 0.00850745365023613, - "step": 8585 - }, - { - "epoch": 1.46462063086104, - "grad_norm": 0.07322543114423752, - "learning_rate": 5.4192788460899786e-05, - "loss": 0.006478501856327057, - "step": 8590 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 0.07086360454559326, - "learning_rate": 5.416984112303095e-05, - "loss": 0.007459370046854019, - "step": 8595 - }, - { - "epoch": 1.4663256606990622, - "grad_norm": 0.08460366725921631, - "learning_rate": 5.414688600312575e-05, - "loss": 0.006461035460233688, - "step": 8600 - }, - { - "epoch": 1.4671781756180733, - "grad_norm": 0.06856394559144974, - "learning_rate": 5.412392311190041e-05, - "loss": 0.007420676201581955, - "step": 8605 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 0.06801126897335052, - "learning_rate": 5.4100952460074766e-05, - "loss": 0.006456401199102402, - "step": 8610 - }, - { - "epoch": 1.4688832054560954, - "grad_norm": 0.06273184716701508, - "learning_rate": 5.4077974058372295e-05, - "loss": 0.00508052185177803, - "step": 8615 - }, - { - "epoch": 1.4697357203751067, - "grad_norm": 0.07751575112342834, - "learning_rate": 5.405498791752007e-05, - "loss": 0.006596812605857849, - "step": 8620 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 0.10850238054990768, - "learning_rate": 5.4031994048248776e-05, - "loss": 0.006385332345962525, - "step": 8625 - }, - { - "epoch": 1.4714407502131288, - "grad_norm": 0.07195930927991867, - "learning_rate": 5.4008992461292736e-05, - "loss": 0.007354143261909485, - "step": 8630 - }, - { - "epoch": 1.4722932651321399, - "grad_norm": 0.061606891453266144, - "learning_rate": 5.3985983167389846e-05, - "loss": 0.007285259664058685, - "step": 8635 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 0.059549275785684586, - "learning_rate": 5.3962966177281616e-05, - "loss": 0.005211231112480163, - "step": 8640 - }, - { - "epoch": 1.473998294970162, - "grad_norm": 0.04548822343349457, - "learning_rate": 5.3939941501713146e-05, - "loss": 0.00805831179022789, - "step": 8645 - }, - { - "epoch": 1.474850809889173, - "grad_norm": 0.046682652086019516, - "learning_rate": 5.3916909151433096e-05, - "loss": 0.005787956342101097, - "step": 8650 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 0.06405246257781982, - "learning_rate": 5.3893869137193755e-05, - "loss": 0.005377359688282013, - "step": 8655 - }, - { - "epoch": 1.4765558397271952, - "grad_norm": 0.09410709887742996, - "learning_rate": 5.3870821469750964e-05, - "loss": 0.006961540877819061, - "step": 8660 - }, - { - "epoch": 1.4774083546462062, - "grad_norm": 0.0637243241071701, - "learning_rate": 5.384776615986414e-05, - "loss": 0.0060172989964485165, - "step": 8665 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 0.07082457840442657, - "learning_rate": 5.382470321829627e-05, - "loss": 0.005960140377283096, - "step": 8670 - }, - { - "epoch": 1.4791133844842284, - "grad_norm": 0.06502280384302139, - "learning_rate": 5.380163265581391e-05, - "loss": 0.005596417188644409, - "step": 8675 - }, - { - "epoch": 1.4799658994032396, - "grad_norm": 0.07504235208034515, - "learning_rate": 5.3778554483187134e-05, - "loss": 0.007427094876766205, - "step": 8680 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 0.08005198836326599, - "learning_rate": 5.375546871118964e-05, - "loss": 0.006888572126626968, - "step": 8685 - }, - { - "epoch": 1.4816709292412618, - "grad_norm": 0.1083201915025711, - "learning_rate": 5.373237535059861e-05, - "loss": 0.007253114879131317, - "step": 8690 - }, - { - "epoch": 1.4825234441602728, - "grad_norm": 0.060000013560056686, - "learning_rate": 5.37092744121948e-05, - "loss": 0.005570416525006294, - "step": 8695 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 0.04832584038376808, - "learning_rate": 5.3686165906762504e-05, - "loss": 0.005356843769550324, - "step": 8700 - }, - { - "epoch": 1.484228473998295, - "grad_norm": 0.061586812138557434, - "learning_rate": 5.3663049845089534e-05, - "loss": 0.005226074159145356, - "step": 8705 - }, - { - "epoch": 1.485080988917306, - "grad_norm": 0.08486256003379822, - "learning_rate": 5.363992623796724e-05, - "loss": 0.007083939760923386, - "step": 8710 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 0.09085836261510849, - "learning_rate": 5.361679509619048e-05, - "loss": 0.005988218262791634, - "step": 8715 - }, - { - "epoch": 1.4867860187553283, - "grad_norm": 0.06301745027303696, - "learning_rate": 5.359365643055765e-05, - "loss": 0.00595020055770874, - "step": 8720 - }, - { - "epoch": 1.4876385336743394, - "grad_norm": 0.07939866930246353, - "learning_rate": 5.3570510251870646e-05, - "loss": 0.006101110950112343, - "step": 8725 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 0.10560661554336548, - "learning_rate": 5.354735657093487e-05, - "loss": 0.006781180202960968, - "step": 8730 - }, - { - "epoch": 1.4893435635123615, - "grad_norm": 0.10549639165401459, - "learning_rate": 5.352419539855925e-05, - "loss": 0.006455187499523163, - "step": 8735 - }, - { - "epoch": 1.4901960784313726, - "grad_norm": 0.06474289298057556, - "learning_rate": 5.3501026745556157e-05, - "loss": 0.0078111283481121065, - "step": 8740 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 0.11109986901283264, - "learning_rate": 5.3477850622741525e-05, - "loss": 0.00798504576086998, - "step": 8745 - }, - { - "epoch": 1.4919011082693947, - "grad_norm": 0.0787222608923912, - "learning_rate": 5.3454667040934715e-05, - "loss": 0.007222773879766465, - "step": 8750 - }, - { - "epoch": 1.4927536231884058, - "grad_norm": 0.06622221320867538, - "learning_rate": 5.3431476010958613e-05, - "loss": 0.0064462460577487946, - "step": 8755 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 0.07526405900716782, - "learning_rate": 5.340827754363955e-05, - "loss": 0.005344667285680771, - "step": 8760 - }, - { - "epoch": 1.4944586530264279, - "grad_norm": 0.08911366015672684, - "learning_rate": 5.338507164980734e-05, - "loss": 0.006722994893789291, - "step": 8765 - }, - { - "epoch": 1.495311167945439, - "grad_norm": 0.03749583289027214, - "learning_rate": 5.336185834029527e-05, - "loss": 0.006120331957936287, - "step": 8770 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 0.08167645335197449, - "learning_rate": 5.333863762594008e-05, - "loss": 0.007496768981218338, - "step": 8775 - }, - { - "epoch": 1.4970161977834613, - "grad_norm": 0.09045904129743576, - "learning_rate": 5.3315409517581996e-05, - "loss": 0.007222528755664826, - "step": 8780 - }, - { - "epoch": 1.4978687127024723, - "grad_norm": 0.06064090132713318, - "learning_rate": 5.329217402606464e-05, - "loss": 0.0044986031949520115, - "step": 8785 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 0.07282263785600662, - "learning_rate": 5.3268931162235126e-05, - "loss": 0.005251912400126457, - "step": 8790 - }, - { - "epoch": 1.4995737425404945, - "grad_norm": 0.0674249604344368, - "learning_rate": 5.324568093694401e-05, - "loss": 0.006289477646350861, - "step": 8795 - }, - { - "epoch": 1.4997442455242966, - "eval_loss": 0.03760311380028725, - "eval_runtime": 3.668, - "eval_samples_per_second": 68.702, - "eval_steps_per_second": 1.091, - "step": 8796 - }, - { - "eval_cer_subset": 0.014184050678261437, - "eval_cer_subset_edit_distance": 871, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 8796 - }, - { - "epoch": 1.5004262574595055, - "grad_norm": 0.06114037334918976, - "learning_rate": 5.322242336104525e-05, - "loss": 0.005809751898050308, - "step": 8800 - }, - { - "epoch": 1.5012787723785166, - "grad_norm": 0.08830825984477997, - "learning_rate": 5.319915844539626e-05, - "loss": 0.006921032071113586, - "step": 8805 - }, - { - "epoch": 1.5021312872975279, - "grad_norm": 0.10813544690608978, - "learning_rate": 5.3175886200857873e-05, - "loss": 0.007966426759958267, - "step": 8810 - }, - { - "epoch": 1.502983802216539, - "grad_norm": 0.08357173204421997, - "learning_rate": 5.3152606638294355e-05, - "loss": 0.006943506002426147, - "step": 8815 - }, - { - "epoch": 1.50383631713555, - "grad_norm": 0.08059901744127274, - "learning_rate": 5.312931976857339e-05, - "loss": 0.0047626100480556485, - "step": 8820 - }, - { - "epoch": 1.504688832054561, - "grad_norm": 0.07412680238485336, - "learning_rate": 5.310602560256604e-05, - "loss": 0.00709492564201355, - "step": 8825 - }, - { - "epoch": 1.5055413469735721, - "grad_norm": 0.046478480100631714, - "learning_rate": 5.3082724151146814e-05, - "loss": 0.006465598940849304, - "step": 8830 - }, - { - "epoch": 1.5063938618925832, - "grad_norm": 0.11122216284275055, - "learning_rate": 5.30594154251936e-05, - "loss": 0.00888531506061554, - "step": 8835 - }, - { - "epoch": 1.5072463768115942, - "grad_norm": 0.06441432982683182, - "learning_rate": 5.3036099435587685e-05, - "loss": 0.005882937833666802, - "step": 8840 - }, - { - "epoch": 1.5080988917306053, - "grad_norm": 0.05722307041287422, - "learning_rate": 5.301277619321374e-05, - "loss": 0.0059202808886766435, - "step": 8845 - }, - { - "epoch": 1.5089514066496164, - "grad_norm": 0.06677310913801193, - "learning_rate": 5.2989445708959856e-05, - "loss": 0.0064939349889755246, - "step": 8850 - }, - { - "epoch": 1.5098039215686274, - "grad_norm": 0.08854222297668457, - "learning_rate": 5.296610799371745e-05, - "loss": 0.007034827768802643, - "step": 8855 - }, - { - "epoch": 1.5106564364876385, - "grad_norm": 0.059711627662181854, - "learning_rate": 5.2942763058381356e-05, - "loss": 0.007557753473520279, - "step": 8860 - }, - { - "epoch": 1.5115089514066495, - "grad_norm": 0.06355257332324982, - "learning_rate": 5.291941091384977e-05, - "loss": 0.006534597277641297, - "step": 8865 - }, - { - "epoch": 1.5123614663256606, - "grad_norm": 0.05741631239652634, - "learning_rate": 5.2896051571024255e-05, - "loss": 0.006453331559896469, - "step": 8870 - }, - { - "epoch": 1.5132139812446717, - "grad_norm": 0.05809224396944046, - "learning_rate": 5.287268504080972e-05, - "loss": 0.006065556779503822, - "step": 8875 - }, - { - "epoch": 1.5140664961636827, - "grad_norm": 0.04522582143545151, - "learning_rate": 5.284931133411443e-05, - "loss": 0.004097414761781692, - "step": 8880 - }, - { - "epoch": 1.514919011082694, - "grad_norm": 0.09349111467599869, - "learning_rate": 5.2825930461850014e-05, - "loss": 0.005707831308245659, - "step": 8885 - }, - { - "epoch": 1.515771526001705, - "grad_norm": 0.08951391279697418, - "learning_rate": 5.280254243493145e-05, - "loss": 0.00725678950548172, - "step": 8890 - }, - { - "epoch": 1.5166240409207161, - "grad_norm": 0.07826244086027145, - "learning_rate": 5.277914726427705e-05, - "loss": 0.008086606860160828, - "step": 8895 - }, - { - "epoch": 1.5174765558397272, - "grad_norm": 0.0619954876601696, - "learning_rate": 5.2755744960808446e-05, - "loss": 0.005462165176868439, - "step": 8900 - }, - { - "epoch": 1.5183290707587382, - "grad_norm": 0.04414132609963417, - "learning_rate": 5.273233553545062e-05, - "loss": 0.005678927898406983, - "step": 8905 - }, - { - "epoch": 1.5191815856777495, - "grad_norm": 0.07183931767940521, - "learning_rate": 5.2708918999131864e-05, - "loss": 0.007184042781591416, - "step": 8910 - }, - { - "epoch": 1.5200341005967606, - "grad_norm": 0.10447251796722412, - "learning_rate": 5.26854953627838e-05, - "loss": 0.009831231832504273, - "step": 8915 - }, - { - "epoch": 1.5208866155157716, - "grad_norm": 0.04392845183610916, - "learning_rate": 5.266206463734135e-05, - "loss": 0.006517301499843598, - "step": 8920 - }, - { - "epoch": 1.5217391304347827, - "grad_norm": 0.06292697787284851, - "learning_rate": 5.2638626833742776e-05, - "loss": 0.005328541249036789, - "step": 8925 - }, - { - "epoch": 1.5225916453537938, - "grad_norm": 0.06425110250711441, - "learning_rate": 5.2615181962929605e-05, - "loss": 0.006298693269491196, - "step": 8930 - }, - { - "epoch": 1.5234441602728048, - "grad_norm": 0.08059051632881165, - "learning_rate": 5.259173003584669e-05, - "loss": 0.008097793161869048, - "step": 8935 - }, - { - "epoch": 1.5242966751918159, - "grad_norm": 0.0625302791595459, - "learning_rate": 5.256827106344218e-05, - "loss": 0.006664089858531952, - "step": 8940 - }, - { - "epoch": 1.525149190110827, - "grad_norm": 0.06092630326747894, - "learning_rate": 5.254480505666749e-05, - "loss": 0.006084204837679863, - "step": 8945 - }, - { - "epoch": 1.526001705029838, - "grad_norm": 0.07297338545322418, - "learning_rate": 5.2521332026477344e-05, - "loss": 0.006405481696128845, - "step": 8950 - }, - { - "epoch": 1.526854219948849, - "grad_norm": 0.05876631662249565, - "learning_rate": 5.249785198382973e-05, - "loss": 0.006670171767473221, - "step": 8955 - }, - { - "epoch": 1.5277067348678601, - "grad_norm": 0.0633542388677597, - "learning_rate": 5.247436493968589e-05, - "loss": 0.004565924406051636, - "step": 8960 - }, - { - "epoch": 1.5285592497868712, - "grad_norm": 0.09164717048406601, - "learning_rate": 5.2450870905010395e-05, - "loss": 0.005662925541400909, - "step": 8965 - }, - { - "epoch": 1.5294117647058822, - "grad_norm": 0.06646572798490524, - "learning_rate": 5.2427369890771026e-05, - "loss": 0.006319984793663025, - "step": 8970 - }, - { - "epoch": 1.5302642796248933, - "grad_norm": 0.08518269658088684, - "learning_rate": 5.2403861907938826e-05, - "loss": 0.0066184550523757935, - "step": 8975 - }, - { - "epoch": 1.5311167945439044, - "grad_norm": 0.08369076251983643, - "learning_rate": 5.238034696748811e-05, - "loss": 0.005069610476493835, - "step": 8980 - }, - { - "epoch": 1.5319693094629157, - "grad_norm": 0.05607258528470993, - "learning_rate": 5.235682508039646e-05, - "loss": 0.007457223534584045, - "step": 8985 - }, - { - "epoch": 1.5328218243819267, - "grad_norm": 0.0828152522444725, - "learning_rate": 5.2333296257644646e-05, - "loss": 0.007727481424808502, - "step": 8990 - }, - { - "epoch": 1.5336743393009378, - "grad_norm": 0.09770844876766205, - "learning_rate": 5.230976051021671e-05, - "loss": 0.007591258734464645, - "step": 8995 - }, - { - "epoch": 1.5345268542199488, - "grad_norm": 0.05906900763511658, - "learning_rate": 5.2286217849099925e-05, - "loss": 0.008510296791791916, - "step": 9000 - }, - { - "epoch": 1.53537936913896, - "grad_norm": 0.07594765722751617, - "learning_rate": 5.2262668285284785e-05, - "loss": 0.005943647772073746, - "step": 9005 - }, - { - "epoch": 1.5362318840579712, - "grad_norm": 0.056658126413822174, - "learning_rate": 5.223911182976502e-05, - "loss": 0.004702667891979218, - "step": 9010 - }, - { - "epoch": 1.5370843989769822, - "grad_norm": 0.060573313385248184, - "learning_rate": 5.2215548493537556e-05, - "loss": 0.006530648469924927, - "step": 9015 - }, - { - "epoch": 1.5379369138959933, - "grad_norm": 0.06876473873853683, - "learning_rate": 5.219197828760254e-05, - "loss": 0.0070976391434669495, - "step": 9020 - }, - { - "epoch": 1.5387894288150044, - "grad_norm": 0.05402369797229767, - "learning_rate": 5.2168401222963354e-05, - "loss": 0.005997032299637795, - "step": 9025 - }, - { - "epoch": 1.5396419437340154, - "grad_norm": 0.0907805860042572, - "learning_rate": 5.214481731062652e-05, - "loss": 0.007357357442378998, - "step": 9030 - }, - { - "epoch": 1.5404944586530265, - "grad_norm": 0.07572564482688904, - "learning_rate": 5.212122656160182e-05, - "loss": 0.004879472404718399, - "step": 9035 - }, - { - "epoch": 1.5413469735720375, - "grad_norm": 0.05684768036007881, - "learning_rate": 5.209762898690218e-05, - "loss": 0.006248699128627777, - "step": 9040 - }, - { - "epoch": 1.5421994884910486, - "grad_norm": 0.070293128490448, - "learning_rate": 5.2074024597543745e-05, - "loss": 0.005055962502956391, - "step": 9045 - }, - { - "epoch": 1.5430520034100597, - "grad_norm": 0.06611300259828568, - "learning_rate": 5.2050413404545823e-05, - "loss": 0.0048581909388303755, - "step": 9050 - }, - { - "epoch": 1.5439045183290707, - "grad_norm": 0.06960003823041916, - "learning_rate": 5.202679541893092e-05, - "loss": 0.006258350610733032, - "step": 9055 - }, - { - "epoch": 1.5447570332480818, - "grad_norm": 0.059757016599178314, - "learning_rate": 5.2003170651724675e-05, - "loss": 0.006347355991601944, - "step": 9060 - }, - { - "epoch": 1.5456095481670928, - "grad_norm": 0.06531284749507904, - "learning_rate": 5.1979539113955936e-05, - "loss": 0.00543224960565567, - "step": 9065 - }, - { - "epoch": 1.546462063086104, - "grad_norm": 0.08068390935659409, - "learning_rate": 5.195590081665667e-05, - "loss": 0.004933612793684006, - "step": 9070 - }, - { - "epoch": 1.547314578005115, - "grad_norm": 0.06198716536164284, - "learning_rate": 5.193225577086203e-05, - "loss": 0.00523824393749237, - "step": 9075 - }, - { - "epoch": 1.548167092924126, - "grad_norm": 0.07734926789999008, - "learning_rate": 5.190860398761032e-05, - "loss": 0.005699950456619263, - "step": 9080 - }, - { - "epoch": 1.5490196078431373, - "grad_norm": 0.058083925396203995, - "learning_rate": 5.188494547794297e-05, - "loss": 0.006147466972470284, - "step": 9085 - }, - { - "epoch": 1.5498721227621484, - "grad_norm": 0.0675162672996521, - "learning_rate": 5.1861280252904546e-05, - "loss": 0.0059716224670410155, - "step": 9090 - }, - { - "epoch": 1.5507246376811594, - "grad_norm": 0.05415274575352669, - "learning_rate": 5.183760832354278e-05, - "loss": 0.0058246061205863954, - "step": 9095 - }, - { - "epoch": 1.5515771526001705, - "grad_norm": 0.05826190859079361, - "learning_rate": 5.1813929700908523e-05, - "loss": 0.005409573763608932, - "step": 9100 - }, - { - "epoch": 1.5524296675191815, - "grad_norm": 0.07188098877668381, - "learning_rate": 5.179024439605573e-05, - "loss": 0.00541839525103569, - "step": 9105 - }, - { - "epoch": 1.5532821824381928, - "grad_norm": 0.07955330610275269, - "learning_rate": 5.176655242004149e-05, - "loss": 0.007760365307331085, - "step": 9110 - }, - { - "epoch": 1.5541346973572039, - "grad_norm": 0.07923565059900284, - "learning_rate": 5.1742853783926e-05, - "loss": 0.00563618317246437, - "step": 9115 - }, - { - "epoch": 1.554987212276215, - "grad_norm": 0.08301008492708206, - "learning_rate": 5.171914849877258e-05, - "loss": 0.006948529183864594, - "step": 9120 - }, - { - "epoch": 1.555839727195226, - "grad_norm": 0.10905841737985611, - "learning_rate": 5.1695436575647655e-05, - "loss": 0.005861887335777282, - "step": 9125 - }, - { - "epoch": 1.556692242114237, - "grad_norm": 0.06157204881310463, - "learning_rate": 5.167171802562072e-05, - "loss": 0.005052468553185463, - "step": 9130 - }, - { - "epoch": 1.5575447570332481, - "grad_norm": 0.08309191465377808, - "learning_rate": 5.164799285976438e-05, - "loss": 0.006937308609485627, - "step": 9135 - }, - { - "epoch": 1.5583972719522592, - "grad_norm": 0.07454490661621094, - "learning_rate": 5.162426108915437e-05, - "loss": 0.00504121258854866, - "step": 9140 - }, - { - "epoch": 1.5592497868712702, - "grad_norm": 0.07217807322740555, - "learning_rate": 5.160052272486943e-05, - "loss": 0.004582167789340019, - "step": 9145 - }, - { - "epoch": 1.5601023017902813, - "grad_norm": 0.07113789767026901, - "learning_rate": 5.157677777799145e-05, - "loss": 0.0055323362350463865, - "step": 9150 - }, - { - "epoch": 1.5609548167092924, - "grad_norm": 0.10281748324632645, - "learning_rate": 5.1553026259605316e-05, - "loss": 0.006342601776123047, - "step": 9155 - }, - { - "epoch": 1.5618073316283034, - "grad_norm": 0.09731876850128174, - "learning_rate": 5.152926818079906e-05, - "loss": 0.0054936733096838, - "step": 9160 - }, - { - "epoch": 1.5626598465473145, - "grad_norm": 0.09631586819887161, - "learning_rate": 5.1505503552663734e-05, - "loss": 0.0064162641763687136, - "step": 9165 - }, - { - "epoch": 1.5635123614663256, - "grad_norm": 0.07588718831539154, - "learning_rate": 5.148173238629348e-05, - "loss": 0.0069232374429702755, - "step": 9170 - }, - { - "epoch": 1.5643648763853366, - "grad_norm": 0.10357257723808289, - "learning_rate": 5.145795469278544e-05, - "loss": 0.007076382637023926, - "step": 9175 - }, - { - "epoch": 1.5652173913043477, - "grad_norm": 0.07249122112989426, - "learning_rate": 5.1434170483239826e-05, - "loss": 0.005868781358003616, - "step": 9180 - }, - { - "epoch": 1.566069906223359, - "grad_norm": 0.06878417730331421, - "learning_rate": 5.1410379768759934e-05, - "loss": 0.006841042637825012, - "step": 9185 - }, - { - "epoch": 1.56692242114237, - "grad_norm": 0.1096004843711853, - "learning_rate": 5.138658256045203e-05, - "loss": 0.00807877779006958, - "step": 9190 - }, - { - "epoch": 1.567774936061381, - "grad_norm": 0.07194329053163528, - "learning_rate": 5.136277886942547e-05, - "loss": 0.005923056975007057, - "step": 9195 - }, - { - "epoch": 1.5686274509803921, - "grad_norm": 0.08904275298118591, - "learning_rate": 5.133896870679257e-05, - "loss": 0.006372517347335816, - "step": 9200 - }, - { - "epoch": 1.5694799658994032, - "grad_norm": 0.05133598670363426, - "learning_rate": 5.131515208366873e-05, - "loss": 0.00692460760474205, - "step": 9205 - }, - { - "epoch": 1.5703324808184145, - "grad_norm": 0.047151900827884674, - "learning_rate": 5.1291329011172345e-05, - "loss": 0.006545543670654297, - "step": 9210 - }, - { - "epoch": 1.5711849957374255, - "grad_norm": 0.07102219760417938, - "learning_rate": 5.126749950042482e-05, - "loss": 0.006531259417533875, - "step": 9215 - }, - { - "epoch": 1.5720375106564366, - "grad_norm": 0.09585709124803543, - "learning_rate": 5.124366356255056e-05, - "loss": 0.005086017400026321, - "step": 9220 - }, - { - "epoch": 1.5728900255754477, - "grad_norm": 0.06898393481969833, - "learning_rate": 5.121982120867695e-05, - "loss": 0.004247477650642395, - "step": 9225 - }, - { - "epoch": 1.5737425404944587, - "grad_norm": 0.10513560473918915, - "learning_rate": 5.119597244993443e-05, - "loss": 0.006501986831426621, - "step": 9230 - }, - { - "epoch": 1.5745950554134698, - "grad_norm": 0.06671630591154099, - "learning_rate": 5.1172117297456366e-05, - "loss": 0.007658005505800247, - "step": 9235 - }, - { - "epoch": 1.5754475703324808, - "grad_norm": 0.09480880945920944, - "learning_rate": 5.1148255762379156e-05, - "loss": 0.006366011500358581, - "step": 9240 - }, - { - "epoch": 1.576300085251492, - "grad_norm": 0.06769633293151855, - "learning_rate": 5.112438785584215e-05, - "loss": 0.00625738725066185, - "step": 9245 - }, - { - "epoch": 1.577152600170503, - "grad_norm": 0.03695152327418327, - "learning_rate": 5.1100513588987665e-05, - "loss": 0.006924654543399811, - "step": 9250 - }, - { - "epoch": 1.578005115089514, - "grad_norm": 0.05657009407877922, - "learning_rate": 5.107663297296104e-05, - "loss": 0.005848415940999985, - "step": 9255 - }, - { - "epoch": 1.578857630008525, - "grad_norm": 0.11228469014167786, - "learning_rate": 5.105274601891051e-05, - "loss": 0.005637861788272858, - "step": 9260 - }, - { - "epoch": 1.5797101449275361, - "grad_norm": 0.06454899162054062, - "learning_rate": 5.102885273798732e-05, - "loss": 0.0066472023725509645, - "step": 9265 - }, - { - "epoch": 1.5805626598465472, - "grad_norm": 0.05328953638672829, - "learning_rate": 5.1004953141345637e-05, - "loss": 0.008773463219404221, - "step": 9270 - }, - { - "epoch": 1.5814151747655583, - "grad_norm": 0.05827401205897331, - "learning_rate": 5.0981047240142576e-05, - "loss": 0.0075307883322238926, - "step": 9275 - }, - { - "epoch": 1.5822676896845693, - "grad_norm": 0.0719359889626503, - "learning_rate": 5.095713504553822e-05, - "loss": 0.007532978057861328, - "step": 9280 - }, - { - "epoch": 1.5831202046035806, - "grad_norm": 0.08982953429222107, - "learning_rate": 5.0933216568695596e-05, - "loss": 0.007915425300598144, - "step": 9285 - }, - { - "epoch": 1.5839727195225917, - "grad_norm": 0.0919221043586731, - "learning_rate": 5.090929182078061e-05, - "loss": 0.005685590207576752, - "step": 9290 - }, - { - "epoch": 1.5848252344416027, - "grad_norm": 0.0840388685464859, - "learning_rate": 5.088536081296215e-05, - "loss": 0.0070190995931625364, - "step": 9295 - }, - { - "epoch": 1.5856777493606138, - "grad_norm": 0.08340579271316528, - "learning_rate": 5.086142355641199e-05, - "loss": 0.005871276929974556, - "step": 9300 - }, - { - "epoch": 1.5865302642796248, - "grad_norm": 0.0840516984462738, - "learning_rate": 5.0837480062304865e-05, - "loss": 0.007803326845169068, - "step": 9305 - }, - { - "epoch": 1.5873827791986361, - "grad_norm": 0.08378542214632034, - "learning_rate": 5.0813530341818377e-05, - "loss": 0.005085055530071258, - "step": 9310 - }, - { - "epoch": 1.5882352941176472, - "grad_norm": 0.10764650255441666, - "learning_rate": 5.078957440613305e-05, - "loss": 0.007959616929292678, - "step": 9315 - }, - { - "epoch": 1.5890878090366582, - "grad_norm": 0.07483979314565659, - "learning_rate": 5.076561226643231e-05, - "loss": 0.004332176968455314, - "step": 9320 - }, - { - "epoch": 1.5899403239556693, - "grad_norm": 0.06658382713794708, - "learning_rate": 5.074164393390249e-05, - "loss": 0.006168607249855995, - "step": 9325 - }, - { - "epoch": 1.5907928388746804, - "grad_norm": 0.09388890862464905, - "learning_rate": 5.071766941973282e-05, - "loss": 0.006460639089345932, - "step": 9330 - }, - { - "epoch": 1.5916453537936914, - "grad_norm": 0.051856543868780136, - "learning_rate": 5.0693688735115364e-05, - "loss": 0.005657953023910522, - "step": 9335 - }, - { - "epoch": 1.5924978687127025, - "grad_norm": 0.0785013884305954, - "learning_rate": 5.066970189124513e-05, - "loss": 0.008378601819276809, - "step": 9340 - }, - { - "epoch": 1.5933503836317136, - "grad_norm": 0.0653534010052681, - "learning_rate": 5.0645708899319956e-05, - "loss": 0.006928309798240662, - "step": 9345 - }, - { - "epoch": 1.5942028985507246, - "grad_norm": 0.047050826251506805, - "learning_rate": 5.062170977054058e-05, - "loss": 0.005722399801015854, - "step": 9350 - }, - { - "epoch": 1.5950554134697357, - "grad_norm": 0.10868531465530396, - "learning_rate": 5.059770451611061e-05, - "loss": 0.009898315370082855, - "step": 9355 - }, - { - "epoch": 1.5959079283887467, - "grad_norm": 0.0615832693874836, - "learning_rate": 5.0573693147236465e-05, - "loss": 0.007755370438098907, - "step": 9360 - }, - { - "epoch": 1.5967604433077578, - "grad_norm": 0.10720556974411011, - "learning_rate": 5.054967567512747e-05, - "loss": 0.006318587809801102, - "step": 9365 - }, - { - "epoch": 1.5976129582267689, - "grad_norm": 0.06587128341197968, - "learning_rate": 5.052565211099578e-05, - "loss": 0.004849371314048767, - "step": 9370 - }, - { - "epoch": 1.59846547314578, - "grad_norm": 0.07305008918046951, - "learning_rate": 5.050162246605638e-05, - "loss": 0.005983927100896835, - "step": 9375 - }, - { - "epoch": 1.599317988064791, - "grad_norm": 0.06641892343759537, - "learning_rate": 5.0477586751527124e-05, - "loss": 0.007008136063814163, - "step": 9380 - }, - { - "epoch": 1.6001705029838023, - "grad_norm": 0.06871581077575684, - "learning_rate": 5.045354497862868e-05, - "loss": 0.0066993959248065945, - "step": 9385 - }, - { - "epoch": 1.6010230179028133, - "grad_norm": 0.07417753338813782, - "learning_rate": 5.042949715858453e-05, - "loss": 0.006360804289579391, - "step": 9390 - }, - { - "epoch": 1.6018755328218244, - "grad_norm": 0.09202401340007782, - "learning_rate": 5.040544330262102e-05, - "loss": 0.006207296252250671, - "step": 9395 - }, - { - "epoch": 1.6027280477408354, - "grad_norm": 0.06747353821992874, - "learning_rate": 5.0381383421967276e-05, - "loss": 0.006196716427803039, - "step": 9400 - }, - { - "epoch": 1.6035805626598465, - "grad_norm": 0.06609310954809189, - "learning_rate": 5.0357317527855266e-05, - "loss": 0.005642791092395782, - "step": 9405 - }, - { - "epoch": 1.6044330775788578, - "grad_norm": 0.039614174515008926, - "learning_rate": 5.0333245631519716e-05, - "loss": 0.005146804824471473, - "step": 9410 - }, - { - "epoch": 1.6052855924978688, - "grad_norm": 0.0902944952249527, - "learning_rate": 5.0309167744198234e-05, - "loss": 0.005218298360705376, - "step": 9415 - }, - { - "epoch": 1.60613810741688, - "grad_norm": 0.06527641415596008, - "learning_rate": 5.028508387713114e-05, - "loss": 0.006157718971371651, - "step": 9420 - }, - { - "epoch": 1.606990622335891, - "grad_norm": 0.10824134200811386, - "learning_rate": 5.026099404156161e-05, - "loss": 0.00577687993645668, - "step": 9425 - }, - { - "epoch": 1.607843137254902, - "grad_norm": 0.091335728764534, - "learning_rate": 5.023689824873556e-05, - "loss": 0.005114461481571198, - "step": 9430 - }, - { - "epoch": 1.608695652173913, - "grad_norm": 0.047340504825115204, - "learning_rate": 5.021279650990173e-05, - "loss": 0.005150845646858216, - "step": 9435 - }, - { - "epoch": 1.6095481670929241, - "grad_norm": 0.05847655236721039, - "learning_rate": 5.01886888363116e-05, - "loss": 0.006019642949104309, - "step": 9440 - }, - { - "epoch": 1.6104006820119352, - "grad_norm": 0.10413257032632828, - "learning_rate": 5.016457523921943e-05, - "loss": 0.0097243569791317, - "step": 9445 - }, - { - "epoch": 1.6112531969309463, - "grad_norm": 0.06559625267982483, - "learning_rate": 5.014045572988226e-05, - "loss": 0.006743426620960236, - "step": 9450 - }, - { - "epoch": 1.6121057118499573, - "grad_norm": 0.07541610300540924, - "learning_rate": 5.0116330319559865e-05, - "loss": 0.004393500834703445, - "step": 9455 - }, - { - "epoch": 1.6129582267689684, - "grad_norm": 0.04757530242204666, - "learning_rate": 5.00921990195148e-05, - "loss": 0.004641738906502724, - "step": 9460 - }, - { - "epoch": 1.6138107416879794, - "grad_norm": 0.10010012239217758, - "learning_rate": 5.0068061841012355e-05, - "loss": 0.005677872523665428, - "step": 9465 - }, - { - "epoch": 1.6146632566069905, - "grad_norm": 0.08248613774776459, - "learning_rate": 5.0043918795320576e-05, - "loss": 0.006557486951351166, - "step": 9470 - }, - { - "epoch": 1.6155157715260016, - "grad_norm": 0.06300318241119385, - "learning_rate": 5.001976989371023e-05, - "loss": 0.0052742622792720795, - "step": 9475 - }, - { - "epoch": 1.6163682864450126, - "grad_norm": 0.06455430388450623, - "learning_rate": 4.999561514745482e-05, - "loss": 0.0061374582350254055, - "step": 9480 - }, - { - "epoch": 1.617220801364024, - "grad_norm": 0.04623732715845108, - "learning_rate": 4.997145456783062e-05, - "loss": 0.007861848175525665, - "step": 9485 - }, - { - "epoch": 1.618073316283035, - "grad_norm": 0.05294455960392952, - "learning_rate": 4.994728816611655e-05, - "loss": 0.005468960478901863, - "step": 9490 - }, - { - "epoch": 1.618925831202046, - "grad_norm": 0.04539628326892853, - "learning_rate": 4.992311595359431e-05, - "loss": 0.005490221083164215, - "step": 9495 - }, - { - "epoch": 1.619778346121057, - "grad_norm": 0.04033574461936951, - "learning_rate": 4.98989379415483e-05, - "loss": 0.005296828970313072, - "step": 9500 - }, - { - "epoch": 1.6206308610400681, - "grad_norm": 0.10801003128290176, - "learning_rate": 4.98747541412656e-05, - "loss": 0.007847490906715392, - "step": 9505 - }, - { - "epoch": 1.6214833759590794, - "grad_norm": 0.05979831889271736, - "learning_rate": 4.985056456403603e-05, - "loss": 0.005352787673473358, - "step": 9510 - }, - { - "epoch": 1.6223358908780905, - "grad_norm": 0.07628990709781647, - "learning_rate": 4.9826369221152086e-05, - "loss": 0.005436672642827034, - "step": 9515 - }, - { - "epoch": 1.6231884057971016, - "grad_norm": 0.0654626339673996, - "learning_rate": 4.9802168123908955e-05, - "loss": 0.004777481406927108, - "step": 9520 - }, - { - "epoch": 1.6240409207161126, - "grad_norm": 0.08487557619810104, - "learning_rate": 4.97779612836045e-05, - "loss": 0.006834116578102112, - "step": 9525 - }, - { - "epoch": 1.6248934356351237, - "grad_norm": 0.09151525050401688, - "learning_rate": 4.9753748711539316e-05, - "loss": 0.006389729678630829, - "step": 9530 - }, - { - "epoch": 1.6257459505541347, - "grad_norm": 0.10458851605653763, - "learning_rate": 4.972953041901661e-05, - "loss": 0.005984527617692947, - "step": 9535 - }, - { - "epoch": 1.6265984654731458, - "grad_norm": 0.08780983090400696, - "learning_rate": 4.970530641734229e-05, - "loss": 0.0068392202258110045, - "step": 9540 - }, - { - "epoch": 1.6274509803921569, - "grad_norm": 0.04871044307947159, - "learning_rate": 4.968107671782493e-05, - "loss": 0.005444938316941261, - "step": 9545 - }, - { - "epoch": 1.628303495311168, - "grad_norm": 0.05514970421791077, - "learning_rate": 4.9656841331775745e-05, - "loss": 0.005353255197405815, - "step": 9550 - }, - { - "epoch": 1.629156010230179, - "grad_norm": 0.057791441679000854, - "learning_rate": 4.9632600270508655e-05, - "loss": 0.005117457732558251, - "step": 9555 - }, - { - "epoch": 1.63000852514919, - "grad_norm": 0.0816815048456192, - "learning_rate": 4.960835354534015e-05, - "loss": 0.005405401438474655, - "step": 9560 - }, - { - "epoch": 1.630861040068201, - "grad_norm": 0.087788425385952, - "learning_rate": 4.958410116758945e-05, - "loss": 0.006124432012438774, - "step": 9565 - }, - { - "epoch": 1.6317135549872122, - "grad_norm": 0.08500470966100693, - "learning_rate": 4.955984314857832e-05, - "loss": 0.00581449456512928, - "step": 9570 - }, - { - "epoch": 1.6325660699062232, - "grad_norm": 0.042804375290870667, - "learning_rate": 4.9535579499631264e-05, - "loss": 0.007793295383453369, - "step": 9575 - }, - { - "epoch": 1.6334185848252343, - "grad_norm": 0.08767658472061157, - "learning_rate": 4.951131023207533e-05, - "loss": 0.006432226300239563, - "step": 9580 - }, - { - "epoch": 1.6342710997442456, - "grad_norm": 0.0693424716591835, - "learning_rate": 4.948703535724023e-05, - "loss": 0.006517377495765686, - "step": 9585 - }, - { - "epoch": 1.6351236146632566, - "grad_norm": 0.08574991673231125, - "learning_rate": 4.9462754886458276e-05, - "loss": 0.009532185643911362, - "step": 9590 - }, - { - "epoch": 1.6359761295822677, - "grad_norm": 0.04135733097791672, - "learning_rate": 4.94384688310644e-05, - "loss": 0.005358002707362175, - "step": 9595 - }, - { - "epoch": 1.6368286445012787, - "grad_norm": 0.09947369992733002, - "learning_rate": 4.941417720239616e-05, - "loss": 0.005965238064527511, - "step": 9600 - }, - { - "epoch": 1.6376811594202898, - "grad_norm": 0.038376711308956146, - "learning_rate": 4.9389880011793665e-05, - "loss": 0.00521450936794281, - "step": 9605 - }, - { - "epoch": 1.638533674339301, - "grad_norm": 0.05022123083472252, - "learning_rate": 4.9365577270599675e-05, - "loss": 0.006678921729326248, - "step": 9610 - }, - { - "epoch": 1.6393861892583121, - "grad_norm": 0.06687050312757492, - "learning_rate": 4.93412689901595e-05, - "loss": 0.006315051764249802, - "step": 9615 - }, - { - "epoch": 1.6402387041773232, - "grad_norm": 0.08563709259033203, - "learning_rate": 4.931695518182107e-05, - "loss": 0.005977614223957062, - "step": 9620 - }, - { - "epoch": 1.6410912190963343, - "grad_norm": 0.07901418209075928, - "learning_rate": 4.929263585693486e-05, - "loss": 0.004367914795875549, - "step": 9625 - }, - { - "epoch": 1.6419437340153453, - "grad_norm": 0.05929172784090042, - "learning_rate": 4.9268311026853974e-05, - "loss": 0.00466451421380043, - "step": 9630 - }, - { - "epoch": 1.6427962489343564, - "grad_norm": 0.09167131781578064, - "learning_rate": 4.924398070293403e-05, - "loss": 0.0063233010470867155, - "step": 9635 - }, - { - "epoch": 1.6436487638533674, - "grad_norm": 0.053217221051454544, - "learning_rate": 4.921964489653321e-05, - "loss": 0.005829869210720063, - "step": 9640 - }, - { - "epoch": 1.6445012787723785, - "grad_norm": 0.05341719463467598, - "learning_rate": 4.919530361901232e-05, - "loss": 0.005165425688028335, - "step": 9645 - }, - { - "epoch": 1.6453537936913896, - "grad_norm": 0.0763968899846077, - "learning_rate": 4.917095688173466e-05, - "loss": 0.008034119009971618, - "step": 9650 - }, - { - "epoch": 1.6462063086104006, - "grad_norm": 0.07722017168998718, - "learning_rate": 4.9146604696066095e-05, - "loss": 0.008911440521478653, - "step": 9655 - }, - { - "epoch": 1.6470588235294117, - "grad_norm": 0.0639941543340683, - "learning_rate": 4.912224707337504e-05, - "loss": 0.0066375695168972015, - "step": 9660 - }, - { - "epoch": 1.6479113384484227, - "grad_norm": 0.05451088026165962, - "learning_rate": 4.9097884025032425e-05, - "loss": 0.004018183052539826, - "step": 9665 - }, - { - "epoch": 1.6487638533674338, - "grad_norm": 0.06928657740354538, - "learning_rate": 4.907351556241176e-05, - "loss": 0.0061560459434986115, - "step": 9670 - }, - { - "epoch": 1.6496163682864449, - "grad_norm": 0.0672740638256073, - "learning_rate": 4.904914169688903e-05, - "loss": 0.005010559782385826, - "step": 9675 - }, - { - "epoch": 1.6504688832054561, - "grad_norm": 0.05115605145692825, - "learning_rate": 4.902476243984279e-05, - "loss": 0.005690005421638489, - "step": 9680 - }, - { - "epoch": 1.6513213981244672, - "grad_norm": 0.08852645754814148, - "learning_rate": 4.9000377802654055e-05, - "loss": 0.0067652732133865355, - "step": 9685 - }, - { - "epoch": 1.6521739130434783, - "grad_norm": 0.08289605379104614, - "learning_rate": 4.897598779670643e-05, - "loss": 0.005946322903037071, - "step": 9690 - }, - { - "epoch": 1.6530264279624893, - "grad_norm": 0.08343428373336792, - "learning_rate": 4.895159243338594e-05, - "loss": 0.006231371313333511, - "step": 9695 - }, - { - "epoch": 1.6538789428815004, - "grad_norm": 0.08138900995254517, - "learning_rate": 4.892719172408117e-05, - "loss": 0.006785771995782852, - "step": 9700 - }, - { - "epoch": 1.6547314578005117, - "grad_norm": 0.07599585503339767, - "learning_rate": 4.890278568018318e-05, - "loss": 0.00609181635081768, - "step": 9705 - }, - { - "epoch": 1.6555839727195227, - "grad_norm": 0.07918383926153183, - "learning_rate": 4.887837431308552e-05, - "loss": 0.006991502642631531, - "step": 9710 - }, - { - "epoch": 1.6564364876385338, - "grad_norm": 0.048750922083854675, - "learning_rate": 4.8853957634184246e-05, - "loss": 0.00639684796333313, - "step": 9715 - }, - { - "epoch": 1.6572890025575449, - "grad_norm": 0.07931654155254364, - "learning_rate": 4.882953565487785e-05, - "loss": 0.004780232906341553, - "step": 9720 - }, - { - "epoch": 1.658141517476556, - "grad_norm": 0.07394375652074814, - "learning_rate": 4.8805108386567345e-05, - "loss": 0.005560039728879929, - "step": 9725 - }, - { - "epoch": 1.658994032395567, - "grad_norm": 0.07906223088502884, - "learning_rate": 4.8780675840656175e-05, - "loss": 0.006233107298612595, - "step": 9730 - }, - { - "epoch": 1.659846547314578, - "grad_norm": 0.05145291984081268, - "learning_rate": 4.875623802855027e-05, - "loss": 0.0049663417041301726, - "step": 9735 - }, - { - "epoch": 1.660699062233589, - "grad_norm": 0.06227492541074753, - "learning_rate": 4.873179496165802e-05, - "loss": 0.006139815598726272, - "step": 9740 - }, - { - "epoch": 1.6615515771526002, - "grad_norm": 0.08176816254854202, - "learning_rate": 4.870734665139028e-05, - "loss": 0.007625886052846908, - "step": 9745 - }, - { - "epoch": 1.6624040920716112, - "grad_norm": 0.06774444133043289, - "learning_rate": 4.868289310916029e-05, - "loss": 0.006510105729103088, - "step": 9750 - }, - { - "epoch": 1.6632566069906223, - "grad_norm": 0.07336006313562393, - "learning_rate": 4.8658434346383805e-05, - "loss": 0.0068834669888019565, - "step": 9755 - }, - { - "epoch": 1.6641091219096333, - "grad_norm": 0.07233051210641861, - "learning_rate": 4.863397037447899e-05, - "loss": 0.005505643784999847, - "step": 9760 - }, - { - "epoch": 1.6649616368286444, - "grad_norm": 0.037355873733758926, - "learning_rate": 4.860950120486643e-05, - "loss": 0.005151794478297234, - "step": 9765 - }, - { - "epoch": 1.6658141517476555, - "grad_norm": 0.10907282680273056, - "learning_rate": 4.8585026848969164e-05, - "loss": 0.007589263468980789, - "step": 9770 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 0.10357582569122314, - "learning_rate": 4.856054731821261e-05, - "loss": 0.006011854484677314, - "step": 9775 - }, - { - "epoch": 1.6675191815856778, - "grad_norm": 0.0713953971862793, - "learning_rate": 4.853606262402465e-05, - "loss": 0.006342334300279617, - "step": 9780 - }, - { - "epoch": 1.6683716965046889, - "grad_norm": 0.07772944122552872, - "learning_rate": 4.851157277783555e-05, - "loss": 0.005903373658657074, - "step": 9785 - }, - { - "epoch": 1.6692242114237, - "grad_norm": 0.1249493658542633, - "learning_rate": 4.848707779107797e-05, - "loss": 0.006542833894491196, - "step": 9790 - }, - { - "epoch": 1.670076726342711, - "grad_norm": 0.05137734115123749, - "learning_rate": 4.8462577675187e-05, - "loss": 0.004380676150321961, - "step": 9795 - }, - { - "epoch": 1.670929241261722, - "grad_norm": 0.09491576999425888, - "learning_rate": 4.8438072441600095e-05, - "loss": 0.005311820283532142, - "step": 9800 - }, - { - "epoch": 1.6717817561807333, - "grad_norm": 0.09257746487855911, - "learning_rate": 4.8413562101757134e-05, - "loss": 0.006033014133572578, - "step": 9805 - }, - { - "epoch": 1.6726342710997444, - "grad_norm": 0.045860812067985535, - "learning_rate": 4.838904666710034e-05, - "loss": 0.008368080109357834, - "step": 9810 - }, - { - "epoch": 1.6734867860187554, - "grad_norm": 0.033777810633182526, - "learning_rate": 4.836452614907435e-05, - "loss": 0.0045743979513645176, - "step": 9815 - }, - { - "epoch": 1.6743393009377665, - "grad_norm": 0.12888991832733154, - "learning_rate": 4.834000055912614e-05, - "loss": 0.005997149646282196, - "step": 9820 - }, - { - "epoch": 1.6751918158567776, - "grad_norm": 0.08622048050165176, - "learning_rate": 4.8315469908705074e-05, - "loss": 0.007002732157707215, - "step": 9825 - }, - { - "epoch": 1.6760443307757886, - "grad_norm": 0.04722774773836136, - "learning_rate": 4.82909342092629e-05, - "loss": 0.005374876409769058, - "step": 9830 - }, - { - "epoch": 1.6768968456947997, - "grad_norm": 0.08596520870923996, - "learning_rate": 4.826639347225366e-05, - "loss": 0.0066084228456020355, - "step": 9835 - }, - { - "epoch": 1.6777493606138107, - "grad_norm": 0.09831524640321732, - "learning_rate": 4.824184770913381e-05, - "loss": 0.004402932524681091, - "step": 9840 - }, - { - "epoch": 1.6786018755328218, - "grad_norm": 0.10586824268102646, - "learning_rate": 4.821729693136214e-05, - "loss": 0.006442143023014069, - "step": 9845 - }, - { - "epoch": 1.6794543904518329, - "grad_norm": 0.11845403164625168, - "learning_rate": 4.8192741150399735e-05, - "loss": 0.006300021708011627, - "step": 9850 - }, - { - "epoch": 1.680306905370844, - "grad_norm": 0.08749356120824814, - "learning_rate": 4.816818037771007e-05, - "loss": 0.0060168147087097164, - "step": 9855 - }, - { - "epoch": 1.681159420289855, - "grad_norm": 0.06483060121536255, - "learning_rate": 4.814361462475895e-05, - "loss": 0.00717247799038887, - "step": 9860 - }, - { - "epoch": 1.682011935208866, - "grad_norm": 0.09276239573955536, - "learning_rate": 4.811904390301444e-05, - "loss": 0.006788758933544159, - "step": 9865 - }, - { - "epoch": 1.682864450127877, - "grad_norm": 0.05662832781672478, - "learning_rate": 4.809446822394701e-05, - "loss": 0.0068000413477420805, - "step": 9870 - }, - { - "epoch": 1.6837169650468882, - "grad_norm": 0.07508451491594315, - "learning_rate": 4.80698875990294e-05, - "loss": 0.006339512765407562, - "step": 9875 - }, - { - "epoch": 1.6845694799658995, - "grad_norm": 0.06525320559740067, - "learning_rate": 4.804530203973664e-05, - "loss": 0.010082229971885681, - "step": 9880 - }, - { - "epoch": 1.6854219948849105, - "grad_norm": 0.07791458070278168, - "learning_rate": 4.8020711557546104e-05, - "loss": 0.006830710172653198, - "step": 9885 - }, - { - "epoch": 1.6862745098039216, - "grad_norm": 0.05997749790549278, - "learning_rate": 4.799611616393745e-05, - "loss": 0.00666801705956459, - "step": 9890 - }, - { - "epoch": 1.6871270247229326, - "grad_norm": 0.07050258666276932, - "learning_rate": 4.797151587039261e-05, - "loss": 0.0059244450181722644, - "step": 9895 - }, - { - "epoch": 1.6879795396419437, - "grad_norm": 0.06760186702013016, - "learning_rate": 4.794691068839585e-05, - "loss": 0.006415641307830811, - "step": 9900 - }, - { - "epoch": 1.688832054560955, - "grad_norm": 0.07285474240779877, - "learning_rate": 4.792230062943364e-05, - "loss": 0.004972729086875916, - "step": 9905 - }, - { - "epoch": 1.689684569479966, - "grad_norm": 0.02914854884147644, - "learning_rate": 4.789768570499481e-05, - "loss": 0.004819701239466667, - "step": 9910 - }, - { - "epoch": 1.690537084398977, - "grad_norm": 0.058768294751644135, - "learning_rate": 4.787306592657042e-05, - "loss": 0.00581958070397377, - "step": 9915 - }, - { - "epoch": 1.6913895993179882, - "grad_norm": 0.08694405853748322, - "learning_rate": 4.7848441305653804e-05, - "loss": 0.004998849332332611, - "step": 9920 - }, - { - "epoch": 1.6922421142369992, - "grad_norm": 0.10194200277328491, - "learning_rate": 4.782381185374054e-05, - "loss": 0.00809016153216362, - "step": 9925 - }, - { - "epoch": 1.6930946291560103, - "grad_norm": 0.04976386949419975, - "learning_rate": 4.779917758232849e-05, - "loss": 0.00392133817076683, - "step": 9930 - }, - { - "epoch": 1.6939471440750213, - "grad_norm": 0.04324428364634514, - "learning_rate": 4.777453850291774e-05, - "loss": 0.005488916113972664, - "step": 9935 - }, - { - "epoch": 1.6947996589940324, - "grad_norm": 0.128068745136261, - "learning_rate": 4.774989462701063e-05, - "loss": 0.008696570992469788, - "step": 9940 - }, - { - "epoch": 1.6956521739130435, - "grad_norm": 0.06357335299253464, - "learning_rate": 4.7725245966111764e-05, - "loss": 0.00657767504453659, - "step": 9945 - }, - { - "epoch": 1.6965046888320545, - "grad_norm": 0.09200388938188553, - "learning_rate": 4.770059253172793e-05, - "loss": 0.00511985532939434, - "step": 9950 - }, - { - "epoch": 1.6973572037510656, - "grad_norm": 0.0898200049996376, - "learning_rate": 4.767593433536819e-05, - "loss": 0.005805553123354912, - "step": 9955 - }, - { - "epoch": 1.6982097186700766, - "grad_norm": 0.06495708227157593, - "learning_rate": 4.765127138854379e-05, - "loss": 0.005122709274291992, - "step": 9960 - }, - { - "epoch": 1.6990622335890877, - "grad_norm": 0.06079862266778946, - "learning_rate": 4.762660370276824e-05, - "loss": 0.005829216912388802, - "step": 9965 - }, - { - "epoch": 1.6999147485080988, - "grad_norm": 0.07300638407468796, - "learning_rate": 4.760193128955721e-05, - "loss": 0.0057421475648880005, - "step": 9970 - }, - { - "epoch": 1.7007672634271098, - "grad_norm": 0.09826004505157471, - "learning_rate": 4.757725416042863e-05, - "loss": 0.007709302753210068, - "step": 9975 - }, - { - "epoch": 1.701619778346121, - "grad_norm": 0.08353756368160248, - "learning_rate": 4.755257232690258e-05, - "loss": 0.007458946853876114, - "step": 9980 - }, - { - "epoch": 1.7024722932651322, - "grad_norm": 0.057993657886981964, - "learning_rate": 4.752788580050137e-05, - "loss": 0.0048107530921697615, - "step": 9985 - }, - { - "epoch": 1.7033248081841432, - "grad_norm": 0.08480621874332428, - "learning_rate": 4.750319459274951e-05, - "loss": 0.007556724548339844, - "step": 9990 - }, - { - "epoch": 1.7041773231031543, - "grad_norm": 0.06563637405633926, - "learning_rate": 4.747849871517364e-05, - "loss": 0.00476250983774662, - "step": 9995 - }, - { - "epoch": 1.7050298380221653, - "grad_norm": 0.06217886507511139, - "learning_rate": 4.7453798179302656e-05, - "loss": 0.008565887063741683, - "step": 10000 - }, - { - "epoch": 1.7058823529411766, - "grad_norm": 0.07285669445991516, - "learning_rate": 4.742909299666756e-05, - "loss": 0.0062899492681026455, - "step": 10005 - }, - { - "epoch": 1.7067348678601877, - "grad_norm": 0.043275732547044754, - "learning_rate": 4.7404383178801564e-05, - "loss": 0.005467301979660988, - "step": 10010 - }, - { - "epoch": 1.7075873827791987, - "grad_norm": 0.09345486015081406, - "learning_rate": 4.7379668737240044e-05, - "loss": 0.007198603451251983, - "step": 10015 - }, - { - "epoch": 1.7084398976982098, - "grad_norm": 0.09792933613061905, - "learning_rate": 4.735494968352049e-05, - "loss": 0.009155672788619996, - "step": 10020 - }, - { - "epoch": 1.7092924126172209, - "grad_norm": 0.03888144716620445, - "learning_rate": 4.733022602918263e-05, - "loss": 0.00484597384929657, - "step": 10025 - }, - { - "epoch": 1.710144927536232, - "grad_norm": 0.050344232469797134, - "learning_rate": 4.7305497785768235e-05, - "loss": 0.00478862039744854, - "step": 10030 - }, - { - "epoch": 1.710997442455243, - "grad_norm": 0.0724092647433281, - "learning_rate": 4.728076496482131e-05, - "loss": 0.005028426647186279, - "step": 10035 - }, - { - "epoch": 1.711849957374254, - "grad_norm": 0.10781413316726685, - "learning_rate": 4.725602757788794e-05, - "loss": 0.00789962187409401, - "step": 10040 - }, - { - "epoch": 1.712702472293265, - "grad_norm": 0.0828569084405899, - "learning_rate": 4.723128563651637e-05, - "loss": 0.006212035566568375, - "step": 10045 - }, - { - "epoch": 1.7135549872122762, - "grad_norm": 0.06634854525327682, - "learning_rate": 4.720653915225695e-05, - "loss": 0.00550018809735775, - "step": 10050 - }, - { - "epoch": 1.7144075021312872, - "grad_norm": 0.07699137926101685, - "learning_rate": 4.718178813666217e-05, - "loss": 0.007427608966827393, - "step": 10055 - }, - { - "epoch": 1.7152600170502983, - "grad_norm": 0.08237455785274506, - "learning_rate": 4.715703260128663e-05, - "loss": 0.0049440376460552216, - "step": 10060 - }, - { - "epoch": 1.7161125319693094, - "grad_norm": 0.0423310324549675, - "learning_rate": 4.7132272557687034e-05, - "loss": 0.005643930658698082, - "step": 10065 - }, - { - "epoch": 1.7169650468883204, - "grad_norm": 0.08052363246679306, - "learning_rate": 4.71075080174222e-05, - "loss": 0.005594046413898468, - "step": 10070 - }, - { - "epoch": 1.7178175618073315, - "grad_norm": 0.05388827249407768, - "learning_rate": 4.7082738992053004e-05, - "loss": 0.005239073187112808, - "step": 10075 - }, - { - "epoch": 1.7186700767263428, - "grad_norm": 0.0699780210852623, - "learning_rate": 4.70579654931425e-05, - "loss": 0.004442551359534264, - "step": 10080 - }, - { - "epoch": 1.7195225916453538, - "grad_norm": 0.07259970158338547, - "learning_rate": 4.7033187532255765e-05, - "loss": 0.004775180667638779, - "step": 10085 - }, - { - "epoch": 1.7203751065643649, - "grad_norm": 0.10291304439306259, - "learning_rate": 4.700840512095995e-05, - "loss": 0.009148158878087998, - "step": 10090 - }, - { - "epoch": 1.721227621483376, - "grad_norm": 0.09639768302440643, - "learning_rate": 4.698361827082435e-05, - "loss": 0.008357913047075272, - "step": 10095 - }, - { - "epoch": 1.722080136402387, - "grad_norm": 0.08128193765878677, - "learning_rate": 4.695882699342026e-05, - "loss": 0.006467945128679276, - "step": 10100 - }, - { - "epoch": 1.7229326513213983, - "grad_norm": 0.0678371787071228, - "learning_rate": 4.6934031300321094e-05, - "loss": 0.005760467797517777, - "step": 10105 - }, - { - "epoch": 1.7237851662404093, - "grad_norm": 0.0766267478466034, - "learning_rate": 4.6909231203102285e-05, - "loss": 0.0068340465426445, - "step": 10110 - }, - { - "epoch": 1.7246376811594204, - "grad_norm": 0.04263419657945633, - "learning_rate": 4.6884426713341366e-05, - "loss": 0.005921339616179466, - "step": 10115 - }, - { - "epoch": 1.7254901960784315, - "grad_norm": 0.10168195515871048, - "learning_rate": 4.6859617842617874e-05, - "loss": 0.006926319003105164, - "step": 10120 - }, - { - "epoch": 1.7263427109974425, - "grad_norm": 0.07910803705453873, - "learning_rate": 4.683480460251343e-05, - "loss": 0.006997878849506378, - "step": 10125 - }, - { - "epoch": 1.7271952259164536, - "grad_norm": 0.045049965381622314, - "learning_rate": 4.680998700461169e-05, - "loss": 0.005594813078641891, - "step": 10130 - }, - { - "epoch": 1.7280477408354646, - "grad_norm": 0.07185275852680206, - "learning_rate": 4.678516506049832e-05, - "loss": 0.006092778965830803, - "step": 10135 - }, - { - "epoch": 1.7289002557544757, - "grad_norm": 0.07003147900104523, - "learning_rate": 4.676033878176102e-05, - "loss": 0.007595886290073395, - "step": 10140 - }, - { - "epoch": 1.7297527706734868, - "grad_norm": 0.06360077112913132, - "learning_rate": 4.6735508179989536e-05, - "loss": 0.00546439029276371, - "step": 10145 - }, - { - "epoch": 1.7306052855924978, - "grad_norm": 0.07347442954778671, - "learning_rate": 4.671067326677563e-05, - "loss": 0.004961185902357101, - "step": 10150 - }, - { - "epoch": 1.7314578005115089, - "grad_norm": 0.056153345853090286, - "learning_rate": 4.6685834053713035e-05, - "loss": 0.006820976734161377, - "step": 10155 - }, - { - "epoch": 1.73231031543052, - "grad_norm": 0.09868444502353668, - "learning_rate": 4.666099055239755e-05, - "loss": 0.004829689115285874, - "step": 10160 - }, - { - "epoch": 1.733162830349531, - "grad_norm": 0.07029838860034943, - "learning_rate": 4.663614277442694e-05, - "loss": 0.006708820164203644, - "step": 10165 - }, - { - "epoch": 1.734015345268542, - "grad_norm": 0.0785607323050499, - "learning_rate": 4.661129073140096e-05, - "loss": 0.0093411885201931, - "step": 10170 - }, - { - "epoch": 1.7348678601875531, - "grad_norm": 0.05867304652929306, - "learning_rate": 4.658643443492139e-05, - "loss": 0.004420546442270279, - "step": 10175 - }, - { - "epoch": 1.7357203751065644, - "grad_norm": 0.08736653625965118, - "learning_rate": 4.656157389659196e-05, - "loss": 0.0049125440418720245, - "step": 10180 - }, - { - "epoch": 1.7365728900255755, - "grad_norm": 0.10769468545913696, - "learning_rate": 4.653670912801842e-05, - "loss": 0.006663528829813003, - "step": 10185 - }, - { - "epoch": 1.7374254049445865, - "grad_norm": 0.054130490869283676, - "learning_rate": 4.651184014080843e-05, - "loss": 0.005649637803435326, - "step": 10190 - }, - { - "epoch": 1.7382779198635976, - "grad_norm": 0.0760764479637146, - "learning_rate": 4.648696694657171e-05, - "loss": 0.00803508386015892, - "step": 10195 - }, - { - "epoch": 1.7391304347826086, - "grad_norm": 0.08103618025779724, - "learning_rate": 4.646208955691987e-05, - "loss": 0.005645860359072686, - "step": 10200 - }, - { - "epoch": 1.73998294970162, - "grad_norm": 0.060226406902074814, - "learning_rate": 4.643720798346649e-05, - "loss": 0.005114502459764481, - "step": 10205 - }, - { - "epoch": 1.740835464620631, - "grad_norm": 0.08842508494853973, - "learning_rate": 4.641232223782713e-05, - "loss": 0.004128537327051163, - "step": 10210 - }, - { - "epoch": 1.741687979539642, - "grad_norm": 0.03715536370873451, - "learning_rate": 4.6387432331619284e-05, - "loss": 0.005640536174178123, - "step": 10215 - }, - { - "epoch": 1.742540494458653, - "grad_norm": 0.09130766242742538, - "learning_rate": 4.636253827646239e-05, - "loss": 0.0074319176375865935, - "step": 10220 - }, - { - "epoch": 1.7433930093776642, - "grad_norm": 0.08204436302185059, - "learning_rate": 4.6337640083977826e-05, - "loss": 0.006443107873201371, - "step": 10225 - }, - { - "epoch": 1.7442455242966752, - "grad_norm": 0.09834989905357361, - "learning_rate": 4.6312737765788883e-05, - "loss": 0.00825996845960617, - "step": 10230 - }, - { - "epoch": 1.7450980392156863, - "grad_norm": 0.07453756034374237, - "learning_rate": 4.628783133352078e-05, - "loss": 0.005153121426701546, - "step": 10235 - }, - { - "epoch": 1.7459505541346974, - "grad_norm": 0.0658891350030899, - "learning_rate": 4.626292079880071e-05, - "loss": 0.005568725615739822, - "step": 10240 - }, - { - "epoch": 1.7468030690537084, - "grad_norm": 0.08673261851072311, - "learning_rate": 4.623800617325772e-05, - "loss": 0.00687919333577156, - "step": 10245 - }, - { - "epoch": 1.7476555839727195, - "grad_norm": 0.08707419037818909, - "learning_rate": 4.621308746852276e-05, - "loss": 0.009814801812171935, - "step": 10250 - }, - { - "epoch": 1.7485080988917305, - "grad_norm": 0.07168986648321152, - "learning_rate": 4.618816469622874e-05, - "loss": 0.004722443222999573, - "step": 10255 - }, - { - "epoch": 1.7493606138107416, - "grad_norm": 0.07987508177757263, - "learning_rate": 4.616323786801042e-05, - "loss": 0.006749927252531052, - "step": 10260 - }, - { - "epoch": 1.7497016197783462, - "eval_loss": 0.03619376942515373, - "eval_runtime": 3.6854, - "eval_samples_per_second": 68.379, - "eval_steps_per_second": 1.085, - "step": 10262 - }, - { - "eval_cer_subset": 0.014314328985294836, - "eval_cer_subset_edit_distance": 879, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 10262 - }, - { - "epoch": 1.7502131287297527, - "grad_norm": 0.10899413377046585, - "learning_rate": 4.6138306995504495e-05, - "loss": 0.006938809901475907, - "step": 10265 - }, - { - "epoch": 1.7510656436487637, - "grad_norm": 0.10073213279247284, - "learning_rate": 4.6113372090349516e-05, - "loss": 0.00795048326253891, - "step": 10270 - }, - { - "epoch": 1.7519181585677748, - "grad_norm": 0.04800979420542717, - "learning_rate": 4.608843316418592e-05, - "loss": 0.007616385817527771, - "step": 10275 - }, - { - "epoch": 1.752770673486786, - "grad_norm": 0.09020161628723145, - "learning_rate": 4.6063490228656025e-05, - "loss": 0.005228221416473389, - "step": 10280 - }, - { - "epoch": 1.7536231884057971, - "grad_norm": 0.083438441157341, - "learning_rate": 4.603854329540403e-05, - "loss": 0.00726160854101181, - "step": 10285 - }, - { - "epoch": 1.7544757033248082, - "grad_norm": 0.07851024717092514, - "learning_rate": 4.6013592376076e-05, - "loss": 0.006890790909528733, - "step": 10290 - }, - { - "epoch": 1.7553282182438192, - "grad_norm": 0.09015098959207535, - "learning_rate": 4.598863748231985e-05, - "loss": 0.007083073258399963, - "step": 10295 - }, - { - "epoch": 1.7561807331628303, - "grad_norm": 0.04751535877585411, - "learning_rate": 4.596367862578534e-05, - "loss": 0.005376371741294861, - "step": 10300 - }, - { - "epoch": 1.7570332480818416, - "grad_norm": 0.07547739148139954, - "learning_rate": 4.5938715818124094e-05, - "loss": 0.008766484260559083, - "step": 10305 - }, - { - "epoch": 1.7578857630008526, - "grad_norm": 0.052052512764930725, - "learning_rate": 4.5913749070989616e-05, - "loss": 0.005375667661428452, - "step": 10310 - }, - { - "epoch": 1.7587382779198637, - "grad_norm": 0.11575129628181458, - "learning_rate": 4.5888778396037187e-05, - "loss": 0.006675881892442703, - "step": 10315 - }, - { - "epoch": 1.7595907928388748, - "grad_norm": 0.05995294824242592, - "learning_rate": 4.586380380492394e-05, - "loss": 0.007097356766462326, - "step": 10320 - }, - { - "epoch": 1.7604433077578858, - "grad_norm": 0.049236129969358444, - "learning_rate": 4.583882530930887e-05, - "loss": 0.004433324560523033, - "step": 10325 - }, - { - "epoch": 1.7612958226768969, - "grad_norm": 0.048296503722667694, - "learning_rate": 4.581384292085274e-05, - "loss": 0.0051886774599552155, - "step": 10330 - }, - { - "epoch": 1.762148337595908, - "grad_norm": 0.09939385205507278, - "learning_rate": 4.57888566512182e-05, - "loss": 0.006426715105772018, - "step": 10335 - }, - { - "epoch": 1.763000852514919, - "grad_norm": 0.08810277283191681, - "learning_rate": 4.5763866512069626e-05, - "loss": 0.00727301687002182, - "step": 10340 - }, - { - "epoch": 1.76385336743393, - "grad_norm": 0.05262129753828049, - "learning_rate": 4.573887251507328e-05, - "loss": 0.004860313236713409, - "step": 10345 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 0.09755868464708328, - "learning_rate": 4.571387467189718e-05, - "loss": 0.00684543177485466, - "step": 10350 - }, - { - "epoch": 1.7655583972719522, - "grad_norm": 0.08306272327899933, - "learning_rate": 4.568887299421115e-05, - "loss": 0.005363506823778152, - "step": 10355 - }, - { - "epoch": 1.7664109121909632, - "grad_norm": 0.06304962188005447, - "learning_rate": 4.566386749368681e-05, - "loss": 0.006262359023094177, - "step": 10360 - }, - { - "epoch": 1.7672634271099743, - "grad_norm": 0.099216029047966, - "learning_rate": 4.5638858181997544e-05, - "loss": 0.005263365060091019, - "step": 10365 - }, - { - "epoch": 1.7681159420289854, - "grad_norm": 0.06316341459751129, - "learning_rate": 4.5613845070818544e-05, - "loss": 0.0053974583745002745, - "step": 10370 - }, - { - "epoch": 1.7689684569479964, - "grad_norm": 0.08523725718259811, - "learning_rate": 4.5588828171826755e-05, - "loss": 0.006064000725746155, - "step": 10375 - }, - { - "epoch": 1.7698209718670077, - "grad_norm": 0.0663699060678482, - "learning_rate": 4.5563807496700925e-05, - "loss": 0.00665600374341011, - "step": 10380 - }, - { - "epoch": 1.7706734867860188, - "grad_norm": 0.10673311352729797, - "learning_rate": 4.55387830571215e-05, - "loss": 0.006540966033935547, - "step": 10385 - }, - { - "epoch": 1.7715260017050298, - "grad_norm": 0.08779574930667877, - "learning_rate": 4.551375486477074e-05, - "loss": 0.00547558106482029, - "step": 10390 - }, - { - "epoch": 1.772378516624041, - "grad_norm": 0.07451514899730682, - "learning_rate": 4.5488722931332625e-05, - "loss": 0.008499838411808014, - "step": 10395 - }, - { - "epoch": 1.773231031543052, - "grad_norm": 0.06014202535152435, - "learning_rate": 4.5463687268492904e-05, - "loss": 0.006278771907091141, - "step": 10400 - }, - { - "epoch": 1.7740835464620632, - "grad_norm": 0.039256151765584946, - "learning_rate": 4.543864788793907e-05, - "loss": 0.0037193533033132555, - "step": 10405 - }, - { - "epoch": 1.7749360613810743, - "grad_norm": 0.09449942409992218, - "learning_rate": 4.541360480136031e-05, - "loss": 0.006574592739343643, - "step": 10410 - }, - { - "epoch": 1.7757885763000854, - "grad_norm": 0.07616980373859406, - "learning_rate": 4.53885580204476e-05, - "loss": 0.006042734161019326, - "step": 10415 - }, - { - "epoch": 1.7766410912190964, - "grad_norm": 0.07019155472517014, - "learning_rate": 4.5363507556893574e-05, - "loss": 0.006044945493340492, - "step": 10420 - }, - { - "epoch": 1.7774936061381075, - "grad_norm": 0.0616939477622509, - "learning_rate": 4.533845342239266e-05, - "loss": 0.004315405339002609, - "step": 10425 - }, - { - "epoch": 1.7783461210571185, - "grad_norm": 0.09354502707719803, - "learning_rate": 4.5313395628640943e-05, - "loss": 0.005719271302223205, - "step": 10430 - }, - { - "epoch": 1.7791986359761296, - "grad_norm": 0.08747732639312744, - "learning_rate": 4.528833418733623e-05, - "loss": 0.00472431555390358, - "step": 10435 - }, - { - "epoch": 1.7800511508951407, - "grad_norm": 0.09513017535209656, - "learning_rate": 4.5263269110178034e-05, - "loss": 0.006968998908996582, - "step": 10440 - }, - { - "epoch": 1.7809036658141517, - "grad_norm": 0.09208676964044571, - "learning_rate": 4.523820040886759e-05, - "loss": 0.006609047204256058, - "step": 10445 - }, - { - "epoch": 1.7817561807331628, - "grad_norm": 0.09964144974946976, - "learning_rate": 4.521312809510778e-05, - "loss": 0.0056272163987159726, - "step": 10450 - }, - { - "epoch": 1.7826086956521738, - "grad_norm": 0.06850367784500122, - "learning_rate": 4.51880521806032e-05, - "loss": 0.005562498047947883, - "step": 10455 - }, - { - "epoch": 1.783461210571185, - "grad_norm": 0.0654430240392685, - "learning_rate": 4.5162972677060124e-05, - "loss": 0.0059367924928665165, - "step": 10460 - }, - { - "epoch": 1.784313725490196, - "grad_norm": 0.0449560284614563, - "learning_rate": 4.513788959618649e-05, - "loss": 0.005458919331431389, - "step": 10465 - }, - { - "epoch": 1.785166240409207, - "grad_norm": 0.14256814122200012, - "learning_rate": 4.511280294969192e-05, - "loss": 0.0066184431314468386, - "step": 10470 - }, - { - "epoch": 1.7860187553282183, - "grad_norm": 0.08284557610750198, - "learning_rate": 4.508771274928771e-05, - "loss": 0.007388219982385635, - "step": 10475 - }, - { - "epoch": 1.7868712702472294, - "grad_norm": 0.05675457417964935, - "learning_rate": 4.506261900668676e-05, - "loss": 0.005572458356618881, - "step": 10480 - }, - { - "epoch": 1.7877237851662404, - "grad_norm": 0.05767322704195976, - "learning_rate": 4.50375217336037e-05, - "loss": 0.0058133058249950405, - "step": 10485 - }, - { - "epoch": 1.7885763000852515, - "grad_norm": 0.03421638533473015, - "learning_rate": 4.501242094175476e-05, - "loss": 0.005268872529268265, - "step": 10490 - }, - { - "epoch": 1.7894288150042625, - "grad_norm": 0.07319685071706772, - "learning_rate": 4.4987316642857836e-05, - "loss": 0.008701664954423904, - "step": 10495 - }, - { - "epoch": 1.7902813299232738, - "grad_norm": 0.04271615296602249, - "learning_rate": 4.4962208848632426e-05, - "loss": 0.005680259317159653, - "step": 10500 - }, - { - "epoch": 1.7911338448422849, - "grad_norm": 0.05916997417807579, - "learning_rate": 4.493709757079971e-05, - "loss": 0.004779224097728729, - "step": 10505 - }, - { - "epoch": 1.791986359761296, - "grad_norm": 0.04994066804647446, - "learning_rate": 4.491198282108244e-05, - "loss": 0.00443916954100132, - "step": 10510 - }, - { - "epoch": 1.792838874680307, - "grad_norm": 0.09032617509365082, - "learning_rate": 4.488686461120504e-05, - "loss": 0.007850547134876252, - "step": 10515 - }, - { - "epoch": 1.793691389599318, - "grad_norm": 0.05055975914001465, - "learning_rate": 4.4861742952893525e-05, - "loss": 0.005925046652555466, - "step": 10520 - }, - { - "epoch": 1.7945439045183291, - "grad_norm": 0.07521310448646545, - "learning_rate": 4.48366178578755e-05, - "loss": 0.006785632669925689, - "step": 10525 - }, - { - "epoch": 1.7953964194373402, - "grad_norm": 0.06577371805906296, - "learning_rate": 4.4811489337880216e-05, - "loss": 0.005300462618470192, - "step": 10530 - }, - { - "epoch": 1.7962489343563512, - "grad_norm": 0.0451020710170269, - "learning_rate": 4.4786357404638485e-05, - "loss": 0.00612550750374794, - "step": 10535 - }, - { - "epoch": 1.7971014492753623, - "grad_norm": 0.08968023955821991, - "learning_rate": 4.4761222069882754e-05, - "loss": 0.00558510459959507, - "step": 10540 - }, - { - "epoch": 1.7979539641943734, - "grad_norm": 0.0945729911327362, - "learning_rate": 4.4736083345347015e-05, - "loss": 0.007513274252414703, - "step": 10545 - }, - { - "epoch": 1.7988064791133844, - "grad_norm": 0.10392102599143982, - "learning_rate": 4.4710941242766844e-05, - "loss": 0.006224355846643448, - "step": 10550 - }, - { - "epoch": 1.7996589940323955, - "grad_norm": 0.10485874116420746, - "learning_rate": 4.4685795773879446e-05, - "loss": 0.005821261927485466, - "step": 10555 - }, - { - "epoch": 1.8005115089514065, - "grad_norm": 0.0689731314778328, - "learning_rate": 4.466064695042355e-05, - "loss": 0.0062000565230846405, - "step": 10560 - }, - { - "epoch": 1.8013640238704176, - "grad_norm": 0.07008705288171768, - "learning_rate": 4.4635494784139463e-05, - "loss": 0.006286797672510147, - "step": 10565 - }, - { - "epoch": 1.8022165387894287, - "grad_norm": 0.07595150172710419, - "learning_rate": 4.461033928676904e-05, - "loss": 0.006704485416412354, - "step": 10570 - }, - { - "epoch": 1.80306905370844, - "grad_norm": 0.07564863562583923, - "learning_rate": 4.458518047005572e-05, - "loss": 0.005777762830257415, - "step": 10575 - }, - { - "epoch": 1.803921568627451, - "grad_norm": 0.07202555984258652, - "learning_rate": 4.4560018345744466e-05, - "loss": 0.00602865107357502, - "step": 10580 - }, - { - "epoch": 1.804774083546462, - "grad_norm": 0.10462740063667297, - "learning_rate": 4.453485292558179e-05, - "loss": 0.007622111588716507, - "step": 10585 - }, - { - "epoch": 1.8056265984654731, - "grad_norm": 0.05587150529026985, - "learning_rate": 4.450968422131578e-05, - "loss": 0.00641121193766594, - "step": 10590 - }, - { - "epoch": 1.8064791133844842, - "grad_norm": 0.0603446289896965, - "learning_rate": 4.448451224469598e-05, - "loss": 0.0073586970567703245, - "step": 10595 - }, - { - "epoch": 1.8073316283034955, - "grad_norm": 0.04228143393993378, - "learning_rate": 4.445933700747353e-05, - "loss": 0.005406339466571808, - "step": 10600 - }, - { - "epoch": 1.8081841432225065, - "grad_norm": 0.04840795323252678, - "learning_rate": 4.4434158521401065e-05, - "loss": 0.0041844088584184645, - "step": 10605 - }, - { - "epoch": 1.8090366581415176, - "grad_norm": 0.08334027975797653, - "learning_rate": 4.440897679823275e-05, - "loss": 0.008376862108707427, - "step": 10610 - }, - { - "epoch": 1.8098891730605287, - "grad_norm": 0.07879523187875748, - "learning_rate": 4.438379184972423e-05, - "loss": 0.0053595036268234255, - "step": 10615 - }, - { - "epoch": 1.8107416879795397, - "grad_norm": 0.0689932182431221, - "learning_rate": 4.435860368763269e-05, - "loss": 0.005961846932768822, - "step": 10620 - }, - { - "epoch": 1.8115942028985508, - "grad_norm": 0.07035796344280243, - "learning_rate": 4.43334123237168e-05, - "loss": 0.005833951756358147, - "step": 10625 - }, - { - "epoch": 1.8124467178175618, - "grad_norm": 0.06488184630870819, - "learning_rate": 4.4308217769736715e-05, - "loss": 0.006380685418844223, - "step": 10630 - }, - { - "epoch": 1.813299232736573, - "grad_norm": 0.1095893532037735, - "learning_rate": 4.428302003745412e-05, - "loss": 0.006500106304883957, - "step": 10635 - }, - { - "epoch": 1.814151747655584, - "grad_norm": 0.07402926683425903, - "learning_rate": 4.425781913863212e-05, - "loss": 0.010839180648326873, - "step": 10640 - }, - { - "epoch": 1.815004262574595, - "grad_norm": 0.07752810418605804, - "learning_rate": 4.4232615085035354e-05, - "loss": 0.0053322531282901766, - "step": 10645 - }, - { - "epoch": 1.815856777493606, - "grad_norm": 0.06572280824184418, - "learning_rate": 4.420740788842991e-05, - "loss": 0.0072415158152580265, - "step": 10650 - }, - { - "epoch": 1.8167092924126171, - "grad_norm": 0.07175682485103607, - "learning_rate": 4.418219756058335e-05, - "loss": 0.007061149924993515, - "step": 10655 - }, - { - "epoch": 1.8175618073316282, - "grad_norm": 0.0702451840043068, - "learning_rate": 4.4156984113264684e-05, - "loss": 0.0050024140626192095, - "step": 10660 - }, - { - "epoch": 1.8184143222506393, - "grad_norm": 0.05054900422692299, - "learning_rate": 4.4131767558244375e-05, - "loss": 0.004906433075666428, - "step": 10665 - }, - { - "epoch": 1.8192668371696503, - "grad_norm": 0.07256589829921722, - "learning_rate": 4.410654790729438e-05, - "loss": 0.006986310333013534, - "step": 10670 - }, - { - "epoch": 1.8201193520886616, - "grad_norm": 0.06617925316095352, - "learning_rate": 4.408132517218805e-05, - "loss": 0.007973263412714005, - "step": 10675 - }, - { - "epoch": 1.8209718670076727, - "grad_norm": 0.09039802104234695, - "learning_rate": 4.405609936470022e-05, - "loss": 0.007263268530368805, - "step": 10680 - }, - { - "epoch": 1.8218243819266837, - "grad_norm": 0.03763730078935623, - "learning_rate": 4.40308704966071e-05, - "loss": 0.005709199234843254, - "step": 10685 - }, - { - "epoch": 1.8226768968456948, - "grad_norm": 0.09264735877513885, - "learning_rate": 4.400563857968639e-05, - "loss": 0.006996266543865204, - "step": 10690 - }, - { - "epoch": 1.8235294117647058, - "grad_norm": 0.0882507711648941, - "learning_rate": 4.398040362571719e-05, - "loss": 0.007461686432361603, - "step": 10695 - }, - { - "epoch": 1.8243819266837171, - "grad_norm": 0.07662846893072128, - "learning_rate": 4.395516564648e-05, - "loss": 0.006977429986000061, - "step": 10700 - }, - { - "epoch": 1.8252344416027282, - "grad_norm": 0.07431378960609436, - "learning_rate": 4.392992465375676e-05, - "loss": 0.004957346618175507, - "step": 10705 - }, - { - "epoch": 1.8260869565217392, - "grad_norm": 0.06182624027132988, - "learning_rate": 4.39046806593308e-05, - "loss": 0.006677946448326111, - "step": 10710 - }, - { - "epoch": 1.8269394714407503, - "grad_norm": 0.06389910727739334, - "learning_rate": 4.3879433674986856e-05, - "loss": 0.006449097394943237, - "step": 10715 - }, - { - "epoch": 1.8277919863597614, - "grad_norm": 0.06772691756486893, - "learning_rate": 4.385418371251107e-05, - "loss": 0.004998251050710678, - "step": 10720 - }, - { - "epoch": 1.8286445012787724, - "grad_norm": 0.07048022747039795, - "learning_rate": 4.3828930783690955e-05, - "loss": 0.006418389827013015, - "step": 10725 - }, - { - "epoch": 1.8294970161977835, - "grad_norm": 0.09442687779664993, - "learning_rate": 4.3803674900315424e-05, - "loss": 0.006921603530645371, - "step": 10730 - }, - { - "epoch": 1.8303495311167945, - "grad_norm": 0.0578981414437294, - "learning_rate": 4.377841607417475e-05, - "loss": 0.007038000971078873, - "step": 10735 - }, - { - "epoch": 1.8312020460358056, - "grad_norm": 0.06990659236907959, - "learning_rate": 4.37531543170606e-05, - "loss": 0.005136258527636528, - "step": 10740 - }, - { - "epoch": 1.8320545609548167, - "grad_norm": 0.05566668137907982, - "learning_rate": 4.372788964076601e-05, - "loss": 0.005333118140697479, - "step": 10745 - }, - { - "epoch": 1.8329070758738277, - "grad_norm": 0.09198274463415146, - "learning_rate": 4.3702622057085376e-05, - "loss": 0.005783502757549286, - "step": 10750 - }, - { - "epoch": 1.8337595907928388, - "grad_norm": 0.12995415925979614, - "learning_rate": 4.3677351577814423e-05, - "loss": 0.005794361606240273, - "step": 10755 - }, - { - "epoch": 1.8346121057118498, - "grad_norm": 0.0827256515622139, - "learning_rate": 4.3652078214750264e-05, - "loss": 0.00593951866030693, - "step": 10760 - }, - { - "epoch": 1.835464620630861, - "grad_norm": 0.09131235629320145, - "learning_rate": 4.362680197969136e-05, - "loss": 0.006387272477149963, - "step": 10765 - }, - { - "epoch": 1.836317135549872, - "grad_norm": 0.06061462685465813, - "learning_rate": 4.360152288443748e-05, - "loss": 0.006085103005170822, - "step": 10770 - }, - { - "epoch": 1.8371696504688833, - "grad_norm": 0.05650132894515991, - "learning_rate": 4.357624094078976e-05, - "loss": 0.004817041009664536, - "step": 10775 - }, - { - "epoch": 1.8380221653878943, - "grad_norm": 0.09250559657812119, - "learning_rate": 4.355095616055063e-05, - "loss": 0.006116693839430809, - "step": 10780 - }, - { - "epoch": 1.8388746803069054, - "grad_norm": 0.06575264036655426, - "learning_rate": 4.352566855552389e-05, - "loss": 0.006027846410870552, - "step": 10785 - }, - { - "epoch": 1.8397271952259164, - "grad_norm": 0.07538174092769623, - "learning_rate": 4.350037813751462e-05, - "loss": 0.006624206900596619, - "step": 10790 - }, - { - "epoch": 1.8405797101449275, - "grad_norm": 0.06000296771526337, - "learning_rate": 4.347508491832924e-05, - "loss": 0.006386204063892365, - "step": 10795 - }, - { - "epoch": 1.8414322250639388, - "grad_norm": 0.058621276170015335, - "learning_rate": 4.3449788909775455e-05, - "loss": 0.006246517226099968, - "step": 10800 - }, - { - "epoch": 1.8422847399829498, - "grad_norm": 0.10082551836967468, - "learning_rate": 4.34244901236623e-05, - "loss": 0.006916524469852447, - "step": 10805 - }, - { - "epoch": 1.843137254901961, - "grad_norm": 0.07926804572343826, - "learning_rate": 4.3399188571800064e-05, - "loss": 0.006270130723714828, - "step": 10810 - }, - { - "epoch": 1.843989769820972, - "grad_norm": 0.14256511628627777, - "learning_rate": 4.3373884266000375e-05, - "loss": 0.008555002510547638, - "step": 10815 - }, - { - "epoch": 1.844842284739983, - "grad_norm": 0.0711030438542366, - "learning_rate": 4.334857721807612e-05, - "loss": 0.004097539931535721, - "step": 10820 - }, - { - "epoch": 1.845694799658994, - "grad_norm": 0.05918106436729431, - "learning_rate": 4.3323267439841464e-05, - "loss": 0.006263938546180725, - "step": 10825 - }, - { - "epoch": 1.8465473145780051, - "grad_norm": 0.06577462702989578, - "learning_rate": 4.329795494311186e-05, - "loss": 0.004532983154058456, - "step": 10830 - }, - { - "epoch": 1.8473998294970162, - "grad_norm": 0.07599867880344391, - "learning_rate": 4.327263973970401e-05, - "loss": 0.006951173394918441, - "step": 10835 - }, - { - "epoch": 1.8482523444160273, - "grad_norm": 0.055239275097846985, - "learning_rate": 4.324732184143592e-05, - "loss": 0.00514591783285141, - "step": 10840 - }, - { - "epoch": 1.8491048593350383, - "grad_norm": 0.10522980988025665, - "learning_rate": 4.322200126012681e-05, - "loss": 0.00747048556804657, - "step": 10845 - }, - { - "epoch": 1.8499573742540494, - "grad_norm": 0.08132579177618027, - "learning_rate": 4.319667800759716e-05, - "loss": 0.005432958528399467, - "step": 10850 - }, - { - "epoch": 1.8508098891730604, - "grad_norm": 0.04027591645717621, - "learning_rate": 4.3171352095668726e-05, - "loss": 0.004450181499123573, - "step": 10855 - }, - { - "epoch": 1.8516624040920715, - "grad_norm": 0.0873839259147644, - "learning_rate": 4.314602353616446e-05, - "loss": 0.006079509109258652, - "step": 10860 - }, - { - "epoch": 1.8525149190110826, - "grad_norm": 0.04989013075828552, - "learning_rate": 4.312069234090862e-05, - "loss": 0.003988634794950485, - "step": 10865 - }, - { - "epoch": 1.8533674339300936, - "grad_norm": 0.061433590948581696, - "learning_rate": 4.309535852172661e-05, - "loss": 0.0056050091981887816, - "step": 10870 - }, - { - "epoch": 1.854219948849105, - "grad_norm": 0.07007768750190735, - "learning_rate": 4.3070022090445114e-05, - "loss": 0.006938119232654571, - "step": 10875 - }, - { - "epoch": 1.855072463768116, - "grad_norm": 0.03557104617357254, - "learning_rate": 4.3044683058892024e-05, - "loss": 0.0061099715530872345, - "step": 10880 - }, - { - "epoch": 1.855924978687127, - "grad_norm": 0.07706935703754425, - "learning_rate": 4.3019341438896446e-05, - "loss": 0.0050103053450584415, - "step": 10885 - }, - { - "epoch": 1.856777493606138, - "grad_norm": 0.06719083338975906, - "learning_rate": 4.2993997242288686e-05, - "loss": 0.005047342553734779, - "step": 10890 - }, - { - "epoch": 1.8576300085251491, - "grad_norm": 0.05179615691304207, - "learning_rate": 4.296865048090024e-05, - "loss": 0.004692831635475158, - "step": 10895 - }, - { - "epoch": 1.8584825234441604, - "grad_norm": 0.08594074845314026, - "learning_rate": 4.294330116656385e-05, - "loss": 0.006039778143167496, - "step": 10900 - }, - { - "epoch": 1.8593350383631715, - "grad_norm": 0.11285590380430222, - "learning_rate": 4.291794931111339e-05, - "loss": 0.005857323482632637, - "step": 10905 - }, - { - "epoch": 1.8601875532821825, - "grad_norm": 0.056068334728479385, - "learning_rate": 4.289259492638399e-05, - "loss": 0.006339801102876663, - "step": 10910 - }, - { - "epoch": 1.8610400682011936, - "grad_norm": 0.1027015820145607, - "learning_rate": 4.2867238024211873e-05, - "loss": 0.007628202438354492, - "step": 10915 - }, - { - "epoch": 1.8618925831202047, - "grad_norm": 0.06938920170068741, - "learning_rate": 4.2841878616434516e-05, - "loss": 0.005421775206923485, - "step": 10920 - }, - { - "epoch": 1.8627450980392157, - "grad_norm": 0.06613876670598984, - "learning_rate": 4.2816516714890525e-05, - "loss": 0.00747835859656334, - "step": 10925 - }, - { - "epoch": 1.8635976129582268, - "grad_norm": 0.07735379040241241, - "learning_rate": 4.279115233141967e-05, - "loss": 0.006907149404287338, - "step": 10930 - }, - { - "epoch": 1.8644501278772379, - "grad_norm": 0.06285069137811661, - "learning_rate": 4.276578547786291e-05, - "loss": 0.005340246856212616, - "step": 10935 - }, - { - "epoch": 1.865302642796249, - "grad_norm": 0.0670136883854866, - "learning_rate": 4.274041616606232e-05, - "loss": 0.0067828245460987095, - "step": 10940 - }, - { - "epoch": 1.86615515771526, - "grad_norm": 0.07944425195455551, - "learning_rate": 4.2715044407861144e-05, - "loss": 0.006403806060552597, - "step": 10945 - }, - { - "epoch": 1.867007672634271, - "grad_norm": 0.07202634960412979, - "learning_rate": 4.268967021510375e-05, - "loss": 0.004467373341321945, - "step": 10950 - }, - { - "epoch": 1.867860187553282, - "grad_norm": 0.08753371983766556, - "learning_rate": 4.266429359963568e-05, - "loss": 0.006740668416023254, - "step": 10955 - }, - { - "epoch": 1.8687127024722932, - "grad_norm": 0.0920538380742073, - "learning_rate": 4.263891457330357e-05, - "loss": 0.007489701360464096, - "step": 10960 - }, - { - "epoch": 1.8695652173913042, - "grad_norm": 0.11196473985910416, - "learning_rate": 4.261353314795519e-05, - "loss": 0.007533909380435943, - "step": 10965 - }, - { - "epoch": 1.8704177323103153, - "grad_norm": 0.08394299447536469, - "learning_rate": 4.258814933543943e-05, - "loss": 0.005159291997551918, - "step": 10970 - }, - { - "epoch": 1.8712702472293266, - "grad_norm": 0.08024156838655472, - "learning_rate": 4.25627631476063e-05, - "loss": 0.00543709248304367, - "step": 10975 - }, - { - "epoch": 1.8721227621483376, - "grad_norm": 0.052640948444604874, - "learning_rate": 4.253737459630694e-05, - "loss": 0.004067758470773697, - "step": 10980 - }, - { - "epoch": 1.8729752770673487, - "grad_norm": 0.08472926914691925, - "learning_rate": 4.251198369339353e-05, - "loss": 0.0077335178852081295, - "step": 10985 - }, - { - "epoch": 1.8738277919863597, - "grad_norm": 0.09794485569000244, - "learning_rate": 4.248659045071942e-05, - "loss": 0.0055429480969905855, - "step": 10990 - }, - { - "epoch": 1.8746803069053708, - "grad_norm": 0.07767575234174728, - "learning_rate": 4.2461194880139016e-05, - "loss": 0.008025288581848145, - "step": 10995 - }, - { - "epoch": 1.875532821824382, - "grad_norm": 0.07427361607551575, - "learning_rate": 4.2435796993507794e-05, - "loss": 0.006437119096517563, - "step": 11000 - }, - { - "epoch": 1.8763853367433931, - "grad_norm": 0.07420040667057037, - "learning_rate": 4.241039680268237e-05, - "loss": 0.0051200386136770245, - "step": 11005 - }, - { - "epoch": 1.8772378516624042, - "grad_norm": 0.09004204720258713, - "learning_rate": 4.2384994319520355e-05, - "loss": 0.007488063722848892, - "step": 11010 - }, - { - "epoch": 1.8780903665814153, - "grad_norm": 0.060929473489522934, - "learning_rate": 4.235958955588049e-05, - "loss": 0.00483398288488388, - "step": 11015 - }, - { - "epoch": 1.8789428815004263, - "grad_norm": 0.08116185665130615, - "learning_rate": 4.2334182523622584e-05, - "loss": 0.007078002393245697, - "step": 11020 - }, - { - "epoch": 1.8797953964194374, - "grad_norm": 0.0908491238951683, - "learning_rate": 4.230877323460746e-05, - "loss": 0.007228228449821472, - "step": 11025 - }, - { - "epoch": 1.8806479113384484, - "grad_norm": 0.08618480712175369, - "learning_rate": 4.228336170069703e-05, - "loss": 0.005402455478906632, - "step": 11030 - }, - { - "epoch": 1.8815004262574595, - "grad_norm": 0.06869816035032272, - "learning_rate": 4.2257947933754236e-05, - "loss": 0.006180650368332863, - "step": 11035 - }, - { - "epoch": 1.8823529411764706, - "grad_norm": 0.0904744416475296, - "learning_rate": 4.223253194564309e-05, - "loss": 0.00636049136519432, - "step": 11040 - }, - { - "epoch": 1.8832054560954816, - "grad_norm": 0.04902644082903862, - "learning_rate": 4.220711374822859e-05, - "loss": 0.0062784947454929355, - "step": 11045 - }, - { - "epoch": 1.8840579710144927, - "grad_norm": 0.060081589967012405, - "learning_rate": 4.2181693353376817e-05, - "loss": 0.005494052171707153, - "step": 11050 - }, - { - "epoch": 1.8849104859335037, - "grad_norm": 0.058530837297439575, - "learning_rate": 4.215627077295485e-05, - "loss": 0.005457080900669098, - "step": 11055 - }, - { - "epoch": 1.8857630008525148, - "grad_norm": 0.15006953477859497, - "learning_rate": 4.2130846018830795e-05, - "loss": 0.0062985971570014955, - "step": 11060 - }, - { - "epoch": 1.8866155157715259, - "grad_norm": 0.04498155787587166, - "learning_rate": 4.210541910287377e-05, - "loss": 0.004242038726806641, - "step": 11065 - }, - { - "epoch": 1.887468030690537, - "grad_norm": 0.09093966335058212, - "learning_rate": 4.207999003695392e-05, - "loss": 0.00554364025592804, - "step": 11070 - }, - { - "epoch": 1.8883205456095482, - "grad_norm": 0.06531018018722534, - "learning_rate": 4.2054558832942365e-05, - "loss": 0.0063869751989841465, - "step": 11075 - }, - { - "epoch": 1.8891730605285593, - "grad_norm": 0.059213872998952866, - "learning_rate": 4.202912550271124e-05, - "loss": 0.004836349189281464, - "step": 11080 - }, - { - "epoch": 1.8900255754475703, - "grad_norm": 0.11074823886156082, - "learning_rate": 4.200369005813367e-05, - "loss": 0.00584055446088314, - "step": 11085 - }, - { - "epoch": 1.8908780903665814, - "grad_norm": 0.09352346509695053, - "learning_rate": 4.197825251108376e-05, - "loss": 0.006423837691545487, - "step": 11090 - }, - { - "epoch": 1.8917306052855924, - "grad_norm": 0.10930176079273224, - "learning_rate": 4.195281287343662e-05, - "loss": 0.007819923013448716, - "step": 11095 - }, - { - "epoch": 1.8925831202046037, - "grad_norm": 0.10935486853122711, - "learning_rate": 4.19273711570683e-05, - "loss": 0.008524692058563233, - "step": 11100 - }, - { - "epoch": 1.8934356351236148, - "grad_norm": 0.07407546788454056, - "learning_rate": 4.190192737385586e-05, - "loss": 0.006353407353162766, - "step": 11105 - }, - { - "epoch": 1.8942881500426259, - "grad_norm": 0.11030165106058121, - "learning_rate": 4.187648153567729e-05, - "loss": 0.007683426141738892, - "step": 11110 - }, - { - "epoch": 1.895140664961637, - "grad_norm": 0.09419413655996323, - "learning_rate": 4.185103365441155e-05, - "loss": 0.005654521286487579, - "step": 11115 - }, - { - "epoch": 1.895993179880648, - "grad_norm": 0.06284896284341812, - "learning_rate": 4.1825583741938576e-05, - "loss": 0.0048633765429258345, - "step": 11120 - }, - { - "epoch": 1.896845694799659, - "grad_norm": 0.06429705023765564, - "learning_rate": 4.180013181013921e-05, - "loss": 0.006907754391431808, - "step": 11125 - }, - { - "epoch": 1.89769820971867, - "grad_norm": 0.1234050914645195, - "learning_rate": 4.177467787089527e-05, - "loss": 0.008531783521175385, - "step": 11130 - }, - { - "epoch": 1.8985507246376812, - "grad_norm": 0.04056263715028763, - "learning_rate": 4.174922193608951e-05, - "loss": 0.006784418225288391, - "step": 11135 - }, - { - "epoch": 1.8994032395566922, - "grad_norm": 0.048422425985336304, - "learning_rate": 4.172376401760561e-05, - "loss": 0.006587067246437072, - "step": 11140 - }, - { - "epoch": 1.9002557544757033, - "grad_norm": 0.10680951178073883, - "learning_rate": 4.169830412732815e-05, - "loss": 0.005700337141752243, - "step": 11145 - }, - { - "epoch": 1.9011082693947143, - "grad_norm": 0.09418217837810516, - "learning_rate": 4.167284227714267e-05, - "loss": 0.0059782925993204115, - "step": 11150 - }, - { - "epoch": 1.9019607843137254, - "grad_norm": 0.12511073052883148, - "learning_rate": 4.1647378478935614e-05, - "loss": 0.006256800889968872, - "step": 11155 - }, - { - "epoch": 1.9028132992327365, - "grad_norm": 0.06957859545946121, - "learning_rate": 4.1621912744594316e-05, - "loss": 0.008690094202756881, - "step": 11160 - }, - { - "epoch": 1.9036658141517475, - "grad_norm": 0.10859719663858414, - "learning_rate": 4.159644508600704e-05, - "loss": 0.008262380957603455, - "step": 11165 - }, - { - "epoch": 1.9045183290707586, - "grad_norm": 0.08408714830875397, - "learning_rate": 4.157097551506292e-05, - "loss": 0.005347007513046264, - "step": 11170 - }, - { - "epoch": 1.9053708439897699, - "grad_norm": 0.05623621866106987, - "learning_rate": 4.1545504043652014e-05, - "loss": 0.005091758817434311, - "step": 11175 - }, - { - "epoch": 1.906223358908781, - "grad_norm": 0.06791777908802032, - "learning_rate": 4.1520030683665246e-05, - "loss": 0.006755101680755615, - "step": 11180 - }, - { - "epoch": 1.907075873827792, - "grad_norm": 0.039112675935029984, - "learning_rate": 4.149455544699444e-05, - "loss": 0.0063312210142612456, - "step": 11185 - }, - { - "epoch": 1.907928388746803, - "grad_norm": 0.05682097375392914, - "learning_rate": 4.146907834553227e-05, - "loss": 0.005028403550386429, - "step": 11190 - }, - { - "epoch": 1.908780903665814, - "grad_norm": 0.07670710980892181, - "learning_rate": 4.144359939117229e-05, - "loss": 0.006438900530338287, - "step": 11195 - }, - { - "epoch": 1.9096334185848254, - "grad_norm": 0.06266012787818909, - "learning_rate": 4.141811859580894e-05, - "loss": 0.006153284758329392, - "step": 11200 - }, - { - "epoch": 1.9104859335038364, - "grad_norm": 0.06892232596874237, - "learning_rate": 4.139263597133749e-05, - "loss": 0.0042446799576282505, - "step": 11205 - }, - { - "epoch": 1.9113384484228475, - "grad_norm": 0.08733050525188446, - "learning_rate": 4.136715152965409e-05, - "loss": 0.0048094093799591064, - "step": 11210 - }, - { - "epoch": 1.9121909633418586, - "grad_norm": 0.06578327715396881, - "learning_rate": 4.13416652826557e-05, - "loss": 0.0047289058566093445, - "step": 11215 - }, - { - "epoch": 1.9130434782608696, - "grad_norm": 0.06382749229669571, - "learning_rate": 4.1316177242240174e-05, - "loss": 0.004200926423072815, - "step": 11220 - }, - { - "epoch": 1.9138959931798807, - "grad_norm": 0.07368794828653336, - "learning_rate": 4.129068742030617e-05, - "loss": 0.0063028551638126375, - "step": 11225 - }, - { - "epoch": 1.9147485080988917, - "grad_norm": 0.09302657842636108, - "learning_rate": 4.1265195828753176e-05, - "loss": 0.008124063909053802, - "step": 11230 - }, - { - "epoch": 1.9156010230179028, - "grad_norm": 0.08030751347541809, - "learning_rate": 4.123970247948153e-05, - "loss": 0.009628574550151824, - "step": 11235 - }, - { - "epoch": 1.9164535379369139, - "grad_norm": 0.08395590633153915, - "learning_rate": 4.1214207384392356e-05, - "loss": 0.007773591578006745, - "step": 11240 - }, - { - "epoch": 1.917306052855925, - "grad_norm": 0.09472183138132095, - "learning_rate": 4.118871055538762e-05, - "loss": 0.005461954325437546, - "step": 11245 - }, - { - "epoch": 1.918158567774936, - "grad_norm": 0.095457524061203, - "learning_rate": 4.11632120043701e-05, - "loss": 0.005725187063217163, - "step": 11250 - }, - { - "epoch": 1.919011082693947, - "grad_norm": 0.10508730262517929, - "learning_rate": 4.113771174324336e-05, - "loss": 0.006902433931827545, - "step": 11255 - }, - { - "epoch": 1.919863597612958, - "grad_norm": 0.08675665408372879, - "learning_rate": 4.111220978391176e-05, - "loss": 0.007470531016588211, - "step": 11260 - }, - { - "epoch": 1.9207161125319692, - "grad_norm": 0.08215013146400452, - "learning_rate": 4.108670613828049e-05, - "loss": 0.005732448399066925, - "step": 11265 - }, - { - "epoch": 1.9215686274509802, - "grad_norm": 0.054156310856342316, - "learning_rate": 4.1061200818255476e-05, - "loss": 0.005808809399604797, - "step": 11270 - }, - { - "epoch": 1.9224211423699915, - "grad_norm": 0.09332830458879471, - "learning_rate": 4.103569383574346e-05, - "loss": 0.005646481737494468, - "step": 11275 - }, - { - "epoch": 1.9232736572890026, - "grad_norm": 0.05589313432574272, - "learning_rate": 4.101018520265195e-05, - "loss": 0.005581434443593025, - "step": 11280 - }, - { - "epoch": 1.9241261722080136, - "grad_norm": 0.0465618334710598, - "learning_rate": 4.098467493088922e-05, - "loss": 0.005028170347213745, - "step": 11285 - }, - { - "epoch": 1.9249786871270247, - "grad_norm": 0.07304909080266953, - "learning_rate": 4.095916303236431e-05, - "loss": 0.007494028657674789, - "step": 11290 - }, - { - "epoch": 1.9258312020460358, - "grad_norm": 0.09532103687524796, - "learning_rate": 4.0933649518987025e-05, - "loss": 0.006374432146549225, - "step": 11295 - }, - { - "epoch": 1.926683716965047, - "grad_norm": 0.07364784181118011, - "learning_rate": 4.090813440266794e-05, - "loss": 0.0053088821470737456, - "step": 11300 - }, - { - "epoch": 1.927536231884058, - "grad_norm": 0.0804903507232666, - "learning_rate": 4.088261769531834e-05, - "loss": 0.0069495439529418945, - "step": 11305 - }, - { - "epoch": 1.9283887468030692, - "grad_norm": 0.07125549763441086, - "learning_rate": 4.0857099408850264e-05, - "loss": 0.005846098065376282, - "step": 11310 - }, - { - "epoch": 1.9292412617220802, - "grad_norm": 0.017375558614730835, - "learning_rate": 4.083157955517653e-05, - "loss": 0.004308582097291946, - "step": 11315 - }, - { - "epoch": 1.9300937766410913, - "grad_norm": 0.07655836641788483, - "learning_rate": 4.080605814621063e-05, - "loss": 0.006030111759901047, - "step": 11320 - }, - { - "epoch": 1.9309462915601023, - "grad_norm": 0.05411117896437645, - "learning_rate": 4.078053519386681e-05, - "loss": 0.0069768443703651425, - "step": 11325 - }, - { - "epoch": 1.9317988064791134, - "grad_norm": 0.08431188017129898, - "learning_rate": 4.0755010710060035e-05, - "loss": 0.006973695755004883, - "step": 11330 - }, - { - "epoch": 1.9326513213981245, - "grad_norm": 0.08480583131313324, - "learning_rate": 4.072948470670598e-05, - "loss": 0.006525547057390213, - "step": 11335 - }, - { - "epoch": 1.9335038363171355, - "grad_norm": 0.073171466588974, - "learning_rate": 4.070395719572104e-05, - "loss": 0.0054599311202764515, - "step": 11340 - }, - { - "epoch": 1.9343563512361466, - "grad_norm": 0.06951522827148438, - "learning_rate": 4.0678428189022304e-05, - "loss": 0.008897364884614945, - "step": 11345 - }, - { - "epoch": 1.9352088661551576, - "grad_norm": 0.08654197305440903, - "learning_rate": 4.0652897698527557e-05, - "loss": 0.005458325147628784, - "step": 11350 - }, - { - "epoch": 1.9360613810741687, - "grad_norm": 0.07929553836584091, - "learning_rate": 4.0627365736155285e-05, - "loss": 0.00710543841123581, - "step": 11355 - }, - { - "epoch": 1.9369138959931798, - "grad_norm": 0.12434503436088562, - "learning_rate": 4.060183231382466e-05, - "loss": 0.0071723200380802155, - "step": 11360 - }, - { - "epoch": 1.9377664109121908, - "grad_norm": 0.06440022587776184, - "learning_rate": 4.057629744345551e-05, - "loss": 0.006010268628597259, - "step": 11365 - }, - { - "epoch": 1.938618925831202, - "grad_norm": 0.09477414190769196, - "learning_rate": 4.0550761136968404e-05, - "loss": 0.007152469456195831, - "step": 11370 - }, - { - "epoch": 1.9394714407502132, - "grad_norm": 0.06758873164653778, - "learning_rate": 4.0525223406284516e-05, - "loss": 0.004493400454521179, - "step": 11375 - }, - { - "epoch": 1.9403239556692242, - "grad_norm": 0.06823158264160156, - "learning_rate": 4.0499684263325695e-05, - "loss": 0.0058505676686763765, - "step": 11380 - }, - { - "epoch": 1.9411764705882353, - "grad_norm": 0.10731697082519531, - "learning_rate": 4.0474143720014485e-05, - "loss": 0.00592585802078247, - "step": 11385 - }, - { - "epoch": 1.9420289855072463, - "grad_norm": 0.09786538779735565, - "learning_rate": 4.044860178827405e-05, - "loss": 0.008860854804515839, - "step": 11390 - }, - { - "epoch": 1.9428815004262576, - "grad_norm": 0.08662491291761398, - "learning_rate": 4.042305848002822e-05, - "loss": 0.00579673945903778, - "step": 11395 - }, - { - "epoch": 1.9437340153452687, - "grad_norm": 0.08446741849184036, - "learning_rate": 4.039751380720145e-05, - "loss": 0.0067916139960289, - "step": 11400 - }, - { - "epoch": 1.9445865302642797, - "grad_norm": 0.08059567958116531, - "learning_rate": 4.037196778171885e-05, - "loss": 0.007273902744054794, - "step": 11405 - }, - { - "epoch": 1.9454390451832908, - "grad_norm": 0.067914679646492, - "learning_rate": 4.0346420415506156e-05, - "loss": 0.00854090303182602, - "step": 11410 - }, - { - "epoch": 1.9462915601023019, - "grad_norm": 0.06519316136837006, - "learning_rate": 4.032087172048973e-05, - "loss": 0.006127477809786797, - "step": 11415 - }, - { - "epoch": 1.947144075021313, - "grad_norm": 0.10216967016458511, - "learning_rate": 4.029532170859655e-05, - "loss": 0.007330343872308731, - "step": 11420 - }, - { - "epoch": 1.947996589940324, - "grad_norm": 0.07684756815433502, - "learning_rate": 4.02697703917542e-05, - "loss": 0.006121716648340225, - "step": 11425 - }, - { - "epoch": 1.948849104859335, - "grad_norm": 0.08026126027107239, - "learning_rate": 4.0244217781890906e-05, - "loss": 0.006386417150497437, - "step": 11430 - }, - { - "epoch": 1.949701619778346, - "grad_norm": 0.09047527611255646, - "learning_rate": 4.021866389093546e-05, - "loss": 0.004208286106586456, - "step": 11435 - }, - { - "epoch": 1.9505541346973572, - "grad_norm": 0.047482747584581375, - "learning_rate": 4.0193108730817284e-05, - "loss": 0.005754061415791512, - "step": 11440 - }, - { - "epoch": 1.9514066496163682, - "grad_norm": 0.054364416748285294, - "learning_rate": 4.0167552313466355e-05, - "loss": 0.004412830248475075, - "step": 11445 - }, - { - "epoch": 1.9522591645353793, - "grad_norm": 0.07640549540519714, - "learning_rate": 4.014199465081327e-05, - "loss": 0.005214530602097511, - "step": 11450 - }, - { - "epoch": 1.9531116794543903, - "grad_norm": 0.07241252809762955, - "learning_rate": 4.0116435754789206e-05, - "loss": 0.005129393562674523, - "step": 11455 - }, - { - "epoch": 1.9539641943734014, - "grad_norm": 0.048170432448387146, - "learning_rate": 4.009087563732589e-05, - "loss": 0.005180074647068977, - "step": 11460 - }, - { - "epoch": 1.9548167092924125, - "grad_norm": 0.07336216419935226, - "learning_rate": 4.006531431035566e-05, - "loss": 0.009098170697689057, - "step": 11465 - }, - { - "epoch": 1.9556692242114238, - "grad_norm": 0.04934614151716232, - "learning_rate": 4.0039751785811346e-05, - "loss": 0.005307629331946373, - "step": 11470 - }, - { - "epoch": 1.9565217391304348, - "grad_norm": 0.08941303193569183, - "learning_rate": 4.001418807562643e-05, - "loss": 0.0069742932915687565, - "step": 11475 - }, - { - "epoch": 1.9573742540494459, - "grad_norm": 0.05791569501161575, - "learning_rate": 3.998862319173488e-05, - "loss": 0.0050424404442310335, - "step": 11480 - }, - { - "epoch": 1.958226768968457, - "grad_norm": 0.04596787318587303, - "learning_rate": 3.996305714607125e-05, - "loss": 0.004805172979831696, - "step": 11485 - }, - { - "epoch": 1.959079283887468, - "grad_norm": 0.07698309421539307, - "learning_rate": 3.993748995057061e-05, - "loss": 0.006605527549982071, - "step": 11490 - }, - { - "epoch": 1.9599317988064793, - "grad_norm": 0.08400565385818481, - "learning_rate": 3.9911921617168565e-05, - "loss": 0.0085490882396698, - "step": 11495 - }, - { - "epoch": 1.9607843137254903, - "grad_norm": 0.1446380764245987, - "learning_rate": 3.9886352157801296e-05, - "loss": 0.005958027392625809, - "step": 11500 - }, - { - "epoch": 1.9616368286445014, - "grad_norm": 0.06108809635043144, - "learning_rate": 3.986078158440544e-05, - "loss": 0.0054461218416690825, - "step": 11505 - }, - { - "epoch": 1.9624893435635125, - "grad_norm": 0.1163720190525055, - "learning_rate": 3.983520990891823e-05, - "loss": 0.0065662160515785216, - "step": 11510 - }, - { - "epoch": 1.9633418584825235, - "grad_norm": 0.08339548110961914, - "learning_rate": 3.980963714327734e-05, - "loss": 0.007503192871809006, - "step": 11515 - }, - { - "epoch": 1.9641943734015346, - "grad_norm": 0.07774331420660019, - "learning_rate": 3.9784063299421e-05, - "loss": 0.005831217020750045, - "step": 11520 - }, - { - "epoch": 1.9650468883205456, - "grad_norm": 0.08897018432617188, - "learning_rate": 3.9758488389287936e-05, - "loss": 0.006972354650497436, - "step": 11525 - }, - { - "epoch": 1.9658994032395567, - "grad_norm": 0.07708834111690521, - "learning_rate": 3.9732912424817374e-05, - "loss": 0.0059847764670848845, - "step": 11530 - }, - { - "epoch": 1.9667519181585678, - "grad_norm": 0.133201003074646, - "learning_rate": 3.9707335417949015e-05, - "loss": 0.005828146636486053, - "step": 11535 - }, - { - "epoch": 1.9676044330775788, - "grad_norm": 0.05620214343070984, - "learning_rate": 3.968175738062303e-05, - "loss": 0.004607116058468819, - "step": 11540 - }, - { - "epoch": 1.9684569479965899, - "grad_norm": 0.05371567979454994, - "learning_rate": 3.965617832478015e-05, - "loss": 0.004455961659550667, - "step": 11545 - }, - { - "epoch": 1.969309462915601, - "grad_norm": 0.10317978262901306, - "learning_rate": 3.96305982623615e-05, - "loss": 0.004697806015610695, - "step": 11550 - }, - { - "epoch": 1.970161977834612, - "grad_norm": 0.08786958456039429, - "learning_rate": 3.96050172053087e-05, - "loss": 0.005183818191289902, - "step": 11555 - }, - { - "epoch": 1.971014492753623, - "grad_norm": 0.07750507444143295, - "learning_rate": 3.957943516556385e-05, - "loss": 0.005475999787449837, - "step": 11560 - }, - { - "epoch": 1.9718670076726341, - "grad_norm": 0.07066313922405243, - "learning_rate": 3.955385215506949e-05, - "loss": 0.005772550404071808, - "step": 11565 - }, - { - "epoch": 1.9727195225916454, - "grad_norm": 0.08183038979768753, - "learning_rate": 3.952826818576863e-05, - "loss": 0.005305550992488861, - "step": 11570 - }, - { - "epoch": 1.9735720375106565, - "grad_norm": 0.075381800532341, - "learning_rate": 3.95026832696047e-05, - "loss": 0.00803310126066208, - "step": 11575 - }, - { - "epoch": 1.9744245524296675, - "grad_norm": 0.09064166992902756, - "learning_rate": 3.9477097418521616e-05, - "loss": 0.006380292773246765, - "step": 11580 - }, - { - "epoch": 1.9752770673486786, - "grad_norm": 0.09140465408563614, - "learning_rate": 3.945151064446367e-05, - "loss": 0.00863645225763321, - "step": 11585 - }, - { - "epoch": 1.9761295822676896, - "grad_norm": 0.09985008090734482, - "learning_rate": 3.942592295937565e-05, - "loss": 0.005205995962023735, - "step": 11590 - }, - { - "epoch": 1.976982097186701, - "grad_norm": 0.07968702167272568, - "learning_rate": 3.940033437520273e-05, - "loss": 0.006467466801404953, - "step": 11595 - }, - { - "epoch": 1.977834612105712, - "grad_norm": 0.0925409123301506, - "learning_rate": 3.937474490389051e-05, - "loss": 0.006804432719945908, - "step": 11600 - }, - { - "epoch": 1.978687127024723, - "grad_norm": 0.053421750664711, - "learning_rate": 3.9349154557385e-05, - "loss": 0.0067677564918994905, - "step": 11605 - }, - { - "epoch": 1.979539641943734, - "grad_norm": 0.07791347801685333, - "learning_rate": 3.9323563347632624e-05, - "loss": 0.006826826930046081, - "step": 11610 - }, - { - "epoch": 1.9803921568627452, - "grad_norm": 0.08627293258905411, - "learning_rate": 3.929797128658024e-05, - "loss": 0.00804663747549057, - "step": 11615 - }, - { - "epoch": 1.9812446717817562, - "grad_norm": 0.06506595015525818, - "learning_rate": 3.927237838617503e-05, - "loss": 0.005456966534256935, - "step": 11620 - }, - { - "epoch": 1.9820971867007673, - "grad_norm": 0.09555826336145401, - "learning_rate": 3.924678465836465e-05, - "loss": 0.005365721881389618, - "step": 11625 - }, - { - "epoch": 1.9829497016197783, - "grad_norm": 0.09176401793956757, - "learning_rate": 3.922119011509706e-05, - "loss": 0.006210924685001373, - "step": 11630 - }, - { - "epoch": 1.9838022165387894, - "grad_norm": 0.05260130763053894, - "learning_rate": 3.919559476832069e-05, - "loss": 0.004408955946564675, - "step": 11635 - }, - { - "epoch": 1.9846547314578005, - "grad_norm": 0.0875319391489029, - "learning_rate": 3.916999862998427e-05, - "loss": 0.005069036781787872, - "step": 11640 - }, - { - "epoch": 1.9855072463768115, - "grad_norm": 0.10335614532232285, - "learning_rate": 3.9144401712036936e-05, - "loss": 0.007199827581644058, - "step": 11645 - }, - { - "epoch": 1.9863597612958226, - "grad_norm": 0.09518889337778091, - "learning_rate": 3.9118804026428194e-05, - "loss": 0.00541754923760891, - "step": 11650 - }, - { - "epoch": 1.9872122762148337, - "grad_norm": 0.06707368791103363, - "learning_rate": 3.9093205585107863e-05, - "loss": 0.00641927570104599, - "step": 11655 - }, - { - "epoch": 1.9880647911338447, - "grad_norm": 0.10102292895317078, - "learning_rate": 3.906760640002618e-05, - "loss": 0.007096148282289505, - "step": 11660 - }, - { - "epoch": 1.9889173060528558, - "grad_norm": 0.0690481886267662, - "learning_rate": 3.904200648313368e-05, - "loss": 0.0063364550471305845, - "step": 11665 - }, - { - "epoch": 1.989769820971867, - "grad_norm": 0.1051480695605278, - "learning_rate": 3.901640584638126e-05, - "loss": 0.009133437275886535, - "step": 11670 - }, - { - "epoch": 1.9906223358908781, - "grad_norm": 0.0857042595744133, - "learning_rate": 3.899080450172015e-05, - "loss": 0.007245839387178421, - "step": 11675 - }, - { - "epoch": 1.9914748508098892, - "grad_norm": 0.04038793221116066, - "learning_rate": 3.8965202461101904e-05, - "loss": 0.005575920641422272, - "step": 11680 - }, - { - "epoch": 1.9923273657289002, - "grad_norm": 0.06331093609333038, - "learning_rate": 3.893959973647842e-05, - "loss": 0.004866635054349899, - "step": 11685 - }, - { - "epoch": 1.9931798806479113, - "grad_norm": 0.08694019168615341, - "learning_rate": 3.891399633980188e-05, - "loss": 0.004249059408903122, - "step": 11690 - }, - { - "epoch": 1.9940323955669226, - "grad_norm": 0.06739087402820587, - "learning_rate": 3.888839228302482e-05, - "loss": 0.006520142406225204, - "step": 11695 - }, - { - "epoch": 1.9948849104859336, - "grad_norm": 0.09432726353406906, - "learning_rate": 3.886278757810005e-05, - "loss": 0.006377060711383819, - "step": 11700 - }, - { - "epoch": 1.9957374254049447, - "grad_norm": 0.040565814822912216, - "learning_rate": 3.883718223698071e-05, - "loss": 0.0062430910766124725, - "step": 11705 - }, - { - "epoch": 1.9965899403239558, - "grad_norm": 0.09249477833509445, - "learning_rate": 3.881157627162022e-05, - "loss": 0.005447167158126831, - "step": 11710 - }, - { - "epoch": 1.9974424552429668, - "grad_norm": 0.08561582118272781, - "learning_rate": 3.87859696939723e-05, - "loss": 0.0067646786570549015, - "step": 11715 - }, - { - "epoch": 1.9982949701619779, - "grad_norm": 0.0771077573299408, - "learning_rate": 3.876036251599094e-05, - "loss": 0.006473222374916076, - "step": 11720 - }, - { - "epoch": 1.999147485080989, - "grad_norm": 0.047942496836185455, - "learning_rate": 3.873475474963044e-05, - "loss": 0.004876254498958588, - "step": 11725 - }, - { - "epoch": 1.9996589940323957, - "eval_loss": 0.03507082909345627, - "eval_runtime": 3.6311, - "eval_samples_per_second": 69.4, - "eval_steps_per_second": 1.102, - "step": 11728 - }, - { - "eval_cer_subset": 0.01172504763300601, - "eval_cer_subset_edit_distance": 720, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 11728 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3.9535860004714414e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-11728/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/adapter_model.safetensors deleted file mode 100644 index 86fca76499f1480550825e224e368253dd5c1273..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:251f115f1399311d664d30a65f3f69edd39ca1066ff9de11cf1af58abc53789c -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/optimizer.pt deleted file mode 100644 index efd89fc63cbc81c79c7bb2805c83028693fd359b..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5571e8cb2c75967acf8b8fe019749ea08c8733a3dc2fd2a8e0f667a32a4c9f57 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/rng_state.pth deleted file mode 100644 index 704d640093351a00f5a279e5605a8e437b70eab3..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4edcb0c400e489258895e3e8e12e12a56c067fbaee316ef0aecc58f56df05ed8 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/scheduler.pt deleted file mode 100644 index 19ffc2635487ad1fb5974f6508338607e9b73aab..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b7686410b47d9ea46c29370951c2c9ee4eb0cc9deb04bed685304ede7411208 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/trainer_state.json deleted file mode 100644 index 7cd20056562279174a15d56c7db4bbf6e8da2e76..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/trainer_state.json +++ /dev/null @@ -1,2142 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9986357435197817, - "eval_steps": 366, - "global_step": 1464, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 1.5267574879617024e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1464/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/adapter_model.safetensors deleted file mode 100644 index 7121d636a44500e824038d1c8899622241078993..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eeae5e07a8d9b92052b6418ada0aeec6c226839a6e77f2fffb796f75c14516a9 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/optimizer.pt deleted file mode 100644 index aafecd72bd33fb6cdca912cf6fba134a4d9a96fd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b584a4e97475ef1d99387db902af3f22c6d714a86d5800599a9c5b4a696a072 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/rng_state.pth deleted file mode 100644 index 590182fe3185b746a8114bd40bdcdd2d117502bd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ccb8eeb935749fc43744e0a5eeacdf6f0f10253be15266a497cbca0ffaa2573 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/scheduler.pt deleted file mode 100644 index 76d4307807562f715cf1b7792d74cc05e8907cf0..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:adf5bf1180c4073f3c778c1ab49bbe225838d3c7885b46f176a1b30b405c7f03 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/trainer_state.json deleted file mode 100644 index 56db4e72ad5a7489e0fec64ec312f55cb29f26df..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/trainer_state.json +++ /dev/null @@ -1,2101 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.24995737425404946, - "eval_steps": 1466, - "global_step": 1466, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 4.941642367710904e+17, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1466/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/adapter_model.safetensors deleted file mode 100644 index 8dc916bef42e26a8d344c0f27067e1d30f297330..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1767ecc8a0b6be3fcc0530980c0b79b0669b0574b3e5eb9e24fe4f8f2da2d1dd -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/optimizer.pt deleted file mode 100644 index ff652978cc382fcae09806ce32374e465b17e9a6..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9bcb770bf5ce40ac20edec3ef11ea6e5346880a52a385b9e42ba26bbb415f744 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/rng_state.pth deleted file mode 100644 index 6367b74f19467b36e1d46b9724b5c718b09ed5ea..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:419e357cd97f0d0fc2ead1cab9470f1570a1a1a10f851be6facc831c815ac5e0 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/scheduler.pt deleted file mode 100644 index c9eca4b2cca2ffe32d82f2b82ab5ec23c4091689..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0e65f45b5eb2e23db6a0e693c522a6c06d7e03dd0fb4a5cddc6f9a581bbd5b6 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/trainer_state.json deleted file mode 100644 index e7a7e2ffca49c1fbdee4aca1f4f19216526aab81..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/trainer_state.json +++ /dev/null @@ -1,24852 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.9994884910485933, - "eval_steps": 1466, - "global_step": 17592, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - }, - { - "epoch": 0.7502131287297528, - "grad_norm": 0.061287231743335724, - "learning_rate": 6.972370044908141e-05, - "loss": 0.011720015108585358, - "step": 4400 - }, - { - "epoch": 0.7510656436487638, - "grad_norm": 0.0668778270483017, - "learning_rate": 6.971058796426478e-05, - "loss": 0.012064819037914277, - "step": 4405 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 0.07897942513227463, - "learning_rate": 6.969746044247421e-05, - "loss": 0.010592888295650481, - "step": 4410 - }, - { - "epoch": 0.7527706734867861, - "grad_norm": 0.09090534597635269, - "learning_rate": 6.968431788983806e-05, - "loss": 0.011600010097026825, - "step": 4415 - }, - { - "epoch": 0.7536231884057971, - "grad_norm": 0.080506332218647, - "learning_rate": 6.967116031249172e-05, - "loss": 0.013006125390529633, - "step": 4420 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 0.04851040989160538, - "learning_rate": 6.965798771657758e-05, - "loss": 0.010165790468454361, - "step": 4425 - }, - { - "epoch": 0.7553282182438192, - "grad_norm": 0.06298603117465973, - "learning_rate": 6.964480010824505e-05, - "loss": 0.007800602912902832, - "step": 4430 - }, - { - "epoch": 0.7561807331628303, - "grad_norm": 0.09919694811105728, - "learning_rate": 6.963159749365053e-05, - "loss": 0.010877586901187897, - "step": 4435 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 0.083896704018116, - "learning_rate": 6.961837987895747e-05, - "loss": 0.011114003509283066, - "step": 4440 - }, - { - "epoch": 0.7578857630008525, - "grad_norm": 0.0452699176967144, - "learning_rate": 6.960514727033626e-05, - "loss": 0.008609171956777573, - "step": 4445 - }, - { - "epoch": 0.7587382779198636, - "grad_norm": 0.08951374143362045, - "learning_rate": 6.959189967396435e-05, - "loss": 0.01193360835313797, - "step": 4450 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 0.08749551326036453, - "learning_rate": 6.957863709602611e-05, - "loss": 0.009163837879896164, - "step": 4455 - }, - { - "epoch": 0.7604433077578857, - "grad_norm": 0.09304409474134445, - "learning_rate": 6.956535954271301e-05, - "loss": 0.01038273349404335, - "step": 4460 - }, - { - "epoch": 0.7612958226768969, - "grad_norm": 0.06662629544734955, - "learning_rate": 6.955206702022342e-05, - "loss": 0.010570932179689407, - "step": 4465 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 0.07736595720052719, - "learning_rate": 6.953875953476276e-05, - "loss": 0.009856238961219788, - "step": 4470 - }, - { - "epoch": 0.763000852514919, - "grad_norm": 0.04692552238702774, - "learning_rate": 6.952543709254338e-05, - "loss": 0.006967573612928391, - "step": 4475 - }, - { - "epoch": 0.7638533674339301, - "grad_norm": 0.06901179254055023, - "learning_rate": 6.951209969978464e-05, - "loss": 0.008588603138923645, - "step": 4480 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 0.07733304053544998, - "learning_rate": 6.949874736271289e-05, - "loss": 0.012579981982707978, - "step": 4485 - }, - { - "epoch": 0.7655583972719523, - "grad_norm": 0.0693301409482956, - "learning_rate": 6.948538008756144e-05, - "loss": 0.009747470915317535, - "step": 4490 - }, - { - "epoch": 0.7664109121909634, - "grad_norm": 0.07054253667593002, - "learning_rate": 6.947199788057059e-05, - "loss": 0.008387601375579834, - "step": 4495 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 0.06526053696870804, - "learning_rate": 6.945860074798757e-05, - "loss": 0.008389735966920853, - "step": 4500 - }, - { - "epoch": 0.7681159420289855, - "grad_norm": 0.06862380355596542, - "learning_rate": 6.944518869606662e-05, - "loss": 0.008736115694046021, - "step": 4505 - }, - { - "epoch": 0.7689684569479965, - "grad_norm": 0.06233246996998787, - "learning_rate": 6.943176173106897e-05, - "loss": 0.008519527316093446, - "step": 4510 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 0.07696249336004257, - "learning_rate": 6.941831985926273e-05, - "loss": 0.011381441354751587, - "step": 4515 - }, - { - "epoch": 0.7706734867860188, - "grad_norm": 0.11450641602277756, - "learning_rate": 6.940486308692302e-05, - "loss": 0.012895810604095458, - "step": 4520 - }, - { - "epoch": 0.7715260017050298, - "grad_norm": 0.09141158312559128, - "learning_rate": 6.939139142033191e-05, - "loss": 0.009508632868528367, - "step": 4525 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 0.09469986706972122, - "learning_rate": 6.937790486577844e-05, - "loss": 0.014812557399272919, - "step": 4530 - }, - { - "epoch": 0.773231031543052, - "grad_norm": 0.08494299650192261, - "learning_rate": 6.936440342955855e-05, - "loss": 0.01355334222316742, - "step": 4535 - }, - { - "epoch": 0.7740835464620631, - "grad_norm": 0.09328251332044601, - "learning_rate": 6.93508871179752e-05, - "loss": 0.011529977619647979, - "step": 4540 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 0.06696850806474686, - "learning_rate": 6.933735593733821e-05, - "loss": 0.010230815410614014, - "step": 4545 - }, - { - "epoch": 0.7757885763000852, - "grad_norm": 0.07968153059482574, - "learning_rate": 6.932380989396442e-05, - "loss": 0.012129776924848557, - "step": 4550 - }, - { - "epoch": 0.7766410912190963, - "grad_norm": 0.07916650176048279, - "learning_rate": 6.931024899417756e-05, - "loss": 0.009455519914627075, - "step": 4555 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 0.05229945108294487, - "learning_rate": 6.92966732443083e-05, - "loss": 0.008516684174537659, - "step": 4560 - }, - { - "epoch": 0.7783461210571185, - "grad_norm": 0.08314234018325806, - "learning_rate": 6.928308265069428e-05, - "loss": 0.010914114117622376, - "step": 4565 - }, - { - "epoch": 0.7791986359761296, - "grad_norm": 0.05075672268867493, - "learning_rate": 6.926947721968001e-05, - "loss": 0.008188208192586898, - "step": 4570 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 0.09465362876653671, - "learning_rate": 6.925585695761697e-05, - "loss": 0.009074468165636063, - "step": 4575 - }, - { - "epoch": 0.7809036658141517, - "grad_norm": 0.09024044126272202, - "learning_rate": 6.924222187086356e-05, - "loss": 0.006571034342050553, - "step": 4580 - }, - { - "epoch": 0.7817561807331628, - "grad_norm": 0.050575681030750275, - "learning_rate": 6.922857196578507e-05, - "loss": 0.008829852938652039, - "step": 4585 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.09888230264186859, - "learning_rate": 6.921490724875376e-05, - "loss": 0.01053793728351593, - "step": 4590 - }, - { - "epoch": 0.783461210571185, - "grad_norm": 0.042567264288663864, - "learning_rate": 6.920122772614875e-05, - "loss": 0.009682109951972962, - "step": 4595 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 0.10021623969078064, - "learning_rate": 6.91875334043561e-05, - "loss": 0.012160807102918624, - "step": 4600 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 0.07016255706548691, - "learning_rate": 6.917382428976878e-05, - "loss": 0.008590599894523621, - "step": 4605 - }, - { - "epoch": 0.7860187553282183, - "grad_norm": 0.06567320227622986, - "learning_rate": 6.916010038878667e-05, - "loss": 0.00809207409620285, - "step": 4610 - }, - { - "epoch": 0.7868712702472294, - "grad_norm": 0.05032164603471756, - "learning_rate": 6.914636170781652e-05, - "loss": 0.009291460365056991, - "step": 4615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 0.06303273886442184, - "learning_rate": 6.913260825327204e-05, - "loss": 0.00837131291627884, - "step": 4620 - }, - { - "epoch": 0.7885763000852515, - "grad_norm": 0.05837355926632881, - "learning_rate": 6.911884003157376e-05, - "loss": 0.007800968736410141, - "step": 4625 - }, - { - "epoch": 0.7894288150042625, - "grad_norm": 0.07052712142467499, - "learning_rate": 6.910505704914916e-05, - "loss": 0.010577390342950821, - "step": 4630 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 0.08550997078418732, - "learning_rate": 6.909125931243259e-05, - "loss": 0.012821859121322632, - "step": 4635 - }, - { - "epoch": 0.7911338448422848, - "grad_norm": 0.060406558215618134, - "learning_rate": 6.90774468278653e-05, - "loss": 0.0065113060176372525, - "step": 4640 - }, - { - "epoch": 0.7919863597612958, - "grad_norm": 0.13999445736408234, - "learning_rate": 6.906361960189542e-05, - "loss": 0.012699820101261139, - "step": 4645 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 0.07585978507995605, - "learning_rate": 6.904977764097797e-05, - "loss": 0.008435635268688202, - "step": 4650 - }, - { - "epoch": 0.793691389599318, - "grad_norm": 0.07488108426332474, - "learning_rate": 6.90359209515748e-05, - "loss": 0.011925875395536422, - "step": 4655 - }, - { - "epoch": 0.7945439045183291, - "grad_norm": 0.12476535886526108, - "learning_rate": 6.902204954015471e-05, - "loss": 0.009086847305297852, - "step": 4660 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 0.08779732137918472, - "learning_rate": 6.900816341319331e-05, - "loss": 0.00962812826037407, - "step": 4665 - }, - { - "epoch": 0.7962489343563512, - "grad_norm": 0.15791405737400055, - "learning_rate": 6.899426257717312e-05, - "loss": 0.011767397075891495, - "step": 4670 - }, - { - "epoch": 0.7971014492753623, - "grad_norm": 0.11228909343481064, - "learning_rate": 6.898034703858352e-05, - "loss": 0.008271434903144836, - "step": 4675 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 0.07288003712892532, - "learning_rate": 6.896641680392073e-05, - "loss": 0.009384474158287049, - "step": 4680 - }, - { - "epoch": 0.7988064791133845, - "grad_norm": 0.08285173773765564, - "learning_rate": 6.895247187968784e-05, - "loss": 0.012600034475326538, - "step": 4685 - }, - { - "epoch": 0.7996589940323956, - "grad_norm": 0.07812397927045822, - "learning_rate": 6.893851227239484e-05, - "loss": 0.008935874700546265, - "step": 4690 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 0.07500546425580978, - "learning_rate": 6.892453798855852e-05, - "loss": 0.010619471222162247, - "step": 4695 - }, - { - "epoch": 0.8013640238704177, - "grad_norm": 0.05211177095770836, - "learning_rate": 6.891054903470251e-05, - "loss": 0.008601508289575576, - "step": 4700 - }, - { - "epoch": 0.8022165387894288, - "grad_norm": 0.029616642743349075, - "learning_rate": 6.889654541735738e-05, - "loss": 0.007921247184276581, - "step": 4705 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 0.06894131749868393, - "learning_rate": 6.888252714306044e-05, - "loss": 0.010935742408037186, - "step": 4710 - }, - { - "epoch": 0.803921568627451, - "grad_norm": 0.0766182467341423, - "learning_rate": 6.886849421835587e-05, - "loss": 0.010556706041097642, - "step": 4715 - }, - { - "epoch": 0.8047740835464621, - "grad_norm": 0.09164462238550186, - "learning_rate": 6.885444664979477e-05, - "loss": 0.010812586545944214, - "step": 4720 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 0.06463408470153809, - "learning_rate": 6.884038444393496e-05, - "loss": 0.009179002791643142, - "step": 4725 - }, - { - "epoch": 0.8064791133844842, - "grad_norm": 0.06639672070741653, - "learning_rate": 6.882630760734118e-05, - "loss": 0.012755092978477479, - "step": 4730 - }, - { - "epoch": 0.8073316283034954, - "grad_norm": 0.062024496495723724, - "learning_rate": 6.881221614658493e-05, - "loss": 0.009655499458312988, - "step": 4735 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 0.06751494854688644, - "learning_rate": 6.879811006824459e-05, - "loss": 0.010860173404216767, - "step": 4740 - }, - { - "epoch": 0.8090366581415175, - "grad_norm": 0.05535218119621277, - "learning_rate": 6.878398937890535e-05, - "loss": 0.011440058052539826, - "step": 4745 - }, - { - "epoch": 0.8098891730605285, - "grad_norm": 0.08365204185247421, - "learning_rate": 6.876985408515922e-05, - "loss": 0.011058357357978821, - "step": 4750 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 0.06450537592172623, - "learning_rate": 6.875570419360501e-05, - "loss": 0.01046149879693985, - "step": 4755 - }, - { - "epoch": 0.8115942028985508, - "grad_norm": 0.08542726188898087, - "learning_rate": 6.874153971084837e-05, - "loss": 0.009869573265314102, - "step": 4760 - }, - { - "epoch": 0.8124467178175618, - "grad_norm": 0.08184531331062317, - "learning_rate": 6.872736064350176e-05, - "loss": 0.01054040789604187, - "step": 4765 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 0.07068512588739395, - "learning_rate": 6.871316699818442e-05, - "loss": 0.009573462605476379, - "step": 4770 - }, - { - "epoch": 0.814151747655584, - "grad_norm": 0.08866564929485321, - "learning_rate": 6.869895878152244e-05, - "loss": 0.008078956604003906, - "step": 4775 - }, - { - "epoch": 0.815004262574595, - "grad_norm": 0.08215270191431046, - "learning_rate": 6.868473600014867e-05, - "loss": 0.010586659610271453, - "step": 4780 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 0.0449003241956234, - "learning_rate": 6.867049866070278e-05, - "loss": 0.008572281152009965, - "step": 4785 - }, - { - "epoch": 0.8167092924126172, - "grad_norm": 0.0766722783446312, - "learning_rate": 6.865624676983124e-05, - "loss": 0.009015947580337524, - "step": 4790 - }, - { - "epoch": 0.8175618073316283, - "grad_norm": 0.07404733449220657, - "learning_rate": 6.864198033418732e-05, - "loss": 0.014639028906822204, - "step": 4795 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 0.10437514632940292, - "learning_rate": 6.862769936043102e-05, - "loss": 0.009333716332912445, - "step": 4800 - }, - { - "epoch": 0.8192668371696504, - "grad_norm": 0.06732609868049622, - "learning_rate": 6.861340385522921e-05, - "loss": 0.007169592380523682, - "step": 4805 - }, - { - "epoch": 0.8201193520886616, - "grad_norm": 0.06016068905591965, - "learning_rate": 6.859909382525552e-05, - "loss": 0.009211564064025879, - "step": 4810 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 0.07302942126989365, - "learning_rate": 6.858476927719031e-05, - "loss": 0.009643231332302094, - "step": 4815 - }, - { - "epoch": 0.8218243819266837, - "grad_norm": 0.07511111348867416, - "learning_rate": 6.857043021772079e-05, - "loss": 0.010751830041408538, - "step": 4820 - }, - { - "epoch": 0.8226768968456948, - "grad_norm": 0.04791528359055519, - "learning_rate": 6.855607665354088e-05, - "loss": 0.008413314074277877, - "step": 4825 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 0.08279003202915192, - "learning_rate": 6.854170859135132e-05, - "loss": 0.009260118752717972, - "step": 4830 - }, - { - "epoch": 0.824381926683717, - "grad_norm": 0.06907783448696136, - "learning_rate": 6.85273260378596e-05, - "loss": 0.009681220352649688, - "step": 4835 - }, - { - "epoch": 0.8252344416027281, - "grad_norm": 0.09847953170537949, - "learning_rate": 6.851292899977997e-05, - "loss": 0.009847448766231537, - "step": 4840 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 0.0683966800570488, - "learning_rate": 6.849851748383343e-05, - "loss": 0.007601346075534821, - "step": 4845 - }, - { - "epoch": 0.8269394714407502, - "grad_norm": 0.0523662269115448, - "learning_rate": 6.848409149674779e-05, - "loss": 0.00900915488600731, - "step": 4850 - }, - { - "epoch": 0.8277919863597613, - "grad_norm": 0.07112257927656174, - "learning_rate": 6.846965104525757e-05, - "loss": 0.011230588704347611, - "step": 4855 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 0.09305348247289658, - "learning_rate": 6.845519613610402e-05, - "loss": 0.01087992861866951, - "step": 4860 - }, - { - "epoch": 0.8294970161977835, - "grad_norm": 0.062347084283828735, - "learning_rate": 6.84407267760352e-05, - "loss": 0.00806276947259903, - "step": 4865 - }, - { - "epoch": 0.8303495311167945, - "grad_norm": 0.09091926366090775, - "learning_rate": 6.84262429718059e-05, - "loss": 0.010562103241682053, - "step": 4870 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 0.05807443708181381, - "learning_rate": 6.841174473017762e-05, - "loss": 0.010788433253765106, - "step": 4875 - }, - { - "epoch": 0.8320545609548167, - "grad_norm": 0.06664252281188965, - "learning_rate": 6.839723205791863e-05, - "loss": 0.01030244082212448, - "step": 4880 - }, - { - "epoch": 0.8329070758738278, - "grad_norm": 0.07469561696052551, - "learning_rate": 6.838270496180392e-05, - "loss": 0.01250479370355606, - "step": 4885 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 0.0469096302986145, - "learning_rate": 6.836816344861523e-05, - "loss": 0.010546717792749405, - "step": 4890 - }, - { - "epoch": 0.83461210571185, - "grad_norm": 0.0646355077624321, - "learning_rate": 6.835360752514104e-05, - "loss": 0.008491561561822892, - "step": 4895 - }, - { - "epoch": 0.835464620630861, - "grad_norm": 0.06006006523966789, - "learning_rate": 6.83390371981765e-05, - "loss": 0.010175065696239471, - "step": 4900 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 0.0595518983900547, - "learning_rate": 6.832445247452355e-05, - "loss": 0.009471315890550613, - "step": 4905 - }, - { - "epoch": 0.8371696504688833, - "grad_norm": 0.0722845196723938, - "learning_rate": 6.830985336099081e-05, - "loss": 0.011656039208173753, - "step": 4910 - }, - { - "epoch": 0.8380221653878943, - "grad_norm": 0.09830670058727264, - "learning_rate": 6.829523986439366e-05, - "loss": 0.0106172576546669, - "step": 4915 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 0.0725899264216423, - "learning_rate": 6.828061199155413e-05, - "loss": 0.00935768336057663, - "step": 4920 - }, - { - "epoch": 0.8397271952259164, - "grad_norm": 0.06721889227628708, - "learning_rate": 6.826596974930101e-05, - "loss": 0.010951700061559677, - "step": 4925 - }, - { - "epoch": 0.8405797101449275, - "grad_norm": 0.09289079904556274, - "learning_rate": 6.82513131444698e-05, - "loss": 0.010057362169027329, - "step": 4930 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 0.07667957991361618, - "learning_rate": 6.823664218390267e-05, - "loss": 0.012943412363529205, - "step": 4935 - }, - { - "epoch": 0.8422847399829497, - "grad_norm": 0.041785743087530136, - "learning_rate": 6.822195687444853e-05, - "loss": 0.009845246374607087, - "step": 4940 - }, - { - "epoch": 0.8431372549019608, - "grad_norm": 0.062134817242622375, - "learning_rate": 6.820725722296295e-05, - "loss": 0.010506168007850647, - "step": 4945 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 0.0870882049202919, - "learning_rate": 6.819254323630825e-05, - "loss": 0.008953387290239334, - "step": 4950 - }, - { - "epoch": 0.8448422847399829, - "grad_norm": 0.06943989545106888, - "learning_rate": 6.817781492135337e-05, - "loss": 0.009975537657737732, - "step": 4955 - }, - { - "epoch": 0.8456947996589941, - "grad_norm": 0.0961625948548317, - "learning_rate": 6.8163072284974e-05, - "loss": 0.010299818217754364, - "step": 4960 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 0.0685473084449768, - "learning_rate": 6.814831533405249e-05, - "loss": 0.007892660051584243, - "step": 4965 - }, - { - "epoch": 0.8473998294970162, - "grad_norm": 0.06370922178030014, - "learning_rate": 6.813354407547787e-05, - "loss": 0.011673354357481004, - "step": 4970 - }, - { - "epoch": 0.8482523444160273, - "grad_norm": 0.1212867870926857, - "learning_rate": 6.811875851614586e-05, - "loss": 0.01049395203590393, - "step": 4975 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 0.07590476423501968, - "learning_rate": 6.810395866295885e-05, - "loss": 0.011261900514364242, - "step": 4980 - }, - { - "epoch": 0.8499573742540495, - "grad_norm": 0.06342966109514236, - "learning_rate": 6.808914452282592e-05, - "loss": 0.011414043605327606, - "step": 4985 - }, - { - "epoch": 0.8508098891730606, - "grad_norm": 0.08939556777477264, - "learning_rate": 6.807431610266278e-05, - "loss": 0.008719882369041443, - "step": 4990 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 0.09418119490146637, - "learning_rate": 6.805947340939183e-05, - "loss": 0.011406099796295166, - "step": 4995 - }, - { - "epoch": 0.8525149190110827, - "grad_norm": 0.09113836288452148, - "learning_rate": 6.804461644994213e-05, - "loss": 0.011070792376995087, - "step": 5000 - }, - { - "epoch": 0.8533674339300937, - "grad_norm": 0.06545080989599228, - "learning_rate": 6.802974523124941e-05, - "loss": 0.009602059423923493, - "step": 5005 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 0.07779792696237564, - "learning_rate": 6.801485976025607e-05, - "loss": 0.008207190036773681, - "step": 5010 - }, - { - "epoch": 0.855072463768116, - "grad_norm": 0.038606271147727966, - "learning_rate": 6.799996004391113e-05, - "loss": 0.008772189915180206, - "step": 5015 - }, - { - "epoch": 0.855924978687127, - "grad_norm": 0.07329479604959488, - "learning_rate": 6.798504608917025e-05, - "loss": 0.011226999759674072, - "step": 5020 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 0.08631903678178787, - "learning_rate": 6.797011790299579e-05, - "loss": 0.012361649423837662, - "step": 5025 - }, - { - "epoch": 0.8576300085251491, - "grad_norm": 0.09041957557201385, - "learning_rate": 6.79551754923567e-05, - "loss": 0.010576151311397552, - "step": 5030 - }, - { - "epoch": 0.8584825234441603, - "grad_norm": 0.07003892213106155, - "learning_rate": 6.794021886422861e-05, - "loss": 0.008748160302639007, - "step": 5035 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 0.06976106762886047, - "learning_rate": 6.792524802559378e-05, - "loss": 0.010539846867322922, - "step": 5040 - }, - { - "epoch": 0.8601875532821824, - "grad_norm": 0.05501266196370125, - "learning_rate": 6.791026298344107e-05, - "loss": 0.01045292615890503, - "step": 5045 - }, - { - "epoch": 0.8610400682011935, - "grad_norm": 0.051503781229257584, - "learning_rate": 6.789526374476602e-05, - "loss": 0.009410140663385391, - "step": 5050 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 0.05674072727560997, - "learning_rate": 6.788025031657076e-05, - "loss": 0.00899135023355484, - "step": 5055 - }, - { - "epoch": 0.8627450980392157, - "grad_norm": 0.09688259661197662, - "learning_rate": 6.786522270586406e-05, - "loss": 0.010228607058525085, - "step": 5060 - }, - { - "epoch": 0.8635976129582268, - "grad_norm": 0.08542542159557343, - "learning_rate": 6.785018091966131e-05, - "loss": 0.010819461196660995, - "step": 5065 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 0.05703757330775261, - "learning_rate": 6.783512496498452e-05, - "loss": 0.011345957219600678, - "step": 5070 - }, - { - "epoch": 0.8653026427962489, - "grad_norm": 0.08096349984407425, - "learning_rate": 6.782005484886231e-05, - "loss": 0.011220332235097885, - "step": 5075 - }, - { - "epoch": 0.86615515771526, - "grad_norm": 0.07224266976118088, - "learning_rate": 6.780497057832988e-05, - "loss": 0.012557309865951539, - "step": 5080 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 0.08718731999397278, - "learning_rate": 6.778987216042912e-05, - "loss": 0.009770408272743225, - "step": 5085 - }, - { - "epoch": 0.8678601875532822, - "grad_norm": 0.05731170251965523, - "learning_rate": 6.777475960220846e-05, - "loss": 0.008567098528146744, - "step": 5090 - }, - { - "epoch": 0.8687127024722933, - "grad_norm": 0.07938708364963531, - "learning_rate": 6.775963291072292e-05, - "loss": 0.010664292424917222, - "step": 5095 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.028421485796570778, - "learning_rate": 6.774449209303416e-05, - "loss": 0.010194088518619537, - "step": 5100 - }, - { - "epoch": 0.8704177323103154, - "grad_norm": 0.06094631180167198, - "learning_rate": 6.772933715621042e-05, - "loss": 0.009481045603752136, - "step": 5105 - }, - { - "epoch": 0.8712702472293266, - "grad_norm": 0.08887558430433273, - "learning_rate": 6.771416810732653e-05, - "loss": 0.012845474481582641, - "step": 5110 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 0.07431238144636154, - "learning_rate": 6.76989849534639e-05, - "loss": 0.00815560668706894, - "step": 5115 - }, - { - "epoch": 0.8729752770673487, - "grad_norm": 0.09380137175321579, - "learning_rate": 6.768378770171052e-05, - "loss": 0.009580246359109878, - "step": 5120 - }, - { - "epoch": 0.8738277919863597, - "grad_norm": 0.08001488447189331, - "learning_rate": 6.766857635916099e-05, - "loss": 0.00857289507985115, - "step": 5125 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 0.049355555325746536, - "learning_rate": 6.765335093291647e-05, - "loss": 0.009263276308774947, - "step": 5130 - }, - { - "epoch": 0.875532821824382, - "grad_norm": 0.05873994901776314, - "learning_rate": 6.763811143008469e-05, - "loss": 0.008233514428138734, - "step": 5135 - }, - { - "epoch": 0.876385336743393, - "grad_norm": 0.10190756618976593, - "learning_rate": 6.762285785777995e-05, - "loss": 0.01529676467180252, - "step": 5140 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 0.08395158499479294, - "learning_rate": 6.760759022312313e-05, - "loss": 0.00961325541138649, - "step": 5145 - }, - { - "epoch": 0.8780903665814151, - "grad_norm": 0.07476748526096344, - "learning_rate": 6.759230853324169e-05, - "loss": 0.010477164387702942, - "step": 5150 - }, - { - "epoch": 0.8789428815004262, - "grad_norm": 0.07773051410913467, - "learning_rate": 6.757701279526961e-05, - "loss": 0.010389962792396545, - "step": 5155 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 0.07345708459615707, - "learning_rate": 6.756170301634745e-05, - "loss": 0.009174319356679917, - "step": 5160 - }, - { - "epoch": 0.8806479113384484, - "grad_norm": 0.07917368412017822, - "learning_rate": 6.754637920362233e-05, - "loss": 0.012756256759166718, - "step": 5165 - }, - { - "epoch": 0.8815004262574595, - "grad_norm": 0.06016271933913231, - "learning_rate": 6.75310413642479e-05, - "loss": 0.011058077961206437, - "step": 5170 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 0.06637005507946014, - "learning_rate": 6.751568950538441e-05, - "loss": 0.013590328395366669, - "step": 5175 - }, - { - "epoch": 0.8832054560954816, - "grad_norm": 0.06486016511917114, - "learning_rate": 6.750032363419857e-05, - "loss": 0.010195261240005494, - "step": 5180 - }, - { - "epoch": 0.8840579710144928, - "grad_norm": 0.09800687432289124, - "learning_rate": 6.748494375786372e-05, - "loss": 0.011106249690055848, - "step": 5185 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 0.04665162041783333, - "learning_rate": 6.746954988355967e-05, - "loss": 0.009880972653627395, - "step": 5190 - }, - { - "epoch": 0.8857630008525149, - "grad_norm": 0.05554487928748131, - "learning_rate": 6.745414201847282e-05, - "loss": 0.009480565786361694, - "step": 5195 - }, - { - "epoch": 0.886615515771526, - "grad_norm": 0.059967316687107086, - "learning_rate": 6.743872016979605e-05, - "loss": 0.010072766989469528, - "step": 5200 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 0.08275031298398972, - "learning_rate": 6.74232843447288e-05, - "loss": 0.010208947211503982, - "step": 5205 - }, - { - "epoch": 0.8883205456095482, - "grad_norm": 0.07091715931892395, - "learning_rate": 6.740783455047704e-05, - "loss": 0.011881709098815918, - "step": 5210 - }, - { - "epoch": 0.8891730605285593, - "grad_norm": 0.1465480923652649, - "learning_rate": 6.739237079425322e-05, - "loss": 0.010970161855220794, - "step": 5215 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 0.07571437954902649, - "learning_rate": 6.737689308327636e-05, - "loss": 0.010722124576568603, - "step": 5220 - }, - { - "epoch": 0.8908780903665814, - "grad_norm": 0.06671100109815598, - "learning_rate": 6.736140142477194e-05, - "loss": 0.010463282465934753, - "step": 5225 - }, - { - "epoch": 0.8917306052855924, - "grad_norm": 0.05676295980811119, - "learning_rate": 6.734589582597204e-05, - "loss": 0.00933043509721756, - "step": 5230 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 0.09708777070045471, - "learning_rate": 6.733037629411514e-05, - "loss": 0.011712780594825745, - "step": 5235 - }, - { - "epoch": 0.8934356351236147, - "grad_norm": 0.07073090970516205, - "learning_rate": 6.731484283644626e-05, - "loss": 0.008112293481826783, - "step": 5240 - }, - { - "epoch": 0.8942881500426257, - "grad_norm": 0.08821752667427063, - "learning_rate": 6.7299295460217e-05, - "loss": 0.01026250645518303, - "step": 5245 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 0.05165687948465347, - "learning_rate": 6.728373417268533e-05, - "loss": 0.008761890232563019, - "step": 5250 - }, - { - "epoch": 0.8959931798806479, - "grad_norm": 0.06872246414422989, - "learning_rate": 6.726815898111581e-05, - "loss": 0.012551462650299073, - "step": 5255 - }, - { - "epoch": 0.896845694799659, - "grad_norm": 0.07550673931837082, - "learning_rate": 6.725256989277944e-05, - "loss": 0.010165071487426758, - "step": 5260 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 0.05931933969259262, - "learning_rate": 6.723696691495373e-05, - "loss": 0.009457996487617493, - "step": 5265 - }, - { - "epoch": 0.8985507246376812, - "grad_norm": 0.10838861763477325, - "learning_rate": 6.722135005492268e-05, - "loss": 0.012053199112415314, - "step": 5270 - }, - { - "epoch": 0.8994032395566922, - "grad_norm": 0.062102265655994415, - "learning_rate": 6.720571931997676e-05, - "loss": 0.007582514733076096, - "step": 5275 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 0.05997858941555023, - "learning_rate": 6.719007471741289e-05, - "loss": 0.009348342567682267, - "step": 5280 - }, - { - "epoch": 0.9011082693947144, - "grad_norm": 0.05405129864811897, - "learning_rate": 6.717441625453451e-05, - "loss": 0.009910254925489425, - "step": 5285 - }, - { - "epoch": 0.9019607843137255, - "grad_norm": 0.08071329444646835, - "learning_rate": 6.715874393865152e-05, - "loss": 0.010672248899936676, - "step": 5290 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 0.06863300502300262, - "learning_rate": 6.714305777708027e-05, - "loss": 0.008570954948663712, - "step": 5295 - }, - { - "epoch": 0.9036658141517476, - "grad_norm": 0.07818135619163513, - "learning_rate": 6.712735777714357e-05, - "loss": 0.008647527545690536, - "step": 5300 - }, - { - "epoch": 0.9045183290707587, - "grad_norm": 0.14757376909255981, - "learning_rate": 6.711164394617072e-05, - "loss": 0.010825049877166749, - "step": 5305 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 0.07376445829868317, - "learning_rate": 6.709591629149746e-05, - "loss": 0.01299697458744049, - "step": 5310 - }, - { - "epoch": 0.9062233589087809, - "grad_norm": 0.05860469490289688, - "learning_rate": 6.708017482046597e-05, - "loss": 0.009549598395824432, - "step": 5315 - }, - { - "epoch": 0.907075873827792, - "grad_norm": 0.0799872875213623, - "learning_rate": 6.706441954042488e-05, - "loss": 0.009733843803405761, - "step": 5320 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 0.05245954543352127, - "learning_rate": 6.704865045872932e-05, - "loss": 0.009799794852733612, - "step": 5325 - }, - { - "epoch": 0.9087809036658141, - "grad_norm": 0.05515241622924805, - "learning_rate": 6.703286758274079e-05, - "loss": 0.007391643524169922, - "step": 5330 - }, - { - "epoch": 0.9096334185848253, - "grad_norm": 0.05900256708264351, - "learning_rate": 6.701707091982726e-05, - "loss": 0.009107303619384766, - "step": 5335 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 0.09559495002031326, - "learning_rate": 6.700126047736317e-05, - "loss": 0.009052158147096635, - "step": 5340 - }, - { - "epoch": 0.9113384484228474, - "grad_norm": 0.11189334839582443, - "learning_rate": 6.698543626272932e-05, - "loss": 0.011292549222707749, - "step": 5345 - }, - { - "epoch": 0.9121909633418585, - "grad_norm": 0.07031659781932831, - "learning_rate": 6.6969598283313e-05, - "loss": 0.008589480072259903, - "step": 5350 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.1652907431125641, - "learning_rate": 6.69537465465079e-05, - "loss": 0.010865563899278641, - "step": 5355 - }, - { - "epoch": 0.9138959931798807, - "grad_norm": 0.06157436594367027, - "learning_rate": 6.693788105971413e-05, - "loss": 0.012611952424049378, - "step": 5360 - }, - { - "epoch": 0.9147485080988917, - "grad_norm": 0.03928734362125397, - "learning_rate": 6.692200183033826e-05, - "loss": 0.009115418046712875, - "step": 5365 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 0.06604880094528198, - "learning_rate": 6.690610886579321e-05, - "loss": 0.010015038400888443, - "step": 5370 - }, - { - "epoch": 0.9164535379369139, - "grad_norm": 0.07625336199998856, - "learning_rate": 6.689020217349835e-05, - "loss": 0.010416677594184876, - "step": 5375 - }, - { - "epoch": 0.9173060528559249, - "grad_norm": 0.07674526423215866, - "learning_rate": 6.687428176087946e-05, - "loss": 0.01016802191734314, - "step": 5380 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 0.08422617614269257, - "learning_rate": 6.685834763536872e-05, - "loss": 0.011127004027366638, - "step": 5385 - }, - { - "epoch": 0.9190110826939472, - "grad_norm": 0.057719554752111435, - "learning_rate": 6.684239980440472e-05, - "loss": 0.008915853500366212, - "step": 5390 - }, - { - "epoch": 0.9198635976129582, - "grad_norm": 0.056555263698101044, - "learning_rate": 6.682643827543241e-05, - "loss": 0.0095272496342659, - "step": 5395 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 0.07605638355016708, - "learning_rate": 6.681046305590317e-05, - "loss": 0.010731159895658492, - "step": 5400 - }, - { - "epoch": 0.9215686274509803, - "grad_norm": 0.07499220222234726, - "learning_rate": 6.679447415327479e-05, - "loss": 0.010919998586177825, - "step": 5405 - }, - { - "epoch": 0.9224211423699915, - "grad_norm": 0.052863143384456635, - "learning_rate": 6.677847157501137e-05, - "loss": 0.011300939321517944, - "step": 5410 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 0.09210597723722458, - "learning_rate": 6.676245532858351e-05, - "loss": 0.013997772336006164, - "step": 5415 - }, - { - "epoch": 0.9241261722080136, - "grad_norm": 0.0746840238571167, - "learning_rate": 6.674642542146807e-05, - "loss": 0.012542533874511718, - "step": 5420 - }, - { - "epoch": 0.9249786871270247, - "grad_norm": 0.06137506663799286, - "learning_rate": 6.67303818611484e-05, - "loss": 0.008029398322105408, - "step": 5425 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 0.03867131471633911, - "learning_rate": 6.671432465511411e-05, - "loss": 0.009305672347545623, - "step": 5430 - }, - { - "epoch": 0.9266837169650469, - "grad_norm": 0.08291540294885635, - "learning_rate": 6.669825381086128e-05, - "loss": 0.011059926450252533, - "step": 5435 - }, - { - "epoch": 0.927536231884058, - "grad_norm": 0.0689411610364914, - "learning_rate": 6.668216933589228e-05, - "loss": 0.008350597321987152, - "step": 5440 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 0.051505669951438904, - "learning_rate": 6.666607123771591e-05, - "loss": 0.011102759093046189, - "step": 5445 - }, - { - "epoch": 0.9292412617220801, - "grad_norm": 0.08774327486753464, - "learning_rate": 6.664995952384729e-05, - "loss": 0.009498609602451325, - "step": 5450 - }, - { - "epoch": 0.9300937766410913, - "grad_norm": 0.060566093772649765, - "learning_rate": 6.663383420180789e-05, - "loss": 0.007811173051595688, - "step": 5455 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 0.09952156245708466, - "learning_rate": 6.661769527912555e-05, - "loss": 0.010514630377292633, - "step": 5460 - }, - { - "epoch": 0.9317988064791134, - "grad_norm": 0.04871741309762001, - "learning_rate": 6.660154276333446e-05, - "loss": 0.006630983203649521, - "step": 5465 - }, - { - "epoch": 0.9326513213981245, - "grad_norm": 0.06254981458187103, - "learning_rate": 6.658537666197517e-05, - "loss": 0.009895097464323044, - "step": 5470 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 0.08351470530033112, - "learning_rate": 6.656919698259452e-05, - "loss": 0.010659988969564438, - "step": 5475 - }, - { - "epoch": 0.9343563512361467, - "grad_norm": 0.07085305452346802, - "learning_rate": 6.655300373274575e-05, - "loss": 0.008971457183361054, - "step": 5480 - }, - { - "epoch": 0.9352088661551577, - "grad_norm": 0.06461923569440842, - "learning_rate": 6.653679691998839e-05, - "loss": 0.009138958156108856, - "step": 5485 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 0.11675399541854858, - "learning_rate": 6.652057655188832e-05, - "loss": 0.008388948440551759, - "step": 5490 - }, - { - "epoch": 0.9369138959931799, - "grad_norm": 0.09698229283094406, - "learning_rate": 6.650434263601777e-05, - "loss": 0.011885351687669753, - "step": 5495 - }, - { - "epoch": 0.9377664109121909, - "grad_norm": 0.06786464154720306, - "learning_rate": 6.648809517995524e-05, - "loss": 0.012351768463850022, - "step": 5500 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 0.09192351251840591, - "learning_rate": 6.647183419128561e-05, - "loss": 0.010940471291542053, - "step": 5505 - }, - { - "epoch": 0.9394714407502132, - "grad_norm": 0.05660499259829521, - "learning_rate": 6.645555967760003e-05, - "loss": 0.01160380095243454, - "step": 5510 - }, - { - "epoch": 0.9403239556692242, - "grad_norm": 0.0829106792807579, - "learning_rate": 6.6439271646496e-05, - "loss": 0.009475469589233398, - "step": 5515 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 0.08007021248340607, - "learning_rate": 6.642297010557733e-05, - "loss": 0.010524801164865493, - "step": 5520 - }, - { - "epoch": 0.9420289855072463, - "grad_norm": 0.07578855752944946, - "learning_rate": 6.640665506245406e-05, - "loss": 0.008864742517471314, - "step": 5525 - }, - { - "epoch": 0.9428815004262575, - "grad_norm": 0.064674511551857, - "learning_rate": 6.639032652474265e-05, - "loss": 0.010619612783193589, - "step": 5530 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 0.07176528871059418, - "learning_rate": 6.637398450006579e-05, - "loss": 0.011696039140224457, - "step": 5535 - }, - { - "epoch": 0.9445865302642796, - "grad_norm": 0.16578713059425354, - "learning_rate": 6.635762899605248e-05, - "loss": 0.009867334365844726, - "step": 5540 - }, - { - "epoch": 0.9454390451832907, - "grad_norm": 0.069394052028656, - "learning_rate": 6.634126002033802e-05, - "loss": 0.012951886653900147, - "step": 5545 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 0.07247213274240494, - "learning_rate": 6.632487758056397e-05, - "loss": 0.009127721190452576, - "step": 5550 - }, - { - "epoch": 0.9471440750213129, - "grad_norm": 0.11084317415952682, - "learning_rate": 6.630848168437822e-05, - "loss": 0.009197863936424255, - "step": 5555 - }, - { - "epoch": 0.947996589940324, - "grad_norm": 0.08719248324632645, - "learning_rate": 6.629207233943492e-05, - "loss": 0.010768509656190871, - "step": 5560 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 0.0857851505279541, - "learning_rate": 6.62756495533945e-05, - "loss": 0.009163270145654679, - "step": 5565 - }, - { - "epoch": 0.9497016197783461, - "grad_norm": 0.09808778762817383, - "learning_rate": 6.625921333392362e-05, - "loss": 0.01005362868309021, - "step": 5570 - }, - { - "epoch": 0.9505541346973572, - "grad_norm": 0.08482059836387634, - "learning_rate": 6.624276368869532e-05, - "loss": 0.008303509652614593, - "step": 5575 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 0.07460886240005493, - "learning_rate": 6.62263006253888e-05, - "loss": 0.00857923850417137, - "step": 5580 - }, - { - "epoch": 0.9522591645353794, - "grad_norm": 0.09494256228208542, - "learning_rate": 6.620982415168956e-05, - "loss": 0.009573552012443542, - "step": 5585 - }, - { - "epoch": 0.9531116794543905, - "grad_norm": 0.08753519505262375, - "learning_rate": 6.61933342752894e-05, - "loss": 0.010430536419153213, - "step": 5590 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 0.05132949724793434, - "learning_rate": 6.617683100388632e-05, - "loss": 0.009080658107995987, - "step": 5595 - }, - { - "epoch": 0.9548167092924126, - "grad_norm": 0.07516856491565704, - "learning_rate": 6.61603143451846e-05, - "loss": 0.009718524664640427, - "step": 5600 - }, - { - "epoch": 0.9556692242114238, - "grad_norm": 0.12723733484745026, - "learning_rate": 6.614378430689477e-05, - "loss": 0.01136334240436554, - "step": 5605 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 0.0905863493680954, - "learning_rate": 6.612724089673359e-05, - "loss": 0.013209307193756103, - "step": 5610 - }, - { - "epoch": 0.9573742540494459, - "grad_norm": 0.06084009259939194, - "learning_rate": 6.611068412242409e-05, - "loss": 0.01001257449388504, - "step": 5615 - }, - { - "epoch": 0.9582267689684569, - "grad_norm": 0.11799532175064087, - "learning_rate": 6.60941139916955e-05, - "loss": 0.008610795438289642, - "step": 5620 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 0.08627504110336304, - "learning_rate": 6.607753051228333e-05, - "loss": 0.009049218893051148, - "step": 5625 - }, - { - "epoch": 0.9599317988064792, - "grad_norm": 0.0910186693072319, - "learning_rate": 6.60609336919293e-05, - "loss": 0.01068672090768814, - "step": 5630 - }, - { - "epoch": 0.9607843137254902, - "grad_norm": 0.06503022462129593, - "learning_rate": 6.604432353838134e-05, - "loss": 0.010604655742645264, - "step": 5635 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 0.07681523263454437, - "learning_rate": 6.602770005939363e-05, - "loss": 0.010527564585208893, - "step": 5640 - }, - { - "epoch": 0.9624893435635123, - "grad_norm": 0.0680806040763855, - "learning_rate": 6.601106326272659e-05, - "loss": 0.009375665336847305, - "step": 5645 - }, - { - "epoch": 0.9633418584825234, - "grad_norm": 0.06601905822753906, - "learning_rate": 6.599441315614678e-05, - "loss": 0.009470004588365555, - "step": 5650 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 0.06291890889406204, - "learning_rate": 6.597774974742706e-05, - "loss": 0.012320800870656966, - "step": 5655 - }, - { - "epoch": 0.9650468883205456, - "grad_norm": 0.0956176221370697, - "learning_rate": 6.596107304434645e-05, - "loss": 0.01018187329173088, - "step": 5660 - }, - { - "epoch": 0.9658994032395567, - "grad_norm": 0.06642715632915497, - "learning_rate": 6.59443830546902e-05, - "loss": 0.010271859169006348, - "step": 5665 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 0.06783592700958252, - "learning_rate": 6.592767978624973e-05, - "loss": 0.00986798033118248, - "step": 5670 - }, - { - "epoch": 0.9676044330775788, - "grad_norm": 0.05877846106886864, - "learning_rate": 6.591096324682272e-05, - "loss": 0.009708859026432037, - "step": 5675 - }, - { - "epoch": 0.96845694799659, - "grad_norm": 0.049258604645729065, - "learning_rate": 6.589423344421297e-05, - "loss": 0.008615868538618088, - "step": 5680 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 0.05622515454888344, - "learning_rate": 6.587749038623052e-05, - "loss": 0.010138686001300811, - "step": 5685 - }, - { - "epoch": 0.9701619778346121, - "grad_norm": 0.05719893425703049, - "learning_rate": 6.586073408069159e-05, - "loss": 0.00869678258895874, - "step": 5690 - }, - { - "epoch": 0.9710144927536232, - "grad_norm": 0.07675095647573471, - "learning_rate": 6.584396453541856e-05, - "loss": 0.010557885468006133, - "step": 5695 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 0.0999779924750328, - "learning_rate": 6.582718175824006e-05, - "loss": 0.010667790472507478, - "step": 5700 - }, - { - "epoch": 0.9727195225916454, - "grad_norm": 0.04620293527841568, - "learning_rate": 6.58103857569908e-05, - "loss": 0.0078192800283432, - "step": 5705 - }, - { - "epoch": 0.9735720375106565, - "grad_norm": 0.06725125759840012, - "learning_rate": 6.579357653951174e-05, - "loss": 0.010441574454307555, - "step": 5710 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 0.09062530100345612, - "learning_rate": 6.577675411364997e-05, - "loss": 0.011757946014404297, - "step": 5715 - }, - { - "epoch": 0.9752770673486786, - "grad_norm": 0.050651032477617264, - "learning_rate": 6.575991848725876e-05, - "loss": 0.009817829728126526, - "step": 5720 - }, - { - "epoch": 0.9761295822676896, - "grad_norm": 0.06951560825109482, - "learning_rate": 6.574306966819755e-05, - "loss": 0.008903174102306366, - "step": 5725 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 0.0733589306473732, - "learning_rate": 6.57262076643319e-05, - "loss": 0.009786784648895264, - "step": 5730 - }, - { - "epoch": 0.9778346121057119, - "grad_norm": 0.0736282467842102, - "learning_rate": 6.570933248353359e-05, - "loss": 0.012176553905010223, - "step": 5735 - }, - { - "epoch": 0.9786871270247229, - "grad_norm": 0.069704569876194, - "learning_rate": 6.56924441336805e-05, - "loss": 0.008654942363500595, - "step": 5740 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 0.07497496902942657, - "learning_rate": 6.567554262265668e-05, - "loss": 0.010902392119169236, - "step": 5745 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 0.07559038698673248, - "learning_rate": 6.56586279583523e-05, - "loss": 0.00921270027756691, - "step": 5750 - }, - { - "epoch": 0.9812446717817562, - "grad_norm": 0.05894545465707779, - "learning_rate": 6.56417001486637e-05, - "loss": 0.009073206037282944, - "step": 5755 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 0.06555377691984177, - "learning_rate": 6.562475920149335e-05, - "loss": 0.010872729122638702, - "step": 5760 - }, - { - "epoch": 0.9829497016197783, - "grad_norm": 0.15036429464817047, - "learning_rate": 6.560780512474984e-05, - "loss": 0.009879975020885468, - "step": 5765 - }, - { - "epoch": 0.9838022165387894, - "grad_norm": 0.06842299550771713, - "learning_rate": 6.559083792634791e-05, - "loss": 0.00965554341673851, - "step": 5770 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 0.0486510805785656, - "learning_rate": 6.557385761420839e-05, - "loss": 0.00872802734375, - "step": 5775 - }, - { - "epoch": 0.9855072463768116, - "grad_norm": 0.059796739369630814, - "learning_rate": 6.555686419625826e-05, - "loss": 0.009720289707183838, - "step": 5780 - }, - { - "epoch": 0.9863597612958227, - "grad_norm": 0.07971934229135513, - "learning_rate": 6.553985768043062e-05, - "loss": 0.008043316006660462, - "step": 5785 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 0.09556971490383148, - "learning_rate": 6.552283807466468e-05, - "loss": 0.009030704945325851, - "step": 5790 - }, - { - "epoch": 0.9880647911338448, - "grad_norm": 0.04631726071238518, - "learning_rate": 6.550580538690577e-05, - "loss": 0.007388583570718765, - "step": 5795 - }, - { - "epoch": 0.9889173060528559, - "grad_norm": 0.08172665536403656, - "learning_rate": 6.548875962510528e-05, - "loss": 0.007863265275955201, - "step": 5800 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 0.11129096895456314, - "learning_rate": 6.547170079722076e-05, - "loss": 0.012218999862670898, - "step": 5805 - }, - { - "epoch": 0.9906223358908781, - "grad_norm": 0.06619804352521896, - "learning_rate": 6.545462891121584e-05, - "loss": 0.007535400986671448, - "step": 5810 - }, - { - "epoch": 0.9914748508098892, - "grad_norm": 0.11495351046323776, - "learning_rate": 6.543754397506025e-05, - "loss": 0.0121284119784832, - "step": 5815 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 0.06017669290304184, - "learning_rate": 6.542044599672978e-05, - "loss": 0.008776353299617767, - "step": 5820 - }, - { - "epoch": 0.9931798806479113, - "grad_norm": 0.08049561828374863, - "learning_rate": 6.540333498420637e-05, - "loss": 0.010460492223501205, - "step": 5825 - }, - { - "epoch": 0.9940323955669225, - "grad_norm": 0.07041274011135101, - "learning_rate": 6.538621094547798e-05, - "loss": 0.008290639519691468, - "step": 5830 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 0.061981480568647385, - "learning_rate": 6.53690738885387e-05, - "loss": 0.007011125236749649, - "step": 5835 - }, - { - "epoch": 0.9957374254049446, - "grad_norm": 0.09022640436887741, - "learning_rate": 6.535192382138867e-05, - "loss": 0.012455084919929504, - "step": 5840 - }, - { - "epoch": 0.9965899403239556, - "grad_norm": 0.05652628839015961, - "learning_rate": 6.53347607520341e-05, - "loss": 0.011704784631729127, - "step": 5845 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 0.0717577114701271, - "learning_rate": 6.531758468848732e-05, - "loss": 0.007738448679447174, - "step": 5850 - }, - { - "epoch": 0.9982949701619779, - "grad_norm": 0.04797588661313057, - "learning_rate": 6.530039563876665e-05, - "loss": 0.00894927978515625, - "step": 5855 - }, - { - "epoch": 0.9991474850809889, - "grad_norm": 0.06541015207767487, - "learning_rate": 6.528319361089651e-05, - "loss": 0.00731588676571846, - "step": 5860 - }, - { - "epoch": 0.9998294970161978, - "eval_loss": 0.03369956836104393, - "eval_runtime": 3.5892, - "eval_samples_per_second": 70.21, - "eval_steps_per_second": 1.114, - "step": 5864 - }, - { - "eval_cer_subset": 0.014444607292328236, - "eval_cer_subset_edit_distance": 887, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 5864 - }, - { - "epoch": 1.0, - "grad_norm": 0.06960473209619522, - "learning_rate": 6.52659786129074e-05, - "loss": 0.009423434734344482, - "step": 5865 - }, - { - "epoch": 1.000852514919011, - "grad_norm": 0.08438396453857422, - "learning_rate": 6.524875065283587e-05, - "loss": 0.009560108184814453, - "step": 5870 - }, - { - "epoch": 1.0017050298380221, - "grad_norm": 0.06281089037656784, - "learning_rate": 6.523150973872446e-05, - "loss": 0.007503298670053482, - "step": 5875 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 0.11766793578863144, - "learning_rate": 6.52142558786218e-05, - "loss": 0.008890827000141144, - "step": 5880 - }, - { - "epoch": 1.0034100596760442, - "grad_norm": 0.058115314692258835, - "learning_rate": 6.519698908058262e-05, - "loss": 0.006190531700849533, - "step": 5885 - }, - { - "epoch": 1.0042625745950555, - "grad_norm": 0.06857501715421677, - "learning_rate": 6.51797093526676e-05, - "loss": 0.007162582129240036, - "step": 5890 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 0.03517467528581619, - "learning_rate": 6.51624167029435e-05, - "loss": 0.0060476396232843396, - "step": 5895 - }, - { - "epoch": 1.0059676044330776, - "grad_norm": 0.10047292709350586, - "learning_rate": 6.514511113948307e-05, - "loss": 0.006416718661785126, - "step": 5900 - }, - { - "epoch": 1.0068201193520887, - "grad_norm": 0.07266796380281448, - "learning_rate": 6.512779267036518e-05, - "loss": 0.005519292503595352, - "step": 5905 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 0.05385264754295349, - "learning_rate": 6.511046130367464e-05, - "loss": 0.006731215119361878, - "step": 5910 - }, - { - "epoch": 1.0085251491901108, - "grad_norm": 0.0927869975566864, - "learning_rate": 6.50931170475023e-05, - "loss": 0.0073065564036369325, - "step": 5915 - }, - { - "epoch": 1.0093776641091219, - "grad_norm": 0.08416371792554855, - "learning_rate": 6.507575990994504e-05, - "loss": 0.005843915045261383, - "step": 5920 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 0.06585095822811127, - "learning_rate": 6.505838989910576e-05, - "loss": 0.006345044076442719, - "step": 5925 - }, - { - "epoch": 1.011082693947144, - "grad_norm": 0.06341785192489624, - "learning_rate": 6.504100702309336e-05, - "loss": 0.005391617119312286, - "step": 5930 - }, - { - "epoch": 1.011935208866155, - "grad_norm": 0.08260001242160797, - "learning_rate": 6.502361129002273e-05, - "loss": 0.008031262457370758, - "step": 5935 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 0.08805666118860245, - "learning_rate": 6.500620270801478e-05, - "loss": 0.006408621370792389, - "step": 5940 - }, - { - "epoch": 1.0136402387041774, - "grad_norm": 0.0704861581325531, - "learning_rate": 6.498878128519642e-05, - "loss": 0.006208440661430359, - "step": 5945 - }, - { - "epoch": 1.0144927536231885, - "grad_norm": 0.07539117336273193, - "learning_rate": 6.497134702970055e-05, - "loss": 0.005263582617044449, - "step": 5950 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 0.022507963702082634, - "learning_rate": 6.495389994966606e-05, - "loss": 0.005692056566476822, - "step": 5955 - }, - { - "epoch": 1.0161977834612106, - "grad_norm": 0.05641510710120201, - "learning_rate": 6.493644005323783e-05, - "loss": 0.007954449951648712, - "step": 5960 - }, - { - "epoch": 1.0170502983802217, - "grad_norm": 0.04853788763284683, - "learning_rate": 6.49189673485667e-05, - "loss": 0.006910678744316101, - "step": 5965 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 0.07868898659944534, - "learning_rate": 6.490148184380956e-05, - "loss": 0.007678037136793136, - "step": 5970 - }, - { - "epoch": 1.0187553282182438, - "grad_norm": 0.08481275290250778, - "learning_rate": 6.488398354712917e-05, - "loss": 0.0060794509947299956, - "step": 5975 - }, - { - "epoch": 1.0196078431372548, - "grad_norm": 0.05573422089219093, - "learning_rate": 6.486647246669435e-05, - "loss": 0.0050107244402170185, - "step": 5980 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 0.10777781158685684, - "learning_rate": 6.484894861067983e-05, - "loss": 0.006611569225788117, - "step": 5985 - }, - { - "epoch": 1.0213128729752772, - "grad_norm": 0.041842151433229446, - "learning_rate": 6.483141198726635e-05, - "loss": 0.0060344856232404705, - "step": 5990 - }, - { - "epoch": 1.0221653878942882, - "grad_norm": 0.05765567347407341, - "learning_rate": 6.48138626046406e-05, - "loss": 0.005772604793310166, - "step": 5995 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 0.05987582355737686, - "learning_rate": 6.479630047099517e-05, - "loss": 0.006899695098400116, - "step": 6000 - }, - { - "epoch": 1.0238704177323104, - "grad_norm": 0.046085257083177567, - "learning_rate": 6.477872559452867e-05, - "loss": 0.006151453405618667, - "step": 6005 - }, - { - "epoch": 1.0247229326513214, - "grad_norm": 0.05994739755988121, - "learning_rate": 6.476113798344566e-05, - "loss": 0.007787984609603882, - "step": 6010 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 0.08866287767887115, - "learning_rate": 6.47435376459566e-05, - "loss": 0.007754974067211151, - "step": 6015 - }, - { - "epoch": 1.0264279624893435, - "grad_norm": 0.07492240518331528, - "learning_rate": 6.472592459027793e-05, - "loss": 0.005562775582075119, - "step": 6020 - }, - { - "epoch": 1.0272804774083546, - "grad_norm": 0.058771468698978424, - "learning_rate": 6.470829882463198e-05, - "loss": 0.008101420104503631, - "step": 6025 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 0.08099868148565292, - "learning_rate": 6.469066035724708e-05, - "loss": 0.007585109025239944, - "step": 6030 - }, - { - "epoch": 1.0289855072463767, - "grad_norm": 0.09368649870157242, - "learning_rate": 6.467300919635743e-05, - "loss": 0.007342393696308136, - "step": 6035 - }, - { - "epoch": 1.029838022165388, - "grad_norm": 0.07358572632074356, - "learning_rate": 6.465534535020317e-05, - "loss": 0.007179292291402817, - "step": 6040 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 0.0542459636926651, - "learning_rate": 6.46376688270304e-05, - "loss": 0.007178785651922226, - "step": 6045 - }, - { - "epoch": 1.0315430520034101, - "grad_norm": 0.04534808546304703, - "learning_rate": 6.461997963509109e-05, - "loss": 0.005939013883471489, - "step": 6050 - }, - { - "epoch": 1.0323955669224212, - "grad_norm": 0.04498334974050522, - "learning_rate": 6.460227778264314e-05, - "loss": 0.007932021468877792, - "step": 6055 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 0.09503943473100662, - "learning_rate": 6.458456327795038e-05, - "loss": 0.006005316227674484, - "step": 6060 - }, - { - "epoch": 1.0341005967604433, - "grad_norm": 0.06634567677974701, - "learning_rate": 6.456683612928252e-05, - "loss": 0.00472346730530262, - "step": 6065 - }, - { - "epoch": 1.0349531116794544, - "grad_norm": 0.06090138852596283, - "learning_rate": 6.454909634491518e-05, - "loss": 0.0071956045925617215, - "step": 6070 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 0.09833965450525284, - "learning_rate": 6.453134393312988e-05, - "loss": 0.00738539919257164, - "step": 6075 - }, - { - "epoch": 1.0366581415174765, - "grad_norm": 0.07924133539199829, - "learning_rate": 6.451357890221406e-05, - "loss": 0.008464773744344711, - "step": 6080 - }, - { - "epoch": 1.0375106564364875, - "grad_norm": 0.04132373258471489, - "learning_rate": 6.4495801260461e-05, - "loss": 0.005705388635396958, - "step": 6085 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 0.08653424680233002, - "learning_rate": 6.44780110161699e-05, - "loss": 0.00777137503027916, - "step": 6090 - }, - { - "epoch": 1.0392156862745099, - "grad_norm": 0.08147025108337402, - "learning_rate": 6.446020817764583e-05, - "loss": 0.005003783106803894, - "step": 6095 - }, - { - "epoch": 1.040068201193521, - "grad_norm": 0.07091398537158966, - "learning_rate": 6.444239275319977e-05, - "loss": 0.005957254022359848, - "step": 6100 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 0.06259306520223618, - "learning_rate": 6.442456475114855e-05, - "loss": 0.005096634104847908, - "step": 6105 - }, - { - "epoch": 1.041773231031543, - "grad_norm": 0.07044103741645813, - "learning_rate": 6.440672417981485e-05, - "loss": 0.00557241328060627, - "step": 6110 - }, - { - "epoch": 1.0426257459505541, - "grad_norm": 0.05029159039258957, - "learning_rate": 6.438887104752726e-05, - "loss": 0.0056043524295091626, - "step": 6115 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 0.04778699576854706, - "learning_rate": 6.437100536262022e-05, - "loss": 0.00855453684926033, - "step": 6120 - }, - { - "epoch": 1.0443307757885762, - "grad_norm": 0.07467184215784073, - "learning_rate": 6.435312713343401e-05, - "loss": 0.006690071523189544, - "step": 6125 - }, - { - "epoch": 1.0451832907075873, - "grad_norm": 0.07189153879880905, - "learning_rate": 6.433523636831481e-05, - "loss": 0.007009527087211609, - "step": 6130 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 0.08000020682811737, - "learning_rate": 6.431733307561459e-05, - "loss": 0.007411211729049683, - "step": 6135 - }, - { - "epoch": 1.0468883205456097, - "grad_norm": 0.06737730652093887, - "learning_rate": 6.429941726369124e-05, - "loss": 0.006843548268079758, - "step": 6140 - }, - { - "epoch": 1.0477408354646207, - "grad_norm": 0.09834714978933334, - "learning_rate": 6.428148894090841e-05, - "loss": 0.007167841494083405, - "step": 6145 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 0.06415695697069168, - "learning_rate": 6.426354811563567e-05, - "loss": 0.005131457373499871, - "step": 6150 - }, - { - "epoch": 1.0494458653026428, - "grad_norm": 0.07823871076107025, - "learning_rate": 6.424559479624839e-05, - "loss": 0.004797356575727463, - "step": 6155 - }, - { - "epoch": 1.050298380221654, - "grad_norm": 0.07165013998746872, - "learning_rate": 6.422762899112777e-05, - "loss": 0.006430945545434952, - "step": 6160 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 0.10924427956342697, - "learning_rate": 6.420965070866086e-05, - "loss": 0.008151047676801682, - "step": 6165 - }, - { - "epoch": 1.052003410059676, - "grad_norm": 0.10381831228733063, - "learning_rate": 6.41916599572405e-05, - "loss": 0.009056917577981948, - "step": 6170 - }, - { - "epoch": 1.052855924978687, - "grad_norm": 0.05251248553395271, - "learning_rate": 6.417365674526539e-05, - "loss": 0.004240944981575012, - "step": 6175 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 0.0812104344367981, - "learning_rate": 6.415564108114001e-05, - "loss": 0.008805926889181137, - "step": 6180 - }, - { - "epoch": 1.0545609548167092, - "grad_norm": 0.05640942230820656, - "learning_rate": 6.413761297327469e-05, - "loss": 0.005727213248610497, - "step": 6185 - }, - { - "epoch": 1.0554134697357205, - "grad_norm": 0.10114334523677826, - "learning_rate": 6.411957243008552e-05, - "loss": 0.008660107105970382, - "step": 6190 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 0.06809760630130768, - "learning_rate": 6.410151945999447e-05, - "loss": 0.006786180287599563, - "step": 6195 - }, - { - "epoch": 1.0571184995737426, - "grad_norm": 0.08121974021196365, - "learning_rate": 6.408345407142924e-05, - "loss": 0.004730105027556419, - "step": 6200 - }, - { - "epoch": 1.0579710144927537, - "grad_norm": 0.0630379393696785, - "learning_rate": 6.406537627282336e-05, - "loss": 0.006532897800207138, - "step": 6205 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 0.09354323893785477, - "learning_rate": 6.404728607261612e-05, - "loss": 0.008165966719388962, - "step": 6210 - }, - { - "epoch": 1.0596760443307758, - "grad_norm": 0.0509798526763916, - "learning_rate": 6.402918347925267e-05, - "loss": 0.006781977415084839, - "step": 6215 - }, - { - "epoch": 1.0605285592497868, - "grad_norm": 0.09830603748559952, - "learning_rate": 6.401106850118389e-05, - "loss": 0.00675075501203537, - "step": 6220 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 0.08417326211929321, - "learning_rate": 6.399294114686645e-05, - "loss": 0.005759935826063156, - "step": 6225 - }, - { - "epoch": 1.062233589087809, - "grad_norm": 0.04999511316418648, - "learning_rate": 6.39748014247628e-05, - "loss": 0.0059943776577711105, - "step": 6230 - }, - { - "epoch": 1.06308610400682, - "grad_norm": 0.0355304591357708, - "learning_rate": 6.395664934334116e-05, - "loss": 0.003978967294096946, - "step": 6235 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 0.09096778929233551, - "learning_rate": 6.393848491107554e-05, - "loss": 0.006428928673267364, - "step": 6240 - }, - { - "epoch": 1.0647911338448424, - "grad_norm": 0.09047707170248032, - "learning_rate": 6.392030813644569e-05, - "loss": 0.005584535002708435, - "step": 6245 - }, - { - "epoch": 1.0656436487638534, - "grad_norm": 0.07133036106824875, - "learning_rate": 6.390211902793714e-05, - "loss": 0.00610351674258709, - "step": 6250 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 0.1025620549917221, - "learning_rate": 6.388391759404117e-05, - "loss": 0.006316560506820679, - "step": 6255 - }, - { - "epoch": 1.0673486786018755, - "grad_norm": 0.0922650694847107, - "learning_rate": 6.386570384325482e-05, - "loss": 0.008717238903045654, - "step": 6260 - }, - { - "epoch": 1.0682011935208866, - "grad_norm": 0.094338558614254, - "learning_rate": 6.384747778408085e-05, - "loss": 0.0067199327051639555, - "step": 6265 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 0.07260075211524963, - "learning_rate": 6.382923942502782e-05, - "loss": 0.007249505072832107, - "step": 6270 - }, - { - "epoch": 1.0699062233589087, - "grad_norm": 0.06572386622428894, - "learning_rate": 6.381098877460999e-05, - "loss": 0.007879015803337098, - "step": 6275 - }, - { - "epoch": 1.0707587382779198, - "grad_norm": 0.11646077036857605, - "learning_rate": 6.379272584134737e-05, - "loss": 0.006477512419223785, - "step": 6280 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 0.14154180884361267, - "learning_rate": 6.37744506337657e-05, - "loss": 0.0069471016526222226, - "step": 6285 - }, - { - "epoch": 1.0724637681159421, - "grad_norm": 0.113606296479702, - "learning_rate": 6.375616316039647e-05, - "loss": 0.010210946947336198, - "step": 6290 - }, - { - "epoch": 1.0733162830349532, - "grad_norm": 0.07193166017532349, - "learning_rate": 6.373786342977687e-05, - "loss": 0.00820360854268074, - "step": 6295 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 0.06180251017212868, - "learning_rate": 6.371955145044983e-05, - "loss": 0.006048502773046494, - "step": 6300 - }, - { - "epoch": 1.0750213128729753, - "grad_norm": 0.06956778466701508, - "learning_rate": 6.370122723096398e-05, - "loss": 0.005345676839351654, - "step": 6305 - }, - { - "epoch": 1.0758738277919864, - "grad_norm": 0.09170625358819962, - "learning_rate": 6.368289077987368e-05, - "loss": 0.0068355493247509004, - "step": 6310 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 0.07023731619119644, - "learning_rate": 6.366454210573901e-05, - "loss": 0.004600600153207779, - "step": 6315 - }, - { - "epoch": 1.0775788576300085, - "grad_norm": 0.07429320365190506, - "learning_rate": 6.36461812171257e-05, - "loss": 0.006272794306278228, - "step": 6320 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 0.11356805264949799, - "learning_rate": 6.362780812260528e-05, - "loss": 0.0048342026770114895, - "step": 6325 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 0.11231013387441635, - "learning_rate": 6.360942283075489e-05, - "loss": 0.00653451681137085, - "step": 6330 - }, - { - "epoch": 1.0801364023870417, - "grad_norm": 0.09655431658029556, - "learning_rate": 6.359102535015739e-05, - "loss": 0.008280844241380692, - "step": 6335 - }, - { - "epoch": 1.080988917306053, - "grad_norm": 0.10172779113054276, - "learning_rate": 6.357261568940135e-05, - "loss": 0.007757744938135147, - "step": 6340 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 0.06417235732078552, - "learning_rate": 6.3554193857081e-05, - "loss": 0.007309675216674805, - "step": 6345 - }, - { - "epoch": 1.082693947144075, - "grad_norm": 0.053178418427705765, - "learning_rate": 6.35357598617963e-05, - "loss": 0.007162143290042877, - "step": 6350 - }, - { - "epoch": 1.0835464620630861, - "grad_norm": 0.07408315688371658, - "learning_rate": 6.351731371215278e-05, - "loss": 0.008392173796892166, - "step": 6355 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 0.03643275052309036, - "learning_rate": 6.349885541676179e-05, - "loss": 0.00610513798892498, - "step": 6360 - }, - { - "epoch": 1.0852514919011083, - "grad_norm": 0.04701307415962219, - "learning_rate": 6.348038498424023e-05, - "loss": 0.00612705871462822, - "step": 6365 - }, - { - "epoch": 1.0861040068201193, - "grad_norm": 0.05050053820014, - "learning_rate": 6.346190242321075e-05, - "loss": 0.005640604719519615, - "step": 6370 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 0.05203640088438988, - "learning_rate": 6.344340774230159e-05, - "loss": 0.005340654775500298, - "step": 6375 - }, - { - "epoch": 1.0878090366581414, - "grad_norm": 0.07451866567134857, - "learning_rate": 6.342490095014669e-05, - "loss": 0.006459225714206695, - "step": 6380 - }, - { - "epoch": 1.0886615515771525, - "grad_norm": 0.09951499849557877, - "learning_rate": 6.340638205538566e-05, - "loss": 0.008529558777809143, - "step": 6385 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 0.06064416840672493, - "learning_rate": 6.33878510666637e-05, - "loss": 0.007885071635246276, - "step": 6390 - }, - { - "epoch": 1.0903665814151748, - "grad_norm": 0.09382321685552597, - "learning_rate": 6.33693079926317e-05, - "loss": 0.007992906123399734, - "step": 6395 - }, - { - "epoch": 1.091219096334186, - "grad_norm": 0.054066915065050125, - "learning_rate": 6.335075284194621e-05, - "loss": 0.007473263889551163, - "step": 6400 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 0.06763065606355667, - "learning_rate": 6.333218562326937e-05, - "loss": 0.006374929845333099, - "step": 6405 - }, - { - "epoch": 1.092924126172208, - "grad_norm": 0.0656818076968193, - "learning_rate": 6.331360634526899e-05, - "loss": 0.006085469573736191, - "step": 6410 - }, - { - "epoch": 1.093776641091219, - "grad_norm": 0.060463279485702515, - "learning_rate": 6.329501501661848e-05, - "loss": 0.005605050176382065, - "step": 6415 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 0.05734890326857567, - "learning_rate": 6.32764116459969e-05, - "loss": 0.00563613623380661, - "step": 6420 - }, - { - "epoch": 1.0954816709292412, - "grad_norm": 0.0502542182803154, - "learning_rate": 6.32577962420889e-05, - "loss": 0.004675766825675965, - "step": 6425 - }, - { - "epoch": 1.0963341858482523, - "grad_norm": 0.06550677120685577, - "learning_rate": 6.32391688135848e-05, - "loss": 0.006265480071306229, - "step": 6430 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 0.1298699826002121, - "learning_rate": 6.322052936918048e-05, - "loss": 0.008352620899677277, - "step": 6435 - }, - { - "epoch": 1.0980392156862746, - "grad_norm": 0.08422241359949112, - "learning_rate": 6.320187791757748e-05, - "loss": 0.005868597701191902, - "step": 6440 - }, - { - "epoch": 1.0988917306052857, - "grad_norm": 0.07807652652263641, - "learning_rate": 6.318321446748291e-05, - "loss": 0.010353461652994157, - "step": 6445 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 0.0663999617099762, - "learning_rate": 6.316453902760946e-05, - "loss": 0.00667201578617096, - "step": 6450 - }, - { - "epoch": 1.1005967604433078, - "grad_norm": 0.06362646073102951, - "learning_rate": 6.314585160667547e-05, - "loss": 0.006539353728294372, - "step": 6455 - }, - { - "epoch": 1.1014492753623188, - "grad_norm": 0.04160219058394432, - "learning_rate": 6.312715221340485e-05, - "loss": 0.004082740843296051, - "step": 6460 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 0.05449013039469719, - "learning_rate": 6.31084408565271e-05, - "loss": 0.007537595182657242, - "step": 6465 - }, - { - "epoch": 1.103154305200341, - "grad_norm": 0.06819169223308563, - "learning_rate": 6.308971754477729e-05, - "loss": 0.006866573542356491, - "step": 6470 - }, - { - "epoch": 1.104006820119352, - "grad_norm": 0.06622573733329773, - "learning_rate": 6.307098228689611e-05, - "loss": 0.009055091440677643, - "step": 6475 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 0.05732693895697594, - "learning_rate": 6.305223509162978e-05, - "loss": 0.006077280640602112, - "step": 6480 - }, - { - "epoch": 1.1057118499573741, - "grad_norm": 0.06630431115627289, - "learning_rate": 6.303347596773012e-05, - "loss": 0.0064442440867424015, - "step": 6485 - }, - { - "epoch": 1.1065643648763854, - "grad_norm": 0.06782745569944382, - "learning_rate": 6.301470492395451e-05, - "loss": 0.005072608217597007, - "step": 6490 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 0.05796601250767708, - "learning_rate": 6.299592196906591e-05, - "loss": 0.0074319586157798765, - "step": 6495 - }, - { - "epoch": 1.1082693947144076, - "grad_norm": 0.04627149552106857, - "learning_rate": 6.297712711183282e-05, - "loss": 0.00512685589492321, - "step": 6500 - }, - { - "epoch": 1.1091219096334186, - "grad_norm": 0.08210720866918564, - "learning_rate": 6.295832036102929e-05, - "loss": 0.006917678564786911, - "step": 6505 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 0.08367052674293518, - "learning_rate": 6.293950172543496e-05, - "loss": 0.0054212499409914015, - "step": 6510 - }, - { - "epoch": 1.1108269394714407, - "grad_norm": 0.08192700892686844, - "learning_rate": 6.292067121383499e-05, - "loss": 0.00791442021727562, - "step": 6515 - }, - { - "epoch": 1.1116794543904518, - "grad_norm": 0.05766206234693527, - "learning_rate": 6.290182883502008e-05, - "loss": 0.006540960818529129, - "step": 6520 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 0.07752664387226105, - "learning_rate": 6.28829745977865e-05, - "loss": 0.009196925908327103, - "step": 6525 - }, - { - "epoch": 1.113384484228474, - "grad_norm": 0.07419038563966751, - "learning_rate": 6.2864108510936e-05, - "loss": 0.007524489611387253, - "step": 6530 - }, - { - "epoch": 1.1142369991474852, - "grad_norm": 0.04851066321134567, - "learning_rate": 6.284523058327593e-05, - "loss": 0.004060015082359314, - "step": 6535 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 0.0764140635728836, - "learning_rate": 6.282634082361911e-05, - "loss": 0.006797254830598831, - "step": 6540 - }, - { - "epoch": 1.1159420289855073, - "grad_norm": 0.06873292475938797, - "learning_rate": 6.280743924078392e-05, - "loss": 0.007637844234704971, - "step": 6545 - }, - { - "epoch": 1.1167945439045184, - "grad_norm": 0.047832686454057693, - "learning_rate": 6.278852584359425e-05, - "loss": 0.00542646199464798, - "step": 6550 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 0.10061443597078323, - "learning_rate": 6.27696006408795e-05, - "loss": 0.008591605722904206, - "step": 6555 - }, - { - "epoch": 1.1184995737425405, - "grad_norm": 0.09729041159152985, - "learning_rate": 6.27506636414746e-05, - "loss": 0.0064585842192173, - "step": 6560 - }, - { - "epoch": 1.1193520886615516, - "grad_norm": 0.04427873343229294, - "learning_rate": 6.273171485421992e-05, - "loss": 0.004846593365073204, - "step": 6565 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 0.07813888788223267, - "learning_rate": 6.271275428796146e-05, - "loss": 0.007345958054065705, - "step": 6570 - }, - { - "epoch": 1.1210571184995737, - "grad_norm": 0.12500733137130737, - "learning_rate": 6.269378195155058e-05, - "loss": 0.006376177072525024, - "step": 6575 - }, - { - "epoch": 1.1219096334185847, - "grad_norm": 0.09636004269123077, - "learning_rate": 6.267479785384422e-05, - "loss": 0.0069282323122024534, - "step": 6580 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 0.06236017122864723, - "learning_rate": 6.265580200370478e-05, - "loss": 0.0050656192004680635, - "step": 6585 - }, - { - "epoch": 1.123614663256607, - "grad_norm": 0.0596463568508625, - "learning_rate": 6.263679441000019e-05, - "loss": 0.006198804825544357, - "step": 6590 - }, - { - "epoch": 1.1244671781756181, - "grad_norm": 0.0846855491399765, - "learning_rate": 6.261777508160378e-05, - "loss": 0.0073812372982501985, - "step": 6595 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 0.05228402093052864, - "learning_rate": 6.259874402739442e-05, - "loss": 0.006196213513612747, - "step": 6600 - }, - { - "epoch": 1.1261722080136403, - "grad_norm": 0.08395595103502274, - "learning_rate": 6.257970125625647e-05, - "loss": 0.0060448311269283295, - "step": 6605 - }, - { - "epoch": 1.1270247229326513, - "grad_norm": 0.055274344980716705, - "learning_rate": 6.25606467770797e-05, - "loss": 0.006578336656093598, - "step": 6610 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 0.054609380662441254, - "learning_rate": 6.254158059875936e-05, - "loss": 0.008093905448913575, - "step": 6615 - }, - { - "epoch": 1.1287297527706734, - "grad_norm": 0.05168715491890907, - "learning_rate": 6.25225027301962e-05, - "loss": 0.006086795404553413, - "step": 6620 - }, - { - "epoch": 1.1295822676896845, - "grad_norm": 0.06260590255260468, - "learning_rate": 6.250341318029641e-05, - "loss": 0.007288631051778793, - "step": 6625 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 0.06585957854986191, - "learning_rate": 6.24843119579716e-05, - "loss": 0.005779954791069031, - "step": 6630 - }, - { - "epoch": 1.1312872975277068, - "grad_norm": 0.05828391760587692, - "learning_rate": 6.246519907213888e-05, - "loss": 0.006320308148860932, - "step": 6635 - }, - { - "epoch": 1.132139812446718, - "grad_norm": 0.08400154113769531, - "learning_rate": 6.244607453172078e-05, - "loss": 0.00452205128967762, - "step": 6640 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 0.059920940548181534, - "learning_rate": 6.242693834564525e-05, - "loss": 0.00620727390050888, - "step": 6645 - }, - { - "epoch": 1.13384484228474, - "grad_norm": 0.1100456491112709, - "learning_rate": 6.240779052284571e-05, - "loss": 0.006768511235713958, - "step": 6650 - }, - { - "epoch": 1.134697357203751, - "grad_norm": 0.07722730189561844, - "learning_rate": 6.238863107226102e-05, - "loss": 0.008207496255636215, - "step": 6655 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 0.05468403548002243, - "learning_rate": 6.236946000283542e-05, - "loss": 0.005272969231009483, - "step": 6660 - }, - { - "epoch": 1.1364023870417732, - "grad_norm": 0.0685155913233757, - "learning_rate": 6.235027732351863e-05, - "loss": 0.008074409514665603, - "step": 6665 - }, - { - "epoch": 1.1372549019607843, - "grad_norm": 0.07667854428291321, - "learning_rate": 6.233108304326571e-05, - "loss": 0.00863628089427948, - "step": 6670 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 0.0727204978466034, - "learning_rate": 6.231187717103727e-05, - "loss": 0.004446333646774292, - "step": 6675 - }, - { - "epoch": 1.1389599317988064, - "grad_norm": 0.06465403735637665, - "learning_rate": 6.229265971579918e-05, - "loss": 0.007380707561969757, - "step": 6680 - }, - { - "epoch": 1.1398124467178175, - "grad_norm": 0.04102586954832077, - "learning_rate": 6.227343068652281e-05, - "loss": 0.006103607639670372, - "step": 6685 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 0.06988929212093353, - "learning_rate": 6.225419009218493e-05, - "loss": 0.007358456403017044, - "step": 6690 - }, - { - "epoch": 1.1415174765558398, - "grad_norm": 0.07802724838256836, - "learning_rate": 6.223493794176767e-05, - "loss": 0.007887010276317597, - "step": 6695 - }, - { - "epoch": 1.1423699914748509, - "grad_norm": 0.10777991265058517, - "learning_rate": 6.221567424425857e-05, - "loss": 0.007653985172510147, - "step": 6700 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 0.0922352522611618, - "learning_rate": 6.219639900865058e-05, - "loss": 0.007459624856710434, - "step": 6705 - }, - { - "epoch": 1.144075021312873, - "grad_norm": 0.06321967393159866, - "learning_rate": 6.217711224394202e-05, - "loss": 0.00540911853313446, - "step": 6710 - }, - { - "epoch": 1.144927536231884, - "grad_norm": 0.09344825148582458, - "learning_rate": 6.215781395913656e-05, - "loss": 0.0053936421871185304, - "step": 6715 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 0.03457584232091904, - "learning_rate": 6.213850416324333e-05, - "loss": 0.006388355046510696, - "step": 6720 - }, - { - "epoch": 1.1466325660699062, - "grad_norm": 0.06931985914707184, - "learning_rate": 6.211918286527676e-05, - "loss": 0.005831810832023621, - "step": 6725 - }, - { - "epoch": 1.1474850809889172, - "grad_norm": 0.05588890612125397, - "learning_rate": 6.209985007425668e-05, - "loss": 0.0041655078530311584, - "step": 6730 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 0.07582694292068481, - "learning_rate": 6.208050579920826e-05, - "loss": 0.006521198153495789, - "step": 6735 - }, - { - "epoch": 1.1491901108269396, - "grad_norm": 0.07055433094501495, - "learning_rate": 6.206115004916209e-05, - "loss": 0.0066129244863986966, - "step": 6740 - }, - { - "epoch": 1.1500426257459506, - "grad_norm": 0.07039172202348709, - "learning_rate": 6.204178283315405e-05, - "loss": 0.005633739382028579, - "step": 6745 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 0.07833350449800491, - "learning_rate": 6.202240416022541e-05, - "loss": 0.006761975586414337, - "step": 6750 - }, - { - "epoch": 1.1517476555839727, - "grad_norm": 0.05553733557462692, - "learning_rate": 6.200301403942278e-05, - "loss": 0.006545065343379975, - "step": 6755 - }, - { - "epoch": 1.1526001705029838, - "grad_norm": 0.07306832075119019, - "learning_rate": 6.198361247979809e-05, - "loss": 0.008323725312948227, - "step": 6760 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 0.04787914454936981, - "learning_rate": 6.196419949040867e-05, - "loss": 0.004425797611474991, - "step": 6765 - }, - { - "epoch": 1.154305200341006, - "grad_norm": 0.08021930605173111, - "learning_rate": 6.194477508031712e-05, - "loss": 0.005103312805294991, - "step": 6770 - }, - { - "epoch": 1.155157715260017, - "grad_norm": 0.0821428894996643, - "learning_rate": 6.192533925859144e-05, - "loss": 0.009274877607822418, - "step": 6775 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 0.09880609810352325, - "learning_rate": 6.190589203430486e-05, - "loss": 0.007863005250692367, - "step": 6780 - }, - { - "epoch": 1.156862745098039, - "grad_norm": 0.08075276762247086, - "learning_rate": 6.188643341653604e-05, - "loss": 0.004675677418708802, - "step": 6785 - }, - { - "epoch": 1.1577152600170504, - "grad_norm": 0.0633573830127716, - "learning_rate": 6.186696341436889e-05, - "loss": 0.007359890639781952, - "step": 6790 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 0.03828895092010498, - "learning_rate": 6.184748203689265e-05, - "loss": 0.004494070634245872, - "step": 6795 - }, - { - "epoch": 1.1594202898550725, - "grad_norm": 0.07907325774431229, - "learning_rate": 6.18279892932019e-05, - "loss": 0.006256269663572312, - "step": 6800 - }, - { - "epoch": 1.1602728047740836, - "grad_norm": 0.055188342928886414, - "learning_rate": 6.180848519239647e-05, - "loss": 0.009548474848270417, - "step": 6805 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 0.05850991606712341, - "learning_rate": 6.178896974358154e-05, - "loss": 0.0056076571345329285, - "step": 6810 - }, - { - "epoch": 1.1619778346121057, - "grad_norm": 0.0626932755112648, - "learning_rate": 6.176944295586757e-05, - "loss": 0.005667714029550552, - "step": 6815 - }, - { - "epoch": 1.1628303495311167, - "grad_norm": 0.06506946682929993, - "learning_rate": 6.174990483837031e-05, - "loss": 0.006154880672693252, - "step": 6820 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 0.0535859651863575, - "learning_rate": 6.17303554002108e-05, - "loss": 0.0042555928230285645, - "step": 6825 - }, - { - "epoch": 1.1645353793691389, - "grad_norm": 0.05577898398041725, - "learning_rate": 6.171079465051538e-05, - "loss": 0.006060104072093964, - "step": 6830 - }, - { - "epoch": 1.1653878942881502, - "grad_norm": 0.05574663355946541, - "learning_rate": 6.169122259841566e-05, - "loss": 0.008667515218257904, - "step": 6835 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 0.09157130867242813, - "learning_rate": 6.16716392530485e-05, - "loss": 0.007259176671504974, - "step": 6840 - }, - { - "epoch": 1.1670929241261723, - "grad_norm": 0.06404415518045425, - "learning_rate": 6.165204462355608e-05, - "loss": 0.007140593230724334, - "step": 6845 - }, - { - "epoch": 1.1679454390451833, - "grad_norm": 0.0713329091668129, - "learning_rate": 6.163243871908581e-05, - "loss": 0.006118748337030411, - "step": 6850 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 0.04038231074810028, - "learning_rate": 6.16128215487904e-05, - "loss": 0.006028918176889419, - "step": 6855 - }, - { - "epoch": 1.1696504688832055, - "grad_norm": 0.07793593406677246, - "learning_rate": 6.159319312182777e-05, - "loss": 0.006851163506507873, - "step": 6860 - }, - { - "epoch": 1.1705029838022165, - "grad_norm": 0.07543511688709259, - "learning_rate": 6.157355344736114e-05, - "loss": 0.006878272444009781, - "step": 6865 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 0.06332696974277496, - "learning_rate": 6.155390253455897e-05, - "loss": 0.006324999034404755, - "step": 6870 - }, - { - "epoch": 1.1722080136402386, - "grad_norm": 0.06920734792947769, - "learning_rate": 6.153424039259495e-05, - "loss": 0.00536310225725174, - "step": 6875 - }, - { - "epoch": 1.1730605285592497, - "grad_norm": 0.09313163161277771, - "learning_rate": 6.151456703064802e-05, - "loss": 0.00795424059033394, - "step": 6880 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 0.08588451147079468, - "learning_rate": 6.149488245790234e-05, - "loss": 0.006889034807682037, - "step": 6885 - }, - { - "epoch": 1.174765558397272, - "grad_norm": 0.09814538061618805, - "learning_rate": 6.147518668354737e-05, - "loss": 0.007332245260477066, - "step": 6890 - }, - { - "epoch": 1.175618073316283, - "grad_norm": 0.05141104385256767, - "learning_rate": 6.145547971677772e-05, - "loss": 0.00333656407892704, - "step": 6895 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 0.05575519800186157, - "learning_rate": 6.143576156679327e-05, - "loss": 0.005542768910527229, - "step": 6900 - }, - { - "epoch": 1.1773231031543052, - "grad_norm": 0.04917008429765701, - "learning_rate": 6.14160322427991e-05, - "loss": 0.007007633149623871, - "step": 6905 - }, - { - "epoch": 1.1781756180733163, - "grad_norm": 0.06385336071252823, - "learning_rate": 6.139629175400552e-05, - "loss": 0.007495941221714019, - "step": 6910 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 0.08664151281118393, - "learning_rate": 6.137654010962805e-05, - "loss": 0.0075534448027610775, - "step": 6915 - }, - { - "epoch": 1.1798806479113384, - "grad_norm": 0.08881189674139023, - "learning_rate": 6.13567773188874e-05, - "loss": 0.0059935558587312695, - "step": 6920 - }, - { - "epoch": 1.1807331628303495, - "grad_norm": 0.07297934591770172, - "learning_rate": 6.133700339100952e-05, - "loss": 0.006142234057188034, - "step": 6925 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 0.053351663053035736, - "learning_rate": 6.131721833522552e-05, - "loss": 0.006038139387965202, - "step": 6930 - }, - { - "epoch": 1.1824381926683718, - "grad_norm": 0.12923622131347656, - "learning_rate": 6.129742216077172e-05, - "loss": 0.008645947277545928, - "step": 6935 - }, - { - "epoch": 1.1832907075873829, - "grad_norm": 0.095455601811409, - "learning_rate": 6.127761487688964e-05, - "loss": 0.004740688577294349, - "step": 6940 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 0.11498606204986572, - "learning_rate": 6.125779649282599e-05, - "loss": 0.00805070549249649, - "step": 6945 - }, - { - "epoch": 1.184995737425405, - "grad_norm": 0.07489286363124847, - "learning_rate": 6.123796701783264e-05, - "loss": 0.0060746859759092334, - "step": 6950 - }, - { - "epoch": 1.185848252344416, - "grad_norm": 0.07027438282966614, - "learning_rate": 6.121812646116663e-05, - "loss": 0.006816025823354721, - "step": 6955 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 0.08542973548173904, - "learning_rate": 6.119827483209024e-05, - "loss": 0.005315831303596497, - "step": 6960 - }, - { - "epoch": 1.1875532821824382, - "grad_norm": 0.08409032970666885, - "learning_rate": 6.117841213987082e-05, - "loss": 0.0061601437628269196, - "step": 6965 - }, - { - "epoch": 1.1884057971014492, - "grad_norm": 0.10387974232435226, - "learning_rate": 6.115853839378096e-05, - "loss": 0.0071022816002368925, - "step": 6970 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 0.056441329419612885, - "learning_rate": 6.113865360309838e-05, - "loss": 0.004539218544960022, - "step": 6975 - }, - { - "epoch": 1.1901108269394713, - "grad_norm": 0.10132234543561935, - "learning_rate": 6.111875777710598e-05, - "loss": 0.0060778014361858364, - "step": 6980 - }, - { - "epoch": 1.1909633418584824, - "grad_norm": 0.07129226624965668, - "learning_rate": 6.109885092509177e-05, - "loss": 0.007794113457202911, - "step": 6985 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 0.09267892688512802, - "learning_rate": 6.107893305634893e-05, - "loss": 0.006867295503616333, - "step": 6990 - }, - { - "epoch": 1.1926683716965047, - "grad_norm": 0.0739888921380043, - "learning_rate": 6.10590041801758e-05, - "loss": 0.006263263523578644, - "step": 6995 - }, - { - "epoch": 1.1935208866155158, - "grad_norm": 0.10201311856508255, - "learning_rate": 6.103906430587583e-05, - "loss": 0.006245525926351548, - "step": 7000 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 0.08561360090970993, - "learning_rate": 6.1019113442757636e-05, - "loss": 0.005739351361989975, - "step": 7005 - }, - { - "epoch": 1.195225916453538, - "grad_norm": 0.06410367786884308, - "learning_rate": 6.099915160013491e-05, - "loss": 0.00598936527967453, - "step": 7010 - }, - { - "epoch": 1.196078431372549, - "grad_norm": 0.11765716224908829, - "learning_rate": 6.0979178787326544e-05, - "loss": 0.010477253794670105, - "step": 7015 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 0.06659694015979767, - "learning_rate": 6.095919501365648e-05, - "loss": 0.0072447523474693295, - "step": 7020 - }, - { - "epoch": 1.197783461210571, - "grad_norm": 0.05651358142495155, - "learning_rate": 6.093920028845381e-05, - "loss": 0.004644358158111572, - "step": 7025 - }, - { - "epoch": 1.1986359761295824, - "grad_norm": 0.07198809087276459, - "learning_rate": 6.0919194621052764e-05, - "loss": 0.00517328754067421, - "step": 7030 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 0.06188420578837395, - "learning_rate": 6.0899178020792614e-05, - "loss": 0.005182894691824913, - "step": 7035 - }, - { - "epoch": 1.2003410059676045, - "grad_norm": 0.07731341570615768, - "learning_rate": 6.087915049701783e-05, - "loss": 0.006863371282815933, - "step": 7040 - }, - { - "epoch": 1.2011935208866156, - "grad_norm": 0.07693833857774734, - "learning_rate": 6.0859112059077866e-05, - "loss": 0.008634812384843826, - "step": 7045 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 0.08118413388729095, - "learning_rate": 6.083906271632736e-05, - "loss": 0.008003174513578414, - "step": 7050 - }, - { - "epoch": 1.2028985507246377, - "grad_norm": 0.07794218510389328, - "learning_rate": 6.0819002478126016e-05, - "loss": 0.005899757146835327, - "step": 7055 - }, - { - "epoch": 1.2037510656436488, - "grad_norm": 0.08107218146324158, - "learning_rate": 6.079893135383861e-05, - "loss": 0.007581057399511338, - "step": 7060 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 0.06910198926925659, - "learning_rate": 6.077884935283502e-05, - "loss": 0.00794234573841095, - "step": 7065 - }, - { - "epoch": 1.2054560954816709, - "grad_norm": 0.08426421135663986, - "learning_rate": 6.0758756484490186e-05, - "loss": 0.0057635679841041565, - "step": 7070 - }, - { - "epoch": 1.206308610400682, - "grad_norm": 0.08670961856842041, - "learning_rate": 6.073865275818413e-05, - "loss": 0.006369538605213165, - "step": 7075 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 0.04245399683713913, - "learning_rate": 6.071853818330193e-05, - "loss": 0.0067548036575317385, - "step": 7080 - }, - { - "epoch": 1.208013640238704, - "grad_norm": 0.10600235313177109, - "learning_rate": 6.069841276923376e-05, - "loss": 0.006923867762088776, - "step": 7085 - }, - { - "epoch": 1.2088661551577153, - "grad_norm": 0.07093790173530579, - "learning_rate": 6.0678276525374814e-05, - "loss": 0.005083417519927025, - "step": 7090 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 0.04997126758098602, - "learning_rate": 6.065812946112537e-05, - "loss": 0.006207586824893951, - "step": 7095 - }, - { - "epoch": 1.2105711849957375, - "grad_norm": 0.04425262287259102, - "learning_rate": 6.063797158589074e-05, - "loss": 0.0046977601945400235, - "step": 7100 - }, - { - "epoch": 1.2114236999147485, - "grad_norm": 0.07300136238336563, - "learning_rate": 6.0617802909081295e-05, - "loss": 0.005589437484741211, - "step": 7105 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 0.0878889262676239, - "learning_rate": 6.0597623440112445e-05, - "loss": 0.005844222381711006, - "step": 7110 - }, - { - "epoch": 1.2131287297527706, - "grad_norm": 0.09079992771148682, - "learning_rate": 6.0577433188404635e-05, - "loss": 0.007329034805297852, - "step": 7115 - }, - { - "epoch": 1.2139812446717817, - "grad_norm": 0.07165387272834778, - "learning_rate": 6.055723216338336e-05, - "loss": 0.006642927974462509, - "step": 7120 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 0.07113955169916153, - "learning_rate": 6.053702037447911e-05, - "loss": 0.006992670893669129, - "step": 7125 - }, - { - "epoch": 1.215686274509804, - "grad_norm": 0.08737215399742126, - "learning_rate": 6.0516797831127425e-05, - "loss": 0.006702055037021637, - "step": 7130 - }, - { - "epoch": 1.216538789428815, - "grad_norm": 0.07276564091444016, - "learning_rate": 6.049656454276887e-05, - "loss": 0.005692619457840919, - "step": 7135 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 0.09203831106424332, - "learning_rate": 6.0476320518849006e-05, - "loss": 0.006464710086584091, - "step": 7140 - }, - { - "epoch": 1.2182438192668372, - "grad_norm": 0.07749375700950623, - "learning_rate": 6.045606576881843e-05, - "loss": 0.008598372340202332, - "step": 7145 - }, - { - "epoch": 1.2190963341858483, - "grad_norm": 0.04338040575385094, - "learning_rate": 6.043580030213272e-05, - "loss": 0.006892016530036927, - "step": 7150 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 0.06691034138202667, - "learning_rate": 6.0415524128252474e-05, - "loss": 0.007622670382261276, - "step": 7155 - }, - { - "epoch": 1.2208013640238704, - "grad_norm": 0.07532396167516708, - "learning_rate": 6.039523725664329e-05, - "loss": 0.00698106437921524, - "step": 7160 - }, - { - "epoch": 1.2216538789428815, - "grad_norm": 0.0408058688044548, - "learning_rate": 6.037493969677575e-05, - "loss": 0.005919446796178817, - "step": 7165 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 0.07306578010320663, - "learning_rate": 6.0354631458125425e-05, - "loss": 0.008953345566987991, - "step": 7170 - }, - { - "epoch": 1.2233589087809036, - "grad_norm": 0.08269601315259933, - "learning_rate": 6.033431255017289e-05, - "loss": 0.007224951684474945, - "step": 7175 - }, - { - "epoch": 1.2242114236999146, - "grad_norm": 0.045140884816646576, - "learning_rate": 6.0313982982403676e-05, - "loss": 0.006175454705953598, - "step": 7180 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 0.0721440315246582, - "learning_rate": 6.0293642764308304e-05, - "loss": 0.007267911732196808, - "step": 7185 - }, - { - "epoch": 1.225916453537937, - "grad_norm": 0.081813283264637, - "learning_rate": 6.027329190538227e-05, - "loss": 0.006872846186161042, - "step": 7190 - }, - { - "epoch": 1.226768968456948, - "grad_norm": 0.05662613734602928, - "learning_rate": 6.025293041512602e-05, - "loss": 0.004837202653288841, - "step": 7195 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 0.10023492574691772, - "learning_rate": 6.023255830304498e-05, - "loss": 0.0060194481164217, - "step": 7200 - }, - { - "epoch": 1.2284739982949702, - "grad_norm": 0.06398235261440277, - "learning_rate": 6.021217557864954e-05, - "loss": 0.007653398066759109, - "step": 7205 - }, - { - "epoch": 1.2293265132139812, - "grad_norm": 0.09494475275278091, - "learning_rate": 6.019178225145503e-05, - "loss": 0.007547302544116974, - "step": 7210 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 0.05356467142701149, - "learning_rate": 6.017137833098171e-05, - "loss": 0.007133310288190841, - "step": 7215 - }, - { - "epoch": 1.2310315430520034, - "grad_norm": 0.09225092083215714, - "learning_rate": 6.0150963826754836e-05, - "loss": 0.006320309638977051, - "step": 7220 - }, - { - "epoch": 1.2318840579710144, - "grad_norm": 0.07072161883115768, - "learning_rate": 6.013053874830458e-05, - "loss": 0.007313568145036697, - "step": 7225 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 0.07206818461418152, - "learning_rate": 6.0110103105166026e-05, - "loss": 0.0054031949490308765, - "step": 7230 - }, - { - "epoch": 1.2335890878090368, - "grad_norm": 0.08611681312322617, - "learning_rate": 6.008965690687922e-05, - "loss": 0.00670153945684433, - "step": 7235 - }, - { - "epoch": 1.2344416027280478, - "grad_norm": 0.07864221930503845, - "learning_rate": 6.0069200162989154e-05, - "loss": 0.0069690033793449405, - "step": 7240 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 0.06847227364778519, - "learning_rate": 6.0048732883045665e-05, - "loss": 0.006755173206329346, - "step": 7245 - }, - { - "epoch": 1.23614663256607, - "grad_norm": 0.06264699995517731, - "learning_rate": 6.0028255076603606e-05, - "loss": 0.00519348569214344, - "step": 7250 - }, - { - "epoch": 1.236999147485081, - "grad_norm": 0.04114431515336037, - "learning_rate": 6.0007766753222665e-05, - "loss": 0.006071234866976738, - "step": 7255 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 0.03660140559077263, - "learning_rate": 5.998726792246751e-05, - "loss": 0.007517064362764359, - "step": 7260 - }, - { - "epoch": 1.2387041773231031, - "grad_norm": 0.10343052446842194, - "learning_rate": 5.9966758593907636e-05, - "loss": 0.0070131182670593265, - "step": 7265 - }, - { - "epoch": 1.2395566922421142, - "grad_norm": 0.08169959485530853, - "learning_rate": 5.994623877711751e-05, - "loss": 0.006279528886079788, - "step": 7270 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 0.06312677264213562, - "learning_rate": 5.992570848167645e-05, - "loss": 0.0041657909750938416, - "step": 7275 - }, - { - "epoch": 1.2412617220801363, - "grad_norm": 0.08725713193416595, - "learning_rate": 5.990516771716869e-05, - "loss": 0.007908149063587189, - "step": 7280 - }, - { - "epoch": 1.2421142369991476, - "grad_norm": 0.05857875198125839, - "learning_rate": 5.988461649318333e-05, - "loss": 0.005137740075588227, - "step": 7285 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 0.04836168512701988, - "learning_rate": 5.986405481931438e-05, - "loss": 0.005033157765865326, - "step": 7290 - }, - { - "epoch": 1.2438192668371697, - "grad_norm": 0.09514568001031876, - "learning_rate": 5.98434827051607e-05, - "loss": 0.007460397481918335, - "step": 7295 - }, - { - "epoch": 1.2446717817561808, - "grad_norm": 0.049415748566389084, - "learning_rate": 5.982290016032604e-05, - "loss": 0.0049881644546985624, - "step": 7300 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 0.1069302037358284, - "learning_rate": 5.980230719441903e-05, - "loss": 0.005356961116194725, - "step": 7305 - }, - { - "epoch": 1.2463768115942029, - "grad_norm": 0.1031380444765091, - "learning_rate": 5.9781703817053136e-05, - "loss": 0.0067513369023799895, - "step": 7310 - }, - { - "epoch": 1.247229326513214, - "grad_norm": 0.05909884348511696, - "learning_rate": 5.976109003784671e-05, - "loss": 0.005357486009597778, - "step": 7315 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 0.09306607395410538, - "learning_rate": 5.974046586642295e-05, - "loss": 0.005747456848621368, - "step": 7320 - }, - { - "epoch": 1.248934356351236, - "grad_norm": 0.0688752606511116, - "learning_rate": 5.971983131240988e-05, - "loss": 0.0073902375996112825, - "step": 7325 - }, - { - "epoch": 1.2497868712702473, - "grad_norm": 0.06592141836881638, - "learning_rate": 5.969918638544044e-05, - "loss": 0.008268805593252182, - "step": 7330 - }, - { - "epoch": 1.2497868712702473, - "eval_loss": 0.037298671901226044, - "eval_runtime": 3.6917, - "eval_samples_per_second": 68.261, - "eval_steps_per_second": 1.084, - "step": 7330 - }, - { - "eval_cer_subset": 0.01283241324278991, - "eval_cer_subset_edit_distance": 788, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 7330 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 0.06094380095601082, - "learning_rate": 5.9678531095152326e-05, - "loss": 0.005528298765420913, - "step": 7335 - }, - { - "epoch": 1.2514919011082695, - "grad_norm": 0.1417030543088913, - "learning_rate": 5.965786545118815e-05, - "loss": 0.00984017476439476, - "step": 7340 - }, - { - "epoch": 1.2523444160272805, - "grad_norm": 0.08209668844938278, - "learning_rate": 5.963718946319529e-05, - "loss": 0.007516486942768097, - "step": 7345 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 0.06825494766235352, - "learning_rate": 5.9616503140826006e-05, - "loss": 0.005924524366855621, - "step": 7350 - }, - { - "epoch": 1.2540494458653026, - "grad_norm": 0.11229037493467331, - "learning_rate": 5.959580649373736e-05, - "loss": 0.006495627760887146, - "step": 7355 - }, - { - "epoch": 1.2549019607843137, - "grad_norm": 0.13235078752040863, - "learning_rate": 5.957509953159123e-05, - "loss": 0.00942063182592392, - "step": 7360 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 0.04514055699110031, - "learning_rate": 5.955438226405432e-05, - "loss": 0.006601292639970779, - "step": 7365 - }, - { - "epoch": 1.2566069906223358, - "grad_norm": 0.08192043751478195, - "learning_rate": 5.9533654700798126e-05, - "loss": 0.007403627783060074, - "step": 7370 - }, - { - "epoch": 1.257459505541347, - "grad_norm": 0.07101254910230637, - "learning_rate": 5.951291685149898e-05, - "loss": 0.006301522254943848, - "step": 7375 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 0.05598035827279091, - "learning_rate": 5.949216872583799e-05, - "loss": 0.006812388449907303, - "step": 7380 - }, - { - "epoch": 1.259164535379369, - "grad_norm": 0.06444506347179413, - "learning_rate": 5.9471410333501085e-05, - "loss": 0.005891536176204681, - "step": 7385 - }, - { - "epoch": 1.2600170502983803, - "grad_norm": 0.04921717569231987, - "learning_rate": 5.945064168417895e-05, - "loss": 0.004649973660707474, - "step": 7390 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 0.09095602482557297, - "learning_rate": 5.94298627875671e-05, - "loss": 0.007515725493431091, - "step": 7395 - }, - { - "epoch": 1.2617220801364024, - "grad_norm": 0.09932803362607956, - "learning_rate": 5.9409073653365816e-05, - "loss": 0.006223166733980179, - "step": 7400 - }, - { - "epoch": 1.2625745950554135, - "grad_norm": 0.08616010844707489, - "learning_rate": 5.938827429128014e-05, - "loss": 0.006999516487121582, - "step": 7405 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 0.11979297548532486, - "learning_rate": 5.936746471101993e-05, - "loss": 0.00812242105603218, - "step": 7410 - }, - { - "epoch": 1.2642796248934356, - "grad_norm": 0.12872007489204407, - "learning_rate": 5.934664492229976e-05, - "loss": 0.006246988475322723, - "step": 7415 - }, - { - "epoch": 1.2651321398124467, - "grad_norm": 0.0831044539809227, - "learning_rate": 5.932581493483903e-05, - "loss": 0.00590248554944992, - "step": 7420 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 0.09913221001625061, - "learning_rate": 5.9304974758361857e-05, - "loss": 0.007224322855472564, - "step": 7425 - }, - { - "epoch": 1.266837169650469, - "grad_norm": 0.08654595911502838, - "learning_rate": 5.928412440259713e-05, - "loss": 0.007056090980768204, - "step": 7430 - }, - { - "epoch": 1.26768968456948, - "grad_norm": 0.07882801443338394, - "learning_rate": 5.926326387727849e-05, - "loss": 0.00572751946747303, - "step": 7435 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 0.12886428833007812, - "learning_rate": 5.924239319214432e-05, - "loss": 0.0106881283223629, - "step": 7440 - }, - { - "epoch": 1.2693947144075022, - "grad_norm": 0.05597686767578125, - "learning_rate": 5.922151235693775e-05, - "loss": 0.005041084438562393, - "step": 7445 - }, - { - "epoch": 1.2702472293265132, - "grad_norm": 0.10719682276248932, - "learning_rate": 5.920062138140665e-05, - "loss": 0.007724158465862274, - "step": 7450 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 0.045485325157642365, - "learning_rate": 5.917972027530363e-05, - "loss": 0.003246675431728363, - "step": 7455 - }, - { - "epoch": 1.2719522591645354, - "grad_norm": 0.09602563083171844, - "learning_rate": 5.9158809048386017e-05, - "loss": 0.006592199206352234, - "step": 7460 - }, - { - "epoch": 1.2728047740835464, - "grad_norm": 0.0555407889187336, - "learning_rate": 5.913788771041586e-05, - "loss": 0.00537751168012619, - "step": 7465 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 0.15820109844207764, - "learning_rate": 5.911695627115994e-05, - "loss": 0.005968114733695984, - "step": 7470 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 0.05781199410557747, - "learning_rate": 5.9096014740389754e-05, - "loss": 0.00887204110622406, - "step": 7475 - }, - { - "epoch": 1.2753623188405796, - "grad_norm": 0.07927337288856506, - "learning_rate": 5.90750631278815e-05, - "loss": 0.006439142674207687, - "step": 7480 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 0.03843824937939644, - "learning_rate": 5.905410144341609e-05, - "loss": 0.007792883366346359, - "step": 7485 - }, - { - "epoch": 1.277067348678602, - "grad_norm": 0.0692640095949173, - "learning_rate": 5.903312969677914e-05, - "loss": 0.006274447590112686, - "step": 7490 - }, - { - "epoch": 1.277919863597613, - "grad_norm": 0.07501527667045593, - "learning_rate": 5.901214789776094e-05, - "loss": 0.007496471703052521, - "step": 7495 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 0.10271260142326355, - "learning_rate": 5.8991156056156514e-05, - "loss": 0.008766942471265794, - "step": 7500 - }, - { - "epoch": 1.2796248934356351, - "grad_norm": 0.03995242714881897, - "learning_rate": 5.897015418176555e-05, - "loss": 0.0055749226361513134, - "step": 7505 - }, - { - "epoch": 1.2804774083546462, - "grad_norm": 0.09215585142374039, - "learning_rate": 5.8949142284392406e-05, - "loss": 0.005763960257172585, - "step": 7510 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 0.07763402909040451, - "learning_rate": 5.892812037384615e-05, - "loss": 0.006439389288425445, - "step": 7515 - }, - { - "epoch": 1.2821824381926683, - "grad_norm": 0.04945438355207443, - "learning_rate": 5.890708845994049e-05, - "loss": 0.006960665434598922, - "step": 7520 - }, - { - "epoch": 1.2830349531116796, - "grad_norm": 0.05348283797502518, - "learning_rate": 5.888604655249384e-05, - "loss": 0.0061422914266586305, - "step": 7525 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 0.10389877110719681, - "learning_rate": 5.886499466132926e-05, - "loss": 0.009247081726789475, - "step": 7530 - }, - { - "epoch": 1.2847399829497017, - "grad_norm": 0.07753872126340866, - "learning_rate": 5.884393279627448e-05, - "loss": 0.004902977123856544, - "step": 7535 - }, - { - "epoch": 1.2855924978687128, - "grad_norm": 0.10553103685379028, - "learning_rate": 5.8822860967161856e-05, - "loss": 0.004547145590186119, - "step": 7540 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 0.08235067129135132, - "learning_rate": 5.880177918382844e-05, - "loss": 0.005282455682754516, - "step": 7545 - }, - { - "epoch": 1.287297527706735, - "grad_norm": 0.08135014772415161, - "learning_rate": 5.878068745611591e-05, - "loss": 0.006127358600497246, - "step": 7550 - }, - { - "epoch": 1.288150042625746, - "grad_norm": 0.04027952626347542, - "learning_rate": 5.875958579387056e-05, - "loss": 0.008251778036355972, - "step": 7555 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 0.1060953438282013, - "learning_rate": 5.8738474206943385e-05, - "loss": 0.008290941268205643, - "step": 7560 - }, - { - "epoch": 1.289855072463768, - "grad_norm": 0.06716421991586685, - "learning_rate": 5.871735270518995e-05, - "loss": 0.004932524263858795, - "step": 7565 - }, - { - "epoch": 1.2907075873827791, - "grad_norm": 0.07644582539796829, - "learning_rate": 5.869622129847048e-05, - "loss": 0.006172410026192665, - "step": 7570 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 0.06018557399511337, - "learning_rate": 5.867507999664983e-05, - "loss": 0.005532362312078476, - "step": 7575 - }, - { - "epoch": 1.2924126172208013, - "grad_norm": 0.06454342603683472, - "learning_rate": 5.865392880959745e-05, - "loss": 0.005053167790174484, - "step": 7580 - }, - { - "epoch": 1.2932651321398123, - "grad_norm": 0.07618142664432526, - "learning_rate": 5.863276774718742e-05, - "loss": 0.005658206716179848, - "step": 7585 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 0.05649973824620247, - "learning_rate": 5.8611596819298434e-05, - "loss": 0.007477214187383651, - "step": 7590 - }, - { - "epoch": 1.2949701619778347, - "grad_norm": 0.09222351759672165, - "learning_rate": 5.859041603581377e-05, - "loss": 0.006974493712186813, - "step": 7595 - }, - { - "epoch": 1.2958226768968457, - "grad_norm": 0.07462326437234879, - "learning_rate": 5.856922540662134e-05, - "loss": 0.008175718039274216, - "step": 7600 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 0.10593193024396896, - "learning_rate": 5.854802494161364e-05, - "loss": 0.006635700166225433, - "step": 7605 - }, - { - "epoch": 1.2975277067348678, - "grad_norm": 0.08673358708620071, - "learning_rate": 5.8526814650687724e-05, - "loss": 0.007347754389047623, - "step": 7610 - }, - { - "epoch": 1.298380221653879, - "grad_norm": 0.10450063645839691, - "learning_rate": 5.850559454374528e-05, - "loss": 0.008085139095783234, - "step": 7615 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 0.04219435900449753, - "learning_rate": 5.848436463069257e-05, - "loss": 0.006296204030513763, - "step": 7620 - }, - { - "epoch": 1.3000852514919012, - "grad_norm": 0.08187524974346161, - "learning_rate": 5.84631249214404e-05, - "loss": 0.007680010050535202, - "step": 7625 - }, - { - "epoch": 1.3009377664109123, - "grad_norm": 0.21044164896011353, - "learning_rate": 5.844187542590418e-05, - "loss": 0.008709554374217988, - "step": 7630 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 0.09822215139865875, - "learning_rate": 5.842061615400389e-05, - "loss": 0.007412384450435639, - "step": 7635 - }, - { - "epoch": 1.3026427962489344, - "grad_norm": 0.05957398563623428, - "learning_rate": 5.8399347115664053e-05, - "loss": 0.0062717020511627196, - "step": 7640 - }, - { - "epoch": 1.3034953111679455, - "grad_norm": 0.07013436406850815, - "learning_rate": 5.837806832081378e-05, - "loss": 0.005471421033143997, - "step": 7645 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 0.09616916626691818, - "learning_rate": 5.835677977938671e-05, - "loss": 0.008985907584428788, - "step": 7650 - }, - { - "epoch": 1.3052003410059676, - "grad_norm": 0.07946161180734634, - "learning_rate": 5.833548150132105e-05, - "loss": 0.00563003197312355, - "step": 7655 - }, - { - "epoch": 1.3060528559249787, - "grad_norm": 0.0630686804652214, - "learning_rate": 5.831417349655953e-05, - "loss": 0.007591667026281357, - "step": 7660 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 0.08530164510011673, - "learning_rate": 5.829285577504944e-05, - "loss": 0.006751708686351776, - "step": 7665 - }, - { - "epoch": 1.3077578857630008, - "grad_norm": 0.045148320496082306, - "learning_rate": 5.8271528346742616e-05, - "loss": 0.0052963607013225555, - "step": 7670 - }, - { - "epoch": 1.3086104006820118, - "grad_norm": 0.07147885859012604, - "learning_rate": 5.82501912215954e-05, - "loss": 0.005282463133335113, - "step": 7675 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 0.0933302789926529, - "learning_rate": 5.8228844409568654e-05, - "loss": 0.0073209434747695925, - "step": 7680 - }, - { - "epoch": 1.310315430520034, - "grad_norm": 0.07449645549058914, - "learning_rate": 5.820748792062781e-05, - "loss": 0.007801111787557602, - "step": 7685 - }, - { - "epoch": 1.3111679454390452, - "grad_norm": 0.04569214582443237, - "learning_rate": 5.8186121764742774e-05, - "loss": 0.006659354269504547, - "step": 7690 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 0.07046396285295486, - "learning_rate": 5.8164745951887995e-05, - "loss": 0.006448440253734589, - "step": 7695 - }, - { - "epoch": 1.3128729752770674, - "grad_norm": 0.09704319387674332, - "learning_rate": 5.814336049204239e-05, - "loss": 0.008210816234350205, - "step": 7700 - }, - { - "epoch": 1.3137254901960784, - "grad_norm": 0.06477776169776917, - "learning_rate": 5.81219653951894e-05, - "loss": 0.005369330942630768, - "step": 7705 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 0.11657397449016571, - "learning_rate": 5.810056067131698e-05, - "loss": 0.010190412402153015, - "step": 7710 - }, - { - "epoch": 1.3154305200341005, - "grad_norm": 0.06578268110752106, - "learning_rate": 5.8079146330417575e-05, - "loss": 0.006289052963256836, - "step": 7715 - }, - { - "epoch": 1.3162830349531116, - "grad_norm": 0.06296945363283157, - "learning_rate": 5.80577223824881e-05, - "loss": 0.008120459318161011, - "step": 7720 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 0.08275634050369263, - "learning_rate": 5.803628883752996e-05, - "loss": 0.006926379352807999, - "step": 7725 - }, - { - "epoch": 1.317988064791134, - "grad_norm": 0.0693436712026596, - "learning_rate": 5.8014845705549086e-05, - "loss": 0.006521113961935043, - "step": 7730 - }, - { - "epoch": 1.318840579710145, - "grad_norm": 0.05845775827765465, - "learning_rate": 5.799339299655579e-05, - "loss": 0.00762510895729065, - "step": 7735 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 0.08802217245101929, - "learning_rate": 5.7971930720564947e-05, - "loss": 0.008071760833263397, - "step": 7740 - }, - { - "epoch": 1.3205456095481671, - "grad_norm": 0.08866037428379059, - "learning_rate": 5.795045888759585e-05, - "loss": 0.006111105903983116, - "step": 7745 - }, - { - "epoch": 1.3213981244671782, - "grad_norm": 0.0844360888004303, - "learning_rate": 5.792897750767225e-05, - "loss": 0.005196729302406311, - "step": 7750 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 0.06763950735330582, - "learning_rate": 5.79074865908224e-05, - "loss": 0.006462454050779343, - "step": 7755 - }, - { - "epoch": 1.3231031543052003, - "grad_norm": 0.06333937495946884, - "learning_rate": 5.7885986147078946e-05, - "loss": 0.0068017512559890745, - "step": 7760 - }, - { - "epoch": 1.3239556692242114, - "grad_norm": 0.05730217695236206, - "learning_rate": 5.786447618647904e-05, - "loss": 0.0065845087170600895, - "step": 7765 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 0.06838720291852951, - "learning_rate": 5.784295671906422e-05, - "loss": 0.0059626404196023945, - "step": 7770 - }, - { - "epoch": 1.3256606990622335, - "grad_norm": 0.06693503260612488, - "learning_rate": 5.782142775488051e-05, - "loss": 0.008056168258190156, - "step": 7775 - }, - { - "epoch": 1.3265132139812446, - "grad_norm": 0.07886708527803421, - "learning_rate": 5.7799889303978324e-05, - "loss": 0.006670922040939331, - "step": 7780 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 0.06932322680950165, - "learning_rate": 5.777834137641255e-05, - "loss": 0.006734507530927658, - "step": 7785 - }, - { - "epoch": 1.328218243819267, - "grad_norm": 0.08057818561792374, - "learning_rate": 5.775678398224247e-05, - "loss": 0.005952415242791176, - "step": 7790 - }, - { - "epoch": 1.329070758738278, - "grad_norm": 0.06614059209823608, - "learning_rate": 5.7735217131531785e-05, - "loss": 0.007471600174903869, - "step": 7795 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 0.06335467845201492, - "learning_rate": 5.771364083434862e-05, - "loss": 0.007279399782419205, - "step": 7800 - }, - { - "epoch": 1.3307757885763, - "grad_norm": 0.11745526641607285, - "learning_rate": 5.769205510076552e-05, - "loss": 0.006242561340332031, - "step": 7805 - }, - { - "epoch": 1.3316283034953111, - "grad_norm": 0.0590963289141655, - "learning_rate": 5.7670459940859414e-05, - "loss": 0.006263129413127899, - "step": 7810 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 0.05416800454258919, - "learning_rate": 5.764885536471164e-05, - "loss": 0.00531160868704319, - "step": 7815 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.05527244135737419, - "learning_rate": 5.7627241382407933e-05, - "loss": 0.005747637152671814, - "step": 7820 - }, - { - "epoch": 1.3341858482523445, - "grad_norm": 0.057753629982471466, - "learning_rate": 5.760561800403844e-05, - "loss": 0.004979781061410904, - "step": 7825 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 0.10882547497749329, - "learning_rate": 5.758398523969763e-05, - "loss": 0.00589316263794899, - "step": 7830 - }, - { - "epoch": 1.3358908780903667, - "grad_norm": 0.08053787797689438, - "learning_rate": 5.756234309948443e-05, - "loss": 0.004465704411268234, - "step": 7835 - }, - { - "epoch": 1.3367433930093777, - "grad_norm": 0.09168808907270432, - "learning_rate": 5.75406915935021e-05, - "loss": 0.004735191911458969, - "step": 7840 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 0.0956537052989006, - "learning_rate": 5.751903073185829e-05, - "loss": 0.005519610643386841, - "step": 7845 - }, - { - "epoch": 1.3384484228473998, - "grad_norm": 0.05775619298219681, - "learning_rate": 5.749736052466501e-05, - "loss": 0.005525605380535125, - "step": 7850 - }, - { - "epoch": 1.339300937766411, - "grad_norm": 0.08594895154237747, - "learning_rate": 5.7475680982038616e-05, - "loss": 0.005280618742108345, - "step": 7855 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 0.10326153039932251, - "learning_rate": 5.745399211409987e-05, - "loss": 0.005818159133195877, - "step": 7860 - }, - { - "epoch": 1.341005967604433, - "grad_norm": 0.053448133170604706, - "learning_rate": 5.743229393097384e-05, - "loss": 0.008255011588335037, - "step": 7865 - }, - { - "epoch": 1.341858482523444, - "grad_norm": 0.05307561904191971, - "learning_rate": 5.741058644278995e-05, - "loss": 0.006851959228515625, - "step": 7870 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 0.050789013504981995, - "learning_rate": 5.738886965968199e-05, - "loss": 0.005396667867898941, - "step": 7875 - }, - { - "epoch": 1.3435635123614662, - "grad_norm": 0.06762190908193588, - "learning_rate": 5.736714359178808e-05, - "loss": 0.005661940947175026, - "step": 7880 - }, - { - "epoch": 1.3444160272804775, - "grad_norm": 0.06955094635486603, - "learning_rate": 5.734540824925066e-05, - "loss": 0.0065834902226924895, - "step": 7885 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 0.09844111651182175, - "learning_rate": 5.7323663642216525e-05, - "loss": 0.006687184423208236, - "step": 7890 - }, - { - "epoch": 1.3461210571184996, - "grad_norm": 0.05249316990375519, - "learning_rate": 5.7301909780836766e-05, - "loss": 0.00670531764626503, - "step": 7895 - }, - { - "epoch": 1.3469735720375107, - "grad_norm": 0.06578750163316727, - "learning_rate": 5.7280146675266815e-05, - "loss": 0.0063153237104415895, - "step": 7900 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 0.10460260510444641, - "learning_rate": 5.725837433566643e-05, - "loss": 0.008820119500160217, - "step": 7905 - }, - { - "epoch": 1.3486786018755328, - "grad_norm": 0.06620552390813828, - "learning_rate": 5.7236592772199624e-05, - "loss": 0.006502580642700195, - "step": 7910 - }, - { - "epoch": 1.3495311167945439, - "grad_norm": 0.1033373698592186, - "learning_rate": 5.72148019950348e-05, - "loss": 0.008503454178571701, - "step": 7915 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 0.05790281295776367, - "learning_rate": 5.719300201434458e-05, - "loss": 0.006304294615983963, - "step": 7920 - }, - { - "epoch": 1.3512361466325662, - "grad_norm": 0.06094033271074295, - "learning_rate": 5.717119284030595e-05, - "loss": 0.006775079667568207, - "step": 7925 - }, - { - "epoch": 1.3520886615515773, - "grad_norm": 0.08011666685342789, - "learning_rate": 5.714937448310015e-05, - "loss": 0.0064566083252429966, - "step": 7930 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 0.06395548582077026, - "learning_rate": 5.7127546952912686e-05, - "loss": 0.009279583394527436, - "step": 7935 - }, - { - "epoch": 1.3537936913895994, - "grad_norm": 0.06697574257850647, - "learning_rate": 5.710571025993342e-05, - "loss": 0.005718713253736496, - "step": 7940 - }, - { - "epoch": 1.3546462063086104, - "grad_norm": 0.08821829408407211, - "learning_rate": 5.7083864414356414e-05, - "loss": 0.008157726377248764, - "step": 7945 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 0.07097669690847397, - "learning_rate": 5.706200942638006e-05, - "loss": 0.004782359302043915, - "step": 7950 - }, - { - "epoch": 1.3563512361466326, - "grad_norm": 0.05015713721513748, - "learning_rate": 5.7040145306206963e-05, - "loss": 0.004204710572957992, - "step": 7955 - }, - { - "epoch": 1.3572037510656436, - "grad_norm": 0.054049719125032425, - "learning_rate": 5.701827206404406e-05, - "loss": 0.00606432780623436, - "step": 7960 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 0.0878557488322258, - "learning_rate": 5.6996389710102474e-05, - "loss": 0.008037066459655762, - "step": 7965 - }, - { - "epoch": 1.3589087809036657, - "grad_norm": 0.10873926430940628, - "learning_rate": 5.697449825459762e-05, - "loss": 0.007864821702241898, - "step": 7970 - }, - { - "epoch": 1.3597612958226768, - "grad_norm": 0.05823246389627457, - "learning_rate": 5.695259770774919e-05, - "loss": 0.00715988278388977, - "step": 7975 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 0.06690117716789246, - "learning_rate": 5.693068807978106e-05, - "loss": 0.006888707727193832, - "step": 7980 - }, - { - "epoch": 1.3614663256606991, - "grad_norm": 0.07290884852409363, - "learning_rate": 5.6908769380921363e-05, - "loss": 0.005684115365147591, - "step": 7985 - }, - { - "epoch": 1.3623188405797102, - "grad_norm": 0.07930465042591095, - "learning_rate": 5.6886841621402504e-05, - "loss": 0.0077220767736434935, - "step": 7990 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 0.08893048763275146, - "learning_rate": 5.686490481146107e-05, - "loss": 0.007824088633060455, - "step": 7995 - }, - { - "epoch": 1.3640238704177323, - "grad_norm": 0.09335844218730927, - "learning_rate": 5.6842958961337905e-05, - "loss": 0.006522499769926071, - "step": 8000 - }, - { - "epoch": 1.3648763853367434, - "grad_norm": 0.07194571942090988, - "learning_rate": 5.682100408127806e-05, - "loss": 0.008011893928050995, - "step": 8005 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 0.053429413586854935, - "learning_rate": 5.6799040181530794e-05, - "loss": 0.006260050833225251, - "step": 8010 - }, - { - "epoch": 1.3665814151747655, - "grad_norm": 0.11974478513002396, - "learning_rate": 5.677706727234959e-05, - "loss": 0.006692723929882049, - "step": 8015 - }, - { - "epoch": 1.3674339300937766, - "grad_norm": 0.07810027152299881, - "learning_rate": 5.6755085363992155e-05, - "loss": 0.007429388910531997, - "step": 8020 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 0.10204190760850906, - "learning_rate": 5.673309446672034e-05, - "loss": 0.005550343170762062, - "step": 8025 - }, - { - "epoch": 1.369138959931799, - "grad_norm": 0.07640541344881058, - "learning_rate": 5.671109459080026e-05, - "loss": 0.006840181350708008, - "step": 8030 - }, - { - "epoch": 1.36999147485081, - "grad_norm": 0.06644181162118912, - "learning_rate": 5.668908574650216e-05, - "loss": 0.005395495146512985, - "step": 8035 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 0.09630967676639557, - "learning_rate": 5.6667067944100526e-05, - "loss": 0.005423872545361519, - "step": 8040 - }, - { - "epoch": 1.371696504688832, - "grad_norm": 0.07114128023386002, - "learning_rate": 5.664504119387398e-05, - "loss": 0.007013414800167084, - "step": 8045 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 0.07324981689453125, - "learning_rate": 5.662300550610535e-05, - "loss": 0.008274464309215546, - "step": 8050 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 0.06012870743870735, - "learning_rate": 5.660096089108163e-05, - "loss": 0.00520169697701931, - "step": 8055 - }, - { - "epoch": 1.3742540494458653, - "grad_norm": 0.07458557933568954, - "learning_rate": 5.657890735909397e-05, - "loss": 0.006112886965274811, - "step": 8060 - }, - { - "epoch": 1.3751065643648763, - "grad_norm": 0.0470297709107399, - "learning_rate": 5.655684492043771e-05, - "loss": 0.004435106366872788, - "step": 8065 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 0.05244847387075424, - "learning_rate": 5.653477358541231e-05, - "loss": 0.006484140455722809, - "step": 8070 - }, - { - "epoch": 1.3768115942028984, - "grad_norm": 0.10809201747179031, - "learning_rate": 5.651269336432142e-05, - "loss": 0.006385499238967895, - "step": 8075 - }, - { - "epoch": 1.3776641091219095, - "grad_norm": 0.11761374026536942, - "learning_rate": 5.649060426747281e-05, - "loss": 0.0056259695440530775, - "step": 8080 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 0.06250949203968048, - "learning_rate": 5.646850630517842e-05, - "loss": 0.005127568915486336, - "step": 8085 - }, - { - "epoch": 1.3793691389599319, - "grad_norm": 0.07686682790517807, - "learning_rate": 5.6446399487754307e-05, - "loss": 0.006484859436750412, - "step": 8090 - }, - { - "epoch": 1.380221653878943, - "grad_norm": 0.10453952848911285, - "learning_rate": 5.6424283825520656e-05, - "loss": 0.007125881314277649, - "step": 8095 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 0.08170976489782333, - "learning_rate": 5.640215932880181e-05, - "loss": 0.007152590900659561, - "step": 8100 - }, - { - "epoch": 1.381926683716965, - "grad_norm": 0.08639637380838394, - "learning_rate": 5.638002600792621e-05, - "loss": 0.006862475723028183, - "step": 8105 - }, - { - "epoch": 1.382779198635976, - "grad_norm": 0.061349738389253616, - "learning_rate": 5.635788387322642e-05, - "loss": 0.006520121544599533, - "step": 8110 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 0.09568873792886734, - "learning_rate": 5.633573293503915e-05, - "loss": 0.00690893828868866, - "step": 8115 - }, - { - "epoch": 1.3844842284739982, - "grad_norm": 0.05280910059809685, - "learning_rate": 5.631357320370518e-05, - "loss": 0.0068241022527217865, - "step": 8120 - }, - { - "epoch": 1.3853367433930095, - "grad_norm": 0.08307540416717529, - "learning_rate": 5.6291404689569406e-05, - "loss": 0.009796305000782013, - "step": 8125 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 0.06511564552783966, - "learning_rate": 5.6269227402980824e-05, - "loss": 0.00675605982542038, - "step": 8130 - }, - { - "epoch": 1.3870417732310316, - "grad_norm": 0.09521665424108505, - "learning_rate": 5.624704135429255e-05, - "loss": 0.00661565363407135, - "step": 8135 - }, - { - "epoch": 1.3878942881500427, - "grad_norm": 0.06467590481042862, - "learning_rate": 5.622484655386175e-05, - "loss": 0.007056808471679688, - "step": 8140 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 0.04240449517965317, - "learning_rate": 5.62026430120497e-05, - "loss": 0.005277678743004799, - "step": 8145 - }, - { - "epoch": 1.3895993179880648, - "grad_norm": 0.08462672680616379, - "learning_rate": 5.618043073922176e-05, - "loss": 0.005951377004384995, - "step": 8150 - }, - { - "epoch": 1.3904518329070759, - "grad_norm": 0.08304573595523834, - "learning_rate": 5.615820974574735e-05, - "loss": 0.006729351729154587, - "step": 8155 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 0.04584382846951485, - "learning_rate": 5.6135980041999964e-05, - "loss": 0.00490913912653923, - "step": 8160 - }, - { - "epoch": 1.392156862745098, - "grad_norm": 0.06771710515022278, - "learning_rate": 5.6113741638357175e-05, - "loss": 0.007046511024236679, - "step": 8165 - }, - { - "epoch": 1.393009377664109, - "grad_norm": 0.06334209442138672, - "learning_rate": 5.609149454520062e-05, - "loss": 0.006314977258443833, - "step": 8170 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 0.06783269345760345, - "learning_rate": 5.606923877291595e-05, - "loss": 0.006176649779081345, - "step": 8175 - }, - { - "epoch": 1.3947144075021312, - "grad_norm": 0.10245220363140106, - "learning_rate": 5.604697433189293e-05, - "loss": 0.006309907138347626, - "step": 8180 - }, - { - "epoch": 1.3955669224211424, - "grad_norm": 0.07151709496974945, - "learning_rate": 5.6024701232525325e-05, - "loss": 0.005038458108901978, - "step": 8185 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 0.08811933547258377, - "learning_rate": 5.600241948521099e-05, - "loss": 0.006065644696354866, - "step": 8190 - }, - { - "epoch": 1.3972719522591646, - "grad_norm": 0.07598903775215149, - "learning_rate": 5.5980129100351736e-05, - "loss": 0.006201237812638283, - "step": 8195 - }, - { - "epoch": 1.3981244671781756, - "grad_norm": 0.058092061430215836, - "learning_rate": 5.5957830088353475e-05, - "loss": 0.006383272260427475, - "step": 8200 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 0.18196560442447662, - "learning_rate": 5.593552245962616e-05, - "loss": 0.004768157005310058, - "step": 8205 - }, - { - "epoch": 1.3998294970161977, - "grad_norm": 0.09071574360132217, - "learning_rate": 5.591320622458369e-05, - "loss": 0.007671289891004562, - "step": 8210 - }, - { - "epoch": 1.4006820119352088, - "grad_norm": 0.09470858424901962, - "learning_rate": 5.589088139364405e-05, - "loss": 0.006691985577344894, - "step": 8215 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 0.05345157906413078, - "learning_rate": 5.58685479772292e-05, - "loss": 0.005832263827323913, - "step": 8220 - }, - { - "epoch": 1.4023870417732311, - "grad_norm": 0.08154480904340744, - "learning_rate": 5.584620598576514e-05, - "loss": 0.00907905399799347, - "step": 8225 - }, - { - "epoch": 1.4032395566922422, - "grad_norm": 0.06621691584587097, - "learning_rate": 5.582385542968185e-05, - "loss": 0.005938088893890381, - "step": 8230 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 0.0557720884680748, - "learning_rate": 5.580149631941329e-05, - "loss": 0.005079039558768273, - "step": 8235 - }, - { - "epoch": 1.4049445865302643, - "grad_norm": 0.07839027792215347, - "learning_rate": 5.577912866539746e-05, - "loss": 0.006307472288608551, - "step": 8240 - }, - { - "epoch": 1.4057971014492754, - "grad_norm": 0.05926419049501419, - "learning_rate": 5.575675247807632e-05, - "loss": 0.0072102643549442295, - "step": 8245 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 0.0570182129740715, - "learning_rate": 5.5734367767895814e-05, - "loss": 0.0066485337913036345, - "step": 8250 - }, - { - "epoch": 1.4075021312872975, - "grad_norm": 0.0930657833814621, - "learning_rate": 5.571197454530588e-05, - "loss": 0.005854785442352295, - "step": 8255 - }, - { - "epoch": 1.4083546462063086, - "grad_norm": 0.06018427759408951, - "learning_rate": 5.568957282076041e-05, - "loss": 0.0049718767404556274, - "step": 8260 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 0.0889105498790741, - "learning_rate": 5.566716260471726e-05, - "loss": 0.005993577092885971, - "step": 8265 - }, - { - "epoch": 1.4100596760443307, - "grad_norm": 0.04429823160171509, - "learning_rate": 5.5644743907638294e-05, - "loss": 0.005357314646244049, - "step": 8270 - }, - { - "epoch": 1.4109121909633418, - "grad_norm": 0.054813142865896225, - "learning_rate": 5.5622316739989296e-05, - "loss": 0.005666692927479744, - "step": 8275 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 0.06909197568893433, - "learning_rate": 5.559988111224001e-05, - "loss": 0.005058525875210762, - "step": 8280 - }, - { - "epoch": 1.412617220801364, - "grad_norm": 0.10060004889965057, - "learning_rate": 5.557743703486413e-05, - "loss": 0.0070966087281703946, - "step": 8285 - }, - { - "epoch": 1.4134697357203752, - "grad_norm": 0.052008096128702164, - "learning_rate": 5.555498451833929e-05, - "loss": 0.006671085208654404, - "step": 8290 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 0.06272758543491364, - "learning_rate": 5.5532523573147094e-05, - "loss": 0.006071484088897705, - "step": 8295 - }, - { - "epoch": 1.4151747655583973, - "grad_norm": 0.08005380630493164, - "learning_rate": 5.551005420977304e-05, - "loss": 0.006429645419120789, - "step": 8300 - }, - { - "epoch": 1.4160272804774083, - "grad_norm": 0.08246695250272751, - "learning_rate": 5.548757643870659e-05, - "loss": 0.004599097743630409, - "step": 8305 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 0.1190599724650383, - "learning_rate": 5.54650902704411e-05, - "loss": 0.00652359127998352, - "step": 8310 - }, - { - "epoch": 1.4177323103154305, - "grad_norm": 0.042393747717142105, - "learning_rate": 5.5442595715473867e-05, - "loss": 0.004551848769187928, - "step": 8315 - }, - { - "epoch": 1.4185848252344417, - "grad_norm": 0.0809137374162674, - "learning_rate": 5.54200927843061e-05, - "loss": 0.0062880381941795346, - "step": 8320 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 0.09030820429325104, - "learning_rate": 5.5397581487442905e-05, - "loss": 0.007365265488624572, - "step": 8325 - }, - { - "epoch": 1.4202898550724639, - "grad_norm": 0.060766976326704025, - "learning_rate": 5.537506183539333e-05, - "loss": 0.0047208376228809355, - "step": 8330 - }, - { - "epoch": 1.421142369991475, - "grad_norm": 0.0763862356543541, - "learning_rate": 5.535253383867027e-05, - "loss": 0.006549081206321717, - "step": 8335 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 0.13493886590003967, - "learning_rate": 5.532999750779056e-05, - "loss": 0.0075773999094963075, - "step": 8340 - }, - { - "epoch": 1.422847399829497, - "grad_norm": 0.07888541370630264, - "learning_rate": 5.53074528532749e-05, - "loss": 0.007893601059913635, - "step": 8345 - }, - { - "epoch": 1.423699914748508, - "grad_norm": 0.04488302394747734, - "learning_rate": 5.528489988564789e-05, - "loss": 0.006052879989147187, - "step": 8350 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 0.09534436464309692, - "learning_rate": 5.5262338615438e-05, - "loss": 0.006183170899748802, - "step": 8355 - }, - { - "epoch": 1.4254049445865302, - "grad_norm": 0.0796227753162384, - "learning_rate": 5.523976905317758e-05, - "loss": 0.006368820369243622, - "step": 8360 - }, - { - "epoch": 1.4262574595055413, - "grad_norm": 0.039230771362781525, - "learning_rate": 5.521719120940287e-05, - "loss": 0.005282421037554741, - "step": 8365 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 0.12020807713270187, - "learning_rate": 5.5194605094653935e-05, - "loss": 0.00718868374824524, - "step": 8370 - }, - { - "epoch": 1.4279624893435634, - "grad_norm": 0.07434894144535065, - "learning_rate": 5.5172010719474746e-05, - "loss": 0.007591472566127777, - "step": 8375 - }, - { - "epoch": 1.4288150042625745, - "grad_norm": 0.0722346156835556, - "learning_rate": 5.514940809441309e-05, - "loss": 0.005854631587862968, - "step": 8380 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 0.07834078371524811, - "learning_rate": 5.5126797230020634e-05, - "loss": 0.007415357977151871, - "step": 8385 - }, - { - "epoch": 1.4305200341005968, - "grad_norm": 0.08718696236610413, - "learning_rate": 5.5104178136852884e-05, - "loss": 0.007089633494615555, - "step": 8390 - }, - { - "epoch": 1.4313725490196079, - "grad_norm": 0.08823035657405853, - "learning_rate": 5.508155082546918e-05, - "loss": 0.007153714448213578, - "step": 8395 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 0.07249119132757187, - "learning_rate": 5.505891530643269e-05, - "loss": 0.007651906460523605, - "step": 8400 - }, - { - "epoch": 1.43307757885763, - "grad_norm": 0.06284502893686295, - "learning_rate": 5.503627159031046e-05, - "loss": 0.007741397619247437, - "step": 8405 - }, - { - "epoch": 1.433930093776641, - "grad_norm": 0.06557357311248779, - "learning_rate": 5.501361968767331e-05, - "loss": 0.007656902819871902, - "step": 8410 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 0.0775318294763565, - "learning_rate": 5.49909596090959e-05, - "loss": 0.006714560091495514, - "step": 8415 - }, - { - "epoch": 1.4356351236146634, - "grad_norm": 0.05347822234034538, - "learning_rate": 5.496829136515672e-05, - "loss": 0.0048537302762269975, - "step": 8420 - }, - { - "epoch": 1.4364876385336744, - "grad_norm": 0.07040467113256454, - "learning_rate": 5.4945614966438046e-05, - "loss": 0.005411979556083679, - "step": 8425 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 0.09473410993814468, - "learning_rate": 5.492293042352598e-05, - "loss": 0.008005911856889725, - "step": 8430 - }, - { - "epoch": 1.4381926683716966, - "grad_norm": 0.037446580827236176, - "learning_rate": 5.4900237747010426e-05, - "loss": 0.006237779557704925, - "step": 8435 - }, - { - "epoch": 1.4390451832907076, - "grad_norm": 0.11029476672410965, - "learning_rate": 5.4877536947485074e-05, - "loss": 0.008190502226352692, - "step": 8440 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 0.0514204315841198, - "learning_rate": 5.4854828035547424e-05, - "loss": 0.006500741839408875, - "step": 8445 - }, - { - "epoch": 1.4407502131287298, - "grad_norm": 0.08411483466625214, - "learning_rate": 5.483211102179873e-05, - "loss": 0.0053235463798046116, - "step": 8450 - }, - { - "epoch": 1.4416027280477408, - "grad_norm": 0.09279052913188934, - "learning_rate": 5.480938591684407e-05, - "loss": 0.006842000037431717, - "step": 8455 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 0.09881046414375305, - "learning_rate": 5.478665273129228e-05, - "loss": 0.007064050436019898, - "step": 8460 - }, - { - "epoch": 1.443307757885763, - "grad_norm": 0.09018172323703766, - "learning_rate": 5.476391147575595e-05, - "loss": 0.005222787708044052, - "step": 8465 - }, - { - "epoch": 1.444160272804774, - "grad_norm": 0.11489493399858475, - "learning_rate": 5.4741162160851455e-05, - "loss": 0.004823528230190277, - "step": 8470 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 0.11010619252920151, - "learning_rate": 5.4718404797198955e-05, - "loss": 0.007554465532302856, - "step": 8475 - }, - { - "epoch": 1.4458653026427961, - "grad_norm": 0.10469060391187668, - "learning_rate": 5.469563939542233e-05, - "loss": 0.005817038565874099, - "step": 8480 - }, - { - "epoch": 1.4467178175618074, - "grad_norm": 0.06776002794504166, - "learning_rate": 5.467286596614922e-05, - "loss": 0.00899386927485466, - "step": 8485 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 0.08414942771196365, - "learning_rate": 5.4650084520011026e-05, - "loss": 0.00611347034573555, - "step": 8490 - }, - { - "epoch": 1.4484228473998295, - "grad_norm": 0.09625279158353806, - "learning_rate": 5.462729506764289e-05, - "loss": 0.005645812302827835, - "step": 8495 - }, - { - "epoch": 1.4492753623188406, - "grad_norm": 0.10020645707845688, - "learning_rate": 5.4604497619683674e-05, - "loss": 0.0058198563754558565, - "step": 8500 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 0.08466780185699463, - "learning_rate": 5.4581692186776e-05, - "loss": 0.005098164081573486, - "step": 8505 - }, - { - "epoch": 1.4509803921568627, - "grad_norm": 0.058955296874046326, - "learning_rate": 5.4558878779566194e-05, - "loss": 0.004072735831141472, - "step": 8510 - }, - { - "epoch": 1.4518329070758738, - "grad_norm": 0.14849397540092468, - "learning_rate": 5.4536057408704304e-05, - "loss": 0.011097650229930877, - "step": 8515 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 0.08641809970140457, - "learning_rate": 5.451322808484413e-05, - "loss": 0.006210784614086151, - "step": 8520 - }, - { - "epoch": 1.453537936913896, - "grad_norm": 0.07506752014160156, - "learning_rate": 5.4490390818643136e-05, - "loss": 0.006071462482213974, - "step": 8525 - }, - { - "epoch": 1.4543904518329072, - "grad_norm": 0.10383405536413193, - "learning_rate": 5.4467545620762545e-05, - "loss": 0.008749781548976899, - "step": 8530 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 0.08180487155914307, - "learning_rate": 5.444469250186721e-05, - "loss": 0.00613279715180397, - "step": 8535 - }, - { - "epoch": 1.4560954816709293, - "grad_norm": 0.07797367125749588, - "learning_rate": 5.442183147262577e-05, - "loss": 0.005885690450668335, - "step": 8540 - }, - { - "epoch": 1.4569479965899403, - "grad_norm": 0.0780278891324997, - "learning_rate": 5.439896254371049e-05, - "loss": 0.007259850949048996, - "step": 8545 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 0.10005395114421844, - "learning_rate": 5.437608572579737e-05, - "loss": 0.0061523888260126116, - "step": 8550 - }, - { - "epoch": 1.4586530264279625, - "grad_norm": 0.10141763836145401, - "learning_rate": 5.435320102956604e-05, - "loss": 0.006501191109418869, - "step": 8555 - }, - { - "epoch": 1.4595055413469735, - "grad_norm": 0.0938732773065567, - "learning_rate": 5.4330308465699865e-05, - "loss": 0.008337517827749252, - "step": 8560 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 0.1085699051618576, - "learning_rate": 5.430740804488582e-05, - "loss": 0.005681714415550232, - "step": 8565 - }, - { - "epoch": 1.4612105711849956, - "grad_norm": 0.07967904955148697, - "learning_rate": 5.428449977781463e-05, - "loss": 0.006299185007810593, - "step": 8570 - }, - { - "epoch": 1.4620630861040067, - "grad_norm": 0.090158611536026, - "learning_rate": 5.426158367518061e-05, - "loss": 0.007821831852197647, - "step": 8575 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 0.12222256511449814, - "learning_rate": 5.4238659747681736e-05, - "loss": 0.0065193742513656614, - "step": 8580 - }, - { - "epoch": 1.463768115942029, - "grad_norm": 0.07724417746067047, - "learning_rate": 5.421572800601971e-05, - "loss": 0.00850745365023613, - "step": 8585 - }, - { - "epoch": 1.46462063086104, - "grad_norm": 0.07322543114423752, - "learning_rate": 5.4192788460899786e-05, - "loss": 0.006478501856327057, - "step": 8590 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 0.07086360454559326, - "learning_rate": 5.416984112303095e-05, - "loss": 0.007459370046854019, - "step": 8595 - }, - { - "epoch": 1.4663256606990622, - "grad_norm": 0.08460366725921631, - "learning_rate": 5.414688600312575e-05, - "loss": 0.006461035460233688, - "step": 8600 - }, - { - "epoch": 1.4671781756180733, - "grad_norm": 0.06856394559144974, - "learning_rate": 5.412392311190041e-05, - "loss": 0.007420676201581955, - "step": 8605 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 0.06801126897335052, - "learning_rate": 5.4100952460074766e-05, - "loss": 0.006456401199102402, - "step": 8610 - }, - { - "epoch": 1.4688832054560954, - "grad_norm": 0.06273184716701508, - "learning_rate": 5.4077974058372295e-05, - "loss": 0.00508052185177803, - "step": 8615 - }, - { - "epoch": 1.4697357203751067, - "grad_norm": 0.07751575112342834, - "learning_rate": 5.405498791752007e-05, - "loss": 0.006596812605857849, - "step": 8620 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 0.10850238054990768, - "learning_rate": 5.4031994048248776e-05, - "loss": 0.006385332345962525, - "step": 8625 - }, - { - "epoch": 1.4714407502131288, - "grad_norm": 0.07195930927991867, - "learning_rate": 5.4008992461292736e-05, - "loss": 0.007354143261909485, - "step": 8630 - }, - { - "epoch": 1.4722932651321399, - "grad_norm": 0.061606891453266144, - "learning_rate": 5.3985983167389846e-05, - "loss": 0.007285259664058685, - "step": 8635 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 0.059549275785684586, - "learning_rate": 5.3962966177281616e-05, - "loss": 0.005211231112480163, - "step": 8640 - }, - { - "epoch": 1.473998294970162, - "grad_norm": 0.04548822343349457, - "learning_rate": 5.3939941501713146e-05, - "loss": 0.00805831179022789, - "step": 8645 - }, - { - "epoch": 1.474850809889173, - "grad_norm": 0.046682652086019516, - "learning_rate": 5.3916909151433096e-05, - "loss": 0.005787956342101097, - "step": 8650 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 0.06405246257781982, - "learning_rate": 5.3893869137193755e-05, - "loss": 0.005377359688282013, - "step": 8655 - }, - { - "epoch": 1.4765558397271952, - "grad_norm": 0.09410709887742996, - "learning_rate": 5.3870821469750964e-05, - "loss": 0.006961540877819061, - "step": 8660 - }, - { - "epoch": 1.4774083546462062, - "grad_norm": 0.0637243241071701, - "learning_rate": 5.384776615986414e-05, - "loss": 0.0060172989964485165, - "step": 8665 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 0.07082457840442657, - "learning_rate": 5.382470321829627e-05, - "loss": 0.005960140377283096, - "step": 8670 - }, - { - "epoch": 1.4791133844842284, - "grad_norm": 0.06502280384302139, - "learning_rate": 5.380163265581391e-05, - "loss": 0.005596417188644409, - "step": 8675 - }, - { - "epoch": 1.4799658994032396, - "grad_norm": 0.07504235208034515, - "learning_rate": 5.3778554483187134e-05, - "loss": 0.007427094876766205, - "step": 8680 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 0.08005198836326599, - "learning_rate": 5.375546871118964e-05, - "loss": 0.006888572126626968, - "step": 8685 - }, - { - "epoch": 1.4816709292412618, - "grad_norm": 0.1083201915025711, - "learning_rate": 5.373237535059861e-05, - "loss": 0.007253114879131317, - "step": 8690 - }, - { - "epoch": 1.4825234441602728, - "grad_norm": 0.060000013560056686, - "learning_rate": 5.37092744121948e-05, - "loss": 0.005570416525006294, - "step": 8695 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 0.04832584038376808, - "learning_rate": 5.3686165906762504e-05, - "loss": 0.005356843769550324, - "step": 8700 - }, - { - "epoch": 1.484228473998295, - "grad_norm": 0.061586812138557434, - "learning_rate": 5.3663049845089534e-05, - "loss": 0.005226074159145356, - "step": 8705 - }, - { - "epoch": 1.485080988917306, - "grad_norm": 0.08486256003379822, - "learning_rate": 5.363992623796724e-05, - "loss": 0.007083939760923386, - "step": 8710 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 0.09085836261510849, - "learning_rate": 5.361679509619048e-05, - "loss": 0.005988218262791634, - "step": 8715 - }, - { - "epoch": 1.4867860187553283, - "grad_norm": 0.06301745027303696, - "learning_rate": 5.359365643055765e-05, - "loss": 0.00595020055770874, - "step": 8720 - }, - { - "epoch": 1.4876385336743394, - "grad_norm": 0.07939866930246353, - "learning_rate": 5.3570510251870646e-05, - "loss": 0.006101110950112343, - "step": 8725 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 0.10560661554336548, - "learning_rate": 5.354735657093487e-05, - "loss": 0.006781180202960968, - "step": 8730 - }, - { - "epoch": 1.4893435635123615, - "grad_norm": 0.10549639165401459, - "learning_rate": 5.352419539855925e-05, - "loss": 0.006455187499523163, - "step": 8735 - }, - { - "epoch": 1.4901960784313726, - "grad_norm": 0.06474289298057556, - "learning_rate": 5.3501026745556157e-05, - "loss": 0.0078111283481121065, - "step": 8740 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 0.11109986901283264, - "learning_rate": 5.3477850622741525e-05, - "loss": 0.00798504576086998, - "step": 8745 - }, - { - "epoch": 1.4919011082693947, - "grad_norm": 0.0787222608923912, - "learning_rate": 5.3454667040934715e-05, - "loss": 0.007222773879766465, - "step": 8750 - }, - { - "epoch": 1.4927536231884058, - "grad_norm": 0.06622221320867538, - "learning_rate": 5.3431476010958613e-05, - "loss": 0.0064462460577487946, - "step": 8755 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 0.07526405900716782, - "learning_rate": 5.340827754363955e-05, - "loss": 0.005344667285680771, - "step": 8760 - }, - { - "epoch": 1.4944586530264279, - "grad_norm": 0.08911366015672684, - "learning_rate": 5.338507164980734e-05, - "loss": 0.006722994893789291, - "step": 8765 - }, - { - "epoch": 1.495311167945439, - "grad_norm": 0.03749583289027214, - "learning_rate": 5.336185834029527e-05, - "loss": 0.006120331957936287, - "step": 8770 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 0.08167645335197449, - "learning_rate": 5.333863762594008e-05, - "loss": 0.007496768981218338, - "step": 8775 - }, - { - "epoch": 1.4970161977834613, - "grad_norm": 0.09045904129743576, - "learning_rate": 5.3315409517581996e-05, - "loss": 0.007222528755664826, - "step": 8780 - }, - { - "epoch": 1.4978687127024723, - "grad_norm": 0.06064090132713318, - "learning_rate": 5.329217402606464e-05, - "loss": 0.0044986031949520115, - "step": 8785 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 0.07282263785600662, - "learning_rate": 5.3268931162235126e-05, - "loss": 0.005251912400126457, - "step": 8790 - }, - { - "epoch": 1.4995737425404945, - "grad_norm": 0.0674249604344368, - "learning_rate": 5.324568093694401e-05, - "loss": 0.006289477646350861, - "step": 8795 - }, - { - "epoch": 1.4997442455242966, - "eval_loss": 0.03760311380028725, - "eval_runtime": 3.668, - "eval_samples_per_second": 68.702, - "eval_steps_per_second": 1.091, - "step": 8796 - }, - { - "eval_cer_subset": 0.014184050678261437, - "eval_cer_subset_edit_distance": 871, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 8796 - }, - { - "epoch": 1.5004262574595055, - "grad_norm": 0.06114037334918976, - "learning_rate": 5.322242336104525e-05, - "loss": 0.005809751898050308, - "step": 8800 - }, - { - "epoch": 1.5012787723785166, - "grad_norm": 0.08830825984477997, - "learning_rate": 5.319915844539626e-05, - "loss": 0.006921032071113586, - "step": 8805 - }, - { - "epoch": 1.5021312872975279, - "grad_norm": 0.10813544690608978, - "learning_rate": 5.3175886200857873e-05, - "loss": 0.007966426759958267, - "step": 8810 - }, - { - "epoch": 1.502983802216539, - "grad_norm": 0.08357173204421997, - "learning_rate": 5.3152606638294355e-05, - "loss": 0.006943506002426147, - "step": 8815 - }, - { - "epoch": 1.50383631713555, - "grad_norm": 0.08059901744127274, - "learning_rate": 5.312931976857339e-05, - "loss": 0.0047626100480556485, - "step": 8820 - }, - { - "epoch": 1.504688832054561, - "grad_norm": 0.07412680238485336, - "learning_rate": 5.310602560256604e-05, - "loss": 0.00709492564201355, - "step": 8825 - }, - { - "epoch": 1.5055413469735721, - "grad_norm": 0.046478480100631714, - "learning_rate": 5.3082724151146814e-05, - "loss": 0.006465598940849304, - "step": 8830 - }, - { - "epoch": 1.5063938618925832, - "grad_norm": 0.11122216284275055, - "learning_rate": 5.30594154251936e-05, - "loss": 0.00888531506061554, - "step": 8835 - }, - { - "epoch": 1.5072463768115942, - "grad_norm": 0.06441432982683182, - "learning_rate": 5.3036099435587685e-05, - "loss": 0.005882937833666802, - "step": 8840 - }, - { - "epoch": 1.5080988917306053, - "grad_norm": 0.05722307041287422, - "learning_rate": 5.301277619321374e-05, - "loss": 0.0059202808886766435, - "step": 8845 - }, - { - "epoch": 1.5089514066496164, - "grad_norm": 0.06677310913801193, - "learning_rate": 5.2989445708959856e-05, - "loss": 0.0064939349889755246, - "step": 8850 - }, - { - "epoch": 1.5098039215686274, - "grad_norm": 0.08854222297668457, - "learning_rate": 5.296610799371745e-05, - "loss": 0.007034827768802643, - "step": 8855 - }, - { - "epoch": 1.5106564364876385, - "grad_norm": 0.059711627662181854, - "learning_rate": 5.2942763058381356e-05, - "loss": 0.007557753473520279, - "step": 8860 - }, - { - "epoch": 1.5115089514066495, - "grad_norm": 0.06355257332324982, - "learning_rate": 5.291941091384977e-05, - "loss": 0.006534597277641297, - "step": 8865 - }, - { - "epoch": 1.5123614663256606, - "grad_norm": 0.05741631239652634, - "learning_rate": 5.2896051571024255e-05, - "loss": 0.006453331559896469, - "step": 8870 - }, - { - "epoch": 1.5132139812446717, - "grad_norm": 0.05809224396944046, - "learning_rate": 5.287268504080972e-05, - "loss": 0.006065556779503822, - "step": 8875 - }, - { - "epoch": 1.5140664961636827, - "grad_norm": 0.04522582143545151, - "learning_rate": 5.284931133411443e-05, - "loss": 0.004097414761781692, - "step": 8880 - }, - { - "epoch": 1.514919011082694, - "grad_norm": 0.09349111467599869, - "learning_rate": 5.2825930461850014e-05, - "loss": 0.005707831308245659, - "step": 8885 - }, - { - "epoch": 1.515771526001705, - "grad_norm": 0.08951391279697418, - "learning_rate": 5.280254243493145e-05, - "loss": 0.00725678950548172, - "step": 8890 - }, - { - "epoch": 1.5166240409207161, - "grad_norm": 0.07826244086027145, - "learning_rate": 5.277914726427705e-05, - "loss": 0.008086606860160828, - "step": 8895 - }, - { - "epoch": 1.5174765558397272, - "grad_norm": 0.0619954876601696, - "learning_rate": 5.2755744960808446e-05, - "loss": 0.005462165176868439, - "step": 8900 - }, - { - "epoch": 1.5183290707587382, - "grad_norm": 0.04414132609963417, - "learning_rate": 5.273233553545062e-05, - "loss": 0.005678927898406983, - "step": 8905 - }, - { - "epoch": 1.5191815856777495, - "grad_norm": 0.07183931767940521, - "learning_rate": 5.2708918999131864e-05, - "loss": 0.007184042781591416, - "step": 8910 - }, - { - "epoch": 1.5200341005967606, - "grad_norm": 0.10447251796722412, - "learning_rate": 5.26854953627838e-05, - "loss": 0.009831231832504273, - "step": 8915 - }, - { - "epoch": 1.5208866155157716, - "grad_norm": 0.04392845183610916, - "learning_rate": 5.266206463734135e-05, - "loss": 0.006517301499843598, - "step": 8920 - }, - { - "epoch": 1.5217391304347827, - "grad_norm": 0.06292697787284851, - "learning_rate": 5.2638626833742776e-05, - "loss": 0.005328541249036789, - "step": 8925 - }, - { - "epoch": 1.5225916453537938, - "grad_norm": 0.06425110250711441, - "learning_rate": 5.2615181962929605e-05, - "loss": 0.006298693269491196, - "step": 8930 - }, - { - "epoch": 1.5234441602728048, - "grad_norm": 0.08059051632881165, - "learning_rate": 5.259173003584669e-05, - "loss": 0.008097793161869048, - "step": 8935 - }, - { - "epoch": 1.5242966751918159, - "grad_norm": 0.0625302791595459, - "learning_rate": 5.256827106344218e-05, - "loss": 0.006664089858531952, - "step": 8940 - }, - { - "epoch": 1.525149190110827, - "grad_norm": 0.06092630326747894, - "learning_rate": 5.254480505666749e-05, - "loss": 0.006084204837679863, - "step": 8945 - }, - { - "epoch": 1.526001705029838, - "grad_norm": 0.07297338545322418, - "learning_rate": 5.2521332026477344e-05, - "loss": 0.006405481696128845, - "step": 8950 - }, - { - "epoch": 1.526854219948849, - "grad_norm": 0.05876631662249565, - "learning_rate": 5.249785198382973e-05, - "loss": 0.006670171767473221, - "step": 8955 - }, - { - "epoch": 1.5277067348678601, - "grad_norm": 0.0633542388677597, - "learning_rate": 5.247436493968589e-05, - "loss": 0.004565924406051636, - "step": 8960 - }, - { - "epoch": 1.5285592497868712, - "grad_norm": 0.09164717048406601, - "learning_rate": 5.2450870905010395e-05, - "loss": 0.005662925541400909, - "step": 8965 - }, - { - "epoch": 1.5294117647058822, - "grad_norm": 0.06646572798490524, - "learning_rate": 5.2427369890771026e-05, - "loss": 0.006319984793663025, - "step": 8970 - }, - { - "epoch": 1.5302642796248933, - "grad_norm": 0.08518269658088684, - "learning_rate": 5.2403861907938826e-05, - "loss": 0.0066184550523757935, - "step": 8975 - }, - { - "epoch": 1.5311167945439044, - "grad_norm": 0.08369076251983643, - "learning_rate": 5.238034696748811e-05, - "loss": 0.005069610476493835, - "step": 8980 - }, - { - "epoch": 1.5319693094629157, - "grad_norm": 0.05607258528470993, - "learning_rate": 5.235682508039646e-05, - "loss": 0.007457223534584045, - "step": 8985 - }, - { - "epoch": 1.5328218243819267, - "grad_norm": 0.0828152522444725, - "learning_rate": 5.2333296257644646e-05, - "loss": 0.007727481424808502, - "step": 8990 - }, - { - "epoch": 1.5336743393009378, - "grad_norm": 0.09770844876766205, - "learning_rate": 5.230976051021671e-05, - "loss": 0.007591258734464645, - "step": 8995 - }, - { - "epoch": 1.5345268542199488, - "grad_norm": 0.05906900763511658, - "learning_rate": 5.2286217849099925e-05, - "loss": 0.008510296791791916, - "step": 9000 - }, - { - "epoch": 1.53537936913896, - "grad_norm": 0.07594765722751617, - "learning_rate": 5.2262668285284785e-05, - "loss": 0.005943647772073746, - "step": 9005 - }, - { - "epoch": 1.5362318840579712, - "grad_norm": 0.056658126413822174, - "learning_rate": 5.223911182976502e-05, - "loss": 0.004702667891979218, - "step": 9010 - }, - { - "epoch": 1.5370843989769822, - "grad_norm": 0.060573313385248184, - "learning_rate": 5.2215548493537556e-05, - "loss": 0.006530648469924927, - "step": 9015 - }, - { - "epoch": 1.5379369138959933, - "grad_norm": 0.06876473873853683, - "learning_rate": 5.219197828760254e-05, - "loss": 0.0070976391434669495, - "step": 9020 - }, - { - "epoch": 1.5387894288150044, - "grad_norm": 0.05402369797229767, - "learning_rate": 5.2168401222963354e-05, - "loss": 0.005997032299637795, - "step": 9025 - }, - { - "epoch": 1.5396419437340154, - "grad_norm": 0.0907805860042572, - "learning_rate": 5.214481731062652e-05, - "loss": 0.007357357442378998, - "step": 9030 - }, - { - "epoch": 1.5404944586530265, - "grad_norm": 0.07572564482688904, - "learning_rate": 5.212122656160182e-05, - "loss": 0.004879472404718399, - "step": 9035 - }, - { - "epoch": 1.5413469735720375, - "grad_norm": 0.05684768036007881, - "learning_rate": 5.209762898690218e-05, - "loss": 0.006248699128627777, - "step": 9040 - }, - { - "epoch": 1.5421994884910486, - "grad_norm": 0.070293128490448, - "learning_rate": 5.2074024597543745e-05, - "loss": 0.005055962502956391, - "step": 9045 - }, - { - "epoch": 1.5430520034100597, - "grad_norm": 0.06611300259828568, - "learning_rate": 5.2050413404545823e-05, - "loss": 0.0048581909388303755, - "step": 9050 - }, - { - "epoch": 1.5439045183290707, - "grad_norm": 0.06960003823041916, - "learning_rate": 5.202679541893092e-05, - "loss": 0.006258350610733032, - "step": 9055 - }, - { - "epoch": 1.5447570332480818, - "grad_norm": 0.059757016599178314, - "learning_rate": 5.2003170651724675e-05, - "loss": 0.006347355991601944, - "step": 9060 - }, - { - "epoch": 1.5456095481670928, - "grad_norm": 0.06531284749507904, - "learning_rate": 5.1979539113955936e-05, - "loss": 0.00543224960565567, - "step": 9065 - }, - { - "epoch": 1.546462063086104, - "grad_norm": 0.08068390935659409, - "learning_rate": 5.195590081665667e-05, - "loss": 0.004933612793684006, - "step": 9070 - }, - { - "epoch": 1.547314578005115, - "grad_norm": 0.06198716536164284, - "learning_rate": 5.193225577086203e-05, - "loss": 0.00523824393749237, - "step": 9075 - }, - { - "epoch": 1.548167092924126, - "grad_norm": 0.07734926789999008, - "learning_rate": 5.190860398761032e-05, - "loss": 0.005699950456619263, - "step": 9080 - }, - { - "epoch": 1.5490196078431373, - "grad_norm": 0.058083925396203995, - "learning_rate": 5.188494547794297e-05, - "loss": 0.006147466972470284, - "step": 9085 - }, - { - "epoch": 1.5498721227621484, - "grad_norm": 0.0675162672996521, - "learning_rate": 5.1861280252904546e-05, - "loss": 0.0059716224670410155, - "step": 9090 - }, - { - "epoch": 1.5507246376811594, - "grad_norm": 0.05415274575352669, - "learning_rate": 5.183760832354278e-05, - "loss": 0.0058246061205863954, - "step": 9095 - }, - { - "epoch": 1.5515771526001705, - "grad_norm": 0.05826190859079361, - "learning_rate": 5.1813929700908523e-05, - "loss": 0.005409573763608932, - "step": 9100 - }, - { - "epoch": 1.5524296675191815, - "grad_norm": 0.07188098877668381, - "learning_rate": 5.179024439605573e-05, - "loss": 0.00541839525103569, - "step": 9105 - }, - { - "epoch": 1.5532821824381928, - "grad_norm": 0.07955330610275269, - "learning_rate": 5.176655242004149e-05, - "loss": 0.007760365307331085, - "step": 9110 - }, - { - "epoch": 1.5541346973572039, - "grad_norm": 0.07923565059900284, - "learning_rate": 5.1742853783926e-05, - "loss": 0.00563618317246437, - "step": 9115 - }, - { - "epoch": 1.554987212276215, - "grad_norm": 0.08301008492708206, - "learning_rate": 5.171914849877258e-05, - "loss": 0.006948529183864594, - "step": 9120 - }, - { - "epoch": 1.555839727195226, - "grad_norm": 0.10905841737985611, - "learning_rate": 5.1695436575647655e-05, - "loss": 0.005861887335777282, - "step": 9125 - }, - { - "epoch": 1.556692242114237, - "grad_norm": 0.06157204881310463, - "learning_rate": 5.167171802562072e-05, - "loss": 0.005052468553185463, - "step": 9130 - }, - { - "epoch": 1.5575447570332481, - "grad_norm": 0.08309191465377808, - "learning_rate": 5.164799285976438e-05, - "loss": 0.006937308609485627, - "step": 9135 - }, - { - "epoch": 1.5583972719522592, - "grad_norm": 0.07454490661621094, - "learning_rate": 5.162426108915437e-05, - "loss": 0.00504121258854866, - "step": 9140 - }, - { - "epoch": 1.5592497868712702, - "grad_norm": 0.07217807322740555, - "learning_rate": 5.160052272486943e-05, - "loss": 0.004582167789340019, - "step": 9145 - }, - { - "epoch": 1.5601023017902813, - "grad_norm": 0.07113789767026901, - "learning_rate": 5.157677777799145e-05, - "loss": 0.0055323362350463865, - "step": 9150 - }, - { - "epoch": 1.5609548167092924, - "grad_norm": 0.10281748324632645, - "learning_rate": 5.1553026259605316e-05, - "loss": 0.006342601776123047, - "step": 9155 - }, - { - "epoch": 1.5618073316283034, - "grad_norm": 0.09731876850128174, - "learning_rate": 5.152926818079906e-05, - "loss": 0.0054936733096838, - "step": 9160 - }, - { - "epoch": 1.5626598465473145, - "grad_norm": 0.09631586819887161, - "learning_rate": 5.1505503552663734e-05, - "loss": 0.0064162641763687136, - "step": 9165 - }, - { - "epoch": 1.5635123614663256, - "grad_norm": 0.07588718831539154, - "learning_rate": 5.148173238629348e-05, - "loss": 0.0069232374429702755, - "step": 9170 - }, - { - "epoch": 1.5643648763853366, - "grad_norm": 0.10357257723808289, - "learning_rate": 5.145795469278544e-05, - "loss": 0.007076382637023926, - "step": 9175 - }, - { - "epoch": 1.5652173913043477, - "grad_norm": 0.07249122112989426, - "learning_rate": 5.1434170483239826e-05, - "loss": 0.005868781358003616, - "step": 9180 - }, - { - "epoch": 1.566069906223359, - "grad_norm": 0.06878417730331421, - "learning_rate": 5.1410379768759934e-05, - "loss": 0.006841042637825012, - "step": 9185 - }, - { - "epoch": 1.56692242114237, - "grad_norm": 0.1096004843711853, - "learning_rate": 5.138658256045203e-05, - "loss": 0.00807877779006958, - "step": 9190 - }, - { - "epoch": 1.567774936061381, - "grad_norm": 0.07194329053163528, - "learning_rate": 5.136277886942547e-05, - "loss": 0.005923056975007057, - "step": 9195 - }, - { - "epoch": 1.5686274509803921, - "grad_norm": 0.08904275298118591, - "learning_rate": 5.133896870679257e-05, - "loss": 0.006372517347335816, - "step": 9200 - }, - { - "epoch": 1.5694799658994032, - "grad_norm": 0.05133598670363426, - "learning_rate": 5.131515208366873e-05, - "loss": 0.00692460760474205, - "step": 9205 - }, - { - "epoch": 1.5703324808184145, - "grad_norm": 0.047151900827884674, - "learning_rate": 5.1291329011172345e-05, - "loss": 0.006545543670654297, - "step": 9210 - }, - { - "epoch": 1.5711849957374255, - "grad_norm": 0.07102219760417938, - "learning_rate": 5.126749950042482e-05, - "loss": 0.006531259417533875, - "step": 9215 - }, - { - "epoch": 1.5720375106564366, - "grad_norm": 0.09585709124803543, - "learning_rate": 5.124366356255056e-05, - "loss": 0.005086017400026321, - "step": 9220 - }, - { - "epoch": 1.5728900255754477, - "grad_norm": 0.06898393481969833, - "learning_rate": 5.121982120867695e-05, - "loss": 0.004247477650642395, - "step": 9225 - }, - { - "epoch": 1.5737425404944587, - "grad_norm": 0.10513560473918915, - "learning_rate": 5.119597244993443e-05, - "loss": 0.006501986831426621, - "step": 9230 - }, - { - "epoch": 1.5745950554134698, - "grad_norm": 0.06671630591154099, - "learning_rate": 5.1172117297456366e-05, - "loss": 0.007658005505800247, - "step": 9235 - }, - { - "epoch": 1.5754475703324808, - "grad_norm": 0.09480880945920944, - "learning_rate": 5.1148255762379156e-05, - "loss": 0.006366011500358581, - "step": 9240 - }, - { - "epoch": 1.576300085251492, - "grad_norm": 0.06769633293151855, - "learning_rate": 5.112438785584215e-05, - "loss": 0.00625738725066185, - "step": 9245 - }, - { - "epoch": 1.577152600170503, - "grad_norm": 0.03695152327418327, - "learning_rate": 5.1100513588987665e-05, - "loss": 0.006924654543399811, - "step": 9250 - }, - { - "epoch": 1.578005115089514, - "grad_norm": 0.05657009407877922, - "learning_rate": 5.107663297296104e-05, - "loss": 0.005848415940999985, - "step": 9255 - }, - { - "epoch": 1.578857630008525, - "grad_norm": 0.11228469014167786, - "learning_rate": 5.105274601891051e-05, - "loss": 0.005637861788272858, - "step": 9260 - }, - { - "epoch": 1.5797101449275361, - "grad_norm": 0.06454899162054062, - "learning_rate": 5.102885273798732e-05, - "loss": 0.0066472023725509645, - "step": 9265 - }, - { - "epoch": 1.5805626598465472, - "grad_norm": 0.05328953638672829, - "learning_rate": 5.1004953141345637e-05, - "loss": 0.008773463219404221, - "step": 9270 - }, - { - "epoch": 1.5814151747655583, - "grad_norm": 0.05827401205897331, - "learning_rate": 5.0981047240142576e-05, - "loss": 0.0075307883322238926, - "step": 9275 - }, - { - "epoch": 1.5822676896845693, - "grad_norm": 0.0719359889626503, - "learning_rate": 5.095713504553822e-05, - "loss": 0.007532978057861328, - "step": 9280 - }, - { - "epoch": 1.5831202046035806, - "grad_norm": 0.08982953429222107, - "learning_rate": 5.0933216568695596e-05, - "loss": 0.007915425300598144, - "step": 9285 - }, - { - "epoch": 1.5839727195225917, - "grad_norm": 0.0919221043586731, - "learning_rate": 5.090929182078061e-05, - "loss": 0.005685590207576752, - "step": 9290 - }, - { - "epoch": 1.5848252344416027, - "grad_norm": 0.0840388685464859, - "learning_rate": 5.088536081296215e-05, - "loss": 0.0070190995931625364, - "step": 9295 - }, - { - "epoch": 1.5856777493606138, - "grad_norm": 0.08340579271316528, - "learning_rate": 5.086142355641199e-05, - "loss": 0.005871276929974556, - "step": 9300 - }, - { - "epoch": 1.5865302642796248, - "grad_norm": 0.0840516984462738, - "learning_rate": 5.0837480062304865e-05, - "loss": 0.007803326845169068, - "step": 9305 - }, - { - "epoch": 1.5873827791986361, - "grad_norm": 0.08378542214632034, - "learning_rate": 5.0813530341818377e-05, - "loss": 0.005085055530071258, - "step": 9310 - }, - { - "epoch": 1.5882352941176472, - "grad_norm": 0.10764650255441666, - "learning_rate": 5.078957440613305e-05, - "loss": 0.007959616929292678, - "step": 9315 - }, - { - "epoch": 1.5890878090366582, - "grad_norm": 0.07483979314565659, - "learning_rate": 5.076561226643231e-05, - "loss": 0.004332176968455314, - "step": 9320 - }, - { - "epoch": 1.5899403239556693, - "grad_norm": 0.06658382713794708, - "learning_rate": 5.074164393390249e-05, - "loss": 0.006168607249855995, - "step": 9325 - }, - { - "epoch": 1.5907928388746804, - "grad_norm": 0.09388890862464905, - "learning_rate": 5.071766941973282e-05, - "loss": 0.006460639089345932, - "step": 9330 - }, - { - "epoch": 1.5916453537936914, - "grad_norm": 0.051856543868780136, - "learning_rate": 5.0693688735115364e-05, - "loss": 0.005657953023910522, - "step": 9335 - }, - { - "epoch": 1.5924978687127025, - "grad_norm": 0.0785013884305954, - "learning_rate": 5.066970189124513e-05, - "loss": 0.008378601819276809, - "step": 9340 - }, - { - "epoch": 1.5933503836317136, - "grad_norm": 0.0653534010052681, - "learning_rate": 5.0645708899319956e-05, - "loss": 0.006928309798240662, - "step": 9345 - }, - { - "epoch": 1.5942028985507246, - "grad_norm": 0.047050826251506805, - "learning_rate": 5.062170977054058e-05, - "loss": 0.005722399801015854, - "step": 9350 - }, - { - "epoch": 1.5950554134697357, - "grad_norm": 0.10868531465530396, - "learning_rate": 5.059770451611061e-05, - "loss": 0.009898315370082855, - "step": 9355 - }, - { - "epoch": 1.5959079283887467, - "grad_norm": 0.0615832693874836, - "learning_rate": 5.0573693147236465e-05, - "loss": 0.007755370438098907, - "step": 9360 - }, - { - "epoch": 1.5967604433077578, - "grad_norm": 0.10720556974411011, - "learning_rate": 5.054967567512747e-05, - "loss": 0.006318587809801102, - "step": 9365 - }, - { - "epoch": 1.5976129582267689, - "grad_norm": 0.06587128341197968, - "learning_rate": 5.052565211099578e-05, - "loss": 0.004849371314048767, - "step": 9370 - }, - { - "epoch": 1.59846547314578, - "grad_norm": 0.07305008918046951, - "learning_rate": 5.050162246605638e-05, - "loss": 0.005983927100896835, - "step": 9375 - }, - { - "epoch": 1.599317988064791, - "grad_norm": 0.06641892343759537, - "learning_rate": 5.0477586751527124e-05, - "loss": 0.007008136063814163, - "step": 9380 - }, - { - "epoch": 1.6001705029838023, - "grad_norm": 0.06871581077575684, - "learning_rate": 5.045354497862868e-05, - "loss": 0.0066993959248065945, - "step": 9385 - }, - { - "epoch": 1.6010230179028133, - "grad_norm": 0.07417753338813782, - "learning_rate": 5.042949715858453e-05, - "loss": 0.006360804289579391, - "step": 9390 - }, - { - "epoch": 1.6018755328218244, - "grad_norm": 0.09202401340007782, - "learning_rate": 5.040544330262102e-05, - "loss": 0.006207296252250671, - "step": 9395 - }, - { - "epoch": 1.6027280477408354, - "grad_norm": 0.06747353821992874, - "learning_rate": 5.0381383421967276e-05, - "loss": 0.006196716427803039, - "step": 9400 - }, - { - "epoch": 1.6035805626598465, - "grad_norm": 0.06609310954809189, - "learning_rate": 5.0357317527855266e-05, - "loss": 0.005642791092395782, - "step": 9405 - }, - { - "epoch": 1.6044330775788578, - "grad_norm": 0.039614174515008926, - "learning_rate": 5.0333245631519716e-05, - "loss": 0.005146804824471473, - "step": 9410 - }, - { - "epoch": 1.6052855924978688, - "grad_norm": 0.0902944952249527, - "learning_rate": 5.0309167744198234e-05, - "loss": 0.005218298360705376, - "step": 9415 - }, - { - "epoch": 1.60613810741688, - "grad_norm": 0.06527641415596008, - "learning_rate": 5.028508387713114e-05, - "loss": 0.006157718971371651, - "step": 9420 - }, - { - "epoch": 1.606990622335891, - "grad_norm": 0.10824134200811386, - "learning_rate": 5.026099404156161e-05, - "loss": 0.00577687993645668, - "step": 9425 - }, - { - "epoch": 1.607843137254902, - "grad_norm": 0.091335728764534, - "learning_rate": 5.023689824873556e-05, - "loss": 0.005114461481571198, - "step": 9430 - }, - { - "epoch": 1.608695652173913, - "grad_norm": 0.047340504825115204, - "learning_rate": 5.021279650990173e-05, - "loss": 0.005150845646858216, - "step": 9435 - }, - { - "epoch": 1.6095481670929241, - "grad_norm": 0.05847655236721039, - "learning_rate": 5.01886888363116e-05, - "loss": 0.006019642949104309, - "step": 9440 - }, - { - "epoch": 1.6104006820119352, - "grad_norm": 0.10413257032632828, - "learning_rate": 5.016457523921943e-05, - "loss": 0.0097243569791317, - "step": 9445 - }, - { - "epoch": 1.6112531969309463, - "grad_norm": 0.06559625267982483, - "learning_rate": 5.014045572988226e-05, - "loss": 0.006743426620960236, - "step": 9450 - }, - { - "epoch": 1.6121057118499573, - "grad_norm": 0.07541610300540924, - "learning_rate": 5.0116330319559865e-05, - "loss": 0.004393500834703445, - "step": 9455 - }, - { - "epoch": 1.6129582267689684, - "grad_norm": 0.04757530242204666, - "learning_rate": 5.00921990195148e-05, - "loss": 0.004641738906502724, - "step": 9460 - }, - { - "epoch": 1.6138107416879794, - "grad_norm": 0.10010012239217758, - "learning_rate": 5.0068061841012355e-05, - "loss": 0.005677872523665428, - "step": 9465 - }, - { - "epoch": 1.6146632566069905, - "grad_norm": 0.08248613774776459, - "learning_rate": 5.0043918795320576e-05, - "loss": 0.006557486951351166, - "step": 9470 - }, - { - "epoch": 1.6155157715260016, - "grad_norm": 0.06300318241119385, - "learning_rate": 5.001976989371023e-05, - "loss": 0.0052742622792720795, - "step": 9475 - }, - { - "epoch": 1.6163682864450126, - "grad_norm": 0.06455430388450623, - "learning_rate": 4.999561514745482e-05, - "loss": 0.0061374582350254055, - "step": 9480 - }, - { - "epoch": 1.617220801364024, - "grad_norm": 0.04623732715845108, - "learning_rate": 4.997145456783062e-05, - "loss": 0.007861848175525665, - "step": 9485 - }, - { - "epoch": 1.618073316283035, - "grad_norm": 0.05294455960392952, - "learning_rate": 4.994728816611655e-05, - "loss": 0.005468960478901863, - "step": 9490 - }, - { - "epoch": 1.618925831202046, - "grad_norm": 0.04539628326892853, - "learning_rate": 4.992311595359431e-05, - "loss": 0.005490221083164215, - "step": 9495 - }, - { - "epoch": 1.619778346121057, - "grad_norm": 0.04033574461936951, - "learning_rate": 4.98989379415483e-05, - "loss": 0.005296828970313072, - "step": 9500 - }, - { - "epoch": 1.6206308610400681, - "grad_norm": 0.10801003128290176, - "learning_rate": 4.98747541412656e-05, - "loss": 0.007847490906715392, - "step": 9505 - }, - { - "epoch": 1.6214833759590794, - "grad_norm": 0.05979831889271736, - "learning_rate": 4.985056456403603e-05, - "loss": 0.005352787673473358, - "step": 9510 - }, - { - "epoch": 1.6223358908780905, - "grad_norm": 0.07628990709781647, - "learning_rate": 4.9826369221152086e-05, - "loss": 0.005436672642827034, - "step": 9515 - }, - { - "epoch": 1.6231884057971016, - "grad_norm": 0.0654626339673996, - "learning_rate": 4.9802168123908955e-05, - "loss": 0.004777481406927108, - "step": 9520 - }, - { - "epoch": 1.6240409207161126, - "grad_norm": 0.08487557619810104, - "learning_rate": 4.97779612836045e-05, - "loss": 0.006834116578102112, - "step": 9525 - }, - { - "epoch": 1.6248934356351237, - "grad_norm": 0.09151525050401688, - "learning_rate": 4.9753748711539316e-05, - "loss": 0.006389729678630829, - "step": 9530 - }, - { - "epoch": 1.6257459505541347, - "grad_norm": 0.10458851605653763, - "learning_rate": 4.972953041901661e-05, - "loss": 0.005984527617692947, - "step": 9535 - }, - { - "epoch": 1.6265984654731458, - "grad_norm": 0.08780983090400696, - "learning_rate": 4.970530641734229e-05, - "loss": 0.0068392202258110045, - "step": 9540 - }, - { - "epoch": 1.6274509803921569, - "grad_norm": 0.04871044307947159, - "learning_rate": 4.968107671782493e-05, - "loss": 0.005444938316941261, - "step": 9545 - }, - { - "epoch": 1.628303495311168, - "grad_norm": 0.05514970421791077, - "learning_rate": 4.9656841331775745e-05, - "loss": 0.005353255197405815, - "step": 9550 - }, - { - "epoch": 1.629156010230179, - "grad_norm": 0.057791441679000854, - "learning_rate": 4.9632600270508655e-05, - "loss": 0.005117457732558251, - "step": 9555 - }, - { - "epoch": 1.63000852514919, - "grad_norm": 0.0816815048456192, - "learning_rate": 4.960835354534015e-05, - "loss": 0.005405401438474655, - "step": 9560 - }, - { - "epoch": 1.630861040068201, - "grad_norm": 0.087788425385952, - "learning_rate": 4.958410116758945e-05, - "loss": 0.006124432012438774, - "step": 9565 - }, - { - "epoch": 1.6317135549872122, - "grad_norm": 0.08500470966100693, - "learning_rate": 4.955984314857832e-05, - "loss": 0.00581449456512928, - "step": 9570 - }, - { - "epoch": 1.6325660699062232, - "grad_norm": 0.042804375290870667, - "learning_rate": 4.9535579499631264e-05, - "loss": 0.007793295383453369, - "step": 9575 - }, - { - "epoch": 1.6334185848252343, - "grad_norm": 0.08767658472061157, - "learning_rate": 4.951131023207533e-05, - "loss": 0.006432226300239563, - "step": 9580 - }, - { - "epoch": 1.6342710997442456, - "grad_norm": 0.0693424716591835, - "learning_rate": 4.948703535724023e-05, - "loss": 0.006517377495765686, - "step": 9585 - }, - { - "epoch": 1.6351236146632566, - "grad_norm": 0.08574991673231125, - "learning_rate": 4.9462754886458276e-05, - "loss": 0.009532185643911362, - "step": 9590 - }, - { - "epoch": 1.6359761295822677, - "grad_norm": 0.04135733097791672, - "learning_rate": 4.94384688310644e-05, - "loss": 0.005358002707362175, - "step": 9595 - }, - { - "epoch": 1.6368286445012787, - "grad_norm": 0.09947369992733002, - "learning_rate": 4.941417720239616e-05, - "loss": 0.005965238064527511, - "step": 9600 - }, - { - "epoch": 1.6376811594202898, - "grad_norm": 0.038376711308956146, - "learning_rate": 4.9389880011793665e-05, - "loss": 0.00521450936794281, - "step": 9605 - }, - { - "epoch": 1.638533674339301, - "grad_norm": 0.05022123083472252, - "learning_rate": 4.9365577270599675e-05, - "loss": 0.006678921729326248, - "step": 9610 - }, - { - "epoch": 1.6393861892583121, - "grad_norm": 0.06687050312757492, - "learning_rate": 4.93412689901595e-05, - "loss": 0.006315051764249802, - "step": 9615 - }, - { - "epoch": 1.6402387041773232, - "grad_norm": 0.08563709259033203, - "learning_rate": 4.931695518182107e-05, - "loss": 0.005977614223957062, - "step": 9620 - }, - { - "epoch": 1.6410912190963343, - "grad_norm": 0.07901418209075928, - "learning_rate": 4.929263585693486e-05, - "loss": 0.004367914795875549, - "step": 9625 - }, - { - "epoch": 1.6419437340153453, - "grad_norm": 0.05929172784090042, - "learning_rate": 4.9268311026853974e-05, - "loss": 0.00466451421380043, - "step": 9630 - }, - { - "epoch": 1.6427962489343564, - "grad_norm": 0.09167131781578064, - "learning_rate": 4.924398070293403e-05, - "loss": 0.0063233010470867155, - "step": 9635 - }, - { - "epoch": 1.6436487638533674, - "grad_norm": 0.053217221051454544, - "learning_rate": 4.921964489653321e-05, - "loss": 0.005829869210720063, - "step": 9640 - }, - { - "epoch": 1.6445012787723785, - "grad_norm": 0.05341719463467598, - "learning_rate": 4.919530361901232e-05, - "loss": 0.005165425688028335, - "step": 9645 - }, - { - "epoch": 1.6453537936913896, - "grad_norm": 0.0763968899846077, - "learning_rate": 4.917095688173466e-05, - "loss": 0.008034119009971618, - "step": 9650 - }, - { - "epoch": 1.6462063086104006, - "grad_norm": 0.07722017168998718, - "learning_rate": 4.9146604696066095e-05, - "loss": 0.008911440521478653, - "step": 9655 - }, - { - "epoch": 1.6470588235294117, - "grad_norm": 0.0639941543340683, - "learning_rate": 4.912224707337504e-05, - "loss": 0.0066375695168972015, - "step": 9660 - }, - { - "epoch": 1.6479113384484227, - "grad_norm": 0.05451088026165962, - "learning_rate": 4.9097884025032425e-05, - "loss": 0.004018183052539826, - "step": 9665 - }, - { - "epoch": 1.6487638533674338, - "grad_norm": 0.06928657740354538, - "learning_rate": 4.907351556241176e-05, - "loss": 0.0061560459434986115, - "step": 9670 - }, - { - "epoch": 1.6496163682864449, - "grad_norm": 0.0672740638256073, - "learning_rate": 4.904914169688903e-05, - "loss": 0.005010559782385826, - "step": 9675 - }, - { - "epoch": 1.6504688832054561, - "grad_norm": 0.05115605145692825, - "learning_rate": 4.902476243984279e-05, - "loss": 0.005690005421638489, - "step": 9680 - }, - { - "epoch": 1.6513213981244672, - "grad_norm": 0.08852645754814148, - "learning_rate": 4.9000377802654055e-05, - "loss": 0.0067652732133865355, - "step": 9685 - }, - { - "epoch": 1.6521739130434783, - "grad_norm": 0.08289605379104614, - "learning_rate": 4.897598779670643e-05, - "loss": 0.005946322903037071, - "step": 9690 - }, - { - "epoch": 1.6530264279624893, - "grad_norm": 0.08343428373336792, - "learning_rate": 4.895159243338594e-05, - "loss": 0.006231371313333511, - "step": 9695 - }, - { - "epoch": 1.6538789428815004, - "grad_norm": 0.08138900995254517, - "learning_rate": 4.892719172408117e-05, - "loss": 0.006785771995782852, - "step": 9700 - }, - { - "epoch": 1.6547314578005117, - "grad_norm": 0.07599585503339767, - "learning_rate": 4.890278568018318e-05, - "loss": 0.00609181635081768, - "step": 9705 - }, - { - "epoch": 1.6555839727195227, - "grad_norm": 0.07918383926153183, - "learning_rate": 4.887837431308552e-05, - "loss": 0.006991502642631531, - "step": 9710 - }, - { - "epoch": 1.6564364876385338, - "grad_norm": 0.048750922083854675, - "learning_rate": 4.8853957634184246e-05, - "loss": 0.00639684796333313, - "step": 9715 - }, - { - "epoch": 1.6572890025575449, - "grad_norm": 0.07931654155254364, - "learning_rate": 4.882953565487785e-05, - "loss": 0.004780232906341553, - "step": 9720 - }, - { - "epoch": 1.658141517476556, - "grad_norm": 0.07394375652074814, - "learning_rate": 4.8805108386567345e-05, - "loss": 0.005560039728879929, - "step": 9725 - }, - { - "epoch": 1.658994032395567, - "grad_norm": 0.07906223088502884, - "learning_rate": 4.8780675840656175e-05, - "loss": 0.006233107298612595, - "step": 9730 - }, - { - "epoch": 1.659846547314578, - "grad_norm": 0.05145291984081268, - "learning_rate": 4.875623802855027e-05, - "loss": 0.0049663417041301726, - "step": 9735 - }, - { - "epoch": 1.660699062233589, - "grad_norm": 0.06227492541074753, - "learning_rate": 4.873179496165802e-05, - "loss": 0.006139815598726272, - "step": 9740 - }, - { - "epoch": 1.6615515771526002, - "grad_norm": 0.08176816254854202, - "learning_rate": 4.870734665139028e-05, - "loss": 0.007625886052846908, - "step": 9745 - }, - { - "epoch": 1.6624040920716112, - "grad_norm": 0.06774444133043289, - "learning_rate": 4.868289310916029e-05, - "loss": 0.006510105729103088, - "step": 9750 - }, - { - "epoch": 1.6632566069906223, - "grad_norm": 0.07336006313562393, - "learning_rate": 4.8658434346383805e-05, - "loss": 0.0068834669888019565, - "step": 9755 - }, - { - "epoch": 1.6641091219096333, - "grad_norm": 0.07233051210641861, - "learning_rate": 4.863397037447899e-05, - "loss": 0.005505643784999847, - "step": 9760 - }, - { - "epoch": 1.6649616368286444, - "grad_norm": 0.037355873733758926, - "learning_rate": 4.860950120486643e-05, - "loss": 0.005151794478297234, - "step": 9765 - }, - { - "epoch": 1.6658141517476555, - "grad_norm": 0.10907282680273056, - "learning_rate": 4.8585026848969164e-05, - "loss": 0.007589263468980789, - "step": 9770 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 0.10357582569122314, - "learning_rate": 4.856054731821261e-05, - "loss": 0.006011854484677314, - "step": 9775 - }, - { - "epoch": 1.6675191815856778, - "grad_norm": 0.0713953971862793, - "learning_rate": 4.853606262402465e-05, - "loss": 0.006342334300279617, - "step": 9780 - }, - { - "epoch": 1.6683716965046889, - "grad_norm": 0.07772944122552872, - "learning_rate": 4.851157277783555e-05, - "loss": 0.005903373658657074, - "step": 9785 - }, - { - "epoch": 1.6692242114237, - "grad_norm": 0.1249493658542633, - "learning_rate": 4.848707779107797e-05, - "loss": 0.006542833894491196, - "step": 9790 - }, - { - "epoch": 1.670076726342711, - "grad_norm": 0.05137734115123749, - "learning_rate": 4.8462577675187e-05, - "loss": 0.004380676150321961, - "step": 9795 - }, - { - "epoch": 1.670929241261722, - "grad_norm": 0.09491576999425888, - "learning_rate": 4.8438072441600095e-05, - "loss": 0.005311820283532142, - "step": 9800 - }, - { - "epoch": 1.6717817561807333, - "grad_norm": 0.09257746487855911, - "learning_rate": 4.8413562101757134e-05, - "loss": 0.006033014133572578, - "step": 9805 - }, - { - "epoch": 1.6726342710997444, - "grad_norm": 0.045860812067985535, - "learning_rate": 4.838904666710034e-05, - "loss": 0.008368080109357834, - "step": 9810 - }, - { - "epoch": 1.6734867860187554, - "grad_norm": 0.033777810633182526, - "learning_rate": 4.836452614907435e-05, - "loss": 0.0045743979513645176, - "step": 9815 - }, - { - "epoch": 1.6743393009377665, - "grad_norm": 0.12888991832733154, - "learning_rate": 4.834000055912614e-05, - "loss": 0.005997149646282196, - "step": 9820 - }, - { - "epoch": 1.6751918158567776, - "grad_norm": 0.08622048050165176, - "learning_rate": 4.8315469908705074e-05, - "loss": 0.007002732157707215, - "step": 9825 - }, - { - "epoch": 1.6760443307757886, - "grad_norm": 0.04722774773836136, - "learning_rate": 4.82909342092629e-05, - "loss": 0.005374876409769058, - "step": 9830 - }, - { - "epoch": 1.6768968456947997, - "grad_norm": 0.08596520870923996, - "learning_rate": 4.826639347225366e-05, - "loss": 0.0066084228456020355, - "step": 9835 - }, - { - "epoch": 1.6777493606138107, - "grad_norm": 0.09831524640321732, - "learning_rate": 4.824184770913381e-05, - "loss": 0.004402932524681091, - "step": 9840 - }, - { - "epoch": 1.6786018755328218, - "grad_norm": 0.10586824268102646, - "learning_rate": 4.821729693136214e-05, - "loss": 0.006442143023014069, - "step": 9845 - }, - { - "epoch": 1.6794543904518329, - "grad_norm": 0.11845403164625168, - "learning_rate": 4.8192741150399735e-05, - "loss": 0.006300021708011627, - "step": 9850 - }, - { - "epoch": 1.680306905370844, - "grad_norm": 0.08749356120824814, - "learning_rate": 4.816818037771007e-05, - "loss": 0.0060168147087097164, - "step": 9855 - }, - { - "epoch": 1.681159420289855, - "grad_norm": 0.06483060121536255, - "learning_rate": 4.814361462475895e-05, - "loss": 0.00717247799038887, - "step": 9860 - }, - { - "epoch": 1.682011935208866, - "grad_norm": 0.09276239573955536, - "learning_rate": 4.811904390301444e-05, - "loss": 0.006788758933544159, - "step": 9865 - }, - { - "epoch": 1.682864450127877, - "grad_norm": 0.05662832781672478, - "learning_rate": 4.809446822394701e-05, - "loss": 0.0068000413477420805, - "step": 9870 - }, - { - "epoch": 1.6837169650468882, - "grad_norm": 0.07508451491594315, - "learning_rate": 4.80698875990294e-05, - "loss": 0.006339512765407562, - "step": 9875 - }, - { - "epoch": 1.6845694799658995, - "grad_norm": 0.06525320559740067, - "learning_rate": 4.804530203973664e-05, - "loss": 0.010082229971885681, - "step": 9880 - }, - { - "epoch": 1.6854219948849105, - "grad_norm": 0.07791458070278168, - "learning_rate": 4.8020711557546104e-05, - "loss": 0.006830710172653198, - "step": 9885 - }, - { - "epoch": 1.6862745098039216, - "grad_norm": 0.05997749790549278, - "learning_rate": 4.799611616393745e-05, - "loss": 0.00666801705956459, - "step": 9890 - }, - { - "epoch": 1.6871270247229326, - "grad_norm": 0.07050258666276932, - "learning_rate": 4.797151587039261e-05, - "loss": 0.0059244450181722644, - "step": 9895 - }, - { - "epoch": 1.6879795396419437, - "grad_norm": 0.06760186702013016, - "learning_rate": 4.794691068839585e-05, - "loss": 0.006415641307830811, - "step": 9900 - }, - { - "epoch": 1.688832054560955, - "grad_norm": 0.07285474240779877, - "learning_rate": 4.792230062943364e-05, - "loss": 0.004972729086875916, - "step": 9905 - }, - { - "epoch": 1.689684569479966, - "grad_norm": 0.02914854884147644, - "learning_rate": 4.789768570499481e-05, - "loss": 0.004819701239466667, - "step": 9910 - }, - { - "epoch": 1.690537084398977, - "grad_norm": 0.058768294751644135, - "learning_rate": 4.787306592657042e-05, - "loss": 0.00581958070397377, - "step": 9915 - }, - { - "epoch": 1.6913895993179882, - "grad_norm": 0.08694405853748322, - "learning_rate": 4.7848441305653804e-05, - "loss": 0.004998849332332611, - "step": 9920 - }, - { - "epoch": 1.6922421142369992, - "grad_norm": 0.10194200277328491, - "learning_rate": 4.782381185374054e-05, - "loss": 0.00809016153216362, - "step": 9925 - }, - { - "epoch": 1.6930946291560103, - "grad_norm": 0.04976386949419975, - "learning_rate": 4.779917758232849e-05, - "loss": 0.00392133817076683, - "step": 9930 - }, - { - "epoch": 1.6939471440750213, - "grad_norm": 0.04324428364634514, - "learning_rate": 4.777453850291774e-05, - "loss": 0.005488916113972664, - "step": 9935 - }, - { - "epoch": 1.6947996589940324, - "grad_norm": 0.128068745136261, - "learning_rate": 4.774989462701063e-05, - "loss": 0.008696570992469788, - "step": 9940 - }, - { - "epoch": 1.6956521739130435, - "grad_norm": 0.06357335299253464, - "learning_rate": 4.7725245966111764e-05, - "loss": 0.00657767504453659, - "step": 9945 - }, - { - "epoch": 1.6965046888320545, - "grad_norm": 0.09200388938188553, - "learning_rate": 4.770059253172793e-05, - "loss": 0.00511985532939434, - "step": 9950 - }, - { - "epoch": 1.6973572037510656, - "grad_norm": 0.0898200049996376, - "learning_rate": 4.767593433536819e-05, - "loss": 0.005805553123354912, - "step": 9955 - }, - { - "epoch": 1.6982097186700766, - "grad_norm": 0.06495708227157593, - "learning_rate": 4.765127138854379e-05, - "loss": 0.005122709274291992, - "step": 9960 - }, - { - "epoch": 1.6990622335890877, - "grad_norm": 0.06079862266778946, - "learning_rate": 4.762660370276824e-05, - "loss": 0.005829216912388802, - "step": 9965 - }, - { - "epoch": 1.6999147485080988, - "grad_norm": 0.07300638407468796, - "learning_rate": 4.760193128955721e-05, - "loss": 0.0057421475648880005, - "step": 9970 - }, - { - "epoch": 1.7007672634271098, - "grad_norm": 0.09826004505157471, - "learning_rate": 4.757725416042863e-05, - "loss": 0.007709302753210068, - "step": 9975 - }, - { - "epoch": 1.701619778346121, - "grad_norm": 0.08353756368160248, - "learning_rate": 4.755257232690258e-05, - "loss": 0.007458946853876114, - "step": 9980 - }, - { - "epoch": 1.7024722932651322, - "grad_norm": 0.057993657886981964, - "learning_rate": 4.752788580050137e-05, - "loss": 0.0048107530921697615, - "step": 9985 - }, - { - "epoch": 1.7033248081841432, - "grad_norm": 0.08480621874332428, - "learning_rate": 4.750319459274951e-05, - "loss": 0.007556724548339844, - "step": 9990 - }, - { - "epoch": 1.7041773231031543, - "grad_norm": 0.06563637405633926, - "learning_rate": 4.747849871517364e-05, - "loss": 0.00476250983774662, - "step": 9995 - }, - { - "epoch": 1.7050298380221653, - "grad_norm": 0.06217886507511139, - "learning_rate": 4.7453798179302656e-05, - "loss": 0.008565887063741683, - "step": 10000 - }, - { - "epoch": 1.7058823529411766, - "grad_norm": 0.07285669445991516, - "learning_rate": 4.742909299666756e-05, - "loss": 0.0062899492681026455, - "step": 10005 - }, - { - "epoch": 1.7067348678601877, - "grad_norm": 0.043275732547044754, - "learning_rate": 4.7404383178801564e-05, - "loss": 0.005467301979660988, - "step": 10010 - }, - { - "epoch": 1.7075873827791987, - "grad_norm": 0.09345486015081406, - "learning_rate": 4.7379668737240044e-05, - "loss": 0.007198603451251983, - "step": 10015 - }, - { - "epoch": 1.7084398976982098, - "grad_norm": 0.09792933613061905, - "learning_rate": 4.735494968352049e-05, - "loss": 0.009155672788619996, - "step": 10020 - }, - { - "epoch": 1.7092924126172209, - "grad_norm": 0.03888144716620445, - "learning_rate": 4.733022602918263e-05, - "loss": 0.00484597384929657, - "step": 10025 - }, - { - "epoch": 1.710144927536232, - "grad_norm": 0.050344232469797134, - "learning_rate": 4.7305497785768235e-05, - "loss": 0.00478862039744854, - "step": 10030 - }, - { - "epoch": 1.710997442455243, - "grad_norm": 0.0724092647433281, - "learning_rate": 4.728076496482131e-05, - "loss": 0.005028426647186279, - "step": 10035 - }, - { - "epoch": 1.711849957374254, - "grad_norm": 0.10781413316726685, - "learning_rate": 4.725602757788794e-05, - "loss": 0.00789962187409401, - "step": 10040 - }, - { - "epoch": 1.712702472293265, - "grad_norm": 0.0828569084405899, - "learning_rate": 4.723128563651637e-05, - "loss": 0.006212035566568375, - "step": 10045 - }, - { - "epoch": 1.7135549872122762, - "grad_norm": 0.06634854525327682, - "learning_rate": 4.720653915225695e-05, - "loss": 0.00550018809735775, - "step": 10050 - }, - { - "epoch": 1.7144075021312872, - "grad_norm": 0.07699137926101685, - "learning_rate": 4.718178813666217e-05, - "loss": 0.007427608966827393, - "step": 10055 - }, - { - "epoch": 1.7152600170502983, - "grad_norm": 0.08237455785274506, - "learning_rate": 4.715703260128663e-05, - "loss": 0.0049440376460552216, - "step": 10060 - }, - { - "epoch": 1.7161125319693094, - "grad_norm": 0.0423310324549675, - "learning_rate": 4.7132272557687034e-05, - "loss": 0.005643930658698082, - "step": 10065 - }, - { - "epoch": 1.7169650468883204, - "grad_norm": 0.08052363246679306, - "learning_rate": 4.71075080174222e-05, - "loss": 0.005594046413898468, - "step": 10070 - }, - { - "epoch": 1.7178175618073315, - "grad_norm": 0.05388827249407768, - "learning_rate": 4.7082738992053004e-05, - "loss": 0.005239073187112808, - "step": 10075 - }, - { - "epoch": 1.7186700767263428, - "grad_norm": 0.0699780210852623, - "learning_rate": 4.70579654931425e-05, - "loss": 0.004442551359534264, - "step": 10080 - }, - { - "epoch": 1.7195225916453538, - "grad_norm": 0.07259970158338547, - "learning_rate": 4.7033187532255765e-05, - "loss": 0.004775180667638779, - "step": 10085 - }, - { - "epoch": 1.7203751065643649, - "grad_norm": 0.10291304439306259, - "learning_rate": 4.700840512095995e-05, - "loss": 0.009148158878087998, - "step": 10090 - }, - { - "epoch": 1.721227621483376, - "grad_norm": 0.09639768302440643, - "learning_rate": 4.698361827082435e-05, - "loss": 0.008357913047075272, - "step": 10095 - }, - { - "epoch": 1.722080136402387, - "grad_norm": 0.08128193765878677, - "learning_rate": 4.695882699342026e-05, - "loss": 0.006467945128679276, - "step": 10100 - }, - { - "epoch": 1.7229326513213983, - "grad_norm": 0.0678371787071228, - "learning_rate": 4.6934031300321094e-05, - "loss": 0.005760467797517777, - "step": 10105 - }, - { - "epoch": 1.7237851662404093, - "grad_norm": 0.0766267478466034, - "learning_rate": 4.6909231203102285e-05, - "loss": 0.0068340465426445, - "step": 10110 - }, - { - "epoch": 1.7246376811594204, - "grad_norm": 0.04263419657945633, - "learning_rate": 4.6884426713341366e-05, - "loss": 0.005921339616179466, - "step": 10115 - }, - { - "epoch": 1.7254901960784315, - "grad_norm": 0.10168195515871048, - "learning_rate": 4.6859617842617874e-05, - "loss": 0.006926319003105164, - "step": 10120 - }, - { - "epoch": 1.7263427109974425, - "grad_norm": 0.07910803705453873, - "learning_rate": 4.683480460251343e-05, - "loss": 0.006997878849506378, - "step": 10125 - }, - { - "epoch": 1.7271952259164536, - "grad_norm": 0.045049965381622314, - "learning_rate": 4.680998700461169e-05, - "loss": 0.005594813078641891, - "step": 10130 - }, - { - "epoch": 1.7280477408354646, - "grad_norm": 0.07185275852680206, - "learning_rate": 4.678516506049832e-05, - "loss": 0.006092778965830803, - "step": 10135 - }, - { - "epoch": 1.7289002557544757, - "grad_norm": 0.07003147900104523, - "learning_rate": 4.676033878176102e-05, - "loss": 0.007595886290073395, - "step": 10140 - }, - { - "epoch": 1.7297527706734868, - "grad_norm": 0.06360077112913132, - "learning_rate": 4.6735508179989536e-05, - "loss": 0.00546439029276371, - "step": 10145 - }, - { - "epoch": 1.7306052855924978, - "grad_norm": 0.07347442954778671, - "learning_rate": 4.671067326677563e-05, - "loss": 0.004961185902357101, - "step": 10150 - }, - { - "epoch": 1.7314578005115089, - "grad_norm": 0.056153345853090286, - "learning_rate": 4.6685834053713035e-05, - "loss": 0.006820976734161377, - "step": 10155 - }, - { - "epoch": 1.73231031543052, - "grad_norm": 0.09868444502353668, - "learning_rate": 4.666099055239755e-05, - "loss": 0.004829689115285874, - "step": 10160 - }, - { - "epoch": 1.733162830349531, - "grad_norm": 0.07029838860034943, - "learning_rate": 4.663614277442694e-05, - "loss": 0.006708820164203644, - "step": 10165 - }, - { - "epoch": 1.734015345268542, - "grad_norm": 0.0785607323050499, - "learning_rate": 4.661129073140096e-05, - "loss": 0.0093411885201931, - "step": 10170 - }, - { - "epoch": 1.7348678601875531, - "grad_norm": 0.05867304652929306, - "learning_rate": 4.658643443492139e-05, - "loss": 0.004420546442270279, - "step": 10175 - }, - { - "epoch": 1.7357203751065644, - "grad_norm": 0.08736653625965118, - "learning_rate": 4.656157389659196e-05, - "loss": 0.0049125440418720245, - "step": 10180 - }, - { - "epoch": 1.7365728900255755, - "grad_norm": 0.10769468545913696, - "learning_rate": 4.653670912801842e-05, - "loss": 0.006663528829813003, - "step": 10185 - }, - { - "epoch": 1.7374254049445865, - "grad_norm": 0.054130490869283676, - "learning_rate": 4.651184014080843e-05, - "loss": 0.005649637803435326, - "step": 10190 - }, - { - "epoch": 1.7382779198635976, - "grad_norm": 0.0760764479637146, - "learning_rate": 4.648696694657171e-05, - "loss": 0.00803508386015892, - "step": 10195 - }, - { - "epoch": 1.7391304347826086, - "grad_norm": 0.08103618025779724, - "learning_rate": 4.646208955691987e-05, - "loss": 0.005645860359072686, - "step": 10200 - }, - { - "epoch": 1.73998294970162, - "grad_norm": 0.060226406902074814, - "learning_rate": 4.643720798346649e-05, - "loss": 0.005114502459764481, - "step": 10205 - }, - { - "epoch": 1.740835464620631, - "grad_norm": 0.08842508494853973, - "learning_rate": 4.641232223782713e-05, - "loss": 0.004128537327051163, - "step": 10210 - }, - { - "epoch": 1.741687979539642, - "grad_norm": 0.03715536370873451, - "learning_rate": 4.6387432331619284e-05, - "loss": 0.005640536174178123, - "step": 10215 - }, - { - "epoch": 1.742540494458653, - "grad_norm": 0.09130766242742538, - "learning_rate": 4.636253827646239e-05, - "loss": 0.0074319176375865935, - "step": 10220 - }, - { - "epoch": 1.7433930093776642, - "grad_norm": 0.08204436302185059, - "learning_rate": 4.6337640083977826e-05, - "loss": 0.006443107873201371, - "step": 10225 - }, - { - "epoch": 1.7442455242966752, - "grad_norm": 0.09834989905357361, - "learning_rate": 4.6312737765788883e-05, - "loss": 0.00825996845960617, - "step": 10230 - }, - { - "epoch": 1.7450980392156863, - "grad_norm": 0.07453756034374237, - "learning_rate": 4.628783133352078e-05, - "loss": 0.005153121426701546, - "step": 10235 - }, - { - "epoch": 1.7459505541346974, - "grad_norm": 0.0658891350030899, - "learning_rate": 4.626292079880071e-05, - "loss": 0.005568725615739822, - "step": 10240 - }, - { - "epoch": 1.7468030690537084, - "grad_norm": 0.08673261851072311, - "learning_rate": 4.623800617325772e-05, - "loss": 0.00687919333577156, - "step": 10245 - }, - { - "epoch": 1.7476555839727195, - "grad_norm": 0.08707419037818909, - "learning_rate": 4.621308746852276e-05, - "loss": 0.009814801812171935, - "step": 10250 - }, - { - "epoch": 1.7485080988917305, - "grad_norm": 0.07168986648321152, - "learning_rate": 4.618816469622874e-05, - "loss": 0.004722443222999573, - "step": 10255 - }, - { - "epoch": 1.7493606138107416, - "grad_norm": 0.07987508177757263, - "learning_rate": 4.616323786801042e-05, - "loss": 0.006749927252531052, - "step": 10260 - }, - { - "epoch": 1.7497016197783462, - "eval_loss": 0.03619376942515373, - "eval_runtime": 3.6854, - "eval_samples_per_second": 68.379, - "eval_steps_per_second": 1.085, - "step": 10262 - }, - { - "eval_cer_subset": 0.014314328985294836, - "eval_cer_subset_edit_distance": 879, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 10262 - }, - { - "epoch": 1.7502131287297527, - "grad_norm": 0.10899413377046585, - "learning_rate": 4.6138306995504495e-05, - "loss": 0.006938809901475907, - "step": 10265 - }, - { - "epoch": 1.7510656436487637, - "grad_norm": 0.10073213279247284, - "learning_rate": 4.6113372090349516e-05, - "loss": 0.00795048326253891, - "step": 10270 - }, - { - "epoch": 1.7519181585677748, - "grad_norm": 0.04800979420542717, - "learning_rate": 4.608843316418592e-05, - "loss": 0.007616385817527771, - "step": 10275 - }, - { - "epoch": 1.752770673486786, - "grad_norm": 0.09020161628723145, - "learning_rate": 4.6063490228656025e-05, - "loss": 0.005228221416473389, - "step": 10280 - }, - { - "epoch": 1.7536231884057971, - "grad_norm": 0.083438441157341, - "learning_rate": 4.603854329540403e-05, - "loss": 0.00726160854101181, - "step": 10285 - }, - { - "epoch": 1.7544757033248082, - "grad_norm": 0.07851024717092514, - "learning_rate": 4.6013592376076e-05, - "loss": 0.006890790909528733, - "step": 10290 - }, - { - "epoch": 1.7553282182438192, - "grad_norm": 0.09015098959207535, - "learning_rate": 4.598863748231985e-05, - "loss": 0.007083073258399963, - "step": 10295 - }, - { - "epoch": 1.7561807331628303, - "grad_norm": 0.04751535877585411, - "learning_rate": 4.596367862578534e-05, - "loss": 0.005376371741294861, - "step": 10300 - }, - { - "epoch": 1.7570332480818416, - "grad_norm": 0.07547739148139954, - "learning_rate": 4.5938715818124094e-05, - "loss": 0.008766484260559083, - "step": 10305 - }, - { - "epoch": 1.7578857630008526, - "grad_norm": 0.052052512764930725, - "learning_rate": 4.5913749070989616e-05, - "loss": 0.005375667661428452, - "step": 10310 - }, - { - "epoch": 1.7587382779198637, - "grad_norm": 0.11575129628181458, - "learning_rate": 4.5888778396037187e-05, - "loss": 0.006675881892442703, - "step": 10315 - }, - { - "epoch": 1.7595907928388748, - "grad_norm": 0.05995294824242592, - "learning_rate": 4.586380380492394e-05, - "loss": 0.007097356766462326, - "step": 10320 - }, - { - "epoch": 1.7604433077578858, - "grad_norm": 0.049236129969358444, - "learning_rate": 4.583882530930887e-05, - "loss": 0.004433324560523033, - "step": 10325 - }, - { - "epoch": 1.7612958226768969, - "grad_norm": 0.048296503722667694, - "learning_rate": 4.581384292085274e-05, - "loss": 0.0051886774599552155, - "step": 10330 - }, - { - "epoch": 1.762148337595908, - "grad_norm": 0.09939385205507278, - "learning_rate": 4.57888566512182e-05, - "loss": 0.006426715105772018, - "step": 10335 - }, - { - "epoch": 1.763000852514919, - "grad_norm": 0.08810277283191681, - "learning_rate": 4.5763866512069626e-05, - "loss": 0.00727301687002182, - "step": 10340 - }, - { - "epoch": 1.76385336743393, - "grad_norm": 0.05262129753828049, - "learning_rate": 4.573887251507328e-05, - "loss": 0.004860313236713409, - "step": 10345 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 0.09755868464708328, - "learning_rate": 4.571387467189718e-05, - "loss": 0.00684543177485466, - "step": 10350 - }, - { - "epoch": 1.7655583972719522, - "grad_norm": 0.08306272327899933, - "learning_rate": 4.568887299421115e-05, - "loss": 0.005363506823778152, - "step": 10355 - }, - { - "epoch": 1.7664109121909632, - "grad_norm": 0.06304962188005447, - "learning_rate": 4.566386749368681e-05, - "loss": 0.006262359023094177, - "step": 10360 - }, - { - "epoch": 1.7672634271099743, - "grad_norm": 0.099216029047966, - "learning_rate": 4.5638858181997544e-05, - "loss": 0.005263365060091019, - "step": 10365 - }, - { - "epoch": 1.7681159420289854, - "grad_norm": 0.06316341459751129, - "learning_rate": 4.5613845070818544e-05, - "loss": 0.0053974583745002745, - "step": 10370 - }, - { - "epoch": 1.7689684569479964, - "grad_norm": 0.08523725718259811, - "learning_rate": 4.5588828171826755e-05, - "loss": 0.006064000725746155, - "step": 10375 - }, - { - "epoch": 1.7698209718670077, - "grad_norm": 0.0663699060678482, - "learning_rate": 4.5563807496700925e-05, - "loss": 0.00665600374341011, - "step": 10380 - }, - { - "epoch": 1.7706734867860188, - "grad_norm": 0.10673311352729797, - "learning_rate": 4.55387830571215e-05, - "loss": 0.006540966033935547, - "step": 10385 - }, - { - "epoch": 1.7715260017050298, - "grad_norm": 0.08779574930667877, - "learning_rate": 4.551375486477074e-05, - "loss": 0.00547558106482029, - "step": 10390 - }, - { - "epoch": 1.772378516624041, - "grad_norm": 0.07451514899730682, - "learning_rate": 4.5488722931332625e-05, - "loss": 0.008499838411808014, - "step": 10395 - }, - { - "epoch": 1.773231031543052, - "grad_norm": 0.06014202535152435, - "learning_rate": 4.5463687268492904e-05, - "loss": 0.006278771907091141, - "step": 10400 - }, - { - "epoch": 1.7740835464620632, - "grad_norm": 0.039256151765584946, - "learning_rate": 4.543864788793907e-05, - "loss": 0.0037193533033132555, - "step": 10405 - }, - { - "epoch": 1.7749360613810743, - "grad_norm": 0.09449942409992218, - "learning_rate": 4.541360480136031e-05, - "loss": 0.006574592739343643, - "step": 10410 - }, - { - "epoch": 1.7757885763000854, - "grad_norm": 0.07616980373859406, - "learning_rate": 4.53885580204476e-05, - "loss": 0.006042734161019326, - "step": 10415 - }, - { - "epoch": 1.7766410912190964, - "grad_norm": 0.07019155472517014, - "learning_rate": 4.5363507556893574e-05, - "loss": 0.006044945493340492, - "step": 10420 - }, - { - "epoch": 1.7774936061381075, - "grad_norm": 0.0616939477622509, - "learning_rate": 4.533845342239266e-05, - "loss": 0.004315405339002609, - "step": 10425 - }, - { - "epoch": 1.7783461210571185, - "grad_norm": 0.09354502707719803, - "learning_rate": 4.5313395628640943e-05, - "loss": 0.005719271302223205, - "step": 10430 - }, - { - "epoch": 1.7791986359761296, - "grad_norm": 0.08747732639312744, - "learning_rate": 4.528833418733623e-05, - "loss": 0.00472431555390358, - "step": 10435 - }, - { - "epoch": 1.7800511508951407, - "grad_norm": 0.09513017535209656, - "learning_rate": 4.5263269110178034e-05, - "loss": 0.006968998908996582, - "step": 10440 - }, - { - "epoch": 1.7809036658141517, - "grad_norm": 0.09208676964044571, - "learning_rate": 4.523820040886759e-05, - "loss": 0.006609047204256058, - "step": 10445 - }, - { - "epoch": 1.7817561807331628, - "grad_norm": 0.09964144974946976, - "learning_rate": 4.521312809510778e-05, - "loss": 0.0056272163987159726, - "step": 10450 - }, - { - "epoch": 1.7826086956521738, - "grad_norm": 0.06850367784500122, - "learning_rate": 4.51880521806032e-05, - "loss": 0.005562498047947883, - "step": 10455 - }, - { - "epoch": 1.783461210571185, - "grad_norm": 0.0654430240392685, - "learning_rate": 4.5162972677060124e-05, - "loss": 0.0059367924928665165, - "step": 10460 - }, - { - "epoch": 1.784313725490196, - "grad_norm": 0.0449560284614563, - "learning_rate": 4.513788959618649e-05, - "loss": 0.005458919331431389, - "step": 10465 - }, - { - "epoch": 1.785166240409207, - "grad_norm": 0.14256814122200012, - "learning_rate": 4.511280294969192e-05, - "loss": 0.0066184431314468386, - "step": 10470 - }, - { - "epoch": 1.7860187553282183, - "grad_norm": 0.08284557610750198, - "learning_rate": 4.508771274928771e-05, - "loss": 0.007388219982385635, - "step": 10475 - }, - { - "epoch": 1.7868712702472294, - "grad_norm": 0.05675457417964935, - "learning_rate": 4.506261900668676e-05, - "loss": 0.005572458356618881, - "step": 10480 - }, - { - "epoch": 1.7877237851662404, - "grad_norm": 0.05767322704195976, - "learning_rate": 4.50375217336037e-05, - "loss": 0.0058133058249950405, - "step": 10485 - }, - { - "epoch": 1.7885763000852515, - "grad_norm": 0.03421638533473015, - "learning_rate": 4.501242094175476e-05, - "loss": 0.005268872529268265, - "step": 10490 - }, - { - "epoch": 1.7894288150042625, - "grad_norm": 0.07319685071706772, - "learning_rate": 4.4987316642857836e-05, - "loss": 0.008701664954423904, - "step": 10495 - }, - { - "epoch": 1.7902813299232738, - "grad_norm": 0.04271615296602249, - "learning_rate": 4.4962208848632426e-05, - "loss": 0.005680259317159653, - "step": 10500 - }, - { - "epoch": 1.7911338448422849, - "grad_norm": 0.05916997417807579, - "learning_rate": 4.493709757079971e-05, - "loss": 0.004779224097728729, - "step": 10505 - }, - { - "epoch": 1.791986359761296, - "grad_norm": 0.04994066804647446, - "learning_rate": 4.491198282108244e-05, - "loss": 0.00443916954100132, - "step": 10510 - }, - { - "epoch": 1.792838874680307, - "grad_norm": 0.09032617509365082, - "learning_rate": 4.488686461120504e-05, - "loss": 0.007850547134876252, - "step": 10515 - }, - { - "epoch": 1.793691389599318, - "grad_norm": 0.05055975914001465, - "learning_rate": 4.4861742952893525e-05, - "loss": 0.005925046652555466, - "step": 10520 - }, - { - "epoch": 1.7945439045183291, - "grad_norm": 0.07521310448646545, - "learning_rate": 4.48366178578755e-05, - "loss": 0.006785632669925689, - "step": 10525 - }, - { - "epoch": 1.7953964194373402, - "grad_norm": 0.06577371805906296, - "learning_rate": 4.4811489337880216e-05, - "loss": 0.005300462618470192, - "step": 10530 - }, - { - "epoch": 1.7962489343563512, - "grad_norm": 0.0451020710170269, - "learning_rate": 4.4786357404638485e-05, - "loss": 0.00612550750374794, - "step": 10535 - }, - { - "epoch": 1.7971014492753623, - "grad_norm": 0.08968023955821991, - "learning_rate": 4.4761222069882754e-05, - "loss": 0.00558510459959507, - "step": 10540 - }, - { - "epoch": 1.7979539641943734, - "grad_norm": 0.0945729911327362, - "learning_rate": 4.4736083345347015e-05, - "loss": 0.007513274252414703, - "step": 10545 - }, - { - "epoch": 1.7988064791133844, - "grad_norm": 0.10392102599143982, - "learning_rate": 4.4710941242766844e-05, - "loss": 0.006224355846643448, - "step": 10550 - }, - { - "epoch": 1.7996589940323955, - "grad_norm": 0.10485874116420746, - "learning_rate": 4.4685795773879446e-05, - "loss": 0.005821261927485466, - "step": 10555 - }, - { - "epoch": 1.8005115089514065, - "grad_norm": 0.0689731314778328, - "learning_rate": 4.466064695042355e-05, - "loss": 0.0062000565230846405, - "step": 10560 - }, - { - "epoch": 1.8013640238704176, - "grad_norm": 0.07008705288171768, - "learning_rate": 4.4635494784139463e-05, - "loss": 0.006286797672510147, - "step": 10565 - }, - { - "epoch": 1.8022165387894287, - "grad_norm": 0.07595150172710419, - "learning_rate": 4.461033928676904e-05, - "loss": 0.006704485416412354, - "step": 10570 - }, - { - "epoch": 1.80306905370844, - "grad_norm": 0.07564863562583923, - "learning_rate": 4.458518047005572e-05, - "loss": 0.005777762830257415, - "step": 10575 - }, - { - "epoch": 1.803921568627451, - "grad_norm": 0.07202555984258652, - "learning_rate": 4.4560018345744466e-05, - "loss": 0.00602865107357502, - "step": 10580 - }, - { - "epoch": 1.804774083546462, - "grad_norm": 0.10462740063667297, - "learning_rate": 4.453485292558179e-05, - "loss": 0.007622111588716507, - "step": 10585 - }, - { - "epoch": 1.8056265984654731, - "grad_norm": 0.05587150529026985, - "learning_rate": 4.450968422131578e-05, - "loss": 0.00641121193766594, - "step": 10590 - }, - { - "epoch": 1.8064791133844842, - "grad_norm": 0.0603446289896965, - "learning_rate": 4.448451224469598e-05, - "loss": 0.0073586970567703245, - "step": 10595 - }, - { - "epoch": 1.8073316283034955, - "grad_norm": 0.04228143393993378, - "learning_rate": 4.445933700747353e-05, - "loss": 0.005406339466571808, - "step": 10600 - }, - { - "epoch": 1.8081841432225065, - "grad_norm": 0.04840795323252678, - "learning_rate": 4.4434158521401065e-05, - "loss": 0.0041844088584184645, - "step": 10605 - }, - { - "epoch": 1.8090366581415176, - "grad_norm": 0.08334027975797653, - "learning_rate": 4.440897679823275e-05, - "loss": 0.008376862108707427, - "step": 10610 - }, - { - "epoch": 1.8098891730605287, - "grad_norm": 0.07879523187875748, - "learning_rate": 4.438379184972423e-05, - "loss": 0.0053595036268234255, - "step": 10615 - }, - { - "epoch": 1.8107416879795397, - "grad_norm": 0.0689932182431221, - "learning_rate": 4.435860368763269e-05, - "loss": 0.005961846932768822, - "step": 10620 - }, - { - "epoch": 1.8115942028985508, - "grad_norm": 0.07035796344280243, - "learning_rate": 4.43334123237168e-05, - "loss": 0.005833951756358147, - "step": 10625 - }, - { - "epoch": 1.8124467178175618, - "grad_norm": 0.06488184630870819, - "learning_rate": 4.4308217769736715e-05, - "loss": 0.006380685418844223, - "step": 10630 - }, - { - "epoch": 1.813299232736573, - "grad_norm": 0.1095893532037735, - "learning_rate": 4.428302003745412e-05, - "loss": 0.006500106304883957, - "step": 10635 - }, - { - "epoch": 1.814151747655584, - "grad_norm": 0.07402926683425903, - "learning_rate": 4.425781913863212e-05, - "loss": 0.010839180648326873, - "step": 10640 - }, - { - "epoch": 1.815004262574595, - "grad_norm": 0.07752810418605804, - "learning_rate": 4.4232615085035354e-05, - "loss": 0.0053322531282901766, - "step": 10645 - }, - { - "epoch": 1.815856777493606, - "grad_norm": 0.06572280824184418, - "learning_rate": 4.420740788842991e-05, - "loss": 0.0072415158152580265, - "step": 10650 - }, - { - "epoch": 1.8167092924126171, - "grad_norm": 0.07175682485103607, - "learning_rate": 4.418219756058335e-05, - "loss": 0.007061149924993515, - "step": 10655 - }, - { - "epoch": 1.8175618073316282, - "grad_norm": 0.0702451840043068, - "learning_rate": 4.4156984113264684e-05, - "loss": 0.0050024140626192095, - "step": 10660 - }, - { - "epoch": 1.8184143222506393, - "grad_norm": 0.05054900422692299, - "learning_rate": 4.4131767558244375e-05, - "loss": 0.004906433075666428, - "step": 10665 - }, - { - "epoch": 1.8192668371696503, - "grad_norm": 0.07256589829921722, - "learning_rate": 4.410654790729438e-05, - "loss": 0.006986310333013534, - "step": 10670 - }, - { - "epoch": 1.8201193520886616, - "grad_norm": 0.06617925316095352, - "learning_rate": 4.408132517218805e-05, - "loss": 0.007973263412714005, - "step": 10675 - }, - { - "epoch": 1.8209718670076727, - "grad_norm": 0.09039802104234695, - "learning_rate": 4.405609936470022e-05, - "loss": 0.007263268530368805, - "step": 10680 - }, - { - "epoch": 1.8218243819266837, - "grad_norm": 0.03763730078935623, - "learning_rate": 4.40308704966071e-05, - "loss": 0.005709199234843254, - "step": 10685 - }, - { - "epoch": 1.8226768968456948, - "grad_norm": 0.09264735877513885, - "learning_rate": 4.400563857968639e-05, - "loss": 0.006996266543865204, - "step": 10690 - }, - { - "epoch": 1.8235294117647058, - "grad_norm": 0.0882507711648941, - "learning_rate": 4.398040362571719e-05, - "loss": 0.007461686432361603, - "step": 10695 - }, - { - "epoch": 1.8243819266837171, - "grad_norm": 0.07662846893072128, - "learning_rate": 4.395516564648e-05, - "loss": 0.006977429986000061, - "step": 10700 - }, - { - "epoch": 1.8252344416027282, - "grad_norm": 0.07431378960609436, - "learning_rate": 4.392992465375676e-05, - "loss": 0.004957346618175507, - "step": 10705 - }, - { - "epoch": 1.8260869565217392, - "grad_norm": 0.06182624027132988, - "learning_rate": 4.39046806593308e-05, - "loss": 0.006677946448326111, - "step": 10710 - }, - { - "epoch": 1.8269394714407503, - "grad_norm": 0.06389910727739334, - "learning_rate": 4.3879433674986856e-05, - "loss": 0.006449097394943237, - "step": 10715 - }, - { - "epoch": 1.8277919863597614, - "grad_norm": 0.06772691756486893, - "learning_rate": 4.385418371251107e-05, - "loss": 0.004998251050710678, - "step": 10720 - }, - { - "epoch": 1.8286445012787724, - "grad_norm": 0.07048022747039795, - "learning_rate": 4.3828930783690955e-05, - "loss": 0.006418389827013015, - "step": 10725 - }, - { - "epoch": 1.8294970161977835, - "grad_norm": 0.09442687779664993, - "learning_rate": 4.3803674900315424e-05, - "loss": 0.006921603530645371, - "step": 10730 - }, - { - "epoch": 1.8303495311167945, - "grad_norm": 0.0578981414437294, - "learning_rate": 4.377841607417475e-05, - "loss": 0.007038000971078873, - "step": 10735 - }, - { - "epoch": 1.8312020460358056, - "grad_norm": 0.06990659236907959, - "learning_rate": 4.37531543170606e-05, - "loss": 0.005136258527636528, - "step": 10740 - }, - { - "epoch": 1.8320545609548167, - "grad_norm": 0.05566668137907982, - "learning_rate": 4.372788964076601e-05, - "loss": 0.005333118140697479, - "step": 10745 - }, - { - "epoch": 1.8329070758738277, - "grad_norm": 0.09198274463415146, - "learning_rate": 4.3702622057085376e-05, - "loss": 0.005783502757549286, - "step": 10750 - }, - { - "epoch": 1.8337595907928388, - "grad_norm": 0.12995415925979614, - "learning_rate": 4.3677351577814423e-05, - "loss": 0.005794361606240273, - "step": 10755 - }, - { - "epoch": 1.8346121057118498, - "grad_norm": 0.0827256515622139, - "learning_rate": 4.3652078214750264e-05, - "loss": 0.00593951866030693, - "step": 10760 - }, - { - "epoch": 1.835464620630861, - "grad_norm": 0.09131235629320145, - "learning_rate": 4.362680197969136e-05, - "loss": 0.006387272477149963, - "step": 10765 - }, - { - "epoch": 1.836317135549872, - "grad_norm": 0.06061462685465813, - "learning_rate": 4.360152288443748e-05, - "loss": 0.006085103005170822, - "step": 10770 - }, - { - "epoch": 1.8371696504688833, - "grad_norm": 0.05650132894515991, - "learning_rate": 4.357624094078976e-05, - "loss": 0.004817041009664536, - "step": 10775 - }, - { - "epoch": 1.8380221653878943, - "grad_norm": 0.09250559657812119, - "learning_rate": 4.355095616055063e-05, - "loss": 0.006116693839430809, - "step": 10780 - }, - { - "epoch": 1.8388746803069054, - "grad_norm": 0.06575264036655426, - "learning_rate": 4.352566855552389e-05, - "loss": 0.006027846410870552, - "step": 10785 - }, - { - "epoch": 1.8397271952259164, - "grad_norm": 0.07538174092769623, - "learning_rate": 4.350037813751462e-05, - "loss": 0.006624206900596619, - "step": 10790 - }, - { - "epoch": 1.8405797101449275, - "grad_norm": 0.06000296771526337, - "learning_rate": 4.347508491832924e-05, - "loss": 0.006386204063892365, - "step": 10795 - }, - { - "epoch": 1.8414322250639388, - "grad_norm": 0.058621276170015335, - "learning_rate": 4.3449788909775455e-05, - "loss": 0.006246517226099968, - "step": 10800 - }, - { - "epoch": 1.8422847399829498, - "grad_norm": 0.10082551836967468, - "learning_rate": 4.34244901236623e-05, - "loss": 0.006916524469852447, - "step": 10805 - }, - { - "epoch": 1.843137254901961, - "grad_norm": 0.07926804572343826, - "learning_rate": 4.3399188571800064e-05, - "loss": 0.006270130723714828, - "step": 10810 - }, - { - "epoch": 1.843989769820972, - "grad_norm": 0.14256511628627777, - "learning_rate": 4.3373884266000375e-05, - "loss": 0.008555002510547638, - "step": 10815 - }, - { - "epoch": 1.844842284739983, - "grad_norm": 0.0711030438542366, - "learning_rate": 4.334857721807612e-05, - "loss": 0.004097539931535721, - "step": 10820 - }, - { - "epoch": 1.845694799658994, - "grad_norm": 0.05918106436729431, - "learning_rate": 4.3323267439841464e-05, - "loss": 0.006263938546180725, - "step": 10825 - }, - { - "epoch": 1.8465473145780051, - "grad_norm": 0.06577462702989578, - "learning_rate": 4.329795494311186e-05, - "loss": 0.004532983154058456, - "step": 10830 - }, - { - "epoch": 1.8473998294970162, - "grad_norm": 0.07599867880344391, - "learning_rate": 4.327263973970401e-05, - "loss": 0.006951173394918441, - "step": 10835 - }, - { - "epoch": 1.8482523444160273, - "grad_norm": 0.055239275097846985, - "learning_rate": 4.324732184143592e-05, - "loss": 0.00514591783285141, - "step": 10840 - }, - { - "epoch": 1.8491048593350383, - "grad_norm": 0.10522980988025665, - "learning_rate": 4.322200126012681e-05, - "loss": 0.00747048556804657, - "step": 10845 - }, - { - "epoch": 1.8499573742540494, - "grad_norm": 0.08132579177618027, - "learning_rate": 4.319667800759716e-05, - "loss": 0.005432958528399467, - "step": 10850 - }, - { - "epoch": 1.8508098891730604, - "grad_norm": 0.04027591645717621, - "learning_rate": 4.3171352095668726e-05, - "loss": 0.004450181499123573, - "step": 10855 - }, - { - "epoch": 1.8516624040920715, - "grad_norm": 0.0873839259147644, - "learning_rate": 4.314602353616446e-05, - "loss": 0.006079509109258652, - "step": 10860 - }, - { - "epoch": 1.8525149190110826, - "grad_norm": 0.04989013075828552, - "learning_rate": 4.312069234090862e-05, - "loss": 0.003988634794950485, - "step": 10865 - }, - { - "epoch": 1.8533674339300936, - "grad_norm": 0.061433590948581696, - "learning_rate": 4.309535852172661e-05, - "loss": 0.0056050091981887816, - "step": 10870 - }, - { - "epoch": 1.854219948849105, - "grad_norm": 0.07007768750190735, - "learning_rate": 4.3070022090445114e-05, - "loss": 0.006938119232654571, - "step": 10875 - }, - { - "epoch": 1.855072463768116, - "grad_norm": 0.03557104617357254, - "learning_rate": 4.3044683058892024e-05, - "loss": 0.0061099715530872345, - "step": 10880 - }, - { - "epoch": 1.855924978687127, - "grad_norm": 0.07706935703754425, - "learning_rate": 4.3019341438896446e-05, - "loss": 0.0050103053450584415, - "step": 10885 - }, - { - "epoch": 1.856777493606138, - "grad_norm": 0.06719083338975906, - "learning_rate": 4.2993997242288686e-05, - "loss": 0.005047342553734779, - "step": 10890 - }, - { - "epoch": 1.8576300085251491, - "grad_norm": 0.05179615691304207, - "learning_rate": 4.296865048090024e-05, - "loss": 0.004692831635475158, - "step": 10895 - }, - { - "epoch": 1.8584825234441604, - "grad_norm": 0.08594074845314026, - "learning_rate": 4.294330116656385e-05, - "loss": 0.006039778143167496, - "step": 10900 - }, - { - "epoch": 1.8593350383631715, - "grad_norm": 0.11285590380430222, - "learning_rate": 4.291794931111339e-05, - "loss": 0.005857323482632637, - "step": 10905 - }, - { - "epoch": 1.8601875532821825, - "grad_norm": 0.056068334728479385, - "learning_rate": 4.289259492638399e-05, - "loss": 0.006339801102876663, - "step": 10910 - }, - { - "epoch": 1.8610400682011936, - "grad_norm": 0.1027015820145607, - "learning_rate": 4.2867238024211873e-05, - "loss": 0.007628202438354492, - "step": 10915 - }, - { - "epoch": 1.8618925831202047, - "grad_norm": 0.06938920170068741, - "learning_rate": 4.2841878616434516e-05, - "loss": 0.005421775206923485, - "step": 10920 - }, - { - "epoch": 1.8627450980392157, - "grad_norm": 0.06613876670598984, - "learning_rate": 4.2816516714890525e-05, - "loss": 0.00747835859656334, - "step": 10925 - }, - { - "epoch": 1.8635976129582268, - "grad_norm": 0.07735379040241241, - "learning_rate": 4.279115233141967e-05, - "loss": 0.006907149404287338, - "step": 10930 - }, - { - "epoch": 1.8644501278772379, - "grad_norm": 0.06285069137811661, - "learning_rate": 4.276578547786291e-05, - "loss": 0.005340246856212616, - "step": 10935 - }, - { - "epoch": 1.865302642796249, - "grad_norm": 0.0670136883854866, - "learning_rate": 4.274041616606232e-05, - "loss": 0.0067828245460987095, - "step": 10940 - }, - { - "epoch": 1.86615515771526, - "grad_norm": 0.07944425195455551, - "learning_rate": 4.2715044407861144e-05, - "loss": 0.006403806060552597, - "step": 10945 - }, - { - "epoch": 1.867007672634271, - "grad_norm": 0.07202634960412979, - "learning_rate": 4.268967021510375e-05, - "loss": 0.004467373341321945, - "step": 10950 - }, - { - "epoch": 1.867860187553282, - "grad_norm": 0.08753371983766556, - "learning_rate": 4.266429359963568e-05, - "loss": 0.006740668416023254, - "step": 10955 - }, - { - "epoch": 1.8687127024722932, - "grad_norm": 0.0920538380742073, - "learning_rate": 4.263891457330357e-05, - "loss": 0.007489701360464096, - "step": 10960 - }, - { - "epoch": 1.8695652173913042, - "grad_norm": 0.11196473985910416, - "learning_rate": 4.261353314795519e-05, - "loss": 0.007533909380435943, - "step": 10965 - }, - { - "epoch": 1.8704177323103153, - "grad_norm": 0.08394299447536469, - "learning_rate": 4.258814933543943e-05, - "loss": 0.005159291997551918, - "step": 10970 - }, - { - "epoch": 1.8712702472293266, - "grad_norm": 0.08024156838655472, - "learning_rate": 4.25627631476063e-05, - "loss": 0.00543709248304367, - "step": 10975 - }, - { - "epoch": 1.8721227621483376, - "grad_norm": 0.052640948444604874, - "learning_rate": 4.253737459630694e-05, - "loss": 0.004067758470773697, - "step": 10980 - }, - { - "epoch": 1.8729752770673487, - "grad_norm": 0.08472926914691925, - "learning_rate": 4.251198369339353e-05, - "loss": 0.0077335178852081295, - "step": 10985 - }, - { - "epoch": 1.8738277919863597, - "grad_norm": 0.09794485569000244, - "learning_rate": 4.248659045071942e-05, - "loss": 0.0055429480969905855, - "step": 10990 - }, - { - "epoch": 1.8746803069053708, - "grad_norm": 0.07767575234174728, - "learning_rate": 4.2461194880139016e-05, - "loss": 0.008025288581848145, - "step": 10995 - }, - { - "epoch": 1.875532821824382, - "grad_norm": 0.07427361607551575, - "learning_rate": 4.2435796993507794e-05, - "loss": 0.006437119096517563, - "step": 11000 - }, - { - "epoch": 1.8763853367433931, - "grad_norm": 0.07420040667057037, - "learning_rate": 4.241039680268237e-05, - "loss": 0.0051200386136770245, - "step": 11005 - }, - { - "epoch": 1.8772378516624042, - "grad_norm": 0.09004204720258713, - "learning_rate": 4.2384994319520355e-05, - "loss": 0.007488063722848892, - "step": 11010 - }, - { - "epoch": 1.8780903665814153, - "grad_norm": 0.060929473489522934, - "learning_rate": 4.235958955588049e-05, - "loss": 0.00483398288488388, - "step": 11015 - }, - { - "epoch": 1.8789428815004263, - "grad_norm": 0.08116185665130615, - "learning_rate": 4.2334182523622584e-05, - "loss": 0.007078002393245697, - "step": 11020 - }, - { - "epoch": 1.8797953964194374, - "grad_norm": 0.0908491238951683, - "learning_rate": 4.230877323460746e-05, - "loss": 0.007228228449821472, - "step": 11025 - }, - { - "epoch": 1.8806479113384484, - "grad_norm": 0.08618480712175369, - "learning_rate": 4.228336170069703e-05, - "loss": 0.005402455478906632, - "step": 11030 - }, - { - "epoch": 1.8815004262574595, - "grad_norm": 0.06869816035032272, - "learning_rate": 4.2257947933754236e-05, - "loss": 0.006180650368332863, - "step": 11035 - }, - { - "epoch": 1.8823529411764706, - "grad_norm": 0.0904744416475296, - "learning_rate": 4.223253194564309e-05, - "loss": 0.00636049136519432, - "step": 11040 - }, - { - "epoch": 1.8832054560954816, - "grad_norm": 0.04902644082903862, - "learning_rate": 4.220711374822859e-05, - "loss": 0.0062784947454929355, - "step": 11045 - }, - { - "epoch": 1.8840579710144927, - "grad_norm": 0.060081589967012405, - "learning_rate": 4.2181693353376817e-05, - "loss": 0.005494052171707153, - "step": 11050 - }, - { - "epoch": 1.8849104859335037, - "grad_norm": 0.058530837297439575, - "learning_rate": 4.215627077295485e-05, - "loss": 0.005457080900669098, - "step": 11055 - }, - { - "epoch": 1.8857630008525148, - "grad_norm": 0.15006953477859497, - "learning_rate": 4.2130846018830795e-05, - "loss": 0.0062985971570014955, - "step": 11060 - }, - { - "epoch": 1.8866155157715259, - "grad_norm": 0.04498155787587166, - "learning_rate": 4.210541910287377e-05, - "loss": 0.004242038726806641, - "step": 11065 - }, - { - "epoch": 1.887468030690537, - "grad_norm": 0.09093966335058212, - "learning_rate": 4.207999003695392e-05, - "loss": 0.00554364025592804, - "step": 11070 - }, - { - "epoch": 1.8883205456095482, - "grad_norm": 0.06531018018722534, - "learning_rate": 4.2054558832942365e-05, - "loss": 0.0063869751989841465, - "step": 11075 - }, - { - "epoch": 1.8891730605285593, - "grad_norm": 0.059213872998952866, - "learning_rate": 4.202912550271124e-05, - "loss": 0.004836349189281464, - "step": 11080 - }, - { - "epoch": 1.8900255754475703, - "grad_norm": 0.11074823886156082, - "learning_rate": 4.200369005813367e-05, - "loss": 0.00584055446088314, - "step": 11085 - }, - { - "epoch": 1.8908780903665814, - "grad_norm": 0.09352346509695053, - "learning_rate": 4.197825251108376e-05, - "loss": 0.006423837691545487, - "step": 11090 - }, - { - "epoch": 1.8917306052855924, - "grad_norm": 0.10930176079273224, - "learning_rate": 4.195281287343662e-05, - "loss": 0.007819923013448716, - "step": 11095 - }, - { - "epoch": 1.8925831202046037, - "grad_norm": 0.10935486853122711, - "learning_rate": 4.19273711570683e-05, - "loss": 0.008524692058563233, - "step": 11100 - }, - { - "epoch": 1.8934356351236148, - "grad_norm": 0.07407546788454056, - "learning_rate": 4.190192737385586e-05, - "loss": 0.006353407353162766, - "step": 11105 - }, - { - "epoch": 1.8942881500426259, - "grad_norm": 0.11030165106058121, - "learning_rate": 4.187648153567729e-05, - "loss": 0.007683426141738892, - "step": 11110 - }, - { - "epoch": 1.895140664961637, - "grad_norm": 0.09419413655996323, - "learning_rate": 4.185103365441155e-05, - "loss": 0.005654521286487579, - "step": 11115 - }, - { - "epoch": 1.895993179880648, - "grad_norm": 0.06284896284341812, - "learning_rate": 4.1825583741938576e-05, - "loss": 0.0048633765429258345, - "step": 11120 - }, - { - "epoch": 1.896845694799659, - "grad_norm": 0.06429705023765564, - "learning_rate": 4.180013181013921e-05, - "loss": 0.006907754391431808, - "step": 11125 - }, - { - "epoch": 1.89769820971867, - "grad_norm": 0.1234050914645195, - "learning_rate": 4.177467787089527e-05, - "loss": 0.008531783521175385, - "step": 11130 - }, - { - "epoch": 1.8985507246376812, - "grad_norm": 0.04056263715028763, - "learning_rate": 4.174922193608951e-05, - "loss": 0.006784418225288391, - "step": 11135 - }, - { - "epoch": 1.8994032395566922, - "grad_norm": 0.048422425985336304, - "learning_rate": 4.172376401760561e-05, - "loss": 0.006587067246437072, - "step": 11140 - }, - { - "epoch": 1.9002557544757033, - "grad_norm": 0.10680951178073883, - "learning_rate": 4.169830412732815e-05, - "loss": 0.005700337141752243, - "step": 11145 - }, - { - "epoch": 1.9011082693947143, - "grad_norm": 0.09418217837810516, - "learning_rate": 4.167284227714267e-05, - "loss": 0.0059782925993204115, - "step": 11150 - }, - { - "epoch": 1.9019607843137254, - "grad_norm": 0.12511073052883148, - "learning_rate": 4.1647378478935614e-05, - "loss": 0.006256800889968872, - "step": 11155 - }, - { - "epoch": 1.9028132992327365, - "grad_norm": 0.06957859545946121, - "learning_rate": 4.1621912744594316e-05, - "loss": 0.008690094202756881, - "step": 11160 - }, - { - "epoch": 1.9036658141517475, - "grad_norm": 0.10859719663858414, - "learning_rate": 4.159644508600704e-05, - "loss": 0.008262380957603455, - "step": 11165 - }, - { - "epoch": 1.9045183290707586, - "grad_norm": 0.08408714830875397, - "learning_rate": 4.157097551506292e-05, - "loss": 0.005347007513046264, - "step": 11170 - }, - { - "epoch": 1.9053708439897699, - "grad_norm": 0.05623621866106987, - "learning_rate": 4.1545504043652014e-05, - "loss": 0.005091758817434311, - "step": 11175 - }, - { - "epoch": 1.906223358908781, - "grad_norm": 0.06791777908802032, - "learning_rate": 4.1520030683665246e-05, - "loss": 0.006755101680755615, - "step": 11180 - }, - { - "epoch": 1.907075873827792, - "grad_norm": 0.039112675935029984, - "learning_rate": 4.149455544699444e-05, - "loss": 0.0063312210142612456, - "step": 11185 - }, - { - "epoch": 1.907928388746803, - "grad_norm": 0.05682097375392914, - "learning_rate": 4.146907834553227e-05, - "loss": 0.005028403550386429, - "step": 11190 - }, - { - "epoch": 1.908780903665814, - "grad_norm": 0.07670710980892181, - "learning_rate": 4.144359939117229e-05, - "loss": 0.006438900530338287, - "step": 11195 - }, - { - "epoch": 1.9096334185848254, - "grad_norm": 0.06266012787818909, - "learning_rate": 4.141811859580894e-05, - "loss": 0.006153284758329392, - "step": 11200 - }, - { - "epoch": 1.9104859335038364, - "grad_norm": 0.06892232596874237, - "learning_rate": 4.139263597133749e-05, - "loss": 0.0042446799576282505, - "step": 11205 - }, - { - "epoch": 1.9113384484228475, - "grad_norm": 0.08733050525188446, - "learning_rate": 4.136715152965409e-05, - "loss": 0.0048094093799591064, - "step": 11210 - }, - { - "epoch": 1.9121909633418586, - "grad_norm": 0.06578327715396881, - "learning_rate": 4.13416652826557e-05, - "loss": 0.0047289058566093445, - "step": 11215 - }, - { - "epoch": 1.9130434782608696, - "grad_norm": 0.06382749229669571, - "learning_rate": 4.1316177242240174e-05, - "loss": 0.004200926423072815, - "step": 11220 - }, - { - "epoch": 1.9138959931798807, - "grad_norm": 0.07368794828653336, - "learning_rate": 4.129068742030617e-05, - "loss": 0.0063028551638126375, - "step": 11225 - }, - { - "epoch": 1.9147485080988917, - "grad_norm": 0.09302657842636108, - "learning_rate": 4.1265195828753176e-05, - "loss": 0.008124063909053802, - "step": 11230 - }, - { - "epoch": 1.9156010230179028, - "grad_norm": 0.08030751347541809, - "learning_rate": 4.123970247948153e-05, - "loss": 0.009628574550151824, - "step": 11235 - }, - { - "epoch": 1.9164535379369139, - "grad_norm": 0.08395590633153915, - "learning_rate": 4.1214207384392356e-05, - "loss": 0.007773591578006745, - "step": 11240 - }, - { - "epoch": 1.917306052855925, - "grad_norm": 0.09472183138132095, - "learning_rate": 4.118871055538762e-05, - "loss": 0.005461954325437546, - "step": 11245 - }, - { - "epoch": 1.918158567774936, - "grad_norm": 0.095457524061203, - "learning_rate": 4.11632120043701e-05, - "loss": 0.005725187063217163, - "step": 11250 - }, - { - "epoch": 1.919011082693947, - "grad_norm": 0.10508730262517929, - "learning_rate": 4.113771174324336e-05, - "loss": 0.006902433931827545, - "step": 11255 - }, - { - "epoch": 1.919863597612958, - "grad_norm": 0.08675665408372879, - "learning_rate": 4.111220978391176e-05, - "loss": 0.007470531016588211, - "step": 11260 - }, - { - "epoch": 1.9207161125319692, - "grad_norm": 0.08215013146400452, - "learning_rate": 4.108670613828049e-05, - "loss": 0.005732448399066925, - "step": 11265 - }, - { - "epoch": 1.9215686274509802, - "grad_norm": 0.054156310856342316, - "learning_rate": 4.1061200818255476e-05, - "loss": 0.005808809399604797, - "step": 11270 - }, - { - "epoch": 1.9224211423699915, - "grad_norm": 0.09332830458879471, - "learning_rate": 4.103569383574346e-05, - "loss": 0.005646481737494468, - "step": 11275 - }, - { - "epoch": 1.9232736572890026, - "grad_norm": 0.05589313432574272, - "learning_rate": 4.101018520265195e-05, - "loss": 0.005581434443593025, - "step": 11280 - }, - { - "epoch": 1.9241261722080136, - "grad_norm": 0.0465618334710598, - "learning_rate": 4.098467493088922e-05, - "loss": 0.005028170347213745, - "step": 11285 - }, - { - "epoch": 1.9249786871270247, - "grad_norm": 0.07304909080266953, - "learning_rate": 4.095916303236431e-05, - "loss": 0.007494028657674789, - "step": 11290 - }, - { - "epoch": 1.9258312020460358, - "grad_norm": 0.09532103687524796, - "learning_rate": 4.0933649518987025e-05, - "loss": 0.006374432146549225, - "step": 11295 - }, - { - "epoch": 1.926683716965047, - "grad_norm": 0.07364784181118011, - "learning_rate": 4.090813440266794e-05, - "loss": 0.0053088821470737456, - "step": 11300 - }, - { - "epoch": 1.927536231884058, - "grad_norm": 0.0804903507232666, - "learning_rate": 4.088261769531834e-05, - "loss": 0.0069495439529418945, - "step": 11305 - }, - { - "epoch": 1.9283887468030692, - "grad_norm": 0.07125549763441086, - "learning_rate": 4.0857099408850264e-05, - "loss": 0.005846098065376282, - "step": 11310 - }, - { - "epoch": 1.9292412617220802, - "grad_norm": 0.017375558614730835, - "learning_rate": 4.083157955517653e-05, - "loss": 0.004308582097291946, - "step": 11315 - }, - { - "epoch": 1.9300937766410913, - "grad_norm": 0.07655836641788483, - "learning_rate": 4.080605814621063e-05, - "loss": 0.006030111759901047, - "step": 11320 - }, - { - "epoch": 1.9309462915601023, - "grad_norm": 0.05411117896437645, - "learning_rate": 4.078053519386681e-05, - "loss": 0.0069768443703651425, - "step": 11325 - }, - { - "epoch": 1.9317988064791134, - "grad_norm": 0.08431188017129898, - "learning_rate": 4.0755010710060035e-05, - "loss": 0.006973695755004883, - "step": 11330 - }, - { - "epoch": 1.9326513213981245, - "grad_norm": 0.08480583131313324, - "learning_rate": 4.072948470670598e-05, - "loss": 0.006525547057390213, - "step": 11335 - }, - { - "epoch": 1.9335038363171355, - "grad_norm": 0.073171466588974, - "learning_rate": 4.070395719572104e-05, - "loss": 0.0054599311202764515, - "step": 11340 - }, - { - "epoch": 1.9343563512361466, - "grad_norm": 0.06951522827148438, - "learning_rate": 4.0678428189022304e-05, - "loss": 0.008897364884614945, - "step": 11345 - }, - { - "epoch": 1.9352088661551576, - "grad_norm": 0.08654197305440903, - "learning_rate": 4.0652897698527557e-05, - "loss": 0.005458325147628784, - "step": 11350 - }, - { - "epoch": 1.9360613810741687, - "grad_norm": 0.07929553836584091, - "learning_rate": 4.0627365736155285e-05, - "loss": 0.00710543841123581, - "step": 11355 - }, - { - "epoch": 1.9369138959931798, - "grad_norm": 0.12434503436088562, - "learning_rate": 4.060183231382466e-05, - "loss": 0.0071723200380802155, - "step": 11360 - }, - { - "epoch": 1.9377664109121908, - "grad_norm": 0.06440022587776184, - "learning_rate": 4.057629744345551e-05, - "loss": 0.006010268628597259, - "step": 11365 - }, - { - "epoch": 1.938618925831202, - "grad_norm": 0.09477414190769196, - "learning_rate": 4.0550761136968404e-05, - "loss": 0.007152469456195831, - "step": 11370 - }, - { - "epoch": 1.9394714407502132, - "grad_norm": 0.06758873164653778, - "learning_rate": 4.0525223406284516e-05, - "loss": 0.004493400454521179, - "step": 11375 - }, - { - "epoch": 1.9403239556692242, - "grad_norm": 0.06823158264160156, - "learning_rate": 4.0499684263325695e-05, - "loss": 0.0058505676686763765, - "step": 11380 - }, - { - "epoch": 1.9411764705882353, - "grad_norm": 0.10731697082519531, - "learning_rate": 4.0474143720014485e-05, - "loss": 0.00592585802078247, - "step": 11385 - }, - { - "epoch": 1.9420289855072463, - "grad_norm": 0.09786538779735565, - "learning_rate": 4.044860178827405e-05, - "loss": 0.008860854804515839, - "step": 11390 - }, - { - "epoch": 1.9428815004262576, - "grad_norm": 0.08662491291761398, - "learning_rate": 4.042305848002822e-05, - "loss": 0.00579673945903778, - "step": 11395 - }, - { - "epoch": 1.9437340153452687, - "grad_norm": 0.08446741849184036, - "learning_rate": 4.039751380720145e-05, - "loss": 0.0067916139960289, - "step": 11400 - }, - { - "epoch": 1.9445865302642797, - "grad_norm": 0.08059567958116531, - "learning_rate": 4.037196778171885e-05, - "loss": 0.007273902744054794, - "step": 11405 - }, - { - "epoch": 1.9454390451832908, - "grad_norm": 0.067914679646492, - "learning_rate": 4.0346420415506156e-05, - "loss": 0.00854090303182602, - "step": 11410 - }, - { - "epoch": 1.9462915601023019, - "grad_norm": 0.06519316136837006, - "learning_rate": 4.032087172048973e-05, - "loss": 0.006127477809786797, - "step": 11415 - }, - { - "epoch": 1.947144075021313, - "grad_norm": 0.10216967016458511, - "learning_rate": 4.029532170859655e-05, - "loss": 0.007330343872308731, - "step": 11420 - }, - { - "epoch": 1.947996589940324, - "grad_norm": 0.07684756815433502, - "learning_rate": 4.02697703917542e-05, - "loss": 0.006121716648340225, - "step": 11425 - }, - { - "epoch": 1.948849104859335, - "grad_norm": 0.08026126027107239, - "learning_rate": 4.0244217781890906e-05, - "loss": 0.006386417150497437, - "step": 11430 - }, - { - "epoch": 1.949701619778346, - "grad_norm": 0.09047527611255646, - "learning_rate": 4.021866389093546e-05, - "loss": 0.004208286106586456, - "step": 11435 - }, - { - "epoch": 1.9505541346973572, - "grad_norm": 0.047482747584581375, - "learning_rate": 4.0193108730817284e-05, - "loss": 0.005754061415791512, - "step": 11440 - }, - { - "epoch": 1.9514066496163682, - "grad_norm": 0.054364416748285294, - "learning_rate": 4.0167552313466355e-05, - "loss": 0.004412830248475075, - "step": 11445 - }, - { - "epoch": 1.9522591645353793, - "grad_norm": 0.07640549540519714, - "learning_rate": 4.014199465081327e-05, - "loss": 0.005214530602097511, - "step": 11450 - }, - { - "epoch": 1.9531116794543903, - "grad_norm": 0.07241252809762955, - "learning_rate": 4.0116435754789206e-05, - "loss": 0.005129393562674523, - "step": 11455 - }, - { - "epoch": 1.9539641943734014, - "grad_norm": 0.048170432448387146, - "learning_rate": 4.009087563732589e-05, - "loss": 0.005180074647068977, - "step": 11460 - }, - { - "epoch": 1.9548167092924125, - "grad_norm": 0.07336216419935226, - "learning_rate": 4.006531431035566e-05, - "loss": 0.009098170697689057, - "step": 11465 - }, - { - "epoch": 1.9556692242114238, - "grad_norm": 0.04934614151716232, - "learning_rate": 4.0039751785811346e-05, - "loss": 0.005307629331946373, - "step": 11470 - }, - { - "epoch": 1.9565217391304348, - "grad_norm": 0.08941303193569183, - "learning_rate": 4.001418807562643e-05, - "loss": 0.0069742932915687565, - "step": 11475 - }, - { - "epoch": 1.9573742540494459, - "grad_norm": 0.05791569501161575, - "learning_rate": 3.998862319173488e-05, - "loss": 0.0050424404442310335, - "step": 11480 - }, - { - "epoch": 1.958226768968457, - "grad_norm": 0.04596787318587303, - "learning_rate": 3.996305714607125e-05, - "loss": 0.004805172979831696, - "step": 11485 - }, - { - "epoch": 1.959079283887468, - "grad_norm": 0.07698309421539307, - "learning_rate": 3.993748995057061e-05, - "loss": 0.006605527549982071, - "step": 11490 - }, - { - "epoch": 1.9599317988064793, - "grad_norm": 0.08400565385818481, - "learning_rate": 3.9911921617168565e-05, - "loss": 0.0085490882396698, - "step": 11495 - }, - { - "epoch": 1.9607843137254903, - "grad_norm": 0.1446380764245987, - "learning_rate": 3.9886352157801296e-05, - "loss": 0.005958027392625809, - "step": 11500 - }, - { - "epoch": 1.9616368286445014, - "grad_norm": 0.06108809635043144, - "learning_rate": 3.986078158440544e-05, - "loss": 0.0054461218416690825, - "step": 11505 - }, - { - "epoch": 1.9624893435635125, - "grad_norm": 0.1163720190525055, - "learning_rate": 3.983520990891823e-05, - "loss": 0.0065662160515785216, - "step": 11510 - }, - { - "epoch": 1.9633418584825235, - "grad_norm": 0.08339548110961914, - "learning_rate": 3.980963714327734e-05, - "loss": 0.007503192871809006, - "step": 11515 - }, - { - "epoch": 1.9641943734015346, - "grad_norm": 0.07774331420660019, - "learning_rate": 3.9784063299421e-05, - "loss": 0.005831217020750045, - "step": 11520 - }, - { - "epoch": 1.9650468883205456, - "grad_norm": 0.08897018432617188, - "learning_rate": 3.9758488389287936e-05, - "loss": 0.006972354650497436, - "step": 11525 - }, - { - "epoch": 1.9658994032395567, - "grad_norm": 0.07708834111690521, - "learning_rate": 3.9732912424817374e-05, - "loss": 0.0059847764670848845, - "step": 11530 - }, - { - "epoch": 1.9667519181585678, - "grad_norm": 0.133201003074646, - "learning_rate": 3.9707335417949015e-05, - "loss": 0.005828146636486053, - "step": 11535 - }, - { - "epoch": 1.9676044330775788, - "grad_norm": 0.05620214343070984, - "learning_rate": 3.968175738062303e-05, - "loss": 0.004607116058468819, - "step": 11540 - }, - { - "epoch": 1.9684569479965899, - "grad_norm": 0.05371567979454994, - "learning_rate": 3.965617832478015e-05, - "loss": 0.004455961659550667, - "step": 11545 - }, - { - "epoch": 1.969309462915601, - "grad_norm": 0.10317978262901306, - "learning_rate": 3.96305982623615e-05, - "loss": 0.004697806015610695, - "step": 11550 - }, - { - "epoch": 1.970161977834612, - "grad_norm": 0.08786958456039429, - "learning_rate": 3.96050172053087e-05, - "loss": 0.005183818191289902, - "step": 11555 - }, - { - "epoch": 1.971014492753623, - "grad_norm": 0.07750507444143295, - "learning_rate": 3.957943516556385e-05, - "loss": 0.005475999787449837, - "step": 11560 - }, - { - "epoch": 1.9718670076726341, - "grad_norm": 0.07066313922405243, - "learning_rate": 3.955385215506949e-05, - "loss": 0.005772550404071808, - "step": 11565 - }, - { - "epoch": 1.9727195225916454, - "grad_norm": 0.08183038979768753, - "learning_rate": 3.952826818576863e-05, - "loss": 0.005305550992488861, - "step": 11570 - }, - { - "epoch": 1.9735720375106565, - "grad_norm": 0.075381800532341, - "learning_rate": 3.95026832696047e-05, - "loss": 0.00803310126066208, - "step": 11575 - }, - { - "epoch": 1.9744245524296675, - "grad_norm": 0.09064166992902756, - "learning_rate": 3.9477097418521616e-05, - "loss": 0.006380292773246765, - "step": 11580 - }, - { - "epoch": 1.9752770673486786, - "grad_norm": 0.09140465408563614, - "learning_rate": 3.945151064446367e-05, - "loss": 0.00863645225763321, - "step": 11585 - }, - { - "epoch": 1.9761295822676896, - "grad_norm": 0.09985008090734482, - "learning_rate": 3.942592295937565e-05, - "loss": 0.005205995962023735, - "step": 11590 - }, - { - "epoch": 1.976982097186701, - "grad_norm": 0.07968702167272568, - "learning_rate": 3.940033437520273e-05, - "loss": 0.006467466801404953, - "step": 11595 - }, - { - "epoch": 1.977834612105712, - "grad_norm": 0.0925409123301506, - "learning_rate": 3.937474490389051e-05, - "loss": 0.006804432719945908, - "step": 11600 - }, - { - "epoch": 1.978687127024723, - "grad_norm": 0.053421750664711, - "learning_rate": 3.9349154557385e-05, - "loss": 0.0067677564918994905, - "step": 11605 - }, - { - "epoch": 1.979539641943734, - "grad_norm": 0.07791347801685333, - "learning_rate": 3.9323563347632624e-05, - "loss": 0.006826826930046081, - "step": 11610 - }, - { - "epoch": 1.9803921568627452, - "grad_norm": 0.08627293258905411, - "learning_rate": 3.929797128658024e-05, - "loss": 0.00804663747549057, - "step": 11615 - }, - { - "epoch": 1.9812446717817562, - "grad_norm": 0.06506595015525818, - "learning_rate": 3.927237838617503e-05, - "loss": 0.005456966534256935, - "step": 11620 - }, - { - "epoch": 1.9820971867007673, - "grad_norm": 0.09555826336145401, - "learning_rate": 3.924678465836465e-05, - "loss": 0.005365721881389618, - "step": 11625 - }, - { - "epoch": 1.9829497016197783, - "grad_norm": 0.09176401793956757, - "learning_rate": 3.922119011509706e-05, - "loss": 0.006210924685001373, - "step": 11630 - }, - { - "epoch": 1.9838022165387894, - "grad_norm": 0.05260130763053894, - "learning_rate": 3.919559476832069e-05, - "loss": 0.004408955946564675, - "step": 11635 - }, - { - "epoch": 1.9846547314578005, - "grad_norm": 0.0875319391489029, - "learning_rate": 3.916999862998427e-05, - "loss": 0.005069036781787872, - "step": 11640 - }, - { - "epoch": 1.9855072463768115, - "grad_norm": 0.10335614532232285, - "learning_rate": 3.9144401712036936e-05, - "loss": 0.007199827581644058, - "step": 11645 - }, - { - "epoch": 1.9863597612958226, - "grad_norm": 0.09518889337778091, - "learning_rate": 3.9118804026428194e-05, - "loss": 0.00541754923760891, - "step": 11650 - }, - { - "epoch": 1.9872122762148337, - "grad_norm": 0.06707368791103363, - "learning_rate": 3.9093205585107863e-05, - "loss": 0.00641927570104599, - "step": 11655 - }, - { - "epoch": 1.9880647911338447, - "grad_norm": 0.10102292895317078, - "learning_rate": 3.906760640002618e-05, - "loss": 0.007096148282289505, - "step": 11660 - }, - { - "epoch": 1.9889173060528558, - "grad_norm": 0.0690481886267662, - "learning_rate": 3.904200648313368e-05, - "loss": 0.0063364550471305845, - "step": 11665 - }, - { - "epoch": 1.989769820971867, - "grad_norm": 0.1051480695605278, - "learning_rate": 3.901640584638126e-05, - "loss": 0.009133437275886535, - "step": 11670 - }, - { - "epoch": 1.9906223358908781, - "grad_norm": 0.0857042595744133, - "learning_rate": 3.899080450172015e-05, - "loss": 0.007245839387178421, - "step": 11675 - }, - { - "epoch": 1.9914748508098892, - "grad_norm": 0.04038793221116066, - "learning_rate": 3.8965202461101904e-05, - "loss": 0.005575920641422272, - "step": 11680 - }, - { - "epoch": 1.9923273657289002, - "grad_norm": 0.06331093609333038, - "learning_rate": 3.893959973647842e-05, - "loss": 0.004866635054349899, - "step": 11685 - }, - { - "epoch": 1.9931798806479113, - "grad_norm": 0.08694019168615341, - "learning_rate": 3.891399633980188e-05, - "loss": 0.004249059408903122, - "step": 11690 - }, - { - "epoch": 1.9940323955669226, - "grad_norm": 0.06739087402820587, - "learning_rate": 3.888839228302482e-05, - "loss": 0.006520142406225204, - "step": 11695 - }, - { - "epoch": 1.9948849104859336, - "grad_norm": 0.09432726353406906, - "learning_rate": 3.886278757810005e-05, - "loss": 0.006377060711383819, - "step": 11700 - }, - { - "epoch": 1.9957374254049447, - "grad_norm": 0.040565814822912216, - "learning_rate": 3.883718223698071e-05, - "loss": 0.0062430910766124725, - "step": 11705 - }, - { - "epoch": 1.9965899403239558, - "grad_norm": 0.09249477833509445, - "learning_rate": 3.881157627162022e-05, - "loss": 0.005447167158126831, - "step": 11710 - }, - { - "epoch": 1.9974424552429668, - "grad_norm": 0.08561582118272781, - "learning_rate": 3.87859696939723e-05, - "loss": 0.0067646786570549015, - "step": 11715 - }, - { - "epoch": 1.9982949701619779, - "grad_norm": 0.0771077573299408, - "learning_rate": 3.876036251599094e-05, - "loss": 0.006473222374916076, - "step": 11720 - }, - { - "epoch": 1.999147485080989, - "grad_norm": 0.047942496836185455, - "learning_rate": 3.873475474963044e-05, - "loss": 0.004876254498958588, - "step": 11725 - }, - { - "epoch": 1.9996589940323957, - "eval_loss": 0.03507082909345627, - "eval_runtime": 3.6311, - "eval_samples_per_second": 69.4, - "eval_steps_per_second": 1.102, - "step": 11728 - }, - { - "eval_cer_subset": 0.01172504763300601, - "eval_cer_subset_edit_distance": 720, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 11728 - }, - { - "epoch": 2.0, - "grad_norm": 0.09595198184251785, - "learning_rate": 3.8709146406845345e-05, - "loss": 0.005297505855560302, - "step": 11730 - }, - { - "epoch": 2.000852514919011, - "grad_norm": 0.0500548854470253, - "learning_rate": 3.8683537499590486e-05, - "loss": 0.0029773740097880363, - "step": 11735 - }, - { - "epoch": 2.001705029838022, - "grad_norm": 0.043145813047885895, - "learning_rate": 3.865792803982097e-05, - "loss": 0.0026956576853990554, - "step": 11740 - }, - { - "epoch": 2.002557544757033, - "grad_norm": 0.06828423589468002, - "learning_rate": 3.86323180394921e-05, - "loss": 0.0032785605639219285, - "step": 11745 - }, - { - "epoch": 2.0034100596760442, - "grad_norm": 0.05070719122886658, - "learning_rate": 3.8606707510559514e-05, - "loss": 0.0025875838473439217, - "step": 11750 - }, - { - "epoch": 2.0042625745950553, - "grad_norm": 0.03793288394808769, - "learning_rate": 3.8581096464979046e-05, - "loss": 0.003196726739406586, - "step": 11755 - }, - { - "epoch": 2.0051150895140664, - "grad_norm": 0.058611899614334106, - "learning_rate": 3.8555484914706783e-05, - "loss": 0.0025842227041721344, - "step": 11760 - }, - { - "epoch": 2.0059676044330774, - "grad_norm": 0.05239633843302727, - "learning_rate": 3.8529872871699064e-05, - "loss": 0.0033856891095638275, - "step": 11765 - }, - { - "epoch": 2.0068201193520885, - "grad_norm": 0.0694168210029602, - "learning_rate": 3.8504260347912414e-05, - "loss": 0.0036750052124261854, - "step": 11770 - }, - { - "epoch": 2.0076726342710995, - "grad_norm": 0.05401293560862541, - "learning_rate": 3.847864735530364e-05, - "loss": 0.0020644858479499815, - "step": 11775 - }, - { - "epoch": 2.008525149190111, - "grad_norm": 0.024288944900035858, - "learning_rate": 3.8453033905829715e-05, - "loss": 0.0030498920008540154, - "step": 11780 - }, - { - "epoch": 2.009377664109122, - "grad_norm": 0.07617825269699097, - "learning_rate": 3.842742001144787e-05, - "loss": 0.002012002095580101, - "step": 11785 - }, - { - "epoch": 2.010230179028133, - "grad_norm": 0.05394979938864708, - "learning_rate": 3.8401805684115514e-05, - "loss": 0.0013803424313664435, - "step": 11790 - }, - { - "epoch": 2.0110826939471442, - "grad_norm": 0.04705117642879486, - "learning_rate": 3.837619093579025e-05, - "loss": 0.0019010987132787704, - "step": 11795 - }, - { - "epoch": 2.0119352088661553, - "grad_norm": 0.04174034297466278, - "learning_rate": 3.835057577842993e-05, - "loss": 0.00339580737054348, - "step": 11800 - }, - { - "epoch": 2.0127877237851663, - "grad_norm": 0.05027524381875992, - "learning_rate": 3.832496022399254e-05, - "loss": 0.003779648244380951, - "step": 11805 - }, - { - "epoch": 2.0136402387041774, - "grad_norm": 0.06344325840473175, - "learning_rate": 3.829934428443627e-05, - "loss": 0.003120606765151024, - "step": 11810 - }, - { - "epoch": 2.0144927536231885, - "grad_norm": 0.04142893850803375, - "learning_rate": 3.827372797171949e-05, - "loss": 0.001541936956346035, - "step": 11815 - }, - { - "epoch": 2.0153452685421995, - "grad_norm": 0.05739682540297508, - "learning_rate": 3.8248111297800766e-05, - "loss": 0.002022813446819782, - "step": 11820 - }, - { - "epoch": 2.0161977834612106, - "grad_norm": 0.05701421573758125, - "learning_rate": 3.82224942746388e-05, - "loss": 0.0032159242779016494, - "step": 11825 - }, - { - "epoch": 2.0170502983802217, - "grad_norm": 0.05839217081665993, - "learning_rate": 3.8196876914192476e-05, - "loss": 0.0019759060814976692, - "step": 11830 - }, - { - "epoch": 2.0179028132992327, - "grad_norm": 0.04104325920343399, - "learning_rate": 3.8171259228420824e-05, - "loss": 0.0030811641365289686, - "step": 11835 - }, - { - "epoch": 2.0187553282182438, - "grad_norm": 0.05367572233080864, - "learning_rate": 3.814564122928303e-05, - "loss": 0.0024660680443048476, - "step": 11840 - }, - { - "epoch": 2.019607843137255, - "grad_norm": 0.07062884420156479, - "learning_rate": 3.8120022928738444e-05, - "loss": 0.0028638459742069246, - "step": 11845 - }, - { - "epoch": 2.020460358056266, - "grad_norm": 0.1098889485001564, - "learning_rate": 3.809440433874652e-05, - "loss": 0.002245648391544819, - "step": 11850 - }, - { - "epoch": 2.021312872975277, - "grad_norm": 0.11214791238307953, - "learning_rate": 3.806878547126689e-05, - "loss": 0.0025152696296572687, - "step": 11855 - }, - { - "epoch": 2.022165387894288, - "grad_norm": 0.0809246301651001, - "learning_rate": 3.804316633825926e-05, - "loss": 0.0030847292393445967, - "step": 11860 - }, - { - "epoch": 2.023017902813299, - "grad_norm": 0.05590866506099701, - "learning_rate": 3.801754695168353e-05, - "loss": 0.002259066328406334, - "step": 11865 - }, - { - "epoch": 2.02387041773231, - "grad_norm": 0.061719413846731186, - "learning_rate": 3.799192732349967e-05, - "loss": 0.003117647022008896, - "step": 11870 - }, - { - "epoch": 2.024722932651321, - "grad_norm": 0.05439555272459984, - "learning_rate": 3.796630746566777e-05, - "loss": 0.00280950702726841, - "step": 11875 - }, - { - "epoch": 2.0255754475703327, - "grad_norm": 0.07110737264156342, - "learning_rate": 3.794068739014802e-05, - "loss": 0.0022924147546291352, - "step": 11880 - }, - { - "epoch": 2.0264279624893438, - "grad_norm": 0.035215508192777634, - "learning_rate": 3.791506710890075e-05, - "loss": 0.0014670810662209988, - "step": 11885 - }, - { - "epoch": 2.027280477408355, - "grad_norm": 0.04196110740303993, - "learning_rate": 3.7889446633886345e-05, - "loss": 0.002128283493220806, - "step": 11890 - }, - { - "epoch": 2.028132992327366, - "grad_norm": 0.02117479033768177, - "learning_rate": 3.7863825977065295e-05, - "loss": 0.002085634134709835, - "step": 11895 - }, - { - "epoch": 2.028985507246377, - "grad_norm": 0.137400820851326, - "learning_rate": 3.783820515039818e-05, - "loss": 0.003697726130485535, - "step": 11900 - }, - { - "epoch": 2.029838022165388, - "grad_norm": 0.05773406848311424, - "learning_rate": 3.781258416584565e-05, - "loss": 0.0020811671391129495, - "step": 11905 - }, - { - "epoch": 2.030690537084399, - "grad_norm": 0.02017928846180439, - "learning_rate": 3.7786963035368425e-05, - "loss": 0.002070310711860657, - "step": 11910 - }, - { - "epoch": 2.03154305200341, - "grad_norm": 0.023989839479327202, - "learning_rate": 3.7761341770927314e-05, - "loss": 0.0035201530903577805, - "step": 11915 - }, - { - "epoch": 2.032395566922421, - "grad_norm": 0.05773681029677391, - "learning_rate": 3.7735720384483176e-05, - "loss": 0.002326706610620022, - "step": 11920 - }, - { - "epoch": 2.0332480818414322, - "grad_norm": 0.06733391433954239, - "learning_rate": 3.771009888799692e-05, - "loss": 0.0019989268854260444, - "step": 11925 - }, - { - "epoch": 2.0341005967604433, - "grad_norm": 0.09590540081262589, - "learning_rate": 3.76844772934295e-05, - "loss": 0.0024355117231607435, - "step": 11930 - }, - { - "epoch": 2.0349531116794544, - "grad_norm": 0.027501709759235382, - "learning_rate": 3.765885561274196e-05, - "loss": 0.0011640249751508236, - "step": 11935 - }, - { - "epoch": 2.0358056265984654, - "grad_norm": 0.031739719212055206, - "learning_rate": 3.7633233857895326e-05, - "loss": 0.0022482817992568018, - "step": 11940 - }, - { - "epoch": 2.0366581415174765, - "grad_norm": 0.027232196182012558, - "learning_rate": 3.760761204085071e-05, - "loss": 0.0018043683841824532, - "step": 11945 - }, - { - "epoch": 2.0375106564364875, - "grad_norm": 0.08634094148874283, - "learning_rate": 3.75819901735692e-05, - "loss": 0.0024119339883327483, - "step": 11950 - }, - { - "epoch": 2.0383631713554986, - "grad_norm": 0.05877285450696945, - "learning_rate": 3.755636826801194e-05, - "loss": 0.0009346712380647659, - "step": 11955 - }, - { - "epoch": 2.0392156862745097, - "grad_norm": 0.034714680165052414, - "learning_rate": 3.7530746336140115e-05, - "loss": 0.0021316718310117723, - "step": 11960 - }, - { - "epoch": 2.0400682011935207, - "grad_norm": 0.05897806957364082, - "learning_rate": 3.750512438991487e-05, - "loss": 0.0029691245406866075, - "step": 11965 - }, - { - "epoch": 2.040920716112532, - "grad_norm": 0.07302019745111465, - "learning_rate": 3.747950244129739e-05, - "loss": 0.0023437861353158953, - "step": 11970 - }, - { - "epoch": 2.041773231031543, - "grad_norm": 0.07187193632125854, - "learning_rate": 3.745388050224885e-05, - "loss": 0.0016971008852124214, - "step": 11975 - }, - { - "epoch": 2.0426257459505544, - "grad_norm": 0.05619340017437935, - "learning_rate": 3.742825858473044e-05, - "loss": 0.0021343151107430457, - "step": 11980 - }, - { - "epoch": 2.0434782608695654, - "grad_norm": 0.08814098685979843, - "learning_rate": 3.7402636700703295e-05, - "loss": 0.0026463912799954414, - "step": 11985 - }, - { - "epoch": 2.0443307757885765, - "grad_norm": 0.10130181908607483, - "learning_rate": 3.737701486212859e-05, - "loss": 0.0020437544211745263, - "step": 11990 - }, - { - "epoch": 2.0451832907075875, - "grad_norm": 0.06105076149106026, - "learning_rate": 3.7351393080967416e-05, - "loss": 0.002344959042966366, - "step": 11995 - }, - { - "epoch": 2.0460358056265986, - "grad_norm": 0.052822742611169815, - "learning_rate": 3.732577136918091e-05, - "loss": 0.0020566854625940325, - "step": 12000 - }, - { - "epoch": 2.0468883205456097, - "grad_norm": 0.10074899345636368, - "learning_rate": 3.730014973873013e-05, - "loss": 0.0026124339550733567, - "step": 12005 - }, - { - "epoch": 2.0477408354646207, - "grad_norm": 0.025597436353564262, - "learning_rate": 3.7274528201576095e-05, - "loss": 0.001905813068151474, - "step": 12010 - }, - { - "epoch": 2.0485933503836318, - "grad_norm": 0.05437781289219856, - "learning_rate": 3.7248906769679776e-05, - "loss": 0.0025912046432495115, - "step": 12015 - }, - { - "epoch": 2.049445865302643, - "grad_norm": 0.07095912098884583, - "learning_rate": 3.722328545500215e-05, - "loss": 0.002769463881850243, - "step": 12020 - }, - { - "epoch": 2.050298380221654, - "grad_norm": 0.14383850991725922, - "learning_rate": 3.719766426950408e-05, - "loss": 0.0038499854505062102, - "step": 12025 - }, - { - "epoch": 2.051150895140665, - "grad_norm": 0.06089269369840622, - "learning_rate": 3.7172043225146386e-05, - "loss": 0.002288899011909962, - "step": 12030 - }, - { - "epoch": 2.052003410059676, - "grad_norm": 0.05808301270008087, - "learning_rate": 3.7146422333889824e-05, - "loss": 0.0028305932879447936, - "step": 12035 - }, - { - "epoch": 2.052855924978687, - "grad_norm": 0.13334520161151886, - "learning_rate": 3.712080160769506e-05, - "loss": 0.00331525094807148, - "step": 12040 - }, - { - "epoch": 2.053708439897698, - "grad_norm": 0.03266080096364021, - "learning_rate": 3.709518105852273e-05, - "loss": 0.0020869884639978407, - "step": 12045 - }, - { - "epoch": 2.054560954816709, - "grad_norm": 0.07307332009077072, - "learning_rate": 3.706956069833336e-05, - "loss": 0.0023028414696455004, - "step": 12050 - }, - { - "epoch": 2.0554134697357203, - "grad_norm": 0.06093568354845047, - "learning_rate": 3.7043940539087366e-05, - "loss": 0.0022027945145964623, - "step": 12055 - }, - { - "epoch": 2.0562659846547313, - "grad_norm": 0.04207700863480568, - "learning_rate": 3.70183205927451e-05, - "loss": 0.0016738155856728554, - "step": 12060 - }, - { - "epoch": 2.0571184995737424, - "grad_norm": 0.046319641172885895, - "learning_rate": 3.699270087126679e-05, - "loss": 0.002480871044099331, - "step": 12065 - }, - { - "epoch": 2.0579710144927534, - "grad_norm": 0.042888909578323364, - "learning_rate": 3.69670813866126e-05, - "loss": 0.0020912257954478265, - "step": 12070 - }, - { - "epoch": 2.0588235294117645, - "grad_norm": 0.05530136078596115, - "learning_rate": 3.694146215074256e-05, - "loss": 0.0021427463740110396, - "step": 12075 - }, - { - "epoch": 2.059676044330776, - "grad_norm": 0.04992877319455147, - "learning_rate": 3.6915843175616555e-05, - "loss": 0.001970967650413513, - "step": 12080 - }, - { - "epoch": 2.060528559249787, - "grad_norm": 0.07341081649065018, - "learning_rate": 3.6890224473194373e-05, - "loss": 0.003120069019496441, - "step": 12085 - }, - { - "epoch": 2.061381074168798, - "grad_norm": 0.05361134931445122, - "learning_rate": 3.686460605543571e-05, - "loss": 0.0030833475291728975, - "step": 12090 - }, - { - "epoch": 2.062233589087809, - "grad_norm": 0.0904894769191742, - "learning_rate": 3.683898793430008e-05, - "loss": 0.0020733945071697234, - "step": 12095 - }, - { - "epoch": 2.0630861040068202, - "grad_norm": 0.03312591835856438, - "learning_rate": 3.681337012174686e-05, - "loss": 0.002308916300535202, - "step": 12100 - }, - { - "epoch": 2.0639386189258313, - "grad_norm": 0.05372268706560135, - "learning_rate": 3.6787752629735314e-05, - "loss": 0.0024915780872106553, - "step": 12105 - }, - { - "epoch": 2.0647911338448424, - "grad_norm": 0.08257800340652466, - "learning_rate": 3.676213547022452e-05, - "loss": 0.001413002610206604, - "step": 12110 - }, - { - "epoch": 2.0656436487638534, - "grad_norm": 0.037859030067920685, - "learning_rate": 3.673651865517344e-05, - "loss": 0.002315748296678066, - "step": 12115 - }, - { - "epoch": 2.0664961636828645, - "grad_norm": 0.04125140607357025, - "learning_rate": 3.6710902196540856e-05, - "loss": 0.0022393757477402686, - "step": 12120 - }, - { - "epoch": 2.0673486786018755, - "grad_norm": 0.09325335919857025, - "learning_rate": 3.668528610628538e-05, - "loss": 0.003246062248945236, - "step": 12125 - }, - { - "epoch": 2.0682011935208866, - "grad_norm": 0.09278098493814468, - "learning_rate": 3.665967039636543e-05, - "loss": 0.0027722738683223723, - "step": 12130 - }, - { - "epoch": 2.0690537084398977, - "grad_norm": 0.07906672358512878, - "learning_rate": 3.663405507873931e-05, - "loss": 0.0035691894590854645, - "step": 12135 - }, - { - "epoch": 2.0699062233589087, - "grad_norm": 0.04077119752764702, - "learning_rate": 3.660844016536507e-05, - "loss": 0.0018417894840240478, - "step": 12140 - }, - { - "epoch": 2.07075873827792, - "grad_norm": 0.08916836231946945, - "learning_rate": 3.6582825668200636e-05, - "loss": 0.0019499020650982856, - "step": 12145 - }, - { - "epoch": 2.071611253196931, - "grad_norm": 0.017643144354224205, - "learning_rate": 3.655721159920368e-05, - "loss": 0.0018016694113612175, - "step": 12150 - }, - { - "epoch": 2.072463768115942, - "grad_norm": 0.046675924211740494, - "learning_rate": 3.6531597970331704e-05, - "loss": 0.0023558875545859337, - "step": 12155 - }, - { - "epoch": 2.073316283034953, - "grad_norm": 0.06159510463476181, - "learning_rate": 3.650598479354202e-05, - "loss": 0.003485919535160065, - "step": 12160 - }, - { - "epoch": 2.074168797953964, - "grad_norm": 0.10620608925819397, - "learning_rate": 3.64803720807917e-05, - "loss": 0.0021355047821998594, - "step": 12165 - }, - { - "epoch": 2.075021312872975, - "grad_norm": 0.03321434184908867, - "learning_rate": 3.645475984403761e-05, - "loss": 0.0027330033481121063, - "step": 12170 - }, - { - "epoch": 2.075873827791986, - "grad_norm": 0.05574263632297516, - "learning_rate": 3.642914809523639e-05, - "loss": 0.0017123395577073098, - "step": 12175 - }, - { - "epoch": 2.0767263427109977, - "grad_norm": 0.045334603637456894, - "learning_rate": 3.640353684634446e-05, - "loss": 0.001525832526385784, - "step": 12180 - }, - { - "epoch": 2.0775788576300087, - "grad_norm": 0.05117806792259216, - "learning_rate": 3.6377926109318005e-05, - "loss": 0.0022421007975935935, - "step": 12185 - }, - { - "epoch": 2.0784313725490198, - "grad_norm": 0.02836792916059494, - "learning_rate": 3.635231589611297e-05, - "loss": 0.003241851553320885, - "step": 12190 - }, - { - "epoch": 2.079283887468031, - "grad_norm": 0.13245631754398346, - "learning_rate": 3.632670621868506e-05, - "loss": 0.0028171174228191374, - "step": 12195 - }, - { - "epoch": 2.080136402387042, - "grad_norm": 0.04175787791609764, - "learning_rate": 3.63010970889897e-05, - "loss": 0.0026013338938355446, - "step": 12200 - }, - { - "epoch": 2.080988917306053, - "grad_norm": 0.022211721166968346, - "learning_rate": 3.6275488518982104e-05, - "loss": 0.0029422508552670477, - "step": 12205 - }, - { - "epoch": 2.081841432225064, - "grad_norm": 0.0889682024717331, - "learning_rate": 3.6249880520617205e-05, - "loss": 0.002521348185837269, - "step": 12210 - }, - { - "epoch": 2.082693947144075, - "grad_norm": 0.022678803652524948, - "learning_rate": 3.622427310584967e-05, - "loss": 0.0010427280329167842, - "step": 12215 - }, - { - "epoch": 2.083546462063086, - "grad_norm": 0.07812847197055817, - "learning_rate": 3.6198666286633886e-05, - "loss": 0.002325686253607273, - "step": 12220 - }, - { - "epoch": 2.084398976982097, - "grad_norm": 0.06912051141262054, - "learning_rate": 3.6173060074923945e-05, - "loss": 0.0022675972431898117, - "step": 12225 - }, - { - "epoch": 2.0852514919011083, - "grad_norm": 0.02951810136437416, - "learning_rate": 3.6147454482673715e-05, - "loss": 0.00159697774797678, - "step": 12230 - }, - { - "epoch": 2.0861040068201193, - "grad_norm": 0.11821833997964859, - "learning_rate": 3.6121849521836735e-05, - "loss": 0.002206057496368885, - "step": 12235 - }, - { - "epoch": 2.0869565217391304, - "grad_norm": 0.05461777001619339, - "learning_rate": 3.609624520436624e-05, - "loss": 0.0012241648510098457, - "step": 12240 - }, - { - "epoch": 2.0878090366581414, - "grad_norm": 0.05038715526461601, - "learning_rate": 3.607064154221516e-05, - "loss": 0.002225806750357151, - "step": 12245 - }, - { - "epoch": 2.0886615515771525, - "grad_norm": 0.03050738200545311, - "learning_rate": 3.604503854733617e-05, - "loss": 0.0020998189225792884, - "step": 12250 - }, - { - "epoch": 2.0895140664961636, - "grad_norm": 0.07000287622213364, - "learning_rate": 3.6019436231681585e-05, - "loss": 0.0022106122225522993, - "step": 12255 - }, - { - "epoch": 2.0903665814151746, - "grad_norm": 0.0332137756049633, - "learning_rate": 3.5993834607203416e-05, - "loss": 0.0020401908084750177, - "step": 12260 - }, - { - "epoch": 2.0912190963341857, - "grad_norm": 0.0996270552277565, - "learning_rate": 3.596823368585336e-05, - "loss": 0.002487153559923172, - "step": 12265 - }, - { - "epoch": 2.0920716112531967, - "grad_norm": 0.1305847465991974, - "learning_rate": 3.594263347958276e-05, - "loss": 0.0028627485036849974, - "step": 12270 - }, - { - "epoch": 2.092924126172208, - "grad_norm": 0.0762234702706337, - "learning_rate": 3.5917034000342664e-05, - "loss": 0.0020220713689923287, - "step": 12275 - }, - { - "epoch": 2.0937766410912193, - "grad_norm": 0.015480007976293564, - "learning_rate": 3.589143526008376e-05, - "loss": 0.00215108972042799, - "step": 12280 - }, - { - "epoch": 2.0946291560102304, - "grad_norm": 0.0862250104546547, - "learning_rate": 3.5865837270756385e-05, - "loss": 0.0020705640316009523, - "step": 12285 - }, - { - "epoch": 2.0954816709292414, - "grad_norm": 0.03390849754214287, - "learning_rate": 3.584024004431052e-05, - "loss": 0.002040168456733227, - "step": 12290 - }, - { - "epoch": 2.0963341858482525, - "grad_norm": 0.07754851132631302, - "learning_rate": 3.581464359269582e-05, - "loss": 0.0029265256598591805, - "step": 12295 - }, - { - "epoch": 2.0971867007672635, - "grad_norm": 0.0625162348151207, - "learning_rate": 3.578904792786155e-05, - "loss": 0.0020755715668201447, - "step": 12300 - }, - { - "epoch": 2.0980392156862746, - "grad_norm": 0.10999561101198196, - "learning_rate": 3.576345306175663e-05, - "loss": 0.0027062267065048216, - "step": 12305 - }, - { - "epoch": 2.0988917306052857, - "grad_norm": 0.03573682904243469, - "learning_rate": 3.573785900632959e-05, - "loss": 0.00178314708173275, - "step": 12310 - }, - { - "epoch": 2.0997442455242967, - "grad_norm": 0.07235981523990631, - "learning_rate": 3.5712265773528564e-05, - "loss": 0.00233871191740036, - "step": 12315 - }, - { - "epoch": 2.100596760443308, - "grad_norm": 0.054438747465610504, - "learning_rate": 3.568667337530135e-05, - "loss": 0.0031350374221801756, - "step": 12320 - }, - { - "epoch": 2.101449275362319, - "grad_norm": 0.07696446031332016, - "learning_rate": 3.566108182359533e-05, - "loss": 0.0019116310402750968, - "step": 12325 - }, - { - "epoch": 2.10230179028133, - "grad_norm": 0.0676850974559784, - "learning_rate": 3.563549113035749e-05, - "loss": 0.0011704936623573303, - "step": 12330 - }, - { - "epoch": 2.103154305200341, - "grad_norm": 0.07241418212652206, - "learning_rate": 3.5609901307534416e-05, - "loss": 0.002332131937146187, - "step": 12335 - }, - { - "epoch": 2.104006820119352, - "grad_norm": 0.0832296758890152, - "learning_rate": 3.558431236707227e-05, - "loss": 0.002539648115634918, - "step": 12340 - }, - { - "epoch": 2.104859335038363, - "grad_norm": 0.046911224722862244, - "learning_rate": 3.555872432091684e-05, - "loss": 0.0015112090855836867, - "step": 12345 - }, - { - "epoch": 2.105711849957374, - "grad_norm": 0.09462827444076538, - "learning_rate": 3.553313718101348e-05, - "loss": 0.0024237846955657005, - "step": 12350 - }, - { - "epoch": 2.106564364876385, - "grad_norm": 0.06934045255184174, - "learning_rate": 3.550755095930711e-05, - "loss": 0.0014186175540089607, - "step": 12355 - }, - { - "epoch": 2.1074168797953963, - "grad_norm": 0.05409622564911842, - "learning_rate": 3.5481965667742216e-05, - "loss": 0.0016573246568441391, - "step": 12360 - }, - { - "epoch": 2.1082693947144073, - "grad_norm": 0.05712766572833061, - "learning_rate": 3.545638131826289e-05, - "loss": 0.0029039720073342325, - "step": 12365 - }, - { - "epoch": 2.1091219096334184, - "grad_norm": 0.05685155466198921, - "learning_rate": 3.543079792281274e-05, - "loss": 0.0016390934586524963, - "step": 12370 - }, - { - "epoch": 2.10997442455243, - "grad_norm": 0.06140974909067154, - "learning_rate": 3.5405215493334966e-05, - "loss": 0.0038812048733234406, - "step": 12375 - }, - { - "epoch": 2.110826939471441, - "grad_norm": 0.0662747323513031, - "learning_rate": 3.537963404177227e-05, - "loss": 0.0029465768486261366, - "step": 12380 - }, - { - "epoch": 2.111679454390452, - "grad_norm": 0.05666056647896767, - "learning_rate": 3.535405358006694e-05, - "loss": 0.0028562054038047792, - "step": 12385 - }, - { - "epoch": 2.112531969309463, - "grad_norm": 0.02187039703130722, - "learning_rate": 3.532847412016077e-05, - "loss": 0.0017194624990224839, - "step": 12390 - }, - { - "epoch": 2.113384484228474, - "grad_norm": 0.040781840682029724, - "learning_rate": 3.530289567399513e-05, - "loss": 0.0026536308228969573, - "step": 12395 - }, - { - "epoch": 2.114236999147485, - "grad_norm": 0.05844609811902046, - "learning_rate": 3.527731825351088e-05, - "loss": 0.0018391696736216544, - "step": 12400 - }, - { - "epoch": 2.1150895140664963, - "grad_norm": 0.08661946654319763, - "learning_rate": 3.52517418706484e-05, - "loss": 0.0028108954429626465, - "step": 12405 - }, - { - "epoch": 2.1159420289855073, - "grad_norm": 0.05540858209133148, - "learning_rate": 3.52261665373476e-05, - "loss": 0.001869852840900421, - "step": 12410 - }, - { - "epoch": 2.1167945439045184, - "grad_norm": 0.05183592066168785, - "learning_rate": 3.520059226554789e-05, - "loss": 0.0038085319101810455, - "step": 12415 - }, - { - "epoch": 2.1176470588235294, - "grad_norm": 0.09019337594509125, - "learning_rate": 3.517501906718822e-05, - "loss": 0.0025485800579190254, - "step": 12420 - }, - { - "epoch": 2.1184995737425405, - "grad_norm": 0.05994381010532379, - "learning_rate": 3.514944695420698e-05, - "loss": 0.0023555709049105644, - "step": 12425 - }, - { - "epoch": 2.1193520886615516, - "grad_norm": 0.07013200968503952, - "learning_rate": 3.512387593854208e-05, - "loss": 0.0023415835574269296, - "step": 12430 - }, - { - "epoch": 2.1202046035805626, - "grad_norm": 0.0558604821562767, - "learning_rate": 3.509830603213094e-05, - "loss": 0.002999695762991905, - "step": 12435 - }, - { - "epoch": 2.1210571184995737, - "grad_norm": 0.054457131773233414, - "learning_rate": 3.507273724691045e-05, - "loss": 0.0022147590294480323, - "step": 12440 - }, - { - "epoch": 2.1219096334185847, - "grad_norm": 0.052365075796842575, - "learning_rate": 3.5047169594816955e-05, - "loss": 0.0023635342717170716, - "step": 12445 - }, - { - "epoch": 2.122762148337596, - "grad_norm": 0.047059565782547, - "learning_rate": 3.502160308778627e-05, - "loss": 0.0015694497153162957, - "step": 12450 - }, - { - "epoch": 2.123614663256607, - "grad_norm": 0.03100336343050003, - "learning_rate": 3.499603773775371e-05, - "loss": 0.0020049646496772765, - "step": 12455 - }, - { - "epoch": 2.124467178175618, - "grad_norm": 0.07436710596084595, - "learning_rate": 3.4970473556654027e-05, - "loss": 0.004277446493506432, - "step": 12460 - }, - { - "epoch": 2.125319693094629, - "grad_norm": 0.044698864221572876, - "learning_rate": 3.4944910556421444e-05, - "loss": 0.0032587334513664245, - "step": 12465 - }, - { - "epoch": 2.12617220801364, - "grad_norm": 0.04725298285484314, - "learning_rate": 3.491934874898961e-05, - "loss": 0.0018061451613903047, - "step": 12470 - }, - { - "epoch": 2.127024722932651, - "grad_norm": 0.04054245352745056, - "learning_rate": 3.4893788146291604e-05, - "loss": 0.0017766639590263366, - "step": 12475 - }, - { - "epoch": 2.1278772378516626, - "grad_norm": 0.06061461195349693, - "learning_rate": 3.486822876025999e-05, - "loss": 0.0025131702423095703, - "step": 12480 - }, - { - "epoch": 2.1287297527706737, - "grad_norm": 0.058438993990421295, - "learning_rate": 3.4842670602826744e-05, - "loss": 0.002218405343592167, - "step": 12485 - }, - { - "epoch": 2.1295822676896847, - "grad_norm": 0.057440634816884995, - "learning_rate": 3.481711368592327e-05, - "loss": 0.0015549706295132637, - "step": 12490 - }, - { - "epoch": 2.130434782608696, - "grad_norm": 0.06638845056295395, - "learning_rate": 3.4791558021480355e-05, - "loss": 0.002662469446659088, - "step": 12495 - }, - { - "epoch": 2.131287297527707, - "grad_norm": 0.06725790351629257, - "learning_rate": 3.476600362142824e-05, - "loss": 0.0024463947862386703, - "step": 12500 - }, - { - "epoch": 2.132139812446718, - "grad_norm": 0.07708985358476639, - "learning_rate": 3.474045049769659e-05, - "loss": 0.0034916583448648454, - "step": 12505 - }, - { - "epoch": 2.132992327365729, - "grad_norm": 0.06412148475646973, - "learning_rate": 3.4714898662214454e-05, - "loss": 0.002831364795565605, - "step": 12510 - }, - { - "epoch": 2.13384484228474, - "grad_norm": 0.04649505391716957, - "learning_rate": 3.468934812691027e-05, - "loss": 0.002048753574490547, - "step": 12515 - }, - { - "epoch": 2.134697357203751, - "grad_norm": 0.04807932674884796, - "learning_rate": 3.4663798903711865e-05, - "loss": 0.0018209950998425485, - "step": 12520 - }, - { - "epoch": 2.135549872122762, - "grad_norm": 0.043283116072416306, - "learning_rate": 3.4638251004546476e-05, - "loss": 0.001797056198120117, - "step": 12525 - }, - { - "epoch": 2.136402387041773, - "grad_norm": 0.015419692732393742, - "learning_rate": 3.4612704441340716e-05, - "loss": 0.002100854739546776, - "step": 12530 - }, - { - "epoch": 2.1372549019607843, - "grad_norm": 0.05244193226099014, - "learning_rate": 3.458715922602057e-05, - "loss": 0.002430478297173977, - "step": 12535 - }, - { - "epoch": 2.1381074168797953, - "grad_norm": 0.08995307981967926, - "learning_rate": 3.4561615370511394e-05, - "loss": 0.0023157089948654176, - "step": 12540 - }, - { - "epoch": 2.1389599317988064, - "grad_norm": 0.06513174623250961, - "learning_rate": 3.4536072886737894e-05, - "loss": 0.002109107933938503, - "step": 12545 - }, - { - "epoch": 2.1398124467178175, - "grad_norm": 0.12199243903160095, - "learning_rate": 3.4510531786624176e-05, - "loss": 0.0016247857362031936, - "step": 12550 - }, - { - "epoch": 2.1406649616368285, - "grad_norm": 0.06062543764710426, - "learning_rate": 3.4484992082093665e-05, - "loss": 0.0033494606614112854, - "step": 12555 - }, - { - "epoch": 2.1415174765558396, - "grad_norm": 0.08636222034692764, - "learning_rate": 3.445945378506915e-05, - "loss": 0.0037529505789279938, - "step": 12560 - }, - { - "epoch": 2.1423699914748506, - "grad_norm": 0.027961688116192818, - "learning_rate": 3.443391690747274e-05, - "loss": 0.0016466494649648666, - "step": 12565 - }, - { - "epoch": 2.1432225063938617, - "grad_norm": 0.033621031790971756, - "learning_rate": 3.440838146122591e-05, - "loss": 0.002477872557938099, - "step": 12570 - }, - { - "epoch": 2.144075021312873, - "grad_norm": 0.08104594051837921, - "learning_rate": 3.4382847458249453e-05, - "loss": 0.0031348835676908494, - "step": 12575 - }, - { - "epoch": 2.1449275362318843, - "grad_norm": 0.07412353157997131, - "learning_rate": 3.4357314910463506e-05, - "loss": 0.002509618178009987, - "step": 12580 - }, - { - "epoch": 2.1457800511508953, - "grad_norm": 0.04261288791894913, - "learning_rate": 3.43317838297875e-05, - "loss": 0.0021477997303009032, - "step": 12585 - }, - { - "epoch": 2.1466325660699064, - "grad_norm": 0.15133292973041534, - "learning_rate": 3.430625422814018e-05, - "loss": 0.0033604972064495086, - "step": 12590 - }, - { - "epoch": 2.1474850809889174, - "grad_norm": 0.08455967903137207, - "learning_rate": 3.428072611743962e-05, - "loss": 0.0035134248435497286, - "step": 12595 - }, - { - "epoch": 2.1483375959079285, - "grad_norm": 0.10830427706241608, - "learning_rate": 3.425519950960321e-05, - "loss": 0.003783620521426201, - "step": 12600 - }, - { - "epoch": 2.1491901108269396, - "grad_norm": 0.05701782926917076, - "learning_rate": 3.422967441654761e-05, - "loss": 0.0017763100564479827, - "step": 12605 - }, - { - "epoch": 2.1500426257459506, - "grad_norm": 0.058323513716459274, - "learning_rate": 3.420415085018878e-05, - "loss": 0.003765106201171875, - "step": 12610 - }, - { - "epoch": 2.1508951406649617, - "grad_norm": 0.08780697733163834, - "learning_rate": 3.417862882244195e-05, - "loss": 0.0021065909415483473, - "step": 12615 - }, - { - "epoch": 2.1517476555839727, - "grad_norm": 0.08741293847560883, - "learning_rate": 3.415310834522168e-05, - "loss": 0.0022673629224300384, - "step": 12620 - }, - { - "epoch": 2.152600170502984, - "grad_norm": 0.08681067824363708, - "learning_rate": 3.412758943044177e-05, - "loss": 0.0029561318457126617, - "step": 12625 - }, - { - "epoch": 2.153452685421995, - "grad_norm": 0.05104825645685196, - "learning_rate": 3.4102072090015306e-05, - "loss": 0.0028430519625544546, - "step": 12630 - }, - { - "epoch": 2.154305200341006, - "grad_norm": 0.05437494069337845, - "learning_rate": 3.4076556335854606e-05, - "loss": 0.0026259947568178176, - "step": 12635 - }, - { - "epoch": 2.155157715260017, - "grad_norm": 0.016572406515479088, - "learning_rate": 3.4051042179871286e-05, - "loss": 0.00198390893638134, - "step": 12640 - }, - { - "epoch": 2.156010230179028, - "grad_norm": 0.04134957864880562, - "learning_rate": 3.4025529633976216e-05, - "loss": 0.0017651205882430077, - "step": 12645 - }, - { - "epoch": 2.156862745098039, - "grad_norm": 0.04091856628656387, - "learning_rate": 3.400001871007949e-05, - "loss": 0.002631684020161629, - "step": 12650 - }, - { - "epoch": 2.15771526001705, - "grad_norm": 0.08851557224988937, - "learning_rate": 3.397450942009046e-05, - "loss": 0.004056418687105179, - "step": 12655 - }, - { - "epoch": 2.1585677749360612, - "grad_norm": 0.09870146960020065, - "learning_rate": 3.3949001775917686e-05, - "loss": 0.0017272619530558585, - "step": 12660 - }, - { - "epoch": 2.1594202898550723, - "grad_norm": 0.059828147292137146, - "learning_rate": 3.3923495789469016e-05, - "loss": 0.0018833462148904801, - "step": 12665 - }, - { - "epoch": 2.1602728047740833, - "grad_norm": 0.04078202694654465, - "learning_rate": 3.3897991472651495e-05, - "loss": 0.0015183920040726662, - "step": 12670 - }, - { - "epoch": 2.1611253196930944, - "grad_norm": 0.09713901579380035, - "learning_rate": 3.387248883737137e-05, - "loss": 0.002313835546374321, - "step": 12675 - }, - { - "epoch": 2.161977834612106, - "grad_norm": 0.13590694963932037, - "learning_rate": 3.3846987895534116e-05, - "loss": 0.002948279120028019, - "step": 12680 - }, - { - "epoch": 2.162830349531117, - "grad_norm": 0.05830051749944687, - "learning_rate": 3.3821488659044435e-05, - "loss": 0.002866750955581665, - "step": 12685 - }, - { - "epoch": 2.163682864450128, - "grad_norm": 0.08523424714803696, - "learning_rate": 3.3795991139806205e-05, - "loss": 0.001992848888039589, - "step": 12690 - }, - { - "epoch": 2.164535379369139, - "grad_norm": 0.07573958486318588, - "learning_rate": 3.3770495349722534e-05, - "loss": 0.003222312778234482, - "step": 12695 - }, - { - "epoch": 2.16538789428815, - "grad_norm": 0.1144784539937973, - "learning_rate": 3.374500130069569e-05, - "loss": 0.0023121457546949387, - "step": 12700 - }, - { - "epoch": 2.166240409207161, - "grad_norm": 0.037679724395275116, - "learning_rate": 3.371950900462716e-05, - "loss": 0.0022720521315932273, - "step": 12705 - }, - { - "epoch": 2.1670929241261723, - "grad_norm": 0.09523876011371613, - "learning_rate": 3.369401847341756e-05, - "loss": 0.0026744550094008447, - "step": 12710 - }, - { - "epoch": 2.1679454390451833, - "grad_norm": 0.08904188126325607, - "learning_rate": 3.3668529718966753e-05, - "loss": 0.0025367535650730132, - "step": 12715 - }, - { - "epoch": 2.1687979539641944, - "grad_norm": 0.065862737596035, - "learning_rate": 3.364304275317373e-05, - "loss": 0.0017513807862997055, - "step": 12720 - }, - { - "epoch": 2.1696504688832055, - "grad_norm": 0.03308388963341713, - "learning_rate": 3.361755758793665e-05, - "loss": 0.001534645166248083, - "step": 12725 - }, - { - "epoch": 2.1705029838022165, - "grad_norm": 0.11249089986085892, - "learning_rate": 3.359207423515283e-05, - "loss": 0.0012927086092531681, - "step": 12730 - }, - { - "epoch": 2.1713554987212276, - "grad_norm": 0.09918250143527985, - "learning_rate": 3.356659270671875e-05, - "loss": 0.0035567093640565873, - "step": 12735 - }, - { - "epoch": 2.1722080136402386, - "grad_norm": 0.008674295619130135, - "learning_rate": 3.354111301453005e-05, - "loss": 0.0013304737396538258, - "step": 12740 - }, - { - "epoch": 2.1730605285592497, - "grad_norm": 0.09038940817117691, - "learning_rate": 3.351563517048149e-05, - "loss": 0.0022449616342782976, - "step": 12745 - }, - { - "epoch": 2.1739130434782608, - "grad_norm": 0.11863812804222107, - "learning_rate": 3.349015918646695e-05, - "loss": 0.0029456689953804016, - "step": 12750 - }, - { - "epoch": 2.174765558397272, - "grad_norm": 0.055224135518074036, - "learning_rate": 3.34646850743795e-05, - "loss": 0.0021983785554766655, - "step": 12755 - }, - { - "epoch": 2.175618073316283, - "grad_norm": 0.05251838266849518, - "learning_rate": 3.34392128461113e-05, - "loss": 0.0018048876896500587, - "step": 12760 - }, - { - "epoch": 2.176470588235294, - "grad_norm": 0.07146445661783218, - "learning_rate": 3.341374251355361e-05, - "loss": 0.0030932359397411345, - "step": 12765 - }, - { - "epoch": 2.177323103154305, - "grad_norm": 0.03640792518854141, - "learning_rate": 3.338827408859686e-05, - "loss": 0.0016893571242690085, - "step": 12770 - }, - { - "epoch": 2.1781756180733165, - "grad_norm": 0.0680721327662468, - "learning_rate": 3.336280758313052e-05, - "loss": 0.0037735387682914733, - "step": 12775 - }, - { - "epoch": 2.1790281329923276, - "grad_norm": 0.047598470002412796, - "learning_rate": 3.333734300904322e-05, - "loss": 0.002026566304266453, - "step": 12780 - }, - { - "epoch": 2.1798806479113386, - "grad_norm": 0.08361580222845078, - "learning_rate": 3.3311880378222695e-05, - "loss": 0.002865005284547806, - "step": 12785 - }, - { - "epoch": 2.1807331628303497, - "grad_norm": 0.04869835823774338, - "learning_rate": 3.328641970255572e-05, - "loss": 0.0018146531656384468, - "step": 12790 - }, - { - "epoch": 2.1815856777493607, - "grad_norm": 0.06970708072185516, - "learning_rate": 3.326096099392819e-05, - "loss": 0.0022316936403512953, - "step": 12795 - }, - { - "epoch": 2.182438192668372, - "grad_norm": 0.07073621451854706, - "learning_rate": 3.323550426422508e-05, - "loss": 0.0021546846255660057, - "step": 12800 - }, - { - "epoch": 2.183290707587383, - "grad_norm": 0.0552116334438324, - "learning_rate": 3.3210049525330426e-05, - "loss": 0.0022750692442059517, - "step": 12805 - }, - { - "epoch": 2.184143222506394, - "grad_norm": 0.08244488388299942, - "learning_rate": 3.318459678912737e-05, - "loss": 0.0027180306613445284, - "step": 12810 - }, - { - "epoch": 2.184995737425405, - "grad_norm": 0.07275483757257462, - "learning_rate": 3.315914606749808e-05, - "loss": 0.002150987088680267, - "step": 12815 - }, - { - "epoch": 2.185848252344416, - "grad_norm": 0.06152818351984024, - "learning_rate": 3.3133697372323804e-05, - "loss": 0.002709987387061119, - "step": 12820 - }, - { - "epoch": 2.186700767263427, - "grad_norm": 0.07358045876026154, - "learning_rate": 3.310825071548483e-05, - "loss": 0.0029207577928900717, - "step": 12825 - }, - { - "epoch": 2.187553282182438, - "grad_norm": 0.07633842527866364, - "learning_rate": 3.3082806108860516e-05, - "loss": 0.0028854381293058396, - "step": 12830 - }, - { - "epoch": 2.1884057971014492, - "grad_norm": 0.0533052496612072, - "learning_rate": 3.305736356432926e-05, - "loss": 0.0023338528349995612, - "step": 12835 - }, - { - "epoch": 2.1892583120204603, - "grad_norm": 0.09400077164173126, - "learning_rate": 3.303192309376846e-05, - "loss": 0.00362023301422596, - "step": 12840 - }, - { - "epoch": 2.1901108269394713, - "grad_norm": 0.09847433120012283, - "learning_rate": 3.300648470905459e-05, - "loss": 0.003238249197602272, - "step": 12845 - }, - { - "epoch": 2.1909633418584824, - "grad_norm": 0.09695439040660858, - "learning_rate": 3.298104842206314e-05, - "loss": 0.002254056558012962, - "step": 12850 - }, - { - "epoch": 2.1918158567774935, - "grad_norm": 0.07510244101285934, - "learning_rate": 3.295561424466861e-05, - "loss": 0.002555438503623009, - "step": 12855 - }, - { - "epoch": 2.1926683716965045, - "grad_norm": 0.07085850089788437, - "learning_rate": 3.2930182188744524e-05, - "loss": 0.0029295925050973892, - "step": 12860 - }, - { - "epoch": 2.1935208866155156, - "grad_norm": 0.12662498652935028, - "learning_rate": 3.290475226616339e-05, - "loss": 0.0019443847239017486, - "step": 12865 - }, - { - "epoch": 2.1943734015345266, - "grad_norm": 0.08738470077514648, - "learning_rate": 3.2879324488796755e-05, - "loss": 0.002229847013950348, - "step": 12870 - }, - { - "epoch": 2.1952259164535377, - "grad_norm": 0.04957102984189987, - "learning_rate": 3.285389886851517e-05, - "loss": 0.0017434298992156983, - "step": 12875 - }, - { - "epoch": 2.196078431372549, - "grad_norm": 0.057968392968177795, - "learning_rate": 3.282847541718814e-05, - "loss": 0.003453432023525238, - "step": 12880 - }, - { - "epoch": 2.1969309462915603, - "grad_norm": 0.1128922700881958, - "learning_rate": 3.280305414668419e-05, - "loss": 0.0025962982326745987, - "step": 12885 - }, - { - "epoch": 2.1977834612105713, - "grad_norm": 0.0661446675658226, - "learning_rate": 3.2777635068870784e-05, - "loss": 0.002279244549572468, - "step": 12890 - }, - { - "epoch": 2.1986359761295824, - "grad_norm": 0.09260411560535431, - "learning_rate": 3.275221819561443e-05, - "loss": 0.002637815475463867, - "step": 12895 - }, - { - "epoch": 2.1994884910485935, - "grad_norm": 0.08168021589517593, - "learning_rate": 3.272680353878056e-05, - "loss": 0.0029386602342128753, - "step": 12900 - }, - { - "epoch": 2.2003410059676045, - "grad_norm": 0.06187237799167633, - "learning_rate": 3.270139111023358e-05, - "loss": 0.0018257955089211464, - "step": 12905 - }, - { - "epoch": 2.2011935208866156, - "grad_norm": 0.09450117498636246, - "learning_rate": 3.267598092183684e-05, - "loss": 0.0023655250668525698, - "step": 12910 - }, - { - "epoch": 2.2020460358056266, - "grad_norm": 0.060870688408613205, - "learning_rate": 3.2650572985452685e-05, - "loss": 0.001705418713390827, - "step": 12915 - }, - { - "epoch": 2.2028985507246377, - "grad_norm": 0.06867264956235886, - "learning_rate": 3.262516731294237e-05, - "loss": 0.00248488187789917, - "step": 12920 - }, - { - "epoch": 2.2037510656436488, - "grad_norm": 0.07654258608818054, - "learning_rate": 3.259976391616612e-05, - "loss": 0.002200855314731598, - "step": 12925 - }, - { - "epoch": 2.20460358056266, - "grad_norm": 0.06781245768070221, - "learning_rate": 3.257436280698308e-05, - "loss": 0.002006441354751587, - "step": 12930 - }, - { - "epoch": 2.205456095481671, - "grad_norm": 0.045858342200517654, - "learning_rate": 3.254896399725132e-05, - "loss": 0.0020667938515543938, - "step": 12935 - }, - { - "epoch": 2.206308610400682, - "grad_norm": 0.06805605441331863, - "learning_rate": 3.2523567498827865e-05, - "loss": 0.002215307205915451, - "step": 12940 - }, - { - "epoch": 2.207161125319693, - "grad_norm": 0.07554472237825394, - "learning_rate": 3.2498173323568645e-05, - "loss": 0.0021156981587409974, - "step": 12945 - }, - { - "epoch": 2.208013640238704, - "grad_norm": 0.049611154943704605, - "learning_rate": 3.2472781483328506e-05, - "loss": 0.0037985272705554963, - "step": 12950 - }, - { - "epoch": 2.208866155157715, - "grad_norm": 0.04867832362651825, - "learning_rate": 3.24473919899612e-05, - "loss": 0.0011579260230064393, - "step": 12955 - }, - { - "epoch": 2.209718670076726, - "grad_norm": 0.04439609497785568, - "learning_rate": 3.2422004855319376e-05, - "loss": 0.0033864513039588927, - "step": 12960 - }, - { - "epoch": 2.2105711849957372, - "grad_norm": 0.054114069789648056, - "learning_rate": 3.23966200912546e-05, - "loss": 0.0017186013981699943, - "step": 12965 - }, - { - "epoch": 2.2114236999147483, - "grad_norm": 0.03286417946219444, - "learning_rate": 3.237123770961735e-05, - "loss": 0.0013779066503047943, - "step": 12970 - }, - { - "epoch": 2.21227621483376, - "grad_norm": 0.05740232393145561, - "learning_rate": 3.234585772225694e-05, - "loss": 0.00376686155796051, - "step": 12975 - }, - { - "epoch": 2.213128729752771, - "grad_norm": 0.11821190267801285, - "learning_rate": 3.232048014102158e-05, - "loss": 0.003515421971678734, - "step": 12980 - }, - { - "epoch": 2.213981244671782, - "grad_norm": 0.06561318039894104, - "learning_rate": 3.229510497775838e-05, - "loss": 0.0034034676849842072, - "step": 12985 - }, - { - "epoch": 2.214833759590793, - "grad_norm": 0.06076068431138992, - "learning_rate": 3.226973224431333e-05, - "loss": 0.0018323207274079322, - "step": 12990 - }, - { - "epoch": 2.215686274509804, - "grad_norm": 0.05743642896413803, - "learning_rate": 3.2244361952531266e-05, - "loss": 0.002844391018152237, - "step": 12995 - }, - { - "epoch": 2.216538789428815, - "grad_norm": 0.0632607489824295, - "learning_rate": 3.221899411425586e-05, - "loss": 0.003329380601644516, - "step": 13000 - }, - { - "epoch": 2.217391304347826, - "grad_norm": 0.06082088127732277, - "learning_rate": 3.219362874132966e-05, - "loss": 0.0026398774236440657, - "step": 13005 - }, - { - "epoch": 2.2182438192668372, - "grad_norm": 0.07731121778488159, - "learning_rate": 3.2168265845594075e-05, - "loss": 0.00193992517888546, - "step": 13010 - }, - { - "epoch": 2.2190963341858483, - "grad_norm": 0.08783961087465286, - "learning_rate": 3.214290543888938e-05, - "loss": 0.0019096124917268753, - "step": 13015 - }, - { - "epoch": 2.2199488491048593, - "grad_norm": 0.07576426863670349, - "learning_rate": 3.211754753305461e-05, - "loss": 0.002824045717716217, - "step": 13020 - }, - { - "epoch": 2.2208013640238704, - "grad_norm": 0.0671941265463829, - "learning_rate": 3.20921921399277e-05, - "loss": 0.0025903450325131415, - "step": 13025 - }, - { - "epoch": 2.2216538789428815, - "grad_norm": 0.025313038378953934, - "learning_rate": 3.206683927134538e-05, - "loss": 0.001357127632945776, - "step": 13030 - }, - { - "epoch": 2.2225063938618925, - "grad_norm": 0.0281735397875309, - "learning_rate": 3.204148893914323e-05, - "loss": 0.0018472330644726752, - "step": 13035 - }, - { - "epoch": 2.2233589087809036, - "grad_norm": 0.027222834527492523, - "learning_rate": 3.2016141155155625e-05, - "loss": 0.0018411261960864067, - "step": 13040 - }, - { - "epoch": 2.2242114236999146, - "grad_norm": 0.04794001951813698, - "learning_rate": 3.199079593121574e-05, - "loss": 0.0015307093039155007, - "step": 13045 - }, - { - "epoch": 2.2250639386189257, - "grad_norm": 0.05856316536664963, - "learning_rate": 3.196545327915558e-05, - "loss": 0.001051103323698044, - "step": 13050 - }, - { - "epoch": 2.2259164535379368, - "grad_norm": 0.037851642817258835, - "learning_rate": 3.194011321080592e-05, - "loss": 0.0020413145422935484, - "step": 13055 - }, - { - "epoch": 2.226768968456948, - "grad_norm": 0.04197809472680092, - "learning_rate": 3.191477573799638e-05, - "loss": 0.0025324104353785515, - "step": 13060 - }, - { - "epoch": 2.227621483375959, - "grad_norm": 0.04126058518886566, - "learning_rate": 3.188944087255531e-05, - "loss": 0.001765124499797821, - "step": 13065 - }, - { - "epoch": 2.2284739982949704, - "grad_norm": 0.13436861336231232, - "learning_rate": 3.186410862630988e-05, - "loss": 0.003620542213320732, - "step": 13070 - }, - { - "epoch": 2.229326513213981, - "grad_norm": 0.05177616328001022, - "learning_rate": 3.183877901108601e-05, - "loss": 0.001679854467511177, - "step": 13075 - }, - { - "epoch": 2.2301790281329925, - "grad_norm": 0.03360729292035103, - "learning_rate": 3.1813452038708415e-05, - "loss": 0.002009689994156361, - "step": 13080 - }, - { - "epoch": 2.2310315430520036, - "grad_norm": 0.102437824010849, - "learning_rate": 3.178812772100058e-05, - "loss": 0.002533908933401108, - "step": 13085 - }, - { - "epoch": 2.2318840579710146, - "grad_norm": 0.045174695551395416, - "learning_rate": 3.176280606978473e-05, - "loss": 0.0023472383618354797, - "step": 13090 - }, - { - "epoch": 2.2327365728900257, - "grad_norm": 0.0679149329662323, - "learning_rate": 3.173748709688184e-05, - "loss": 0.00249241441488266, - "step": 13095 - }, - { - "epoch": 2.2335890878090368, - "grad_norm": 0.1367262750864029, - "learning_rate": 3.171217081411166e-05, - "loss": 0.002387053519487381, - "step": 13100 - }, - { - "epoch": 2.234441602728048, - "grad_norm": 0.06661707162857056, - "learning_rate": 3.168685723329269e-05, - "loss": 0.002376999333500862, - "step": 13105 - }, - { - "epoch": 2.235294117647059, - "grad_norm": 0.08916410058736801, - "learning_rate": 3.166154636624214e-05, - "loss": 0.0027421964332461357, - "step": 13110 - }, - { - "epoch": 2.23614663256607, - "grad_norm": 0.058119386434555054, - "learning_rate": 3.163623822477595e-05, - "loss": 0.0018962904810905456, - "step": 13115 - }, - { - "epoch": 2.236999147485081, - "grad_norm": 0.06457269936800003, - "learning_rate": 3.161093282070882e-05, - "loss": 0.001441392581909895, - "step": 13120 - }, - { - "epoch": 2.237851662404092, - "grad_norm": 0.1250019371509552, - "learning_rate": 3.158563016585412e-05, - "loss": 0.002274188958108425, - "step": 13125 - }, - { - "epoch": 2.238704177323103, - "grad_norm": 0.03324245661497116, - "learning_rate": 3.156033027202403e-05, - "loss": 0.002002820558845997, - "step": 13130 - }, - { - "epoch": 2.239556692242114, - "grad_norm": 0.01897227205336094, - "learning_rate": 3.153503315102934e-05, - "loss": 0.0016582176089286805, - "step": 13135 - }, - { - "epoch": 2.2404092071611252, - "grad_norm": 0.07142049074172974, - "learning_rate": 3.15097388146796e-05, - "loss": 0.002489439025521278, - "step": 13140 - }, - { - "epoch": 2.2412617220801363, - "grad_norm": 0.05619347095489502, - "learning_rate": 3.148444727478303e-05, - "loss": 0.0021767957136034966, - "step": 13145 - }, - { - "epoch": 2.2421142369991474, - "grad_norm": 0.0950259119272232, - "learning_rate": 3.14591585431466e-05, - "loss": 0.001732981950044632, - "step": 13150 - }, - { - "epoch": 2.2429667519181584, - "grad_norm": 0.06186724454164505, - "learning_rate": 3.143387263157591e-05, - "loss": 0.001604793407022953, - "step": 13155 - }, - { - "epoch": 2.2438192668371695, - "grad_norm": 0.0921434834599495, - "learning_rate": 3.1408589551875256e-05, - "loss": 0.001957142725586891, - "step": 13160 - }, - { - "epoch": 2.2446717817561805, - "grad_norm": 0.05556231364607811, - "learning_rate": 3.138330931584763e-05, - "loss": 0.002686610072851181, - "step": 13165 - }, - { - "epoch": 2.2455242966751916, - "grad_norm": 0.10184850543737411, - "learning_rate": 3.1358031935294666e-05, - "loss": 0.0019098062068223954, - "step": 13170 - }, - { - "epoch": 2.246376811594203, - "grad_norm": 0.08860436826944351, - "learning_rate": 3.133275742201673e-05, - "loss": 0.002402664348483086, - "step": 13175 - }, - { - "epoch": 2.247229326513214, - "grad_norm": 0.06324724107980728, - "learning_rate": 3.130748578781278e-05, - "loss": 0.0018930312246084214, - "step": 13180 - }, - { - "epoch": 2.2480818414322252, - "grad_norm": 0.07382629811763763, - "learning_rate": 3.128221704448045e-05, - "loss": 0.0026824956759810446, - "step": 13185 - }, - { - "epoch": 2.2489343563512363, - "grad_norm": 0.1002819687128067, - "learning_rate": 3.125695120381603e-05, - "loss": 0.0030449360609054567, - "step": 13190 - }, - { - "epoch": 2.249616368286445, - "eval_loss": 0.046705588698387146, - "eval_runtime": 3.7196, - "eval_samples_per_second": 67.748, - "eval_steps_per_second": 1.075, - "step": 13194 - }, - { - "eval_cer_subset": 0.013842070122298761, - "eval_cer_subset_edit_distance": 850, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 13194 - }, - { - "epoch": 2.2497868712702473, - "grad_norm": 0.030392520129680634, - "learning_rate": 3.123168827761447e-05, - "loss": 0.0015232504345476627, - "step": 13195 - }, - { - "epoch": 2.2506393861892584, - "grad_norm": 0.04160630702972412, - "learning_rate": 3.1206428277669336e-05, - "loss": 0.0026638204231858253, - "step": 13200 - }, - { - "epoch": 2.2514919011082695, - "grad_norm": 0.06140404939651489, - "learning_rate": 3.118117121577284e-05, - "loss": 0.003001154027879238, - "step": 13205 - }, - { - "epoch": 2.2523444160272805, - "grad_norm": 0.06974830478429794, - "learning_rate": 3.115591710371581e-05, - "loss": 0.0032261811196804047, - "step": 13210 - }, - { - "epoch": 2.2531969309462916, - "grad_norm": 0.09120716899633408, - "learning_rate": 3.1130665953287695e-05, - "loss": 0.001386938989162445, - "step": 13215 - }, - { - "epoch": 2.2540494458653026, - "grad_norm": 0.06130429729819298, - "learning_rate": 3.110541777627661e-05, - "loss": 0.0014743787236511708, - "step": 13220 - }, - { - "epoch": 2.2549019607843137, - "grad_norm": 0.07033205777406693, - "learning_rate": 3.108017258446921e-05, - "loss": 0.003749256581068039, - "step": 13225 - }, - { - "epoch": 2.2557544757033248, - "grad_norm": 0.08650046586990356, - "learning_rate": 3.1054930389650804e-05, - "loss": 0.0023554276674985887, - "step": 13230 - }, - { - "epoch": 2.256606990622336, - "grad_norm": 0.06045643612742424, - "learning_rate": 3.102969120360529e-05, - "loss": 0.0019686706364154816, - "step": 13235 - }, - { - "epoch": 2.257459505541347, - "grad_norm": 0.1004268005490303, - "learning_rate": 3.100445503811514e-05, - "loss": 0.003136196732521057, - "step": 13240 - }, - { - "epoch": 2.258312020460358, - "grad_norm": 0.08810209482908249, - "learning_rate": 3.097922190496146e-05, - "loss": 0.002239716053009033, - "step": 13245 - }, - { - "epoch": 2.259164535379369, - "grad_norm": 0.10518727451562881, - "learning_rate": 3.095399181592392e-05, - "loss": 0.002447150461375713, - "step": 13250 - }, - { - "epoch": 2.26001705029838, - "grad_norm": 0.049536559730768204, - "learning_rate": 3.092876478278074e-05, - "loss": 0.0023296492174267767, - "step": 13255 - }, - { - "epoch": 2.260869565217391, - "grad_norm": 0.057701822370290756, - "learning_rate": 3.0903540817308734e-05, - "loss": 0.0018970953300595284, - "step": 13260 - }, - { - "epoch": 2.261722080136402, - "grad_norm": 0.04391616955399513, - "learning_rate": 3.087831993128333e-05, - "loss": 0.0026229951530694962, - "step": 13265 - }, - { - "epoch": 2.2625745950554137, - "grad_norm": 0.048150911927223206, - "learning_rate": 3.0853102136478444e-05, - "loss": 0.0015288691036403179, - "step": 13270 - }, - { - "epoch": 2.2634271099744243, - "grad_norm": 0.12074416130781174, - "learning_rate": 3.082788744466659e-05, - "loss": 0.0025329213589429856, - "step": 13275 - }, - { - "epoch": 2.264279624893436, - "grad_norm": 0.05400107055902481, - "learning_rate": 3.080267586761881e-05, - "loss": 0.0017294475808739662, - "step": 13280 - }, - { - "epoch": 2.265132139812447, - "grad_norm": 0.07027488201856613, - "learning_rate": 3.0777467417104717e-05, - "loss": 0.0026237966492772104, - "step": 13285 - }, - { - "epoch": 2.265984654731458, - "grad_norm": 0.06868001073598862, - "learning_rate": 3.075226210489247e-05, - "loss": 0.0021411897614598274, - "step": 13290 - }, - { - "epoch": 2.266837169650469, - "grad_norm": 0.07447243481874466, - "learning_rate": 3.072705994274874e-05, - "loss": 0.002808676287531853, - "step": 13295 - }, - { - "epoch": 2.26768968456948, - "grad_norm": 0.04292432591319084, - "learning_rate": 3.070186094243872e-05, - "loss": 0.001994679495692253, - "step": 13300 - }, - { - "epoch": 2.268542199488491, - "grad_norm": 0.06083334609866142, - "learning_rate": 3.067666511572614e-05, - "loss": 0.001621294766664505, - "step": 13305 - }, - { - "epoch": 2.269394714407502, - "grad_norm": 0.04339296743273735, - "learning_rate": 3.065147247437327e-05, - "loss": 0.002122482657432556, - "step": 13310 - }, - { - "epoch": 2.2702472293265132, - "grad_norm": 0.07901404052972794, - "learning_rate": 3.062628303014087e-05, - "loss": 0.0030757525935769083, - "step": 13315 - }, - { - "epoch": 2.2710997442455243, - "grad_norm": 0.046554502099752426, - "learning_rate": 3.060109679478821e-05, - "loss": 0.0022816451266407965, - "step": 13320 - }, - { - "epoch": 2.2719522591645354, - "grad_norm": 0.03428821638226509, - "learning_rate": 3.0575913780073036e-05, - "loss": 0.002047870494425297, - "step": 13325 - }, - { - "epoch": 2.2728047740835464, - "grad_norm": 0.09298217296600342, - "learning_rate": 3.0550733997751634e-05, - "loss": 0.002046193927526474, - "step": 13330 - }, - { - "epoch": 2.2736572890025575, - "grad_norm": 0.08458553999662399, - "learning_rate": 3.0525557459578786e-05, - "loss": 0.002566727437078953, - "step": 13335 - }, - { - "epoch": 2.2745098039215685, - "grad_norm": 0.10309132188558578, - "learning_rate": 3.050038417730772e-05, - "loss": 0.00358976349234581, - "step": 13340 - }, - { - "epoch": 2.2753623188405796, - "grad_norm": 0.08116701990365982, - "learning_rate": 3.0475214162690144e-05, - "loss": 0.003372102603316307, - "step": 13345 - }, - { - "epoch": 2.2762148337595907, - "grad_norm": 0.09258918464183807, - "learning_rate": 3.0450047427476292e-05, - "loss": 0.0037133049219846724, - "step": 13350 - }, - { - "epoch": 2.2770673486786017, - "grad_norm": 0.09618882834911346, - "learning_rate": 3.0424883983414797e-05, - "loss": 0.0024330444633960725, - "step": 13355 - }, - { - "epoch": 2.277919863597613, - "grad_norm": 0.04637463390827179, - "learning_rate": 3.039972384225282e-05, - "loss": 0.0011583495885133742, - "step": 13360 - }, - { - "epoch": 2.2787723785166243, - "grad_norm": 0.04919019341468811, - "learning_rate": 3.0374567015735953e-05, - "loss": 0.0017433254048228263, - "step": 13365 - }, - { - "epoch": 2.279624893435635, - "grad_norm": 0.07092445343732834, - "learning_rate": 3.0349413515608213e-05, - "loss": 0.0010275598615407944, - "step": 13370 - }, - { - "epoch": 2.2804774083546464, - "grad_norm": 0.06819095462560654, - "learning_rate": 3.03242633536121e-05, - "loss": 0.0018655678257346153, - "step": 13375 - }, - { - "epoch": 2.2813299232736575, - "grad_norm": 0.13397860527038574, - "learning_rate": 3.029911654148857e-05, - "loss": 0.0029754094779491425, - "step": 13380 - }, - { - "epoch": 2.2821824381926685, - "grad_norm": 0.09142930805683136, - "learning_rate": 3.0273973090976974e-05, - "loss": 0.0027707524597644804, - "step": 13385 - }, - { - "epoch": 2.2830349531116796, - "grad_norm": 0.06282728165388107, - "learning_rate": 3.0248833013815112e-05, - "loss": 0.0018412042409181595, - "step": 13390 - }, - { - "epoch": 2.2838874680306906, - "grad_norm": 0.05533494055271149, - "learning_rate": 3.0223696321739196e-05, - "loss": 0.0025158364325761793, - "step": 13395 - }, - { - "epoch": 2.2847399829497017, - "grad_norm": 0.08349598199129105, - "learning_rate": 3.0198563026483876e-05, - "loss": 0.002777436375617981, - "step": 13400 - }, - { - "epoch": 2.2855924978687128, - "grad_norm": 0.07469198107719421, - "learning_rate": 3.0173433139782227e-05, - "loss": 0.001951916702091694, - "step": 13405 - }, - { - "epoch": 2.286445012787724, - "grad_norm": 0.07126526534557343, - "learning_rate": 3.0148306673365708e-05, - "loss": 0.0031182590872049333, - "step": 13410 - }, - { - "epoch": 2.287297527706735, - "grad_norm": 0.06499479711055756, - "learning_rate": 3.0123183638964183e-05, - "loss": 0.001717902161180973, - "step": 13415 - }, - { - "epoch": 2.288150042625746, - "grad_norm": 0.03133346140384674, - "learning_rate": 3.0098064048305917e-05, - "loss": 0.0015830917283892632, - "step": 13420 - }, - { - "epoch": 2.289002557544757, - "grad_norm": 0.06725561618804932, - "learning_rate": 3.0072947913117573e-05, - "loss": 0.004541714489459991, - "step": 13425 - }, - { - "epoch": 2.289855072463768, - "grad_norm": 0.13644525408744812, - "learning_rate": 3.0047835245124216e-05, - "loss": 0.0027179479598999023, - "step": 13430 - }, - { - "epoch": 2.290707587382779, - "grad_norm": 0.06966832280158997, - "learning_rate": 3.0022726056049262e-05, - "loss": 0.0026542846113443374, - "step": 13435 - }, - { - "epoch": 2.29156010230179, - "grad_norm": 0.04449222609400749, - "learning_rate": 2.999762035761451e-05, - "loss": 0.0014596210792660713, - "step": 13440 - }, - { - "epoch": 2.2924126172208013, - "grad_norm": 0.05453059822320938, - "learning_rate": 2.9972518161540124e-05, - "loss": 0.0024629242718219755, - "step": 13445 - }, - { - "epoch": 2.2932651321398123, - "grad_norm": 0.06370346248149872, - "learning_rate": 2.9947419479544677e-05, - "loss": 0.0018157381564378738, - "step": 13450 - }, - { - "epoch": 2.2941176470588234, - "grad_norm": 0.149154931306839, - "learning_rate": 2.992232432334505e-05, - "loss": 0.0038953136652708055, - "step": 13455 - }, - { - "epoch": 2.2949701619778344, - "grad_norm": 0.08758609741926193, - "learning_rate": 2.9897232704656494e-05, - "loss": 0.00197781715542078, - "step": 13460 - }, - { - "epoch": 2.2958226768968455, - "grad_norm": 0.04877983406186104, - "learning_rate": 2.9872144635192625e-05, - "loss": 0.0018029011785984038, - "step": 13465 - }, - { - "epoch": 2.296675191815857, - "grad_norm": 0.029492873698472977, - "learning_rate": 2.984706012666536e-05, - "loss": 0.00226336307823658, - "step": 13470 - }, - { - "epoch": 2.2975277067348676, - "grad_norm": 0.09038830548524857, - "learning_rate": 2.982197919078502e-05, - "loss": 0.0024063091725111006, - "step": 13475 - }, - { - "epoch": 2.298380221653879, - "grad_norm": 0.08629653602838516, - "learning_rate": 2.97969018392602e-05, - "loss": 0.0019390033558011055, - "step": 13480 - }, - { - "epoch": 2.29923273657289, - "grad_norm": 0.08667116612195969, - "learning_rate": 2.9771828083797832e-05, - "loss": 0.003171199932694435, - "step": 13485 - }, - { - "epoch": 2.3000852514919012, - "grad_norm": 0.07069036364555359, - "learning_rate": 2.974675793610318e-05, - "loss": 0.002098524570465088, - "step": 13490 - }, - { - "epoch": 2.3009377664109123, - "grad_norm": 0.0887150913476944, - "learning_rate": 2.972169140787985e-05, - "loss": 0.001710166409611702, - "step": 13495 - }, - { - "epoch": 2.3017902813299234, - "grad_norm": 0.08873872458934784, - "learning_rate": 2.969662851082972e-05, - "loss": 0.002029442973434925, - "step": 13500 - }, - { - "epoch": 2.3026427962489344, - "grad_norm": 0.09199293702840805, - "learning_rate": 2.9671569256652976e-05, - "loss": 0.0015904868021607399, - "step": 13505 - }, - { - "epoch": 2.3034953111679455, - "grad_norm": 0.07347019016742706, - "learning_rate": 2.9646513657048106e-05, - "loss": 0.002239963971078396, - "step": 13510 - }, - { - "epoch": 2.3043478260869565, - "grad_norm": 0.056011516600847244, - "learning_rate": 2.9621461723711897e-05, - "loss": 0.003089374490082264, - "step": 13515 - }, - { - "epoch": 2.3052003410059676, - "grad_norm": 0.05805368721485138, - "learning_rate": 2.9596413468339447e-05, - "loss": 0.0011475264094769956, - "step": 13520 - }, - { - "epoch": 2.3060528559249787, - "grad_norm": 0.08263146877288818, - "learning_rate": 2.95713689026241e-05, - "loss": 0.0027705669403076173, - "step": 13525 - }, - { - "epoch": 2.3069053708439897, - "grad_norm": 0.10079067945480347, - "learning_rate": 2.954632803825749e-05, - "loss": 0.0038317229598760607, - "step": 13530 - }, - { - "epoch": 2.307757885763001, - "grad_norm": 0.07248156517744064, - "learning_rate": 2.9521290886929514e-05, - "loss": 0.0017008930444717407, - "step": 13535 - }, - { - "epoch": 2.308610400682012, - "grad_norm": 0.09252380579710007, - "learning_rate": 2.949625746032838e-05, - "loss": 0.0021895600482821466, - "step": 13540 - }, - { - "epoch": 2.309462915601023, - "grad_norm": 0.03231853246688843, - "learning_rate": 2.947122777014051e-05, - "loss": 0.002471560053527355, - "step": 13545 - }, - { - "epoch": 2.310315430520034, - "grad_norm": 0.09625072032213211, - "learning_rate": 2.944620182805059e-05, - "loss": 0.002643503434956074, - "step": 13550 - }, - { - "epoch": 2.311167945439045, - "grad_norm": 0.11135435849428177, - "learning_rate": 2.9421179645741552e-05, - "loss": 0.0015677452087402345, - "step": 13555 - }, - { - "epoch": 2.312020460358056, - "grad_norm": 0.07239774614572525, - "learning_rate": 2.939616123489459e-05, - "loss": 0.0020940851420164107, - "step": 13560 - }, - { - "epoch": 2.3128729752770676, - "grad_norm": 0.0686500295996666, - "learning_rate": 2.937114660718915e-05, - "loss": 0.004896241426467896, - "step": 13565 - }, - { - "epoch": 2.313725490196078, - "grad_norm": 0.04634196311235428, - "learning_rate": 2.934613577430288e-05, - "loss": 0.0017542928457260133, - "step": 13570 - }, - { - "epoch": 2.3145780051150897, - "grad_norm": 0.08693452924489975, - "learning_rate": 2.9321128747911657e-05, - "loss": 0.003124900534749031, - "step": 13575 - }, - { - "epoch": 2.3154305200341008, - "grad_norm": 0.053911175578832626, - "learning_rate": 2.9296125539689615e-05, - "loss": 0.001699080690741539, - "step": 13580 - }, - { - "epoch": 2.316283034953112, - "grad_norm": 0.07346964627504349, - "learning_rate": 2.9271126161309052e-05, - "loss": 0.0027174966409802435, - "step": 13585 - }, - { - "epoch": 2.317135549872123, - "grad_norm": 0.07157005369663239, - "learning_rate": 2.9246130624440546e-05, - "loss": 0.0026199813932180406, - "step": 13590 - }, - { - "epoch": 2.317988064791134, - "grad_norm": 0.0852048397064209, - "learning_rate": 2.922113894075282e-05, - "loss": 0.002349478751420975, - "step": 13595 - }, - { - "epoch": 2.318840579710145, - "grad_norm": 0.069539375603199, - "learning_rate": 2.9196151121912828e-05, - "loss": 0.002428753860294819, - "step": 13600 - }, - { - "epoch": 2.319693094629156, - "grad_norm": 0.06993792951107025, - "learning_rate": 2.9171167179585712e-05, - "loss": 0.0025543162599205972, - "step": 13605 - }, - { - "epoch": 2.320545609548167, - "grad_norm": 0.09210001677274704, - "learning_rate": 2.9146187125434826e-05, - "loss": 0.004642657563090324, - "step": 13610 - }, - { - "epoch": 2.321398124467178, - "grad_norm": 0.03652270883321762, - "learning_rate": 2.9121210971121674e-05, - "loss": 0.0019740790128707887, - "step": 13615 - }, - { - "epoch": 2.3222506393861893, - "grad_norm": 0.032051410526037216, - "learning_rate": 2.9096238728305957e-05, - "loss": 0.0020309314131736755, - "step": 13620 - }, - { - "epoch": 2.3231031543052003, - "grad_norm": 0.08898582309484482, - "learning_rate": 2.907127040864556e-05, - "loss": 0.0012645654380321503, - "step": 13625 - }, - { - "epoch": 2.3239556692242114, - "grad_norm": 0.21863117814064026, - "learning_rate": 2.9046306023796493e-05, - "loss": 0.0025009674951434135, - "step": 13630 - }, - { - "epoch": 2.3248081841432224, - "grad_norm": 0.06401807814836502, - "learning_rate": 2.9021345585413004e-05, - "loss": 0.002794540859758854, - "step": 13635 - }, - { - "epoch": 2.3256606990622335, - "grad_norm": 0.049468256533145905, - "learning_rate": 2.8996389105147437e-05, - "loss": 0.0024725871160626413, - "step": 13640 - }, - { - "epoch": 2.3265132139812446, - "grad_norm": 0.0904751718044281, - "learning_rate": 2.8971436594650292e-05, - "loss": 0.0033982183784246446, - "step": 13645 - }, - { - "epoch": 2.3273657289002556, - "grad_norm": 0.11576029658317566, - "learning_rate": 2.8946488065570242e-05, - "loss": 0.004228492826223373, - "step": 13650 - }, - { - "epoch": 2.3282182438192667, - "grad_norm": 0.08191253244876862, - "learning_rate": 2.892154352955411e-05, - "loss": 0.0015400771982967854, - "step": 13655 - }, - { - "epoch": 2.3290707587382777, - "grad_norm": 0.03641185909509659, - "learning_rate": 2.8896602998246817e-05, - "loss": 0.002032958157360554, - "step": 13660 - }, - { - "epoch": 2.329923273657289, - "grad_norm": 0.09123575687408447, - "learning_rate": 2.8871666483291433e-05, - "loss": 0.00326089970767498, - "step": 13665 - }, - { - "epoch": 2.3307757885763003, - "grad_norm": 0.07897967845201492, - "learning_rate": 2.8846733996329148e-05, - "loss": 0.0022133901715278626, - "step": 13670 - }, - { - "epoch": 2.3316283034953114, - "grad_norm": 0.0802898034453392, - "learning_rate": 2.8821805548999275e-05, - "loss": 0.002646717242896557, - "step": 13675 - }, - { - "epoch": 2.3324808184143224, - "grad_norm": 0.05337275192141533, - "learning_rate": 2.879688115293926e-05, - "loss": 0.0022310430184006693, - "step": 13680 - }, - { - "epoch": 2.3333333333333335, - "grad_norm": 0.026133684441447258, - "learning_rate": 2.8771960819784635e-05, - "loss": 0.0013902435079216958, - "step": 13685 - }, - { - "epoch": 2.3341858482523445, - "grad_norm": 0.0701865404844284, - "learning_rate": 2.8747044561169026e-05, - "loss": 0.0030527923256158827, - "step": 13690 - }, - { - "epoch": 2.3350383631713556, - "grad_norm": 0.023815227672457695, - "learning_rate": 2.8722132388724187e-05, - "loss": 0.001688534766435623, - "step": 13695 - }, - { - "epoch": 2.3358908780903667, - "grad_norm": 0.0819278210401535, - "learning_rate": 2.8697224314079928e-05, - "loss": 0.0028546562418341635, - "step": 13700 - }, - { - "epoch": 2.3367433930093777, - "grad_norm": 0.03683038055896759, - "learning_rate": 2.86723203488642e-05, - "loss": 0.0024238623678684234, - "step": 13705 - }, - { - "epoch": 2.337595907928389, - "grad_norm": 0.050080958753824234, - "learning_rate": 2.8647420504702977e-05, - "loss": 0.001459009852260351, - "step": 13710 - }, - { - "epoch": 2.3384484228474, - "grad_norm": 0.04246260225772858, - "learning_rate": 2.8622524793220336e-05, - "loss": 0.0024909645318984984, - "step": 13715 - }, - { - "epoch": 2.339300937766411, - "grad_norm": 0.04298778250813484, - "learning_rate": 2.8597633226038422e-05, - "loss": 0.0017042815685272216, - "step": 13720 - }, - { - "epoch": 2.340153452685422, - "grad_norm": 0.08792980760335922, - "learning_rate": 2.857274581477747e-05, - "loss": 0.0021930102258920668, - "step": 13725 - }, - { - "epoch": 2.341005967604433, - "grad_norm": 0.030293628573417664, - "learning_rate": 2.854786257105573e-05, - "loss": 0.002472694218158722, - "step": 13730 - }, - { - "epoch": 2.341858482523444, - "grad_norm": 0.100398488342762, - "learning_rate": 2.852298350648953e-05, - "loss": 0.0016385417431592942, - "step": 13735 - }, - { - "epoch": 2.342710997442455, - "grad_norm": 0.056936830282211304, - "learning_rate": 2.849810863269325e-05, - "loss": 0.0014652124606072902, - "step": 13740 - }, - { - "epoch": 2.343563512361466, - "grad_norm": 0.04332558810710907, - "learning_rate": 2.8473237961279293e-05, - "loss": 0.0029267419129610063, - "step": 13745 - }, - { - "epoch": 2.3444160272804773, - "grad_norm": 0.051982469856739044, - "learning_rate": 2.8448371503858143e-05, - "loss": 0.001836571842432022, - "step": 13750 - }, - { - "epoch": 2.3452685421994883, - "grad_norm": 0.1215415671467781, - "learning_rate": 2.8423509272038276e-05, - "loss": 0.002749188058078289, - "step": 13755 - }, - { - "epoch": 2.3461210571184994, - "grad_norm": 0.044508881866931915, - "learning_rate": 2.8398651277426203e-05, - "loss": 0.0023854803293943405, - "step": 13760 - }, - { - "epoch": 2.346973572037511, - "grad_norm": 0.09419308602809906, - "learning_rate": 2.837379753162647e-05, - "loss": 0.00259498693048954, - "step": 13765 - }, - { - "epoch": 2.3478260869565215, - "grad_norm": 0.0996370017528534, - "learning_rate": 2.8348948046241616e-05, - "loss": 0.003275657445192337, - "step": 13770 - }, - { - "epoch": 2.348678601875533, - "grad_norm": 0.0585092268884182, - "learning_rate": 2.8324102832872238e-05, - "loss": 0.0023032236844301225, - "step": 13775 - }, - { - "epoch": 2.349531116794544, - "grad_norm": 0.06259947270154953, - "learning_rate": 2.829926190311689e-05, - "loss": 0.0022853843867778776, - "step": 13780 - }, - { - "epoch": 2.350383631713555, - "grad_norm": 0.1343093067407608, - "learning_rate": 2.827442526857214e-05, - "loss": 0.0019558047875761985, - "step": 13785 - }, - { - "epoch": 2.351236146632566, - "grad_norm": 0.03901712968945503, - "learning_rate": 2.8249592940832552e-05, - "loss": 0.0019383212551474572, - "step": 13790 - }, - { - "epoch": 2.3520886615515773, - "grad_norm": 0.08933644741773605, - "learning_rate": 2.8224764931490707e-05, - "loss": 0.0019501563161611556, - "step": 13795 - }, - { - "epoch": 2.3529411764705883, - "grad_norm": 0.06790988147258759, - "learning_rate": 2.819994125213713e-05, - "loss": 0.0018905265256762504, - "step": 13800 - }, - { - "epoch": 2.3537936913895994, - "grad_norm": 0.10576235502958298, - "learning_rate": 2.817512191436033e-05, - "loss": 0.0017807571217417716, - "step": 13805 - }, - { - "epoch": 2.3546462063086104, - "grad_norm": 0.07914351671934128, - "learning_rate": 2.8150306929746826e-05, - "loss": 0.002854841575026512, - "step": 13810 - }, - { - "epoch": 2.3554987212276215, - "grad_norm": 0.10912367701530457, - "learning_rate": 2.812549630988104e-05, - "loss": 0.0028494328260421755, - "step": 13815 - }, - { - "epoch": 2.3563512361466326, - "grad_norm": 0.07309834659099579, - "learning_rate": 2.8100690066345434e-05, - "loss": 0.001808878593146801, - "step": 13820 - }, - { - "epoch": 2.3572037510656436, - "grad_norm": 0.07053545117378235, - "learning_rate": 2.807588821072037e-05, - "loss": 0.0024722769856452944, - "step": 13825 - }, - { - "epoch": 2.3580562659846547, - "grad_norm": 0.06512318551540375, - "learning_rate": 2.8051090754584176e-05, - "loss": 0.0025828687474131586, - "step": 13830 - }, - { - "epoch": 2.3589087809036657, - "grad_norm": 0.06797149777412415, - "learning_rate": 2.8026297709513125e-05, - "loss": 0.0021874068304896356, - "step": 13835 - }, - { - "epoch": 2.359761295822677, - "grad_norm": 0.12261441349983215, - "learning_rate": 2.800150908708145e-05, - "loss": 0.00291924811899662, - "step": 13840 - }, - { - "epoch": 2.360613810741688, - "grad_norm": 0.05696386098861694, - "learning_rate": 2.797672489886131e-05, - "loss": 0.003488580882549286, - "step": 13845 - }, - { - "epoch": 2.361466325660699, - "grad_norm": 0.3340120315551758, - "learning_rate": 2.795194515642276e-05, - "loss": 0.0033275336027145386, - "step": 13850 - }, - { - "epoch": 2.36231884057971, - "grad_norm": 0.08209964632987976, - "learning_rate": 2.7927169871333836e-05, - "loss": 0.0020242417231202126, - "step": 13855 - }, - { - "epoch": 2.363171355498721, - "grad_norm": 0.04942183569073677, - "learning_rate": 2.7902399055160435e-05, - "loss": 0.0015470117330551147, - "step": 13860 - }, - { - "epoch": 2.364023870417732, - "grad_norm": 0.07711990922689438, - "learning_rate": 2.7877632719466438e-05, - "loss": 0.002402086555957794, - "step": 13865 - }, - { - "epoch": 2.3648763853367436, - "grad_norm": 0.06835886090993881, - "learning_rate": 2.7852870875813572e-05, - "loss": 0.002709807641804218, - "step": 13870 - }, - { - "epoch": 2.3657289002557547, - "grad_norm": 0.01572684571146965, - "learning_rate": 2.7828113535761476e-05, - "loss": 0.0037427868694067, - "step": 13875 - }, - { - "epoch": 2.3665814151747657, - "grad_norm": 0.03897464647889137, - "learning_rate": 2.7803360710867728e-05, - "loss": 0.0029004696756601334, - "step": 13880 - }, - { - "epoch": 2.367433930093777, - "grad_norm": 0.1281740814447403, - "learning_rate": 2.777861241268774e-05, - "loss": 0.0021549168974161147, - "step": 13885 - }, - { - "epoch": 2.368286445012788, - "grad_norm": 0.04390920698642731, - "learning_rate": 2.7753868652774873e-05, - "loss": 0.0019567809998989106, - "step": 13890 - }, - { - "epoch": 2.369138959931799, - "grad_norm": 0.09526315331459045, - "learning_rate": 2.7729129442680314e-05, - "loss": 0.001414876524358988, - "step": 13895 - }, - { - "epoch": 2.36999147485081, - "grad_norm": 0.041541386395692825, - "learning_rate": 2.7704394793953162e-05, - "loss": 0.0023986730724573136, - "step": 13900 - }, - { - "epoch": 2.370843989769821, - "grad_norm": 0.056684307754039764, - "learning_rate": 2.7679664718140354e-05, - "loss": 0.0023011576384305956, - "step": 13905 - }, - { - "epoch": 2.371696504688832, - "grad_norm": 0.04548821225762367, - "learning_rate": 2.765493922678674e-05, - "loss": 0.002776668407022953, - "step": 13910 - }, - { - "epoch": 2.372549019607843, - "grad_norm": 0.05635173246264458, - "learning_rate": 2.763021833143499e-05, - "loss": 0.0021549917757511137, - "step": 13915 - }, - { - "epoch": 2.373401534526854, - "grad_norm": 0.06744635850191116, - "learning_rate": 2.7605502043625636e-05, - "loss": 0.0014210479333996774, - "step": 13920 - }, - { - "epoch": 2.3742540494458653, - "grad_norm": 0.03131572902202606, - "learning_rate": 2.758079037489707e-05, - "loss": 0.002670668438076973, - "step": 13925 - }, - { - "epoch": 2.3751065643648763, - "grad_norm": 0.1132262721657753, - "learning_rate": 2.75560833367855e-05, - "loss": 0.004025829955935478, - "step": 13930 - }, - { - "epoch": 2.3759590792838874, - "grad_norm": 0.08719862997531891, - "learning_rate": 2.753138094082502e-05, - "loss": 0.0026264961808919905, - "step": 13935 - }, - { - "epoch": 2.3768115942028984, - "grad_norm": 0.045282550156116486, - "learning_rate": 2.7506683198547527e-05, - "loss": 0.0016890913248062134, - "step": 13940 - }, - { - "epoch": 2.3776641091219095, - "grad_norm": 0.03815371170639992, - "learning_rate": 2.7481990121482737e-05, - "loss": 0.0017980627715587616, - "step": 13945 - }, - { - "epoch": 2.3785166240409206, - "grad_norm": 0.05136419087648392, - "learning_rate": 2.745730172115819e-05, - "loss": 0.0017518583685159684, - "step": 13950 - }, - { - "epoch": 2.3793691389599316, - "grad_norm": 0.076651431620121, - "learning_rate": 2.743261800909929e-05, - "loss": 0.0021933792158961296, - "step": 13955 - }, - { - "epoch": 2.3802216538789427, - "grad_norm": 0.04328504204750061, - "learning_rate": 2.740793899682919e-05, - "loss": 0.0015049883164465427, - "step": 13960 - }, - { - "epoch": 2.381074168797954, - "grad_norm": 0.029004819691181183, - "learning_rate": 2.7383264695868863e-05, - "loss": 0.0023387337103486063, - "step": 13965 - }, - { - "epoch": 2.381926683716965, - "grad_norm": 0.11483976989984512, - "learning_rate": 2.7358595117737118e-05, - "loss": 0.00246519148349762, - "step": 13970 - }, - { - "epoch": 2.3827791986359763, - "grad_norm": 0.09073470532894135, - "learning_rate": 2.733393027395051e-05, - "loss": 0.0031791247427463533, - "step": 13975 - }, - { - "epoch": 2.3836317135549874, - "grad_norm": 0.12094864249229431, - "learning_rate": 2.7309270176023436e-05, - "loss": 0.0025795340538024903, - "step": 13980 - }, - { - "epoch": 2.3844842284739984, - "grad_norm": 0.13568098843097687, - "learning_rate": 2.7284614835468035e-05, - "loss": 0.0057578980922698975, - "step": 13985 - }, - { - "epoch": 2.3853367433930095, - "grad_norm": 0.06415567547082901, - "learning_rate": 2.725996426379423e-05, - "loss": 0.0024575673043727873, - "step": 13990 - }, - { - "epoch": 2.3861892583120206, - "grad_norm": 0.05898221582174301, - "learning_rate": 2.723531847250975e-05, - "loss": 0.0013358716852962971, - "step": 13995 - }, - { - "epoch": 2.3870417732310316, - "grad_norm": 0.019117049872875214, - "learning_rate": 2.721067747312004e-05, - "loss": 0.0016026780009269713, - "step": 14000 - }, - { - "epoch": 2.3878942881500427, - "grad_norm": 0.028591491281986237, - "learning_rate": 2.7186041277128383e-05, - "loss": 0.001663113385438919, - "step": 14005 - }, - { - "epoch": 2.3887468030690537, - "grad_norm": 0.03701665997505188, - "learning_rate": 2.7161409896035733e-05, - "loss": 0.0012899260967969895, - "step": 14010 - }, - { - "epoch": 2.389599317988065, - "grad_norm": 0.05777057632803917, - "learning_rate": 2.7136783341340862e-05, - "loss": 0.0018556809052824974, - "step": 14015 - }, - { - "epoch": 2.390451832907076, - "grad_norm": 0.04922354966402054, - "learning_rate": 2.711216162454024e-05, - "loss": 0.002131880074739456, - "step": 14020 - }, - { - "epoch": 2.391304347826087, - "grad_norm": 0.045851659029722214, - "learning_rate": 2.708754475712814e-05, - "loss": 0.001147150807082653, - "step": 14025 - }, - { - "epoch": 2.392156862745098, - "grad_norm": 0.11482678353786469, - "learning_rate": 2.7062932750596514e-05, - "loss": 0.0027298804372549055, - "step": 14030 - }, - { - "epoch": 2.393009377664109, - "grad_norm": 0.054821670055389404, - "learning_rate": 2.7038325616435058e-05, - "loss": 0.0018268844112753868, - "step": 14035 - }, - { - "epoch": 2.39386189258312, - "grad_norm": 0.09821441024541855, - "learning_rate": 2.701372336613122e-05, - "loss": 0.002109052799642086, - "step": 14040 - }, - { - "epoch": 2.394714407502131, - "grad_norm": 0.04923141747713089, - "learning_rate": 2.6989126011170115e-05, - "loss": 0.0021799976006150247, - "step": 14045 - }, - { - "epoch": 2.395566922421142, - "grad_norm": 0.1223372220993042, - "learning_rate": 2.6964533563034648e-05, - "loss": 0.00261150524020195, - "step": 14050 - }, - { - "epoch": 2.3964194373401533, - "grad_norm": 0.04964495450258255, - "learning_rate": 2.6939946033205374e-05, - "loss": 0.001747405156493187, - "step": 14055 - }, - { - "epoch": 2.397271952259165, - "grad_norm": 0.05354087054729462, - "learning_rate": 2.6915363433160562e-05, - "loss": 0.0017880409955978393, - "step": 14060 - }, - { - "epoch": 2.3981244671781754, - "grad_norm": 0.0796194076538086, - "learning_rate": 2.6890785774376188e-05, - "loss": 0.002280256152153015, - "step": 14065 - }, - { - "epoch": 2.398976982097187, - "grad_norm": 0.048979468643665314, - "learning_rate": 2.6866213068325942e-05, - "loss": 0.0034266695380210876, - "step": 14070 - }, - { - "epoch": 2.399829497016198, - "grad_norm": 0.11115774512290955, - "learning_rate": 2.6841645326481166e-05, - "loss": 0.0014098694548010827, - "step": 14075 - }, - { - "epoch": 2.400682011935209, - "grad_norm": 0.14144426584243774, - "learning_rate": 2.681708256031089e-05, - "loss": 0.0017399771139025688, - "step": 14080 - }, - { - "epoch": 2.40153452685422, - "grad_norm": 0.060562510043382645, - "learning_rate": 2.6792524781281846e-05, - "loss": 0.0031288094818592072, - "step": 14085 - }, - { - "epoch": 2.402387041773231, - "grad_norm": 0.08271291851997375, - "learning_rate": 2.6767972000858402e-05, - "loss": 0.002268883027136326, - "step": 14090 - }, - { - "epoch": 2.403239556692242, - "grad_norm": 0.08203598111867905, - "learning_rate": 2.674342423050264e-05, - "loss": 0.0017265897244215012, - "step": 14095 - }, - { - "epoch": 2.4040920716112533, - "grad_norm": 0.07809042930603027, - "learning_rate": 2.6718881481674265e-05, - "loss": 0.0032232727855443953, - "step": 14100 - }, - { - "epoch": 2.4049445865302643, - "grad_norm": 0.043053366243839264, - "learning_rate": 2.6694343765830633e-05, - "loss": 0.0014350255951285362, - "step": 14105 - }, - { - "epoch": 2.4057971014492754, - "grad_norm": 0.2139715999364853, - "learning_rate": 2.666981109442679e-05, - "loss": 0.002208554185926914, - "step": 14110 - }, - { - "epoch": 2.4066496163682864, - "grad_norm": 0.028433851897716522, - "learning_rate": 2.6645283478915373e-05, - "loss": 0.0033426061272621155, - "step": 14115 - }, - { - "epoch": 2.4075021312872975, - "grad_norm": 0.03152618184685707, - "learning_rate": 2.6620760930746726e-05, - "loss": 0.0017683111131191255, - "step": 14120 - }, - { - "epoch": 2.4083546462063086, - "grad_norm": 0.11559031158685684, - "learning_rate": 2.6596243461368762e-05, - "loss": 0.0027762461453676225, - "step": 14125 - }, - { - "epoch": 2.4092071611253196, - "grad_norm": 0.08188942819833755, - "learning_rate": 2.6571731082227068e-05, - "loss": 0.0029629599303007126, - "step": 14130 - }, - { - "epoch": 2.4100596760443307, - "grad_norm": 0.03179270401597023, - "learning_rate": 2.654722380476482e-05, - "loss": 0.001593652181327343, - "step": 14135 - }, - { - "epoch": 2.4109121909633418, - "grad_norm": 0.03763008117675781, - "learning_rate": 2.652272164042285e-05, - "loss": 0.003974568471312523, - "step": 14140 - }, - { - "epoch": 2.411764705882353, - "grad_norm": 0.06221388280391693, - "learning_rate": 2.649822460063958e-05, - "loss": 0.0021382227540016176, - "step": 14145 - }, - { - "epoch": 2.412617220801364, - "grad_norm": 0.13541199266910553, - "learning_rate": 2.6473732696851025e-05, - "loss": 0.0030446551740169526, - "step": 14150 - }, - { - "epoch": 2.413469735720375, - "grad_norm": 0.07515605539083481, - "learning_rate": 2.6449245940490843e-05, - "loss": 0.0023170780390501023, - "step": 14155 - }, - { - "epoch": 2.414322250639386, - "grad_norm": 0.029287993907928467, - "learning_rate": 2.6424764342990247e-05, - "loss": 0.002732834219932556, - "step": 14160 - }, - { - "epoch": 2.4151747655583975, - "grad_norm": 0.056158751249313354, - "learning_rate": 2.6400287915778073e-05, - "loss": 0.0026283055543899537, - "step": 14165 - }, - { - "epoch": 2.416027280477408, - "grad_norm": 0.05005735903978348, - "learning_rate": 2.6375816670280742e-05, - "loss": 0.0021377095952630045, - "step": 14170 - }, - { - "epoch": 2.4168797953964196, - "grad_norm": 0.039338257163763046, - "learning_rate": 2.6351350617922217e-05, - "loss": 0.0010171877220273018, - "step": 14175 - }, - { - "epoch": 2.4177323103154307, - "grad_norm": 0.054605189710855484, - "learning_rate": 2.6326889770124074e-05, - "loss": 0.0015358464792370797, - "step": 14180 - }, - { - "epoch": 2.4185848252344417, - "grad_norm": 0.05107913911342621, - "learning_rate": 2.630243413830547e-05, - "loss": 0.0014638695865869522, - "step": 14185 - }, - { - "epoch": 2.419437340153453, - "grad_norm": 0.14121516048908234, - "learning_rate": 2.62779837338831e-05, - "loss": 0.0018762655556201935, - "step": 14190 - }, - { - "epoch": 2.420289855072464, - "grad_norm": 0.13554073870182037, - "learning_rate": 2.625353856827121e-05, - "loss": 0.002315247431397438, - "step": 14195 - }, - { - "epoch": 2.421142369991475, - "grad_norm": 0.07378100603818893, - "learning_rate": 2.6229098652881636e-05, - "loss": 0.0017681105062365531, - "step": 14200 - }, - { - "epoch": 2.421994884910486, - "grad_norm": 0.0729142278432846, - "learning_rate": 2.6204663999123712e-05, - "loss": 0.0013508319854736329, - "step": 14205 - }, - { - "epoch": 2.422847399829497, - "grad_norm": 0.09028290957212448, - "learning_rate": 2.6180234618404393e-05, - "loss": 0.0023917261511087417, - "step": 14210 - }, - { - "epoch": 2.423699914748508, - "grad_norm": 0.042102012783288956, - "learning_rate": 2.6155810522128105e-05, - "loss": 0.001337253674864769, - "step": 14215 - }, - { - "epoch": 2.424552429667519, - "grad_norm": 0.126102477312088, - "learning_rate": 2.6131391721696812e-05, - "loss": 0.0030670080333948134, - "step": 14220 - }, - { - "epoch": 2.42540494458653, - "grad_norm": 0.08583983033895493, - "learning_rate": 2.6106978228510047e-05, - "loss": 0.0025723014026880265, - "step": 14225 - }, - { - "epoch": 2.4262574595055413, - "grad_norm": 0.0516071692109108, - "learning_rate": 2.608257005396482e-05, - "loss": 0.0020857708528637885, - "step": 14230 - }, - { - "epoch": 2.4271099744245523, - "grad_norm": 0.08321108669042587, - "learning_rate": 2.6058167209455697e-05, - "loss": 0.0023237552493810655, - "step": 14235 - }, - { - "epoch": 2.4279624893435634, - "grad_norm": 0.04344337806105614, - "learning_rate": 2.6033769706374727e-05, - "loss": 0.0016502588987350464, - "step": 14240 - }, - { - "epoch": 2.4288150042625745, - "grad_norm": 0.10716593265533447, - "learning_rate": 2.6009377556111488e-05, - "loss": 0.002213199995458126, - "step": 14245 - }, - { - "epoch": 2.4296675191815855, - "grad_norm": 0.08346270024776459, - "learning_rate": 2.598499077005302e-05, - "loss": 0.0023431163281202316, - "step": 14250 - }, - { - "epoch": 2.4305200341005966, - "grad_norm": 0.032770343124866486, - "learning_rate": 2.596060935958392e-05, - "loss": 0.0011562082916498184, - "step": 14255 - }, - { - "epoch": 2.431372549019608, - "grad_norm": 0.09246552735567093, - "learning_rate": 2.593623333608623e-05, - "loss": 0.002459176816046238, - "step": 14260 - }, - { - "epoch": 2.4322250639386187, - "grad_norm": 0.05482151731848717, - "learning_rate": 2.5911862710939474e-05, - "loss": 0.0019333874806761742, - "step": 14265 - }, - { - "epoch": 2.43307757885763, - "grad_norm": 0.03243163228034973, - "learning_rate": 2.588749749552069e-05, - "loss": 0.0017584215849637986, - "step": 14270 - }, - { - "epoch": 2.4339300937766413, - "grad_norm": 0.07286939024925232, - "learning_rate": 2.586313770120434e-05, - "loss": 0.002444162592291832, - "step": 14275 - }, - { - "epoch": 2.4347826086956523, - "grad_norm": 0.05575154721736908, - "learning_rate": 2.583878333936243e-05, - "loss": 0.0024999476969242095, - "step": 14280 - }, - { - "epoch": 2.4356351236146634, - "grad_norm": 0.10262400656938553, - "learning_rate": 2.5814434421364354e-05, - "loss": 0.0018360136076807977, - "step": 14285 - }, - { - "epoch": 2.4364876385336744, - "grad_norm": 0.023329658433794975, - "learning_rate": 2.5790090958577017e-05, - "loss": 0.002157992497086525, - "step": 14290 - }, - { - "epoch": 2.4373401534526855, - "grad_norm": 0.11155838519334793, - "learning_rate": 2.576575296236473e-05, - "loss": 0.002236923947930336, - "step": 14295 - }, - { - "epoch": 2.4381926683716966, - "grad_norm": 0.015751022845506668, - "learning_rate": 2.5741420444089317e-05, - "loss": 0.0023830370977520944, - "step": 14300 - }, - { - "epoch": 2.4390451832907076, - "grad_norm": 0.06451129168272018, - "learning_rate": 2.5717093415109982e-05, - "loss": 0.0012244164943695068, - "step": 14305 - }, - { - "epoch": 2.4398976982097187, - "grad_norm": 0.05141889676451683, - "learning_rate": 2.569277188678339e-05, - "loss": 0.0008386586792767048, - "step": 14310 - }, - { - "epoch": 2.4407502131287298, - "grad_norm": 0.07528503239154816, - "learning_rate": 2.5668455870463654e-05, - "loss": 0.0027780460193753244, - "step": 14315 - }, - { - "epoch": 2.441602728047741, - "grad_norm": 0.0676177367568016, - "learning_rate": 2.5644145377502277e-05, - "loss": 0.002171286940574646, - "step": 14320 - }, - { - "epoch": 2.442455242966752, - "grad_norm": 0.03209437057375908, - "learning_rate": 2.5619840419248228e-05, - "loss": 0.0011549444869160652, - "step": 14325 - }, - { - "epoch": 2.443307757885763, - "grad_norm": 0.0711345300078392, - "learning_rate": 2.559554100704787e-05, - "loss": 0.0029217278584837913, - "step": 14330 - }, - { - "epoch": 2.444160272804774, - "grad_norm": 0.07314640283584595, - "learning_rate": 2.5571247152244955e-05, - "loss": 0.0019763100892305372, - "step": 14335 - }, - { - "epoch": 2.445012787723785, - "grad_norm": 0.058573171496391296, - "learning_rate": 2.5546958866180686e-05, - "loss": 0.0023175042122602465, - "step": 14340 - }, - { - "epoch": 2.445865302642796, - "grad_norm": 0.06780791282653809, - "learning_rate": 2.552267616019362e-05, - "loss": 0.0022560084238648415, - "step": 14345 - }, - { - "epoch": 2.446717817561807, - "grad_norm": 0.0834873840212822, - "learning_rate": 2.5498399045619755e-05, - "loss": 0.0015980398282408714, - "step": 14350 - }, - { - "epoch": 2.4475703324808182, - "grad_norm": 0.06677491962909698, - "learning_rate": 2.5474127533792443e-05, - "loss": 0.002242721430957317, - "step": 14355 - }, - { - "epoch": 2.4484228473998293, - "grad_norm": 0.11220566183328629, - "learning_rate": 2.5449861636042443e-05, - "loss": 0.001862034946680069, - "step": 14360 - }, - { - "epoch": 2.449275362318841, - "grad_norm": 0.05493709817528725, - "learning_rate": 2.542560136369786e-05, - "loss": 0.0020324042066931724, - "step": 14365 - }, - { - "epoch": 2.4501278772378514, - "grad_norm": 0.09586431086063385, - "learning_rate": 2.5401346728084225e-05, - "loss": 0.001961209811270237, - "step": 14370 - }, - { - "epoch": 2.450980392156863, - "grad_norm": 0.06384766101837158, - "learning_rate": 2.5377097740524402e-05, - "loss": 0.002969523146748543, - "step": 14375 - }, - { - "epoch": 2.451832907075874, - "grad_norm": 0.11584383249282837, - "learning_rate": 2.5352854412338607e-05, - "loss": 0.0037360407412052156, - "step": 14380 - }, - { - "epoch": 2.452685421994885, - "grad_norm": 0.05268854275345802, - "learning_rate": 2.5328616754844447e-05, - "loss": 0.0024207277223467828, - "step": 14385 - }, - { - "epoch": 2.453537936913896, - "grad_norm": 0.10550973564386368, - "learning_rate": 2.5304384779356855e-05, - "loss": 0.002147519588470459, - "step": 14390 - }, - { - "epoch": 2.454390451832907, - "grad_norm": 0.11402281373739243, - "learning_rate": 2.5280158497188144e-05, - "loss": 0.0030479192733764648, - "step": 14395 - }, - { - "epoch": 2.455242966751918, - "grad_norm": 0.042928412556648254, - "learning_rate": 2.5255937919647928e-05, - "loss": 0.0009582490660250187, - "step": 14400 - }, - { - "epoch": 2.4560954816709293, - "grad_norm": 0.09466255456209183, - "learning_rate": 2.52317230580432e-05, - "loss": 0.0028877202421426773, - "step": 14405 - }, - { - "epoch": 2.4569479965899403, - "grad_norm": 0.0167491864413023, - "learning_rate": 2.5207513923678246e-05, - "loss": 0.002237674966454506, - "step": 14410 - }, - { - "epoch": 2.4578005115089514, - "grad_norm": 0.11767696589231491, - "learning_rate": 2.518331052785468e-05, - "loss": 0.00270021203905344, - "step": 14415 - }, - { - "epoch": 2.4586530264279625, - "grad_norm": 0.13400165736675262, - "learning_rate": 2.5159112881871494e-05, - "loss": 0.0025584336370229723, - "step": 14420 - }, - { - "epoch": 2.4595055413469735, - "grad_norm": 0.051460813730955124, - "learning_rate": 2.5134920997024915e-05, - "loss": 0.001182288955897093, - "step": 14425 - }, - { - "epoch": 2.4603580562659846, - "grad_norm": 0.05078651383519173, - "learning_rate": 2.511073488460855e-05, - "loss": 0.001340255793184042, - "step": 14430 - }, - { - "epoch": 2.4612105711849956, - "grad_norm": 0.06714113801717758, - "learning_rate": 2.5086554555913245e-05, - "loss": 0.0019190860912203789, - "step": 14435 - }, - { - "epoch": 2.4620630861040067, - "grad_norm": 0.05757109820842743, - "learning_rate": 2.5062380022227226e-05, - "loss": 0.0016031917184591293, - "step": 14440 - }, - { - "epoch": 2.4629156010230178, - "grad_norm": 0.045739807188510895, - "learning_rate": 2.5038211294835944e-05, - "loss": 0.0020723894238471987, - "step": 14445 - }, - { - "epoch": 2.463768115942029, - "grad_norm": 0.06381653994321823, - "learning_rate": 2.5014048385022156e-05, - "loss": 0.002237732522189617, - "step": 14450 - }, - { - "epoch": 2.46462063086104, - "grad_norm": 0.08096056431531906, - "learning_rate": 2.498989130406594e-05, - "loss": 0.0017275322228670121, - "step": 14455 - }, - { - "epoch": 2.4654731457800514, - "grad_norm": 0.04627775028347969, - "learning_rate": 2.4965740063244582e-05, - "loss": 0.0028135737404227255, - "step": 14460 - }, - { - "epoch": 2.466325660699062, - "grad_norm": 0.07789458334445953, - "learning_rate": 2.4941594673832737e-05, - "loss": 0.0017165482044219972, - "step": 14465 - }, - { - "epoch": 2.4671781756180735, - "grad_norm": 0.03633275255560875, - "learning_rate": 2.491745514710224e-05, - "loss": 0.003017013892531395, - "step": 14470 - }, - { - "epoch": 2.4680306905370846, - "grad_norm": 0.07425010204315186, - "learning_rate": 2.489332149432224e-05, - "loss": 0.002849162742495537, - "step": 14475 - }, - { - "epoch": 2.4688832054560956, - "grad_norm": 0.08738066256046295, - "learning_rate": 2.486919372675911e-05, - "loss": 0.003103286027908325, - "step": 14480 - }, - { - "epoch": 2.4697357203751067, - "grad_norm": 0.059462107717990875, - "learning_rate": 2.4845071855676526e-05, - "loss": 0.003129242733120918, - "step": 14485 - }, - { - "epoch": 2.4705882352941178, - "grad_norm": 0.12157633155584335, - "learning_rate": 2.4820955892335358e-05, - "loss": 0.00188961960375309, - "step": 14490 - }, - { - "epoch": 2.471440750213129, - "grad_norm": 0.04780135303735733, - "learning_rate": 2.4796845847993743e-05, - "loss": 0.001777658425271511, - "step": 14495 - }, - { - "epoch": 2.47229326513214, - "grad_norm": 0.08734847605228424, - "learning_rate": 2.477274173390706e-05, - "loss": 0.0025872459635138513, - "step": 14500 - }, - { - "epoch": 2.473145780051151, - "grad_norm": 0.08637238293886185, - "learning_rate": 2.4748643561327887e-05, - "loss": 0.0034623559564352035, - "step": 14505 - }, - { - "epoch": 2.473998294970162, - "grad_norm": 0.1351020187139511, - "learning_rate": 2.4724551341506083e-05, - "loss": 0.0025932226330041886, - "step": 14510 - }, - { - "epoch": 2.474850809889173, - "grad_norm": 0.0965266153216362, - "learning_rate": 2.4700465085688678e-05, - "loss": 0.0021650340408086778, - "step": 14515 - }, - { - "epoch": 2.475703324808184, - "grad_norm": 0.06353217363357544, - "learning_rate": 2.4676384805119954e-05, - "loss": 0.0017436511814594268, - "step": 14520 - }, - { - "epoch": 2.476555839727195, - "grad_norm": 0.09694099426269531, - "learning_rate": 2.4652310511041376e-05, - "loss": 0.002511733956634998, - "step": 14525 - }, - { - "epoch": 2.4774083546462062, - "grad_norm": 0.13362912833690643, - "learning_rate": 2.4628242214691614e-05, - "loss": 0.0020636413246393204, - "step": 14530 - }, - { - "epoch": 2.4782608695652173, - "grad_norm": 0.05283635854721069, - "learning_rate": 2.4604179927306575e-05, - "loss": 0.002218991331756115, - "step": 14535 - }, - { - "epoch": 2.4791133844842284, - "grad_norm": 0.062003809958696365, - "learning_rate": 2.4580123660119317e-05, - "loss": 0.0021969690918922425, - "step": 14540 - }, - { - "epoch": 2.4799658994032394, - "grad_norm": 0.1058121919631958, - "learning_rate": 2.4556073424360115e-05, - "loss": 0.002514044567942619, - "step": 14545 - }, - { - "epoch": 2.4808184143222505, - "grad_norm": 0.06746378540992737, - "learning_rate": 2.4532029231256397e-05, - "loss": 0.001485797483474016, - "step": 14550 - }, - { - "epoch": 2.4816709292412615, - "grad_norm": 0.043892405927181244, - "learning_rate": 2.4507991092032832e-05, - "loss": 0.0021189235150814055, - "step": 14555 - }, - { - "epoch": 2.4825234441602726, - "grad_norm": 0.04537670686841011, - "learning_rate": 2.4483959017911195e-05, - "loss": 0.0018616810441017151, - "step": 14560 - }, - { - "epoch": 2.483375959079284, - "grad_norm": 0.04895998165011406, - "learning_rate": 2.445993302011046e-05, - "loss": 0.0016737811267375946, - "step": 14565 - }, - { - "epoch": 2.484228473998295, - "grad_norm": 0.07096420228481293, - "learning_rate": 2.4435913109846773e-05, - "loss": 0.0032933827489614485, - "step": 14570 - }, - { - "epoch": 2.485080988917306, - "grad_norm": 0.07391496002674103, - "learning_rate": 2.4411899298333403e-05, - "loss": 0.0021815944463014604, - "step": 14575 - }, - { - "epoch": 2.4859335038363173, - "grad_norm": 0.12835897505283356, - "learning_rate": 2.438789159678083e-05, - "loss": 0.0032001670449972154, - "step": 14580 - }, - { - "epoch": 2.4867860187553283, - "grad_norm": 0.0947527140378952, - "learning_rate": 2.436389001639662e-05, - "loss": 0.002512381225824356, - "step": 14585 - }, - { - "epoch": 2.4876385336743394, - "grad_norm": 0.06699662655591965, - "learning_rate": 2.4339894568385526e-05, - "loss": 0.0014906782656908036, - "step": 14590 - }, - { - "epoch": 2.4884910485933505, - "grad_norm": 0.042523179203271866, - "learning_rate": 2.4315905263949404e-05, - "loss": 0.0012685291469097138, - "step": 14595 - }, - { - "epoch": 2.4893435635123615, - "grad_norm": 0.03687009960412979, - "learning_rate": 2.4291922114287286e-05, - "loss": 0.0016289660707116127, - "step": 14600 - }, - { - "epoch": 2.4901960784313726, - "grad_norm": 0.07698170840740204, - "learning_rate": 2.4267945130595287e-05, - "loss": 0.002090749144554138, - "step": 14605 - }, - { - "epoch": 2.4910485933503836, - "grad_norm": 0.08533983677625656, - "learning_rate": 2.4243974324066653e-05, - "loss": 0.002234157919883728, - "step": 14610 - }, - { - "epoch": 2.4919011082693947, - "grad_norm": 0.10050603002309799, - "learning_rate": 2.422000970589177e-05, - "loss": 0.002818283811211586, - "step": 14615 - }, - { - "epoch": 2.4927536231884058, - "grad_norm": 0.057129960507154465, - "learning_rate": 2.4196051287258095e-05, - "loss": 0.004226747527718544, - "step": 14620 - }, - { - "epoch": 2.493606138107417, - "grad_norm": 0.08218846470117569, - "learning_rate": 2.4172099079350256e-05, - "loss": 0.0016387354582548142, - "step": 14625 - }, - { - "epoch": 2.494458653026428, - "grad_norm": 0.07963220775127411, - "learning_rate": 2.4148153093349894e-05, - "loss": 0.002778450772166252, - "step": 14630 - }, - { - "epoch": 2.495311167945439, - "grad_norm": 0.058049995452165604, - "learning_rate": 2.4124213340435834e-05, - "loss": 0.0024016743525862696, - "step": 14635 - }, - { - "epoch": 2.49616368286445, - "grad_norm": 0.13127438724040985, - "learning_rate": 2.410027983178392e-05, - "loss": 0.0038317706435918807, - "step": 14640 - }, - { - "epoch": 2.497016197783461, - "grad_norm": 0.048698920756578445, - "learning_rate": 2.407635257856711e-05, - "loss": 0.00152621790766716, - "step": 14645 - }, - { - "epoch": 2.497868712702472, - "grad_norm": 0.02338201180100441, - "learning_rate": 2.405243159195546e-05, - "loss": 0.0027417311444878577, - "step": 14650 - }, - { - "epoch": 2.498721227621483, - "grad_norm": 0.07108049094676971, - "learning_rate": 2.402851688311607e-05, - "loss": 0.001716497913002968, - "step": 14655 - }, - { - "epoch": 2.4995737425404947, - "grad_norm": 0.028342491015791893, - "learning_rate": 2.4004608463213126e-05, - "loss": 0.0013954185880720616, - "step": 14660 - }, - { - "epoch": 2.4995737425404947, - "eval_loss": 0.04806143045425415, - "eval_runtime": 3.6619, - "eval_samples_per_second": 68.816, - "eval_steps_per_second": 1.092, - "step": 14660 - }, - { - "eval_cer_subset": 0.01446089208070741, - "eval_cer_subset_edit_distance": 888, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 14660 - }, - { - "epoch": 2.5004262574595053, - "grad_norm": 0.0640476793050766, - "learning_rate": 2.398070634340786e-05, - "loss": 0.002191193774342537, - "step": 14665 - }, - { - "epoch": 2.501278772378517, - "grad_norm": 0.034168582409620285, - "learning_rate": 2.3956810534858607e-05, - "loss": 0.0013371256180107593, - "step": 14670 - }, - { - "epoch": 2.502131287297528, - "grad_norm": 0.07181207835674286, - "learning_rate": 2.3932921048720698e-05, - "loss": 0.0019236713647842406, - "step": 14675 - }, - { - "epoch": 2.502983802216539, - "grad_norm": 0.0469810888171196, - "learning_rate": 2.3909037896146552e-05, - "loss": 0.0018267782405018806, - "step": 14680 - }, - { - "epoch": 2.50383631713555, - "grad_norm": 0.028826232999563217, - "learning_rate": 2.3885161088285617e-05, - "loss": 0.0010010387748479843, - "step": 14685 - }, - { - "epoch": 2.504688832054561, - "grad_norm": 0.10193654894828796, - "learning_rate": 2.386129063628437e-05, - "loss": 0.0024211697280406954, - "step": 14690 - }, - { - "epoch": 2.505541346973572, - "grad_norm": 0.14754284918308258, - "learning_rate": 2.3837426551286357e-05, - "loss": 0.0020253278315067293, - "step": 14695 - }, - { - "epoch": 2.506393861892583, - "grad_norm": 0.12155842036008835, - "learning_rate": 2.3813568844432103e-05, - "loss": 0.002546634152531624, - "step": 14700 - }, - { - "epoch": 2.5072463768115942, - "grad_norm": 0.07209643721580505, - "learning_rate": 2.378971752685919e-05, - "loss": 0.002657034806907177, - "step": 14705 - }, - { - "epoch": 2.5080988917306053, - "grad_norm": 0.1210934966802597, - "learning_rate": 2.3765872609702192e-05, - "loss": 0.002788655459880829, - "step": 14710 - }, - { - "epoch": 2.5089514066496164, - "grad_norm": 0.05647290125489235, - "learning_rate": 2.374203410409274e-05, - "loss": 0.0022313324734568596, - "step": 14715 - }, - { - "epoch": 2.5098039215686274, - "grad_norm": 0.18282383680343628, - "learning_rate": 2.371820202115942e-05, - "loss": 0.0019404273480176926, - "step": 14720 - }, - { - "epoch": 2.5106564364876385, - "grad_norm": 0.022936735302209854, - "learning_rate": 2.369437637202784e-05, - "loss": 0.0015677658841013909, - "step": 14725 - }, - { - "epoch": 2.5115089514066495, - "grad_norm": 0.023840973153710365, - "learning_rate": 2.3670557167820614e-05, - "loss": 0.0017029233276844025, - "step": 14730 - }, - { - "epoch": 2.5123614663256606, - "grad_norm": 0.01897628791630268, - "learning_rate": 2.3646744419657323e-05, - "loss": 0.001359708234667778, - "step": 14735 - }, - { - "epoch": 2.5132139812446717, - "grad_norm": 0.03326602280139923, - "learning_rate": 2.3622938138654575e-05, - "loss": 0.0006220267619937659, - "step": 14740 - }, - { - "epoch": 2.5140664961636827, - "grad_norm": 0.0619979090988636, - "learning_rate": 2.3599138335925914e-05, - "loss": 0.002030659094452858, - "step": 14745 - }, - { - "epoch": 2.514919011082694, - "grad_norm": 0.03451136127114296, - "learning_rate": 2.3575345022581896e-05, - "loss": 0.0016797656193375588, - "step": 14750 - }, - { - "epoch": 2.5157715260017053, - "grad_norm": 0.0554860420525074, - "learning_rate": 2.3551558209730018e-05, - "loss": 0.0016403241083025933, - "step": 14755 - }, - { - "epoch": 2.516624040920716, - "grad_norm": 0.08686158061027527, - "learning_rate": 2.3527777908474744e-05, - "loss": 0.003415975719690323, - "step": 14760 - }, - { - "epoch": 2.5174765558397274, - "grad_norm": 0.06636729836463928, - "learning_rate": 2.3504004129917542e-05, - "loss": 0.0018416630104184152, - "step": 14765 - }, - { - "epoch": 2.518329070758738, - "grad_norm": 0.08038193732500076, - "learning_rate": 2.3480236885156776e-05, - "loss": 0.002185085415840149, - "step": 14770 - }, - { - "epoch": 2.5191815856777495, - "grad_norm": 0.06370148062705994, - "learning_rate": 2.3456476185287802e-05, - "loss": 0.001394746359437704, - "step": 14775 - }, - { - "epoch": 2.5200341005967606, - "grad_norm": 0.05585980415344238, - "learning_rate": 2.3432722041402886e-05, - "loss": 0.003035778924822807, - "step": 14780 - }, - { - "epoch": 2.5208866155157716, - "grad_norm": 0.08676521480083466, - "learning_rate": 2.340897446459128e-05, - "loss": 0.002279702201485634, - "step": 14785 - }, - { - "epoch": 2.5217391304347827, - "grad_norm": 0.0421539731323719, - "learning_rate": 2.3385233465939124e-05, - "loss": 0.0015795350074768067, - "step": 14790 - }, - { - "epoch": 2.5225916453537938, - "grad_norm": 0.09380512684583664, - "learning_rate": 2.3361499056529516e-05, - "loss": 0.0024957180023193358, - "step": 14795 - }, - { - "epoch": 2.523444160272805, - "grad_norm": 0.05541060492396355, - "learning_rate": 2.3337771247442457e-05, - "loss": 0.0022170023992657663, - "step": 14800 - }, - { - "epoch": 2.524296675191816, - "grad_norm": 0.030795352533459663, - "learning_rate": 2.3314050049754872e-05, - "loss": 0.0015011204406619072, - "step": 14805 - }, - { - "epoch": 2.525149190110827, - "grad_norm": 0.040677715092897415, - "learning_rate": 2.329033547454063e-05, - "loss": 0.0023216739296913146, - "step": 14810 - }, - { - "epoch": 2.526001705029838, - "grad_norm": 0.036884501576423645, - "learning_rate": 2.3266627532870462e-05, - "loss": 0.0025786716490983964, - "step": 14815 - }, - { - "epoch": 2.526854219948849, - "grad_norm": 0.02398660033941269, - "learning_rate": 2.324292623581204e-05, - "loss": 0.0017933860421180726, - "step": 14820 - }, - { - "epoch": 2.52770673486786, - "grad_norm": 0.06601176410913467, - "learning_rate": 2.321923159442989e-05, - "loss": 0.002885073609650135, - "step": 14825 - }, - { - "epoch": 2.528559249786871, - "grad_norm": 0.08684834837913513, - "learning_rate": 2.3195543619785496e-05, - "loss": 0.0026486974209547045, - "step": 14830 - }, - { - "epoch": 2.5294117647058822, - "grad_norm": 0.04674920067191124, - "learning_rate": 2.3171862322937173e-05, - "loss": 0.0025192024186253548, - "step": 14835 - }, - { - "epoch": 2.5302642796248933, - "grad_norm": 0.059271425008773804, - "learning_rate": 2.314818771494013e-05, - "loss": 0.001517033763229847, - "step": 14840 - }, - { - "epoch": 2.5311167945439044, - "grad_norm": 0.03094577044248581, - "learning_rate": 2.312451980684648e-05, - "loss": 0.001731237769126892, - "step": 14845 - }, - { - "epoch": 2.531969309462916, - "grad_norm": 0.04043465852737427, - "learning_rate": 2.3100858609705167e-05, - "loss": 0.002348044328391552, - "step": 14850 - }, - { - "epoch": 2.5328218243819265, - "grad_norm": 0.05144953727722168, - "learning_rate": 2.3077204134562054e-05, - "loss": 0.0019841600209474564, - "step": 14855 - }, - { - "epoch": 2.533674339300938, - "grad_norm": 0.07220125198364258, - "learning_rate": 2.3053556392459813e-05, - "loss": 0.002818341739475727, - "step": 14860 - }, - { - "epoch": 2.5345268542199486, - "grad_norm": 0.08199959248304367, - "learning_rate": 2.302991539443801e-05, - "loss": 0.0024914808571338655, - "step": 14865 - }, - { - "epoch": 2.53537936913896, - "grad_norm": 0.07761549204587936, - "learning_rate": 2.3006281151533047e-05, - "loss": 0.003526497259736061, - "step": 14870 - }, - { - "epoch": 2.536231884057971, - "grad_norm": 0.1002337783575058, - "learning_rate": 2.298265367477816e-05, - "loss": 0.0022296305745840073, - "step": 14875 - }, - { - "epoch": 2.5370843989769822, - "grad_norm": 0.05918731540441513, - "learning_rate": 2.295903297520346e-05, - "loss": 0.0012512234039604663, - "step": 14880 - }, - { - "epoch": 2.5379369138959933, - "grad_norm": 0.053112782537937164, - "learning_rate": 2.2935419063835868e-05, - "loss": 0.0017477553337812424, - "step": 14885 - }, - { - "epoch": 2.5387894288150044, - "grad_norm": 0.061820488423109055, - "learning_rate": 2.2911811951699155e-05, - "loss": 0.0022626927122473715, - "step": 14890 - }, - { - "epoch": 2.5396419437340154, - "grad_norm": 0.11703728139400482, - "learning_rate": 2.288821164981387e-05, - "loss": 0.0025926090776920317, - "step": 14895 - }, - { - "epoch": 2.5404944586530265, - "grad_norm": 0.04635873809456825, - "learning_rate": 2.2864618169197468e-05, - "loss": 0.0017809070646762847, - "step": 14900 - }, - { - "epoch": 2.5413469735720375, - "grad_norm": 0.11287315934896469, - "learning_rate": 2.2841031520864136e-05, - "loss": 0.00238190982490778, - "step": 14905 - }, - { - "epoch": 2.5421994884910486, - "grad_norm": 0.026871057227253914, - "learning_rate": 2.2817451715824924e-05, - "loss": 0.0015336395241320132, - "step": 14910 - }, - { - "epoch": 2.5430520034100597, - "grad_norm": 0.06438672542572021, - "learning_rate": 2.279387876508766e-05, - "loss": 0.001995333656668663, - "step": 14915 - }, - { - "epoch": 2.5439045183290707, - "grad_norm": 0.06547024846076965, - "learning_rate": 2.277031267965697e-05, - "loss": 0.002060149237513542, - "step": 14920 - }, - { - "epoch": 2.544757033248082, - "grad_norm": 0.07563283294439316, - "learning_rate": 2.2746753470534322e-05, - "loss": 0.0017446789890527726, - "step": 14925 - }, - { - "epoch": 2.545609548167093, - "grad_norm": 0.028652798384428024, - "learning_rate": 2.27232011487179e-05, - "loss": 0.0022552181035280228, - "step": 14930 - }, - { - "epoch": 2.546462063086104, - "grad_norm": 0.0893654152750969, - "learning_rate": 2.269965572520274e-05, - "loss": 0.0029813185334205627, - "step": 14935 - }, - { - "epoch": 2.547314578005115, - "grad_norm": 0.04628995433449745, - "learning_rate": 2.26761172109806e-05, - "loss": 0.0025255372747778893, - "step": 14940 - }, - { - "epoch": 2.548167092924126, - "grad_norm": 0.09175702929496765, - "learning_rate": 2.2652585617040076e-05, - "loss": 0.004577648639678955, - "step": 14945 - }, - { - "epoch": 2.549019607843137, - "grad_norm": 0.041957542300224304, - "learning_rate": 2.262906095436648e-05, - "loss": 0.002292825095355511, - "step": 14950 - }, - { - "epoch": 2.5498721227621486, - "grad_norm": 0.061231136322021484, - "learning_rate": 2.2605543233941904e-05, - "loss": 0.002193107083439827, - "step": 14955 - }, - { - "epoch": 2.550724637681159, - "grad_norm": 0.08939548581838608, - "learning_rate": 2.2582032466745206e-05, - "loss": 0.0013388695195317268, - "step": 14960 - }, - { - "epoch": 2.5515771526001707, - "grad_norm": 0.10106560587882996, - "learning_rate": 2.255852866375199e-05, - "loss": 0.004271790385246277, - "step": 14965 - }, - { - "epoch": 2.5524296675191813, - "grad_norm": 0.04756918177008629, - "learning_rate": 2.253503183593463e-05, - "loss": 0.002253059670329094, - "step": 14970 - }, - { - "epoch": 2.553282182438193, - "grad_norm": 0.06336323171854019, - "learning_rate": 2.2511541994262203e-05, - "loss": 0.0019065763801336289, - "step": 14975 - }, - { - "epoch": 2.554134697357204, - "grad_norm": 0.021801188588142395, - "learning_rate": 2.2488059149700568e-05, - "loss": 0.001671123132109642, - "step": 14980 - }, - { - "epoch": 2.554987212276215, - "grad_norm": 0.07580327987670898, - "learning_rate": 2.2464583313212294e-05, - "loss": 0.0031477130949497225, - "step": 14985 - }, - { - "epoch": 2.555839727195226, - "grad_norm": 0.07757267355918884, - "learning_rate": 2.244111449575666e-05, - "loss": 0.0026445770636200905, - "step": 14990 - }, - { - "epoch": 2.556692242114237, - "grad_norm": 0.043020669370889664, - "learning_rate": 2.2417652708289726e-05, - "loss": 0.002764601819217205, - "step": 14995 - }, - { - "epoch": 2.557544757033248, - "grad_norm": 0.03915635868906975, - "learning_rate": 2.2394197961764212e-05, - "loss": 0.002291044779121876, - "step": 15000 - }, - { - "epoch": 2.558397271952259, - "grad_norm": 0.0665091872215271, - "learning_rate": 2.2370750267129586e-05, - "loss": 0.0017822932451963425, - "step": 15005 - }, - { - "epoch": 2.5592497868712702, - "grad_norm": 0.08525653183460236, - "learning_rate": 2.234730963533199e-05, - "loss": 0.0018473496660590173, - "step": 15010 - }, - { - "epoch": 2.5601023017902813, - "grad_norm": 0.05346886068582535, - "learning_rate": 2.2323876077314327e-05, - "loss": 0.002567983791232109, - "step": 15015 - }, - { - "epoch": 2.5609548167092924, - "grad_norm": 0.04240184277296066, - "learning_rate": 2.2300449604016123e-05, - "loss": 0.0021606752648949622, - "step": 15020 - }, - { - "epoch": 2.5618073316283034, - "grad_norm": 0.08507288247346878, - "learning_rate": 2.2277030226373667e-05, - "loss": 0.0023022485896945, - "step": 15025 - }, - { - "epoch": 2.5626598465473145, - "grad_norm": 0.07468844205141068, - "learning_rate": 2.225361795531989e-05, - "loss": 0.0030104584991931917, - "step": 15030 - }, - { - "epoch": 2.5635123614663256, - "grad_norm": 0.03731158375740051, - "learning_rate": 2.22302128017844e-05, - "loss": 0.0019535191357135774, - "step": 15035 - }, - { - "epoch": 2.5643648763853366, - "grad_norm": 0.09111307561397552, - "learning_rate": 2.2206814776693536e-05, - "loss": 0.0016553621739149094, - "step": 15040 - }, - { - "epoch": 2.5652173913043477, - "grad_norm": 0.04197632521390915, - "learning_rate": 2.2183423890970255e-05, - "loss": 0.0018846508115530013, - "step": 15045 - }, - { - "epoch": 2.566069906223359, - "grad_norm": 0.09259206801652908, - "learning_rate": 2.2160040155534206e-05, - "loss": 0.0028481241315603256, - "step": 15050 - }, - { - "epoch": 2.56692242114237, - "grad_norm": 0.07880257815122604, - "learning_rate": 2.2136663581301696e-05, - "loss": 0.002117951214313507, - "step": 15055 - }, - { - "epoch": 2.5677749360613813, - "grad_norm": 0.0969267189502716, - "learning_rate": 2.2113294179185667e-05, - "loss": 0.00240680705755949, - "step": 15060 - }, - { - "epoch": 2.568627450980392, - "grad_norm": 0.06295698881149292, - "learning_rate": 2.2089931960095754e-05, - "loss": 0.0012395468540489674, - "step": 15065 - }, - { - "epoch": 2.5694799658994034, - "grad_norm": 0.0716724842786789, - "learning_rate": 2.2066576934938224e-05, - "loss": 0.004721567407250405, - "step": 15070 - }, - { - "epoch": 2.5703324808184145, - "grad_norm": 0.04790467768907547, - "learning_rate": 2.2043229114615967e-05, - "loss": 0.0016566522419452668, - "step": 15075 - }, - { - "epoch": 2.5711849957374255, - "grad_norm": 0.014919254928827286, - "learning_rate": 2.2019888510028515e-05, - "loss": 0.00200834795832634, - "step": 15080 - }, - { - "epoch": 2.5720375106564366, - "grad_norm": 0.07281307876110077, - "learning_rate": 2.1996555132072063e-05, - "loss": 0.0021370718255639075, - "step": 15085 - }, - { - "epoch": 2.5728900255754477, - "grad_norm": 0.04918764904141426, - "learning_rate": 2.197322899163938e-05, - "loss": 0.002188747748732567, - "step": 15090 - }, - { - "epoch": 2.5737425404944587, - "grad_norm": 0.05246208980679512, - "learning_rate": 2.1949910099619913e-05, - "loss": 0.002106213942170143, - "step": 15095 - }, - { - "epoch": 2.57459505541347, - "grad_norm": 0.07900833338499069, - "learning_rate": 2.1926598466899674e-05, - "loss": 0.0014828240498900413, - "step": 15100 - }, - { - "epoch": 2.575447570332481, - "grad_norm": 0.1235758364200592, - "learning_rate": 2.19032941043613e-05, - "loss": 0.0033482640981674196, - "step": 15105 - }, - { - "epoch": 2.576300085251492, - "grad_norm": 0.06170985475182533, - "learning_rate": 2.187999702288408e-05, - "loss": 0.0019921788945794104, - "step": 15110 - }, - { - "epoch": 2.577152600170503, - "grad_norm": 0.1210661381483078, - "learning_rate": 2.185670723334384e-05, - "loss": 0.0019077232107520103, - "step": 15115 - }, - { - "epoch": 2.578005115089514, - "grad_norm": 0.06942020356655121, - "learning_rate": 2.1833424746613026e-05, - "loss": 0.0019503291696310043, - "step": 15120 - }, - { - "epoch": 2.578857630008525, - "grad_norm": 0.09329917281866074, - "learning_rate": 2.1810149573560693e-05, - "loss": 0.0026118636131286623, - "step": 15125 - }, - { - "epoch": 2.579710144927536, - "grad_norm": 0.1026659607887268, - "learning_rate": 2.1786881725052445e-05, - "loss": 0.002567945420742035, - "step": 15130 - }, - { - "epoch": 2.580562659846547, - "grad_norm": 0.06306809186935425, - "learning_rate": 2.1763621211950517e-05, - "loss": 0.001768135279417038, - "step": 15135 - }, - { - "epoch": 2.5814151747655583, - "grad_norm": 0.07647090405225754, - "learning_rate": 2.174036804511367e-05, - "loss": 0.0015752470120787621, - "step": 15140 - }, - { - "epoch": 2.5822676896845693, - "grad_norm": 0.045121923089027405, - "learning_rate": 2.171712223539726e-05, - "loss": 0.0025726621970534325, - "step": 15145 - }, - { - "epoch": 2.5831202046035804, - "grad_norm": 0.040667545050382614, - "learning_rate": 2.1693883793653188e-05, - "loss": 0.002222199738025665, - "step": 15150 - }, - { - "epoch": 2.583972719522592, - "grad_norm": 0.08505896478891373, - "learning_rate": 2.1670652730729968e-05, - "loss": 0.0030935727059841155, - "step": 15155 - }, - { - "epoch": 2.5848252344416025, - "grad_norm": 0.05064573138952255, - "learning_rate": 2.164742905747261e-05, - "loss": 0.002387380041182041, - "step": 15160 - }, - { - "epoch": 2.585677749360614, - "grad_norm": 0.0372583344578743, - "learning_rate": 2.1624212784722684e-05, - "loss": 0.0026363788172602655, - "step": 15165 - }, - { - "epoch": 2.5865302642796246, - "grad_norm": 0.06209828332066536, - "learning_rate": 2.1601003923318344e-05, - "loss": 0.0029974017292261125, - "step": 15170 - }, - { - "epoch": 2.587382779198636, - "grad_norm": 0.049798715859651566, - "learning_rate": 2.157780248409424e-05, - "loss": 0.0016345694661140443, - "step": 15175 - }, - { - "epoch": 2.588235294117647, - "grad_norm": 0.06752602010965347, - "learning_rate": 2.1554608477881597e-05, - "loss": 0.0025367341935634614, - "step": 15180 - }, - { - "epoch": 2.5890878090366582, - "grad_norm": 0.10456907004117966, - "learning_rate": 2.1531421915508137e-05, - "loss": 0.002495551109313965, - "step": 15185 - }, - { - "epoch": 2.5899403239556693, - "grad_norm": 0.0790029838681221, - "learning_rate": 2.1508242807798114e-05, - "loss": 0.0025735165923833846, - "step": 15190 - }, - { - "epoch": 2.5907928388746804, - "grad_norm": 0.030237069353461266, - "learning_rate": 2.1485071165572298e-05, - "loss": 0.0018124323338270187, - "step": 15195 - }, - { - "epoch": 2.5916453537936914, - "grad_norm": 0.06030745431780815, - "learning_rate": 2.1461906999648008e-05, - "loss": 0.002845403365790844, - "step": 15200 - }, - { - "epoch": 2.5924978687127025, - "grad_norm": 0.10071806609630585, - "learning_rate": 2.1438750320839037e-05, - "loss": 0.002326494827866554, - "step": 15205 - }, - { - "epoch": 2.5933503836317136, - "grad_norm": 0.050379570573568344, - "learning_rate": 2.1415601139955686e-05, - "loss": 0.0019888151437044144, - "step": 15210 - }, - { - "epoch": 2.5942028985507246, - "grad_norm": 0.09101511538028717, - "learning_rate": 2.1392459467804753e-05, - "loss": 0.003049125336110592, - "step": 15215 - }, - { - "epoch": 2.5950554134697357, - "grad_norm": 0.03804527968168259, - "learning_rate": 2.1369325315189553e-05, - "loss": 0.0016767382621765137, - "step": 15220 - }, - { - "epoch": 2.5959079283887467, - "grad_norm": 0.0779503807425499, - "learning_rate": 2.1346198692909895e-05, - "loss": 0.001964661478996277, - "step": 15225 - }, - { - "epoch": 2.596760443307758, - "grad_norm": 0.07922998070716858, - "learning_rate": 2.1323079611762033e-05, - "loss": 0.001821339875459671, - "step": 15230 - }, - { - "epoch": 2.597612958226769, - "grad_norm": 0.045152947306632996, - "learning_rate": 2.1299968082538734e-05, - "loss": 0.0011449499055743218, - "step": 15235 - }, - { - "epoch": 2.59846547314578, - "grad_norm": 0.026626303791999817, - "learning_rate": 2.1276864116029207e-05, - "loss": 0.0016753975301980971, - "step": 15240 - }, - { - "epoch": 2.599317988064791, - "grad_norm": 0.10935933142900467, - "learning_rate": 2.1253767723019188e-05, - "loss": 0.0026281427592039107, - "step": 15245 - }, - { - "epoch": 2.6001705029838025, - "grad_norm": 0.08133106678724289, - "learning_rate": 2.123067891429082e-05, - "loss": 0.001925770938396454, - "step": 15250 - }, - { - "epoch": 2.601023017902813, - "grad_norm": 0.04865674301981926, - "learning_rate": 2.1207597700622728e-05, - "loss": 0.0019936567172408105, - "step": 15255 - }, - { - "epoch": 2.6018755328218246, - "grad_norm": 0.11841622740030289, - "learning_rate": 2.1184524092789982e-05, - "loss": 0.00298205092549324, - "step": 15260 - }, - { - "epoch": 2.602728047740835, - "grad_norm": 0.04416264593601227, - "learning_rate": 2.1161458101564115e-05, - "loss": 0.0036853265017271044, - "step": 15265 - }, - { - "epoch": 2.6035805626598467, - "grad_norm": 0.08603575825691223, - "learning_rate": 2.1138399737713118e-05, - "loss": 0.004533383995294571, - "step": 15270 - }, - { - "epoch": 2.604433077578858, - "grad_norm": 0.0626961886882782, - "learning_rate": 2.1115349012001388e-05, - "loss": 0.0017330382019281388, - "step": 15275 - }, - { - "epoch": 2.605285592497869, - "grad_norm": 0.12894456088542938, - "learning_rate": 2.1092305935189773e-05, - "loss": 0.0037327542901039123, - "step": 15280 - }, - { - "epoch": 2.60613810741688, - "grad_norm": 0.10542263090610504, - "learning_rate": 2.106927051803554e-05, - "loss": 0.0026806583628058434, - "step": 15285 - }, - { - "epoch": 2.606990622335891, - "grad_norm": 0.05068397521972656, - "learning_rate": 2.1046242771292386e-05, - "loss": 0.0014822190627455712, - "step": 15290 - }, - { - "epoch": 2.607843137254902, - "grad_norm": 0.08927716314792633, - "learning_rate": 2.102322270571045e-05, - "loss": 0.003242380917072296, - "step": 15295 - }, - { - "epoch": 2.608695652173913, - "grad_norm": 0.05792883411049843, - "learning_rate": 2.1000210332036248e-05, - "loss": 0.0017583563923835755, - "step": 15300 - }, - { - "epoch": 2.609548167092924, - "grad_norm": 0.0648881196975708, - "learning_rate": 2.09772056610127e-05, - "loss": 0.002197427675127983, - "step": 15305 - }, - { - "epoch": 2.610400682011935, - "grad_norm": 0.060977645218372345, - "learning_rate": 2.095420870337919e-05, - "loss": 0.002055848389863968, - "step": 15310 - }, - { - "epoch": 2.6112531969309463, - "grad_norm": 0.04654461517930031, - "learning_rate": 2.093121946987146e-05, - "loss": 0.002073242887854576, - "step": 15315 - }, - { - "epoch": 2.6121057118499573, - "grad_norm": 0.04738753288984299, - "learning_rate": 2.0908237971221634e-05, - "loss": 0.0017290839925408364, - "step": 15320 - }, - { - "epoch": 2.6129582267689684, - "grad_norm": 0.07519782334566116, - "learning_rate": 2.0885264218158248e-05, - "loss": 0.0012821624055504798, - "step": 15325 - }, - { - "epoch": 2.6138107416879794, - "grad_norm": 0.06078832224011421, - "learning_rate": 2.0862298221406206e-05, - "loss": 0.0019888199865818025, - "step": 15330 - }, - { - "epoch": 2.6146632566069905, - "grad_norm": 0.04823920503258705, - "learning_rate": 2.083933999168679e-05, - "loss": 0.0015226650051772595, - "step": 15335 - }, - { - "epoch": 2.6155157715260016, - "grad_norm": 0.04050251096487045, - "learning_rate": 2.0816389539717694e-05, - "loss": 0.0024490740150213243, - "step": 15340 - }, - { - "epoch": 2.6163682864450126, - "grad_norm": 0.08443193882703781, - "learning_rate": 2.0793446876212937e-05, - "loss": 0.0027990926057100294, - "step": 15345 - }, - { - "epoch": 2.6172208013640237, - "grad_norm": 0.03322751075029373, - "learning_rate": 2.07705120118829e-05, - "loss": 0.0011861051432788372, - "step": 15350 - }, - { - "epoch": 2.618073316283035, - "grad_norm": 0.06874673068523407, - "learning_rate": 2.0747584957434375e-05, - "loss": 0.0018939610570669174, - "step": 15355 - }, - { - "epoch": 2.618925831202046, - "grad_norm": 0.04990018159151077, - "learning_rate": 2.0724665723570437e-05, - "loss": 0.0013890796341001987, - "step": 15360 - }, - { - "epoch": 2.6197783461210573, - "grad_norm": 0.06342940032482147, - "learning_rate": 2.0701754320990586e-05, - "loss": 0.0019270982593297958, - "step": 15365 - }, - { - "epoch": 2.620630861040068, - "grad_norm": 0.05647345632314682, - "learning_rate": 2.0678850760390607e-05, - "loss": 0.0019773844629526137, - "step": 15370 - }, - { - "epoch": 2.6214833759590794, - "grad_norm": 0.09767530113458633, - "learning_rate": 2.0655955052462643e-05, - "loss": 0.0025425378233194353, - "step": 15375 - }, - { - "epoch": 2.6223358908780905, - "grad_norm": 0.05659051984548569, - "learning_rate": 2.063306720789516e-05, - "loss": 0.0016861587762832641, - "step": 15380 - }, - { - "epoch": 2.6231884057971016, - "grad_norm": 0.07679109275341034, - "learning_rate": 2.061018723737299e-05, - "loss": 0.0012974600307643414, - "step": 15385 - }, - { - "epoch": 2.6240409207161126, - "grad_norm": 0.032084014266729355, - "learning_rate": 2.0587315151577257e-05, - "loss": 0.0009737671352922916, - "step": 15390 - }, - { - "epoch": 2.6248934356351237, - "grad_norm": 0.07588861882686615, - "learning_rate": 2.056445096118539e-05, - "loss": 0.002771071344614029, - "step": 15395 - }, - { - "epoch": 2.6257459505541347, - "grad_norm": 0.07706267386674881, - "learning_rate": 2.0541594676871188e-05, - "loss": 0.002124561369419098, - "step": 15400 - }, - { - "epoch": 2.626598465473146, - "grad_norm": 0.05501805245876312, - "learning_rate": 2.051874630930469e-05, - "loss": 0.001449206192046404, - "step": 15405 - }, - { - "epoch": 2.627450980392157, - "grad_norm": 0.07360731810331345, - "learning_rate": 2.0495905869152303e-05, - "loss": 0.0014007428660988807, - "step": 15410 - }, - { - "epoch": 2.628303495311168, - "grad_norm": 0.03651239722967148, - "learning_rate": 2.04730733670767e-05, - "loss": 0.0013091465458273887, - "step": 15415 - }, - { - "epoch": 2.629156010230179, - "grad_norm": 0.05154712125658989, - "learning_rate": 2.0450248813736842e-05, - "loss": 0.0017904775217175485, - "step": 15420 - }, - { - "epoch": 2.63000852514919, - "grad_norm": 0.03202452138066292, - "learning_rate": 2.0427432219787978e-05, - "loss": 0.002919047139585018, - "step": 15425 - }, - { - "epoch": 2.630861040068201, - "grad_norm": 0.08954522758722305, - "learning_rate": 2.040462359588169e-05, - "loss": 0.0031249357387423517, - "step": 15430 - }, - { - "epoch": 2.631713554987212, - "grad_norm": 0.09551462531089783, - "learning_rate": 2.038182295266577e-05, - "loss": 0.0016073914244771003, - "step": 15435 - }, - { - "epoch": 2.632566069906223, - "grad_norm": 0.0576794371008873, - "learning_rate": 2.035903030078432e-05, - "loss": 0.0028427325189113615, - "step": 15440 - }, - { - "epoch": 2.6334185848252343, - "grad_norm": 0.0879262238740921, - "learning_rate": 2.0336245650877728e-05, - "loss": 0.0012862576171755792, - "step": 15445 - }, - { - "epoch": 2.634271099744246, - "grad_norm": 0.1022641509771347, - "learning_rate": 2.03134690135826e-05, - "loss": 0.002296357229351997, - "step": 15450 - }, - { - "epoch": 2.6351236146632564, - "grad_norm": 0.07090801000595093, - "learning_rate": 2.029070039953186e-05, - "loss": 0.0032129865139722824, - "step": 15455 - }, - { - "epoch": 2.635976129582268, - "grad_norm": 0.06394338607788086, - "learning_rate": 2.026793981935463e-05, - "loss": 0.0022887293249368667, - "step": 15460 - }, - { - "epoch": 2.6368286445012785, - "grad_norm": 0.03828660771250725, - "learning_rate": 2.0245187283676316e-05, - "loss": 0.0023141488432884215, - "step": 15465 - }, - { - "epoch": 2.63768115942029, - "grad_norm": 0.0748148262500763, - "learning_rate": 2.0222442803118537e-05, - "loss": 0.002477791905403137, - "step": 15470 - }, - { - "epoch": 2.638533674339301, - "grad_norm": 0.02352295070886612, - "learning_rate": 2.019970638829921e-05, - "loss": 0.0021653104573488235, - "step": 15475 - }, - { - "epoch": 2.639386189258312, - "grad_norm": 0.019303878769278526, - "learning_rate": 2.017697804983243e-05, - "loss": 0.0014067382551729679, - "step": 15480 - }, - { - "epoch": 2.640238704177323, - "grad_norm": 0.036747269332408905, - "learning_rate": 2.015425779832854e-05, - "loss": 0.002145359478890896, - "step": 15485 - }, - { - "epoch": 2.6410912190963343, - "grad_norm": 0.04195109382271767, - "learning_rate": 2.0131545644394096e-05, - "loss": 0.0014138499274849892, - "step": 15490 - }, - { - "epoch": 2.6419437340153453, - "grad_norm": 0.07388610392808914, - "learning_rate": 2.0108841598631904e-05, - "loss": 0.0025294892489910125, - "step": 15495 - }, - { - "epoch": 2.6427962489343564, - "grad_norm": 0.0890735536813736, - "learning_rate": 2.0086145671640973e-05, - "loss": 0.0026762137189507485, - "step": 15500 - }, - { - "epoch": 2.6436487638533674, - "grad_norm": 0.07587535679340363, - "learning_rate": 2.006345787401652e-05, - "loss": 0.0031544029712677, - "step": 15505 - }, - { - "epoch": 2.6445012787723785, - "grad_norm": 0.10948733240365982, - "learning_rate": 2.004077821634995e-05, - "loss": 0.0023899499326944353, - "step": 15510 - }, - { - "epoch": 2.6453537936913896, - "grad_norm": 0.07914752513170242, - "learning_rate": 2.0018106709228886e-05, - "loss": 0.004097612574696541, - "step": 15515 - }, - { - "epoch": 2.6462063086104006, - "grad_norm": 0.07947845757007599, - "learning_rate": 1.9995443363237126e-05, - "loss": 0.0022834014147520066, - "step": 15520 - }, - { - "epoch": 2.6470588235294117, - "grad_norm": 0.05973362177610397, - "learning_rate": 1.9972788188954704e-05, - "loss": 0.001445610448718071, - "step": 15525 - }, - { - "epoch": 2.6479113384484227, - "grad_norm": 0.07292830944061279, - "learning_rate": 1.9950141196957792e-05, - "loss": 0.0023502418771386147, - "step": 15530 - }, - { - "epoch": 2.648763853367434, - "grad_norm": 0.09226574003696442, - "learning_rate": 1.9927502397818745e-05, - "loss": 0.002285385876893997, - "step": 15535 - }, - { - "epoch": 2.649616368286445, - "grad_norm": 0.08981240540742874, - "learning_rate": 1.9904871802106124e-05, - "loss": 0.0023617954924702645, - "step": 15540 - }, - { - "epoch": 2.6504688832054564, - "grad_norm": 0.07505398988723755, - "learning_rate": 1.988224942038466e-05, - "loss": 0.0016136666759848594, - "step": 15545 - }, - { - "epoch": 2.651321398124467, - "grad_norm": 0.06795456260442734, - "learning_rate": 1.9859635263215215e-05, - "loss": 0.0014020048081874847, - "step": 15550 - }, - { - "epoch": 2.6521739130434785, - "grad_norm": 0.07863990217447281, - "learning_rate": 1.983702934115483e-05, - "loss": 0.0016099724918603898, - "step": 15555 - }, - { - "epoch": 2.653026427962489, - "grad_norm": 0.15475937724113464, - "learning_rate": 1.9814431664756705e-05, - "loss": 0.0028660917654633523, - "step": 15560 - }, - { - "epoch": 2.6538789428815006, - "grad_norm": 0.09072619676589966, - "learning_rate": 1.979184224457017e-05, - "loss": 0.0038232788443565368, - "step": 15565 - }, - { - "epoch": 2.6547314578005117, - "grad_norm": 0.04944036900997162, - "learning_rate": 1.9769261091140746e-05, - "loss": 0.002762124501168728, - "step": 15570 - }, - { - "epoch": 2.6555839727195227, - "grad_norm": 0.07315114885568619, - "learning_rate": 1.974668821501005e-05, - "loss": 0.0018053753301501274, - "step": 15575 - }, - { - "epoch": 2.656436487638534, - "grad_norm": 0.03133604675531387, - "learning_rate": 1.972412362671584e-05, - "loss": 0.0012923687696456908, - "step": 15580 - }, - { - "epoch": 2.657289002557545, - "grad_norm": 0.07396573573350906, - "learning_rate": 1.9701567336792037e-05, - "loss": 0.004405549541115761, - "step": 15585 - }, - { - "epoch": 2.658141517476556, - "grad_norm": 0.05702332779765129, - "learning_rate": 1.967901935576867e-05, - "loss": 0.001864958368241787, - "step": 15590 - }, - { - "epoch": 2.658994032395567, - "grad_norm": 0.06003536656498909, - "learning_rate": 1.9656479694171882e-05, - "loss": 0.0025712646543979644, - "step": 15595 - }, - { - "epoch": 2.659846547314578, - "grad_norm": 0.06424745172262192, - "learning_rate": 1.963394836252393e-05, - "loss": 0.002156762033700943, - "step": 15600 - }, - { - "epoch": 2.660699062233589, - "grad_norm": 0.0703018307685852, - "learning_rate": 1.9611425371343193e-05, - "loss": 0.0034677576273679732, - "step": 15605 - }, - { - "epoch": 2.6615515771526, - "grad_norm": 0.06616941094398499, - "learning_rate": 1.958891073114414e-05, - "loss": 0.002393544837832451, - "step": 15610 - }, - { - "epoch": 2.662404092071611, - "grad_norm": 0.04988931491971016, - "learning_rate": 1.9566404452437374e-05, - "loss": 0.00223421361297369, - "step": 15615 - }, - { - "epoch": 2.6632566069906223, - "grad_norm": 0.04869920015335083, - "learning_rate": 1.9543906545729573e-05, - "loss": 0.0013234581798315047, - "step": 15620 - }, - { - "epoch": 2.6641091219096333, - "grad_norm": 0.06417152285575867, - "learning_rate": 1.9521417021523482e-05, - "loss": 0.002075556293129921, - "step": 15625 - }, - { - "epoch": 2.6649616368286444, - "grad_norm": 0.07077648490667343, - "learning_rate": 1.949893589031799e-05, - "loss": 0.002136031910777092, - "step": 15630 - }, - { - "epoch": 2.6658141517476555, - "grad_norm": 0.060446277260780334, - "learning_rate": 1.9476463162608016e-05, - "loss": 0.0025891490280628205, - "step": 15635 - }, - { - "epoch": 2.6666666666666665, - "grad_norm": 0.0396479107439518, - "learning_rate": 1.94539988488846e-05, - "loss": 0.0018554994836449622, - "step": 15640 - }, - { - "epoch": 2.6675191815856776, - "grad_norm": 0.06787905842065811, - "learning_rate": 1.9431542959634817e-05, - "loss": 0.0017763305455446242, - "step": 15645 - }, - { - "epoch": 2.668371696504689, - "grad_norm": 0.08236365765333176, - "learning_rate": 1.940909550534182e-05, - "loss": 0.0018430478870868684, - "step": 15650 - }, - { - "epoch": 2.6692242114236997, - "grad_norm": 0.10013429820537567, - "learning_rate": 1.9386656496484816e-05, - "loss": 0.0021154627203941343, - "step": 15655 - }, - { - "epoch": 2.670076726342711, - "grad_norm": 0.046891309320926666, - "learning_rate": 1.936422594353911e-05, - "loss": 0.0018745694309473039, - "step": 15660 - }, - { - "epoch": 2.670929241261722, - "grad_norm": 0.03863799199461937, - "learning_rate": 1.9341803856976016e-05, - "loss": 0.002184972912073135, - "step": 15665 - }, - { - "epoch": 2.6717817561807333, - "grad_norm": 0.05828642472624779, - "learning_rate": 1.9319390247262896e-05, - "loss": 0.0022127529606223106, - "step": 15670 - }, - { - "epoch": 2.6726342710997444, - "grad_norm": 0.08675269782543182, - "learning_rate": 1.9296985124863194e-05, - "loss": 0.002008732967078686, - "step": 15675 - }, - { - "epoch": 2.6734867860187554, - "grad_norm": 0.0700579434633255, - "learning_rate": 1.9274588500236345e-05, - "loss": 0.0024785833433270455, - "step": 15680 - }, - { - "epoch": 2.6743393009377665, - "grad_norm": 0.10120563209056854, - "learning_rate": 1.9252200383837854e-05, - "loss": 0.002890965715050697, - "step": 15685 - }, - { - "epoch": 2.6751918158567776, - "grad_norm": 0.10622604191303253, - "learning_rate": 1.9229820786119235e-05, - "loss": 0.002458018809556961, - "step": 15690 - }, - { - "epoch": 2.6760443307757886, - "grad_norm": 0.07292070984840393, - "learning_rate": 1.920744971752803e-05, - "loss": 0.0030672624707221985, - "step": 15695 - }, - { - "epoch": 2.6768968456947997, - "grad_norm": 0.030893906950950623, - "learning_rate": 1.918508718850779e-05, - "loss": 0.002947884239256382, - "step": 15700 - }, - { - "epoch": 2.6777493606138107, - "grad_norm": 0.07428406924009323, - "learning_rate": 1.9162733209498077e-05, - "loss": 0.003342418372631073, - "step": 15705 - }, - { - "epoch": 2.678601875532822, - "grad_norm": 0.014073869213461876, - "learning_rate": 1.9140387790934502e-05, - "loss": 0.0020058237016201018, - "step": 15710 - }, - { - "epoch": 2.679454390451833, - "grad_norm": 0.08076811581850052, - "learning_rate": 1.911805094324863e-05, - "loss": 0.0020218659192323685, - "step": 15715 - }, - { - "epoch": 2.680306905370844, - "grad_norm": 0.059959858655929565, - "learning_rate": 1.909572267686804e-05, - "loss": 0.0012911208905279636, - "step": 15720 - }, - { - "epoch": 2.681159420289855, - "grad_norm": 0.08688201755285263, - "learning_rate": 1.9073403002216313e-05, - "loss": 0.001681494526565075, - "step": 15725 - }, - { - "epoch": 2.682011935208866, - "grad_norm": 0.08083862066268921, - "learning_rate": 1.905109192971304e-05, - "loss": 0.002467956393957138, - "step": 15730 - }, - { - "epoch": 2.682864450127877, - "grad_norm": 0.06145070865750313, - "learning_rate": 1.902878946977375e-05, - "loss": 0.004355132207274437, - "step": 15735 - }, - { - "epoch": 2.683716965046888, - "grad_norm": 0.07410819083452225, - "learning_rate": 1.900649563280997e-05, - "loss": 0.0029904641211032867, - "step": 15740 - }, - { - "epoch": 2.6845694799658997, - "grad_norm": 0.03833797574043274, - "learning_rate": 1.8984210429229217e-05, - "loss": 0.0012984732165932655, - "step": 15745 - }, - { - "epoch": 2.6854219948849103, - "grad_norm": 0.023088248446583748, - "learning_rate": 1.896193386943494e-05, - "loss": 0.001397434249520302, - "step": 15750 - }, - { - "epoch": 2.686274509803922, - "grad_norm": 0.06918703764677048, - "learning_rate": 1.8939665963826616e-05, - "loss": 0.0015222997404634952, - "step": 15755 - }, - { - "epoch": 2.6871270247229324, - "grad_norm": 0.0286374781280756, - "learning_rate": 1.891740672279962e-05, - "loss": 0.0015881337225437165, - "step": 15760 - }, - { - "epoch": 2.687979539641944, - "grad_norm": 0.05485616624355316, - "learning_rate": 1.88951561567453e-05, - "loss": 0.0034288309514522554, - "step": 15765 - }, - { - "epoch": 2.688832054560955, - "grad_norm": 0.05021583288908005, - "learning_rate": 1.887291427605097e-05, - "loss": 0.0013944344595074654, - "step": 15770 - }, - { - "epoch": 2.689684569479966, - "grad_norm": 0.06752395629882812, - "learning_rate": 1.8850681091099895e-05, - "loss": 0.002590004727244377, - "step": 15775 - }, - { - "epoch": 2.690537084398977, - "grad_norm": 0.04273150861263275, - "learning_rate": 1.8828456612271255e-05, - "loss": 0.0019359454512596131, - "step": 15780 - }, - { - "epoch": 2.691389599317988, - "grad_norm": 0.0928453654050827, - "learning_rate": 1.8806240849940167e-05, - "loss": 0.003046049177646637, - "step": 15785 - }, - { - "epoch": 2.692242114236999, - "grad_norm": 0.025754287838935852, - "learning_rate": 1.8784033814477692e-05, - "loss": 0.0018295232206583024, - "step": 15790 - }, - { - "epoch": 2.6930946291560103, - "grad_norm": 0.07345419377088547, - "learning_rate": 1.8761835516250806e-05, - "loss": 0.0018985627219080925, - "step": 15795 - }, - { - "epoch": 2.6939471440750213, - "grad_norm": 0.08317514508962631, - "learning_rate": 1.873964596562243e-05, - "loss": 0.0030419353395700456, - "step": 15800 - }, - { - "epoch": 2.6947996589940324, - "grad_norm": 0.07300770282745361, - "learning_rate": 1.8717465172951377e-05, - "loss": 0.002040323428809643, - "step": 15805 - }, - { - "epoch": 2.6956521739130435, - "grad_norm": 0.07284363359212875, - "learning_rate": 1.8695293148592362e-05, - "loss": 0.001639954373240471, - "step": 15810 - }, - { - "epoch": 2.6965046888320545, - "grad_norm": 0.05817059800028801, - "learning_rate": 1.867312990289606e-05, - "loss": 0.0015234597958624363, - "step": 15815 - }, - { - "epoch": 2.6973572037510656, - "grad_norm": 0.11319714039564133, - "learning_rate": 1.865097544620897e-05, - "loss": 0.0018295228481292724, - "step": 15820 - }, - { - "epoch": 2.6982097186700766, - "grad_norm": 0.10493957251310349, - "learning_rate": 1.8628829788873567e-05, - "loss": 0.0025029994547367098, - "step": 15825 - }, - { - "epoch": 2.6990622335890877, - "grad_norm": 0.03161423280835152, - "learning_rate": 1.860669294122816e-05, - "loss": 0.0014271627180278302, - "step": 15830 - }, - { - "epoch": 2.6999147485080988, - "grad_norm": 0.03267689794301987, - "learning_rate": 1.858456491360697e-05, - "loss": 0.0012216478586196899, - "step": 15835 - }, - { - "epoch": 2.70076726342711, - "grad_norm": 0.07986247539520264, - "learning_rate": 1.856244571634008e-05, - "loss": 0.0018704459071159363, - "step": 15840 - }, - { - "epoch": 2.701619778346121, - "grad_norm": 0.10120461881160736, - "learning_rate": 1.85403353597535e-05, - "loss": 0.0020706810057163237, - "step": 15845 - }, - { - "epoch": 2.7024722932651324, - "grad_norm": 0.05339881405234337, - "learning_rate": 1.8518233854169056e-05, - "loss": 0.0017986055463552475, - "step": 15850 - }, - { - "epoch": 2.703324808184143, - "grad_norm": 0.11433786898851395, - "learning_rate": 1.8496141209904464e-05, - "loss": 0.0034054510295391084, - "step": 15855 - }, - { - "epoch": 2.7041773231031545, - "grad_norm": 0.061081189662218094, - "learning_rate": 1.8474057437273328e-05, - "loss": 0.002348882704973221, - "step": 15860 - }, - { - "epoch": 2.705029838022165, - "grad_norm": 0.055195316672325134, - "learning_rate": 1.8451982546585055e-05, - "loss": 0.0015221487730741501, - "step": 15865 - }, - { - "epoch": 2.7058823529411766, - "grad_norm": 0.06800514459609985, - "learning_rate": 1.8429916548144973e-05, - "loss": 0.0023088542744517325, - "step": 15870 - }, - { - "epoch": 2.7067348678601877, - "grad_norm": 0.05646739527583122, - "learning_rate": 1.8407859452254206e-05, - "loss": 0.0024141166359186172, - "step": 15875 - }, - { - "epoch": 2.7075873827791987, - "grad_norm": 0.10886628180742264, - "learning_rate": 1.8385811269209743e-05, - "loss": 0.0019476715475320815, - "step": 15880 - }, - { - "epoch": 2.70843989769821, - "grad_norm": 0.04279763624072075, - "learning_rate": 1.8363772009304395e-05, - "loss": 0.002021237276494503, - "step": 15885 - }, - { - "epoch": 2.709292412617221, - "grad_norm": 0.09583209455013275, - "learning_rate": 1.8341741682826852e-05, - "loss": 0.002025018632411957, - "step": 15890 - }, - { - "epoch": 2.710144927536232, - "grad_norm": 0.06695323437452316, - "learning_rate": 1.8319720300061582e-05, - "loss": 0.0026269391179084777, - "step": 15895 - }, - { - "epoch": 2.710997442455243, - "grad_norm": 0.07438764721155167, - "learning_rate": 1.829770787128889e-05, - "loss": 0.0014647828415036202, - "step": 15900 - }, - { - "epoch": 2.711849957374254, - "grad_norm": 0.05395448952913284, - "learning_rate": 1.8275704406784933e-05, - "loss": 0.0024559808894991874, - "step": 15905 - }, - { - "epoch": 2.712702472293265, - "grad_norm": 0.03163938969373703, - "learning_rate": 1.825370991682164e-05, - "loss": 0.0022430509328842164, - "step": 15910 - }, - { - "epoch": 2.713554987212276, - "grad_norm": 0.104282446205616, - "learning_rate": 1.8231724411666794e-05, - "loss": 0.001472956594079733, - "step": 15915 - }, - { - "epoch": 2.7144075021312872, - "grad_norm": 0.07355596870183945, - "learning_rate": 1.8209747901583944e-05, - "loss": 0.0023859225213527678, - "step": 15920 - }, - { - "epoch": 2.7152600170502983, - "grad_norm": 0.06525922566652298, - "learning_rate": 1.8187780396832463e-05, - "loss": 0.00265895314514637, - "step": 15925 - }, - { - "epoch": 2.7161125319693094, - "grad_norm": 0.09379115700721741, - "learning_rate": 1.8165821907667505e-05, - "loss": 0.002496413141489029, - "step": 15930 - }, - { - "epoch": 2.7169650468883204, - "grad_norm": 0.05254679545760155, - "learning_rate": 1.8143872444340017e-05, - "loss": 0.0022162407636642455, - "step": 15935 - }, - { - "epoch": 2.7178175618073315, - "grad_norm": 0.06203889846801758, - "learning_rate": 1.8121932017096758e-05, - "loss": 0.0016900423914194107, - "step": 15940 - }, - { - "epoch": 2.718670076726343, - "grad_norm": 0.08532653003931046, - "learning_rate": 1.810000063618023e-05, - "loss": 0.0028453752398490905, - "step": 15945 - }, - { - "epoch": 2.7195225916453536, - "grad_norm": 0.08361469209194183, - "learning_rate": 1.807807831182875e-05, - "loss": 0.0029737703502178193, - "step": 15950 - }, - { - "epoch": 2.720375106564365, - "grad_norm": 0.06439653784036636, - "learning_rate": 1.805616505427637e-05, - "loss": 0.002233676239848137, - "step": 15955 - }, - { - "epoch": 2.7212276214833757, - "grad_norm": 0.09197837114334106, - "learning_rate": 1.803426087375295e-05, - "loss": 0.0020749013870954513, - "step": 15960 - }, - { - "epoch": 2.722080136402387, - "grad_norm": 0.055145513266325, - "learning_rate": 1.8012365780484074e-05, - "loss": 0.0013141044415533542, - "step": 15965 - }, - { - "epoch": 2.7229326513213983, - "grad_norm": 0.06788767874240875, - "learning_rate": 1.7990479784691105e-05, - "loss": 0.0023008717224001886, - "step": 15970 - }, - { - "epoch": 2.7237851662404093, - "grad_norm": 0.10216987133026123, - "learning_rate": 1.7968602896591152e-05, - "loss": 0.002799564599990845, - "step": 15975 - }, - { - "epoch": 2.7246376811594204, - "grad_norm": 0.0995464101433754, - "learning_rate": 1.7946735126397056e-05, - "loss": 0.0023927824571728707, - "step": 15980 - }, - { - "epoch": 2.7254901960784315, - "grad_norm": 0.05936437472701073, - "learning_rate": 1.7924876484317453e-05, - "loss": 0.001253789383918047, - "step": 15985 - }, - { - "epoch": 2.7263427109974425, - "grad_norm": 0.06160435080528259, - "learning_rate": 1.7903026980556672e-05, - "loss": 0.00238239299505949, - "step": 15990 - }, - { - "epoch": 2.7271952259164536, - "grad_norm": 0.05691118910908699, - "learning_rate": 1.788118662531477e-05, - "loss": 0.0015995081514120102, - "step": 15995 - }, - { - "epoch": 2.7280477408354646, - "grad_norm": 0.0878402590751648, - "learning_rate": 1.7859355428787564e-05, - "loss": 0.002066444233059883, - "step": 16000 - }, - { - "epoch": 2.7289002557544757, - "grad_norm": 0.04065166413784027, - "learning_rate": 1.7837533401166598e-05, - "loss": 0.0022698283195495606, - "step": 16005 - }, - { - "epoch": 2.7297527706734868, - "grad_norm": 0.08980758488178253, - "learning_rate": 1.7815720552639105e-05, - "loss": 0.0016043156385421753, - "step": 16010 - }, - { - "epoch": 2.730605285592498, - "grad_norm": 0.05619784817099571, - "learning_rate": 1.7793916893388055e-05, - "loss": 0.0025583259761333466, - "step": 16015 - }, - { - "epoch": 2.731457800511509, - "grad_norm": 0.09853291511535645, - "learning_rate": 1.7772122433592116e-05, - "loss": 0.0025311170145869257, - "step": 16020 - }, - { - "epoch": 2.73231031543052, - "grad_norm": 0.044340990483760834, - "learning_rate": 1.7750337183425652e-05, - "loss": 0.0020809115841984747, - "step": 16025 - }, - { - "epoch": 2.733162830349531, - "grad_norm": 0.024994025006890297, - "learning_rate": 1.772856115305877e-05, - "loss": 0.001932576857507229, - "step": 16030 - }, - { - "epoch": 2.734015345268542, - "grad_norm": 0.07059352099895477, - "learning_rate": 1.770679435265724e-05, - "loss": 0.002347341552376747, - "step": 16035 - }, - { - "epoch": 2.734867860187553, - "grad_norm": 0.08121193200349808, - "learning_rate": 1.7685036792382506e-05, - "loss": 0.0015123223885893822, - "step": 16040 - }, - { - "epoch": 2.735720375106564, - "grad_norm": 0.07900503277778625, - "learning_rate": 1.766328848239175e-05, - "loss": 0.0028667191043496134, - "step": 16045 - }, - { - "epoch": 2.7365728900255757, - "grad_norm": 0.08574212342500687, - "learning_rate": 1.7641549432837778e-05, - "loss": 0.002038617432117462, - "step": 16050 - }, - { - "epoch": 2.7374254049445863, - "grad_norm": 0.06154071167111397, - "learning_rate": 1.7619819653869132e-05, - "loss": 0.0017743892967700958, - "step": 16055 - }, - { - "epoch": 2.738277919863598, - "grad_norm": 0.06745338439941406, - "learning_rate": 1.7598099155629982e-05, - "loss": 0.0018204674124717712, - "step": 16060 - }, - { - "epoch": 2.7391304347826084, - "grad_norm": 0.029756128787994385, - "learning_rate": 1.7576387948260175e-05, - "loss": 0.0020426372066140175, - "step": 16065 - }, - { - "epoch": 2.73998294970162, - "grad_norm": 0.13447973132133484, - "learning_rate": 1.7554686041895217e-05, - "loss": 0.0023698143661022185, - "step": 16070 - }, - { - "epoch": 2.740835464620631, - "grad_norm": 0.09888533502817154, - "learning_rate": 1.7532993446666298e-05, - "loss": 0.0024117348715662957, - "step": 16075 - }, - { - "epoch": 2.741687979539642, - "grad_norm": 0.05919703096151352, - "learning_rate": 1.751131017270024e-05, - "loss": 0.0027751058340072634, - "step": 16080 - }, - { - "epoch": 2.742540494458653, - "grad_norm": 0.04920949414372444, - "learning_rate": 1.74896362301195e-05, - "loss": 0.0022046850994229318, - "step": 16085 - }, - { - "epoch": 2.743393009377664, - "grad_norm": 0.028095668181777, - "learning_rate": 1.746797162904222e-05, - "loss": 0.001455264538526535, - "step": 16090 - }, - { - "epoch": 2.7442455242966752, - "grad_norm": 0.03558868542313576, - "learning_rate": 1.7446316379582125e-05, - "loss": 0.0023241037502884864, - "step": 16095 - }, - { - "epoch": 2.7450980392156863, - "grad_norm": 0.07124538719654083, - "learning_rate": 1.742467049184864e-05, - "loss": 0.0014614716172218322, - "step": 16100 - }, - { - "epoch": 2.7459505541346974, - "grad_norm": 0.07355284690856934, - "learning_rate": 1.7403033975946774e-05, - "loss": 0.0018932107836008073, - "step": 16105 - }, - { - "epoch": 2.7468030690537084, - "grad_norm": 0.06485545635223389, - "learning_rate": 1.738140684197717e-05, - "loss": 0.0021881703287363052, - "step": 16110 - }, - { - "epoch": 2.7476555839727195, - "grad_norm": 0.05748758837580681, - "learning_rate": 1.735978910003607e-05, - "loss": 0.0019190840423107148, - "step": 16115 - }, - { - "epoch": 2.7485080988917305, - "grad_norm": 0.04986255615949631, - "learning_rate": 1.7338180760215395e-05, - "loss": 0.001525167189538479, - "step": 16120 - }, - { - "epoch": 2.7493606138107416, - "grad_norm": 0.06383983045816422, - "learning_rate": 1.731658183260262e-05, - "loss": 0.0026792695745825766, - "step": 16125 - }, - { - "epoch": 2.749531116794544, - "eval_loss": 0.047858335077762604, - "eval_runtime": 3.7263, - "eval_samples_per_second": 67.627, - "eval_steps_per_second": 1.073, - "step": 16126 - }, - { - "eval_cer_subset": 0.01459117038774081, - "eval_cer_subset_edit_distance": 896, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 16126 - }, - { - "epoch": 2.7502131287297527, - "grad_norm": 0.10121899098157883, - "learning_rate": 1.7294992327280826e-05, - "loss": 0.0027876641601324082, - "step": 16130 - }, - { - "epoch": 2.7510656436487637, - "grad_norm": 0.029004251584410667, - "learning_rate": 1.7273412254328743e-05, - "loss": 0.0015729216858744622, - "step": 16135 - }, - { - "epoch": 2.7519181585677748, - "grad_norm": 0.06613736599683762, - "learning_rate": 1.7251841623820638e-05, - "loss": 0.0020701587200164795, - "step": 16140 - }, - { - "epoch": 2.7527706734867863, - "grad_norm": 0.09212189167737961, - "learning_rate": 1.7230280445826422e-05, - "loss": 0.0026726944372057913, - "step": 16145 - }, - { - "epoch": 2.753623188405797, - "grad_norm": 0.12724192440509796, - "learning_rate": 1.720872873041157e-05, - "loss": 0.002590762265026569, - "step": 16150 - }, - { - "epoch": 2.7544757033248084, - "grad_norm": 0.043855708092451096, - "learning_rate": 1.7187186487637124e-05, - "loss": 0.001780974492430687, - "step": 16155 - }, - { - "epoch": 2.755328218243819, - "grad_norm": 0.10562611371278763, - "learning_rate": 1.7165653727559725e-05, - "loss": 0.002336742728948593, - "step": 16160 - }, - { - "epoch": 2.7561807331628305, - "grad_norm": 0.05162282660603523, - "learning_rate": 1.7144130460231574e-05, - "loss": 0.0018106916919350623, - "step": 16165 - }, - { - "epoch": 2.7570332480818416, - "grad_norm": 0.020110471174120903, - "learning_rate": 1.7122616695700467e-05, - "loss": 0.0014431983232498168, - "step": 16170 - }, - { - "epoch": 2.7578857630008526, - "grad_norm": 0.15154017508029938, - "learning_rate": 1.7101112444009725e-05, - "loss": 0.0019074320793151856, - "step": 16175 - }, - { - "epoch": 2.7587382779198637, - "grad_norm": 0.03481750935316086, - "learning_rate": 1.7079617715198264e-05, - "loss": 0.0037923645228147506, - "step": 16180 - }, - { - "epoch": 2.7595907928388748, - "grad_norm": 0.024081731215119362, - "learning_rate": 1.7058132519300524e-05, - "loss": 0.002791491337120533, - "step": 16185 - }, - { - "epoch": 2.760443307757886, - "grad_norm": 0.07880852371454239, - "learning_rate": 1.703665686634653e-05, - "loss": 0.0028480572625994684, - "step": 16190 - }, - { - "epoch": 2.761295822676897, - "grad_norm": 0.06910362094640732, - "learning_rate": 1.701519076636182e-05, - "loss": 0.0018049828708171845, - "step": 16195 - }, - { - "epoch": 2.762148337595908, - "grad_norm": 0.09321995079517365, - "learning_rate": 1.699373422936748e-05, - "loss": 0.001952703855931759, - "step": 16200 - }, - { - "epoch": 2.763000852514919, - "grad_norm": 0.05871212109923363, - "learning_rate": 1.6972287265380137e-05, - "loss": 0.00121518075466156, - "step": 16205 - }, - { - "epoch": 2.76385336743393, - "grad_norm": 0.10542161762714386, - "learning_rate": 1.6950849884411936e-05, - "loss": 0.0024038642644882203, - "step": 16210 - }, - { - "epoch": 2.764705882352941, - "grad_norm": 0.0580933652818203, - "learning_rate": 1.6929422096470582e-05, - "loss": 0.0021081961691379546, - "step": 16215 - }, - { - "epoch": 2.765558397271952, - "grad_norm": 0.024878472089767456, - "learning_rate": 1.6908003911559256e-05, - "loss": 0.0022545790299773215, - "step": 16220 - }, - { - "epoch": 2.7664109121909632, - "grad_norm": 0.060553766787052155, - "learning_rate": 1.6886595339676703e-05, - "loss": 0.0015277018770575523, - "step": 16225 - }, - { - "epoch": 2.7672634271099743, - "grad_norm": 0.06857582181692123, - "learning_rate": 1.6865196390817137e-05, - "loss": 0.001996198855340481, - "step": 16230 - }, - { - "epoch": 2.7681159420289854, - "grad_norm": 0.06866193562746048, - "learning_rate": 1.6843807074970316e-05, - "loss": 0.0014093054458498954, - "step": 16235 - }, - { - "epoch": 2.7689684569479964, - "grad_norm": 0.12889426946640015, - "learning_rate": 1.6822427402121476e-05, - "loss": 0.0029415406286716463, - "step": 16240 - }, - { - "epoch": 2.7698209718670075, - "grad_norm": 0.05907638370990753, - "learning_rate": 1.6801057382251363e-05, - "loss": 0.0020356021821498873, - "step": 16245 - }, - { - "epoch": 2.770673486786019, - "grad_norm": 0.05899703502655029, - "learning_rate": 1.6779697025336205e-05, - "loss": 0.0010949989780783653, - "step": 16250 - }, - { - "epoch": 2.7715260017050296, - "grad_norm": 0.048360541462898254, - "learning_rate": 1.6758346341347716e-05, - "loss": 0.002375531755387783, - "step": 16255 - }, - { - "epoch": 2.772378516624041, - "grad_norm": 0.06712590157985687, - "learning_rate": 1.6737005340253134e-05, - "loss": 0.0016120089218020438, - "step": 16260 - }, - { - "epoch": 2.7732310315430517, - "grad_norm": 0.04694962501525879, - "learning_rate": 1.6715674032015137e-05, - "loss": 0.0010866542346775533, - "step": 16265 - }, - { - "epoch": 2.7740835464620632, - "grad_norm": 0.06813527643680573, - "learning_rate": 1.6694352426591873e-05, - "loss": 0.001494432892650366, - "step": 16270 - }, - { - "epoch": 2.7749360613810743, - "grad_norm": 0.12899814546108246, - "learning_rate": 1.6673040533937004e-05, - "loss": 0.003590015694499016, - "step": 16275 - }, - { - "epoch": 2.7757885763000854, - "grad_norm": 0.013963109813630581, - "learning_rate": 1.6651738363999604e-05, - "loss": 0.0019298167899250984, - "step": 16280 - }, - { - "epoch": 2.7766410912190964, - "grad_norm": 0.03605286777019501, - "learning_rate": 1.6630445926724262e-05, - "loss": 0.0031480703502893446, - "step": 16285 - }, - { - "epoch": 2.7774936061381075, - "grad_norm": 0.10986622422933578, - "learning_rate": 1.660916323205098e-05, - "loss": 0.002917572297155857, - "step": 16290 - }, - { - "epoch": 2.7783461210571185, - "grad_norm": 0.057930365204811096, - "learning_rate": 1.658789028991523e-05, - "loss": 0.0026299282908439636, - "step": 16295 - }, - { - "epoch": 2.7791986359761296, - "grad_norm": 0.029447276145219803, - "learning_rate": 1.6566627110247917e-05, - "loss": 0.0022400498390197756, - "step": 16300 - }, - { - "epoch": 2.7800511508951407, - "grad_norm": 0.045625604689121246, - "learning_rate": 1.6545373702975423e-05, - "loss": 0.0010993774980306626, - "step": 16305 - }, - { - "epoch": 2.7809036658141517, - "grad_norm": 0.03276116028428078, - "learning_rate": 1.6524130078019536e-05, - "loss": 0.0017030857503414153, - "step": 16310 - }, - { - "epoch": 2.7817561807331628, - "grad_norm": 0.07950260490179062, - "learning_rate": 1.650289624529747e-05, - "loss": 0.0029186248779296876, - "step": 16315 - }, - { - "epoch": 2.782608695652174, - "grad_norm": 0.03196907788515091, - "learning_rate": 1.6481672214721915e-05, - "loss": 0.0021285150200128556, - "step": 16320 - }, - { - "epoch": 2.783461210571185, - "grad_norm": 0.08347548544406891, - "learning_rate": 1.6460457996200926e-05, - "loss": 0.0018467068672180175, - "step": 16325 - }, - { - "epoch": 2.784313725490196, - "grad_norm": 0.062316033989191055, - "learning_rate": 1.643925359963803e-05, - "loss": 0.002080459892749786, - "step": 16330 - }, - { - "epoch": 2.785166240409207, - "grad_norm": 0.06067380681633949, - "learning_rate": 1.641805903493214e-05, - "loss": 0.0014378841035068036, - "step": 16335 - }, - { - "epoch": 2.786018755328218, - "grad_norm": 0.19668881595134735, - "learning_rate": 1.6396874311977574e-05, - "loss": 0.0018663834780454636, - "step": 16340 - }, - { - "epoch": 2.7868712702472296, - "grad_norm": 0.03857511281967163, - "learning_rate": 1.637569944066407e-05, - "loss": 0.0017508219927549363, - "step": 16345 - }, - { - "epoch": 2.78772378516624, - "grad_norm": 0.06684751063585281, - "learning_rate": 1.6354534430876746e-05, - "loss": 0.0021339647471904756, - "step": 16350 - }, - { - "epoch": 2.7885763000852517, - "grad_norm": 0.0722980946302414, - "learning_rate": 1.633337929249616e-05, - "loss": 0.002456018142402172, - "step": 16355 - }, - { - "epoch": 2.7894288150042623, - "grad_norm": 0.013861587271094322, - "learning_rate": 1.6312234035398214e-05, - "loss": 0.0013738014735281468, - "step": 16360 - }, - { - "epoch": 2.790281329923274, - "grad_norm": 0.05103524401783943, - "learning_rate": 1.6291098669454237e-05, - "loss": 0.0012777662836015225, - "step": 16365 - }, - { - "epoch": 2.791133844842285, - "grad_norm": 0.08019815385341644, - "learning_rate": 1.6269973204530896e-05, - "loss": 0.0021779144182801245, - "step": 16370 - }, - { - "epoch": 2.791986359761296, - "grad_norm": 0.11674029380083084, - "learning_rate": 1.6248857650490287e-05, - "loss": 0.003945905342698097, - "step": 16375 - }, - { - "epoch": 2.792838874680307, - "grad_norm": 0.10289142280817032, - "learning_rate": 1.622775201718984e-05, - "loss": 0.0033991221338510514, - "step": 16380 - }, - { - "epoch": 2.793691389599318, - "grad_norm": 0.08295715600252151, - "learning_rate": 1.6206656314482372e-05, - "loss": 0.0025476697832345963, - "step": 16385 - }, - { - "epoch": 2.794543904518329, - "grad_norm": 0.09820916503667831, - "learning_rate": 1.618557055221605e-05, - "loss": 0.002469751611351967, - "step": 16390 - }, - { - "epoch": 2.79539641943734, - "grad_norm": 0.04547690227627754, - "learning_rate": 1.61644947402344e-05, - "loss": 0.0017419423907995223, - "step": 16395 - }, - { - "epoch": 2.7962489343563512, - "grad_norm": 0.09098807722330093, - "learning_rate": 1.6143428888376336e-05, - "loss": 0.0025540072470903396, - "step": 16400 - }, - { - "epoch": 2.7971014492753623, - "grad_norm": 0.06253538280725479, - "learning_rate": 1.6122373006476078e-05, - "loss": 0.00161474347114563, - "step": 16405 - }, - { - "epoch": 2.7979539641943734, - "grad_norm": 0.10068398714065552, - "learning_rate": 1.6101327104363236e-05, - "loss": 0.0030464882031083105, - "step": 16410 - }, - { - "epoch": 2.7988064791133844, - "grad_norm": 0.04052518680691719, - "learning_rate": 1.6080291191862708e-05, - "loss": 0.001292982418090105, - "step": 16415 - }, - { - "epoch": 2.7996589940323955, - "grad_norm": 0.09480784833431244, - "learning_rate": 1.605926527879478e-05, - "loss": 0.002949811331927776, - "step": 16420 - }, - { - "epoch": 2.8005115089514065, - "grad_norm": 0.08064186573028564, - "learning_rate": 1.603824937497505e-05, - "loss": 0.001863202080130577, - "step": 16425 - }, - { - "epoch": 2.8013640238704176, - "grad_norm": 0.03577118366956711, - "learning_rate": 1.601724349021443e-05, - "loss": 0.0015472200699150561, - "step": 16430 - }, - { - "epoch": 2.8022165387894287, - "grad_norm": 0.04698857292532921, - "learning_rate": 1.5996247634319162e-05, - "loss": 0.002168430760502815, - "step": 16435 - }, - { - "epoch": 2.80306905370844, - "grad_norm": 0.09031593799591064, - "learning_rate": 1.5975261817090803e-05, - "loss": 0.0017798427492380143, - "step": 16440 - }, - { - "epoch": 2.803921568627451, - "grad_norm": 0.24021683633327484, - "learning_rate": 1.5954286048326258e-05, - "loss": 0.0024022582918405535, - "step": 16445 - }, - { - "epoch": 2.8047740835464623, - "grad_norm": 0.07379221171140671, - "learning_rate": 1.5933320337817685e-05, - "loss": 0.0016447069123387338, - "step": 16450 - }, - { - "epoch": 2.805626598465473, - "grad_norm": 0.07145442813634872, - "learning_rate": 1.59123646953526e-05, - "loss": 0.002100140042603016, - "step": 16455 - }, - { - "epoch": 2.8064791133844844, - "grad_norm": 0.06444204598665237, - "learning_rate": 1.5891419130713783e-05, - "loss": 0.0022544978186488152, - "step": 16460 - }, - { - "epoch": 2.8073316283034955, - "grad_norm": 0.07764707505702972, - "learning_rate": 1.5870483653679307e-05, - "loss": 0.002028309740126133, - "step": 16465 - }, - { - "epoch": 2.8081841432225065, - "grad_norm": 0.13890637457370758, - "learning_rate": 1.584955827402257e-05, - "loss": 0.001833663322031498, - "step": 16470 - }, - { - "epoch": 2.8090366581415176, - "grad_norm": 0.06412612646818161, - "learning_rate": 1.5828643001512236e-05, - "loss": 0.0017296869307756424, - "step": 16475 - }, - { - "epoch": 2.8098891730605287, - "grad_norm": 0.05978688597679138, - "learning_rate": 1.5807737845912234e-05, - "loss": 0.001933468133211136, - "step": 16480 - }, - { - "epoch": 2.8107416879795397, - "grad_norm": 0.1131395548582077, - "learning_rate": 1.5786842816981778e-05, - "loss": 0.003291580080986023, - "step": 16485 - }, - { - "epoch": 2.8115942028985508, - "grad_norm": 0.0549713559448719, - "learning_rate": 1.5765957924475394e-05, - "loss": 0.0019789932295680044, - "step": 16490 - }, - { - "epoch": 2.812446717817562, - "grad_norm": 0.08038460463285446, - "learning_rate": 1.5745083178142833e-05, - "loss": 0.002347235009074211, - "step": 16495 - }, - { - "epoch": 2.813299232736573, - "grad_norm": 0.05014783889055252, - "learning_rate": 1.5724218587729098e-05, - "loss": 0.0016623528674244881, - "step": 16500 - }, - { - "epoch": 2.814151747655584, - "grad_norm": 0.05042316019535065, - "learning_rate": 1.5703364162974503e-05, - "loss": 0.0018199939280748368, - "step": 16505 - }, - { - "epoch": 2.815004262574595, - "grad_norm": 0.056051138788461685, - "learning_rate": 1.5682519913614565e-05, - "loss": 0.0016215803101658822, - "step": 16510 - }, - { - "epoch": 2.815856777493606, - "grad_norm": 0.04295732453465462, - "learning_rate": 1.5661685849380098e-05, - "loss": 0.0020044256001710893, - "step": 16515 - }, - { - "epoch": 2.816709292412617, - "grad_norm": 0.02020161598920822, - "learning_rate": 1.564086197999712e-05, - "loss": 0.0018876813352108, - "step": 16520 - }, - { - "epoch": 2.817561807331628, - "grad_norm": 0.09220346808433533, - "learning_rate": 1.562004831518691e-05, - "loss": 0.0015535833314061164, - "step": 16525 - }, - { - "epoch": 2.8184143222506393, - "grad_norm": 0.09728234261274338, - "learning_rate": 1.5599244864665966e-05, - "loss": 0.0015536649152636528, - "step": 16530 - }, - { - "epoch": 2.8192668371696503, - "grad_norm": 0.17288024723529816, - "learning_rate": 1.5578451638146053e-05, - "loss": 0.0021170184016227724, - "step": 16535 - }, - { - "epoch": 2.8201193520886614, - "grad_norm": 0.056582558900117874, - "learning_rate": 1.5557668645334132e-05, - "loss": 0.0030540911480784415, - "step": 16540 - }, - { - "epoch": 2.820971867007673, - "grad_norm": 0.17674417793750763, - "learning_rate": 1.553689589593238e-05, - "loss": 0.001543693896383047, - "step": 16545 - }, - { - "epoch": 2.8218243819266835, - "grad_norm": 0.06186344474554062, - "learning_rate": 1.551613339963823e-05, - "loss": 0.001764528639614582, - "step": 16550 - }, - { - "epoch": 2.822676896845695, - "grad_norm": 0.13224560022354126, - "learning_rate": 1.5495381166144288e-05, - "loss": 0.004735496640205383, - "step": 16555 - }, - { - "epoch": 2.8235294117647056, - "grad_norm": 0.1427813619375229, - "learning_rate": 1.5474639205138406e-05, - "loss": 0.003041662834584713, - "step": 16560 - }, - { - "epoch": 2.824381926683717, - "grad_norm": 0.09970462322235107, - "learning_rate": 1.5453907526303614e-05, - "loss": 0.0025150768458843233, - "step": 16565 - }, - { - "epoch": 2.825234441602728, - "grad_norm": 0.02305634692311287, - "learning_rate": 1.5433186139318144e-05, - "loss": 0.001219399645924568, - "step": 16570 - }, - { - "epoch": 2.8260869565217392, - "grad_norm": 0.04805911332368851, - "learning_rate": 1.541247505385543e-05, - "loss": 0.0012801218777894973, - "step": 16575 - }, - { - "epoch": 2.8269394714407503, - "grad_norm": 0.08059800416231155, - "learning_rate": 1.539177427958408e-05, - "loss": 0.0031003907322883608, - "step": 16580 - }, - { - "epoch": 2.8277919863597614, - "grad_norm": 0.05763188377022743, - "learning_rate": 1.537108382616794e-05, - "loss": 0.002337191253900528, - "step": 16585 - }, - { - "epoch": 2.8286445012787724, - "grad_norm": 0.06821907311677933, - "learning_rate": 1.535040370326597e-05, - "loss": 0.0030008716508746146, - "step": 16590 - }, - { - "epoch": 2.8294970161977835, - "grad_norm": 0.12901924550533295, - "learning_rate": 1.5329733920532358e-05, - "loss": 0.0035179533064365388, - "step": 16595 - }, - { - "epoch": 2.8303495311167945, - "grad_norm": 0.040896832942962646, - "learning_rate": 1.5309074487616435e-05, - "loss": 0.0020170003175735475, - "step": 16600 - }, - { - "epoch": 2.8312020460358056, - "grad_norm": 0.06776095926761627, - "learning_rate": 1.5288425414162725e-05, - "loss": 0.0017662534490227699, - "step": 16605 - }, - { - "epoch": 2.8320545609548167, - "grad_norm": 0.08130808174610138, - "learning_rate": 1.5267786709810897e-05, - "loss": 0.0018257603049278259, - "step": 16610 - }, - { - "epoch": 2.8329070758738277, - "grad_norm": 0.05846976861357689, - "learning_rate": 1.5247158384195778e-05, - "loss": 0.0013240544125437737, - "step": 16615 - }, - { - "epoch": 2.833759590792839, - "grad_norm": 0.113974429666996, - "learning_rate": 1.522654044694736e-05, - "loss": 0.002671768143773079, - "step": 16620 - }, - { - "epoch": 2.83461210571185, - "grad_norm": 0.03519630804657936, - "learning_rate": 1.5205932907690771e-05, - "loss": 0.001667863130569458, - "step": 16625 - }, - { - "epoch": 2.835464620630861, - "grad_norm": 0.014673003926873207, - "learning_rate": 1.5185335776046322e-05, - "loss": 0.002035524509847164, - "step": 16630 - }, - { - "epoch": 2.836317135549872, - "grad_norm": 0.05683857575058937, - "learning_rate": 1.5164749061629407e-05, - "loss": 0.0021878147497773172, - "step": 16635 - }, - { - "epoch": 2.8371696504688835, - "grad_norm": 0.08671200275421143, - "learning_rate": 1.5144172774050623e-05, - "loss": 0.002064511738717556, - "step": 16640 - }, - { - "epoch": 2.838022165387894, - "grad_norm": 0.041581057012081146, - "learning_rate": 1.512360692291563e-05, - "loss": 0.0019536083564162254, - "step": 16645 - }, - { - "epoch": 2.8388746803069056, - "grad_norm": 0.10846979171037674, - "learning_rate": 1.5103051517825288e-05, - "loss": 0.0026564691215753555, - "step": 16650 - }, - { - "epoch": 2.839727195225916, - "grad_norm": 0.026884516701102257, - "learning_rate": 1.5082506568375526e-05, - "loss": 0.0026851309463381766, - "step": 16655 - }, - { - "epoch": 2.8405797101449277, - "grad_norm": 0.0613347552716732, - "learning_rate": 1.506197208415741e-05, - "loss": 0.0014739801175892354, - "step": 16660 - }, - { - "epoch": 2.8414322250639388, - "grad_norm": 0.06315013766288757, - "learning_rate": 1.504144807475712e-05, - "loss": 0.0026756677776575088, - "step": 16665 - }, - { - "epoch": 2.84228473998295, - "grad_norm": 0.04869166761636734, - "learning_rate": 1.5020934549755933e-05, - "loss": 0.0020816361531615256, - "step": 16670 - }, - { - "epoch": 2.843137254901961, - "grad_norm": 0.07282520830631256, - "learning_rate": 1.5000431518730273e-05, - "loss": 0.0008225045166909695, - "step": 16675 - }, - { - "epoch": 2.843989769820972, - "grad_norm": 0.051693812012672424, - "learning_rate": 1.4979938991251607e-05, - "loss": 0.002745438739657402, - "step": 16680 - }, - { - "epoch": 2.844842284739983, - "grad_norm": 0.1495431363582611, - "learning_rate": 1.4959456976886558e-05, - "loss": 0.001805400662124157, - "step": 16685 - }, - { - "epoch": 2.845694799658994, - "grad_norm": 0.05393834039568901, - "learning_rate": 1.4938985485196799e-05, - "loss": 0.0017135551199316979, - "step": 16690 - }, - { - "epoch": 2.846547314578005, - "grad_norm": 0.06205644831061363, - "learning_rate": 1.4918524525739088e-05, - "loss": 0.002358596958220005, - "step": 16695 - }, - { - "epoch": 2.847399829497016, - "grad_norm": 0.1177382543683052, - "learning_rate": 1.4898074108065306e-05, - "loss": 0.00382155142724514, - "step": 16700 - }, - { - "epoch": 2.8482523444160273, - "grad_norm": 0.06532850116491318, - "learning_rate": 1.487763424172238e-05, - "loss": 0.002384480834007263, - "step": 16705 - }, - { - "epoch": 2.8491048593350383, - "grad_norm": 0.05195530876517296, - "learning_rate": 1.4857204936252313e-05, - "loss": 0.0030395207926630975, - "step": 16710 - }, - { - "epoch": 2.8499573742540494, - "grad_norm": 0.06609994173049927, - "learning_rate": 1.4836786201192182e-05, - "loss": 0.002476612851023674, - "step": 16715 - }, - { - "epoch": 2.8508098891730604, - "grad_norm": 0.07928726077079773, - "learning_rate": 1.4816378046074146e-05, - "loss": 0.001881701312959194, - "step": 16720 - }, - { - "epoch": 2.8516624040920715, - "grad_norm": 0.08206343650817871, - "learning_rate": 1.4795980480425392e-05, - "loss": 0.0017553886398673057, - "step": 16725 - }, - { - "epoch": 2.8525149190110826, - "grad_norm": 0.08301947265863419, - "learning_rate": 1.4775593513768202e-05, - "loss": 0.0031315773725509644, - "step": 16730 - }, - { - "epoch": 2.8533674339300936, - "grad_norm": 0.034867819398641586, - "learning_rate": 1.4755217155619887e-05, - "loss": 0.0016052091494202613, - "step": 16735 - }, - { - "epoch": 2.8542199488491047, - "grad_norm": 0.03188352286815643, - "learning_rate": 1.4734851415492789e-05, - "loss": 0.002192831225693226, - "step": 16740 - }, - { - "epoch": 2.855072463768116, - "grad_norm": 0.07953578233718872, - "learning_rate": 1.4714496302894339e-05, - "loss": 0.002898801490664482, - "step": 16745 - }, - { - "epoch": 2.855924978687127, - "grad_norm": 0.06410107016563416, - "learning_rate": 1.4694151827326966e-05, - "loss": 0.0023399315774440765, - "step": 16750 - }, - { - "epoch": 2.8567774936061383, - "grad_norm": 0.09000501781702042, - "learning_rate": 1.4673817998288152e-05, - "loss": 0.003346502408385277, - "step": 16755 - }, - { - "epoch": 2.857630008525149, - "grad_norm": 0.07080121338367462, - "learning_rate": 1.465349482527039e-05, - "loss": 0.002062254026532173, - "step": 16760 - }, - { - "epoch": 2.8584825234441604, - "grad_norm": 0.03813991695642471, - "learning_rate": 1.4633182317761244e-05, - "loss": 0.0037174589931964876, - "step": 16765 - }, - { - "epoch": 2.8593350383631715, - "grad_norm": 0.035782843828201294, - "learning_rate": 1.4612880485243246e-05, - "loss": 0.0017096459865570067, - "step": 16770 - }, - { - "epoch": 2.8601875532821825, - "grad_norm": 0.058607637882232666, - "learning_rate": 1.4592589337193962e-05, - "loss": 0.0013915538787841798, - "step": 16775 - }, - { - "epoch": 2.8610400682011936, - "grad_norm": 0.06763444095849991, - "learning_rate": 1.4572308883085995e-05, - "loss": 0.0025088803842663763, - "step": 16780 - }, - { - "epoch": 2.8618925831202047, - "grad_norm": 0.08015233278274536, - "learning_rate": 1.4552039132386913e-05, - "loss": 0.001922524720430374, - "step": 16785 - }, - { - "epoch": 2.8627450980392157, - "grad_norm": 0.07501938939094543, - "learning_rate": 1.4531780094559332e-05, - "loss": 0.0023180417716503142, - "step": 16790 - }, - { - "epoch": 2.863597612958227, - "grad_norm": 0.1105467900633812, - "learning_rate": 1.4511531779060838e-05, - "loss": 0.0017500972375273705, - "step": 16795 - }, - { - "epoch": 2.864450127877238, - "grad_norm": 0.016127226874232292, - "learning_rate": 1.4491294195344016e-05, - "loss": 0.0029237957671284674, - "step": 16800 - }, - { - "epoch": 2.865302642796249, - "grad_norm": 0.06432373076677322, - "learning_rate": 1.447106735285644e-05, - "loss": 0.002439063973724842, - "step": 16805 - }, - { - "epoch": 2.86615515771526, - "grad_norm": 0.07629001885652542, - "learning_rate": 1.4450851261040664e-05, - "loss": 0.0021009005606174467, - "step": 16810 - }, - { - "epoch": 2.867007672634271, - "grad_norm": 0.05186440795660019, - "learning_rate": 1.4430645929334253e-05, - "loss": 0.0010275249369442463, - "step": 16815 - }, - { - "epoch": 2.867860187553282, - "grad_norm": 0.06517529487609863, - "learning_rate": 1.4410451367169705e-05, - "loss": 0.0022583767771720887, - "step": 16820 - }, - { - "epoch": 2.868712702472293, - "grad_norm": 0.03262385353446007, - "learning_rate": 1.4390267583974544e-05, - "loss": 0.002132249251008034, - "step": 16825 - }, - { - "epoch": 2.869565217391304, - "grad_norm": 0.04578368365764618, - "learning_rate": 1.4370094589171199e-05, - "loss": 0.0015474225394427777, - "step": 16830 - }, - { - "epoch": 2.8704177323103153, - "grad_norm": 0.11160826683044434, - "learning_rate": 1.4349932392177122e-05, - "loss": 0.001869696006178856, - "step": 16835 - }, - { - "epoch": 2.8712702472293268, - "grad_norm": 0.07949322462081909, - "learning_rate": 1.4329781002404687e-05, - "loss": 0.002716188505291939, - "step": 16840 - }, - { - "epoch": 2.8721227621483374, - "grad_norm": 0.12685050070285797, - "learning_rate": 1.430964042926123e-05, - "loss": 0.0026786208152770998, - "step": 16845 - }, - { - "epoch": 2.872975277067349, - "grad_norm": 0.03826960548758507, - "learning_rate": 1.428951068214904e-05, - "loss": 0.0015644762665033341, - "step": 16850 - }, - { - "epoch": 2.8738277919863595, - "grad_norm": 0.0909774899482727, - "learning_rate": 1.4269391770465346e-05, - "loss": 0.0020492007955908776, - "step": 16855 - }, - { - "epoch": 2.874680306905371, - "grad_norm": 0.09891391545534134, - "learning_rate": 1.4249283703602345e-05, - "loss": 0.0028120437636971474, - "step": 16860 - }, - { - "epoch": 2.875532821824382, - "grad_norm": 0.06281251460313797, - "learning_rate": 1.4229186490947126e-05, - "loss": 0.001888560503721237, - "step": 16865 - }, - { - "epoch": 2.876385336743393, - "grad_norm": 0.0330815464258194, - "learning_rate": 1.4209100141881763e-05, - "loss": 0.002112870290875435, - "step": 16870 - }, - { - "epoch": 2.877237851662404, - "grad_norm": 0.053650904446840286, - "learning_rate": 1.4189024665783207e-05, - "loss": 0.0012864695861935615, - "step": 16875 - }, - { - "epoch": 2.8780903665814153, - "grad_norm": 0.035941146314144135, - "learning_rate": 1.4168960072023384e-05, - "loss": 0.0028607305139303207, - "step": 16880 - }, - { - "epoch": 2.8789428815004263, - "grad_norm": 0.025085339322686195, - "learning_rate": 1.41489063699691e-05, - "loss": 0.001800362393260002, - "step": 16885 - }, - { - "epoch": 2.8797953964194374, - "grad_norm": 0.08627615869045258, - "learning_rate": 1.4128863568982088e-05, - "loss": 0.0023837506771087645, - "step": 16890 - }, - { - "epoch": 2.8806479113384484, - "grad_norm": 0.11542297154664993, - "learning_rate": 1.4108831678419e-05, - "loss": 0.003114992380142212, - "step": 16895 - }, - { - "epoch": 2.8815004262574595, - "grad_norm": 0.04762958735227585, - "learning_rate": 1.4088810707631375e-05, - "loss": 0.0020215384662151336, - "step": 16900 - }, - { - "epoch": 2.8823529411764706, - "grad_norm": 0.08232380449771881, - "learning_rate": 1.4068800665965687e-05, - "loss": 0.002120315283536911, - "step": 16905 - }, - { - "epoch": 2.8832054560954816, - "grad_norm": 0.04248562082648277, - "learning_rate": 1.4048801562763272e-05, - "loss": 0.001563185639679432, - "step": 16910 - }, - { - "epoch": 2.8840579710144927, - "grad_norm": 0.058416519314050674, - "learning_rate": 1.4028813407360393e-05, - "loss": 0.0017185319215059281, - "step": 16915 - }, - { - "epoch": 2.8849104859335037, - "grad_norm": 0.03542419150471687, - "learning_rate": 1.4008836209088185e-05, - "loss": 0.0017645543441176415, - "step": 16920 - }, - { - "epoch": 2.885763000852515, - "grad_norm": 0.055227622389793396, - "learning_rate": 1.3988869977272645e-05, - "loss": 0.002331301011145115, - "step": 16925 - }, - { - "epoch": 2.886615515771526, - "grad_norm": 0.02851465903222561, - "learning_rate": 1.3968914721234703e-05, - "loss": 0.00188722126185894, - "step": 16930 - }, - { - "epoch": 2.887468030690537, - "grad_norm": 0.10346336662769318, - "learning_rate": 1.3948970450290129e-05, - "loss": 0.003334081172943115, - "step": 16935 - }, - { - "epoch": 2.888320545609548, - "grad_norm": 0.040114935487508774, - "learning_rate": 1.3929037173749564e-05, - "loss": 0.002542957104742527, - "step": 16940 - }, - { - "epoch": 2.8891730605285595, - "grad_norm": 0.06734409183263779, - "learning_rate": 1.3909114900918517e-05, - "loss": 0.002022533863782883, - "step": 16945 - }, - { - "epoch": 2.89002557544757, - "grad_norm": 0.03672570362687111, - "learning_rate": 1.3889203641097392e-05, - "loss": 0.0017688646912574768, - "step": 16950 - }, - { - "epoch": 2.8908780903665816, - "grad_norm": 0.016099590808153152, - "learning_rate": 1.3869303403581397e-05, - "loss": 0.002179678343236446, - "step": 16955 - }, - { - "epoch": 2.8917306052855922, - "grad_norm": 0.0655573159456253, - "learning_rate": 1.384941419766066e-05, - "loss": 0.0020285720005631448, - "step": 16960 - }, - { - "epoch": 2.8925831202046037, - "grad_norm": 0.03573548421263695, - "learning_rate": 1.3829536032620105e-05, - "loss": 0.002248694933950901, - "step": 16965 - }, - { - "epoch": 2.893435635123615, - "grad_norm": 0.06182318180799484, - "learning_rate": 1.3809668917739507e-05, - "loss": 0.002159777097404003, - "step": 16970 - }, - { - "epoch": 2.894288150042626, - "grad_norm": 0.09492490440607071, - "learning_rate": 1.3789812862293527e-05, - "loss": 0.0027505803853273393, - "step": 16975 - }, - { - "epoch": 2.895140664961637, - "grad_norm": 0.043292637914419174, - "learning_rate": 1.3769967875551613e-05, - "loss": 0.0018307223916053772, - "step": 16980 - }, - { - "epoch": 2.895993179880648, - "grad_norm": 0.08455146849155426, - "learning_rate": 1.375013396677807e-05, - "loss": 0.0019843194633722304, - "step": 16985 - }, - { - "epoch": 2.896845694799659, - "grad_norm": 0.06926032900810242, - "learning_rate": 1.3730311145232023e-05, - "loss": 0.0024761717766523363, - "step": 16990 - }, - { - "epoch": 2.89769820971867, - "grad_norm": 0.0860179215669632, - "learning_rate": 1.3710499420167413e-05, - "loss": 0.002175389975309372, - "step": 16995 - }, - { - "epoch": 2.898550724637681, - "grad_norm": 0.10651890188455582, - "learning_rate": 1.3690698800833026e-05, - "loss": 0.0033860310912132265, - "step": 17000 - }, - { - "epoch": 2.899403239556692, - "grad_norm": 0.09691976010799408, - "learning_rate": 1.3670909296472464e-05, - "loss": 0.0021878845989704134, - "step": 17005 - }, - { - "epoch": 2.9002557544757033, - "grad_norm": 0.11704960465431213, - "learning_rate": 1.3651130916324107e-05, - "loss": 0.00286871287971735, - "step": 17010 - }, - { - "epoch": 2.9011082693947143, - "grad_norm": 0.09645909816026688, - "learning_rate": 1.3631363669621153e-05, - "loss": 0.001873398572206497, - "step": 17015 - }, - { - "epoch": 2.9019607843137254, - "grad_norm": 0.13174127042293549, - "learning_rate": 1.3611607565591639e-05, - "loss": 0.00285712368786335, - "step": 17020 - }, - { - "epoch": 2.9028132992327365, - "grad_norm": 0.07539260387420654, - "learning_rate": 1.359186261345835e-05, - "loss": 0.0027119526639580727, - "step": 17025 - }, - { - "epoch": 2.9036658141517475, - "grad_norm": 0.06165684387087822, - "learning_rate": 1.3572128822438892e-05, - "loss": 0.0018354985862970353, - "step": 17030 - }, - { - "epoch": 2.9045183290707586, - "grad_norm": 0.06021244078874588, - "learning_rate": 1.3552406201745654e-05, - "loss": 0.0016940701752901077, - "step": 17035 - }, - { - "epoch": 2.90537084398977, - "grad_norm": 0.09488464146852493, - "learning_rate": 1.3532694760585795e-05, - "loss": 0.0019129924476146698, - "step": 17040 - }, - { - "epoch": 2.9062233589087807, - "grad_norm": 0.04894041642546654, - "learning_rate": 1.3512994508161307e-05, - "loss": 0.002598444186151028, - "step": 17045 - }, - { - "epoch": 2.907075873827792, - "grad_norm": 0.045589860528707504, - "learning_rate": 1.349330545366889e-05, - "loss": 0.0018267668783664703, - "step": 17050 - }, - { - "epoch": 2.907928388746803, - "grad_norm": 0.04273771867156029, - "learning_rate": 1.3473627606300071e-05, - "loss": 0.0013479530811309815, - "step": 17055 - }, - { - "epoch": 2.9087809036658143, - "grad_norm": 0.050675440579652786, - "learning_rate": 1.345396097524111e-05, - "loss": 0.001664750650525093, - "step": 17060 - }, - { - "epoch": 2.9096334185848254, - "grad_norm": 0.07637523114681244, - "learning_rate": 1.3434305569673059e-05, - "loss": 0.001719363033771515, - "step": 17065 - }, - { - "epoch": 2.9104859335038364, - "grad_norm": 0.03540422394871712, - "learning_rate": 1.3414661398771711e-05, - "loss": 0.002338713780045509, - "step": 17070 - }, - { - "epoch": 2.9113384484228475, - "grad_norm": 0.09252000600099564, - "learning_rate": 1.3395028471707613e-05, - "loss": 0.0018722079694271088, - "step": 17075 - }, - { - "epoch": 2.9121909633418586, - "grad_norm": 0.08759574592113495, - "learning_rate": 1.3375406797646068e-05, - "loss": 0.003211042284965515, - "step": 17080 - }, - { - "epoch": 2.9130434782608696, - "grad_norm": 0.07291707396507263, - "learning_rate": 1.3355796385747121e-05, - "loss": 0.002141663059592247, - "step": 17085 - }, - { - "epoch": 2.9138959931798807, - "grad_norm": 0.03608965128660202, - "learning_rate": 1.3336197245165578e-05, - "loss": 0.0015133512206375599, - "step": 17090 - }, - { - "epoch": 2.9147485080988917, - "grad_norm": 0.0686686635017395, - "learning_rate": 1.3316609385050954e-05, - "loss": 0.0015084316954016685, - "step": 17095 - }, - { - "epoch": 2.915601023017903, - "grad_norm": 0.052468664944171906, - "learning_rate": 1.3297032814547539e-05, - "loss": 0.00120701240375638, - "step": 17100 - }, - { - "epoch": 2.916453537936914, - "grad_norm": 0.06129363924264908, - "learning_rate": 1.3277467542794304e-05, - "loss": 0.002575872652232647, - "step": 17105 - }, - { - "epoch": 2.917306052855925, - "grad_norm": 0.06045043095946312, - "learning_rate": 1.3257913578924969e-05, - "loss": 0.0022510627284646036, - "step": 17110 - }, - { - "epoch": 2.918158567774936, - "grad_norm": 0.09090365469455719, - "learning_rate": 1.3238370932067996e-05, - "loss": 0.002203880250453949, - "step": 17115 - }, - { - "epoch": 2.919011082693947, - "grad_norm": 0.03382663428783417, - "learning_rate": 1.3218839611346522e-05, - "loss": 0.0009420939721167087, - "step": 17120 - }, - { - "epoch": 2.919863597612958, - "grad_norm": 0.06900735199451447, - "learning_rate": 1.3199319625878431e-05, - "loss": 0.0021647622808814047, - "step": 17125 - }, - { - "epoch": 2.920716112531969, - "grad_norm": 0.04494655504822731, - "learning_rate": 1.3179810984776277e-05, - "loss": 0.0027208495885133743, - "step": 17130 - }, - { - "epoch": 2.9215686274509802, - "grad_norm": 0.05262625217437744, - "learning_rate": 1.3160313697147373e-05, - "loss": 0.0015311154536902904, - "step": 17135 - }, - { - "epoch": 2.9224211423699913, - "grad_norm": 0.025083297863602638, - "learning_rate": 1.314082777209368e-05, - "loss": 0.00193443913012743, - "step": 17140 - }, - { - "epoch": 2.923273657289003, - "grad_norm": 0.08246373385190964, - "learning_rate": 1.3121353218711892e-05, - "loss": 0.0019143052399158479, - "step": 17145 - }, - { - "epoch": 2.9241261722080134, - "grad_norm": 0.1049862802028656, - "learning_rate": 1.3101890046093376e-05, - "loss": 0.002230258658528328, - "step": 17150 - }, - { - "epoch": 2.924978687127025, - "grad_norm": 0.042054325342178345, - "learning_rate": 1.3082438263324169e-05, - "loss": 0.0011081861332058907, - "step": 17155 - }, - { - "epoch": 2.9258312020460355, - "grad_norm": 0.0713399276137352, - "learning_rate": 1.3062997879485033e-05, - "loss": 0.0015817128121852874, - "step": 17160 - }, - { - "epoch": 2.926683716965047, - "grad_norm": 0.07212921977043152, - "learning_rate": 1.3043568903651381e-05, - "loss": 0.002985073998570442, - "step": 17165 - }, - { - "epoch": 2.927536231884058, - "grad_norm": 0.14285585284233093, - "learning_rate": 1.3024151344893299e-05, - "loss": 0.0019961275160312653, - "step": 17170 - }, - { - "epoch": 2.928388746803069, - "grad_norm": 0.06164155155420303, - "learning_rate": 1.3004745212275543e-05, - "loss": 0.0017055023461580276, - "step": 17175 - }, - { - "epoch": 2.92924126172208, - "grad_norm": 0.02376371994614601, - "learning_rate": 1.298535051485756e-05, - "loss": 0.0013552471064031124, - "step": 17180 - }, - { - "epoch": 2.9300937766410913, - "grad_norm": 0.07454569637775421, - "learning_rate": 1.296596726169342e-05, - "loss": 0.002513031102716923, - "step": 17185 - }, - { - "epoch": 2.9309462915601023, - "grad_norm": 0.0765121579170227, - "learning_rate": 1.2946595461831892e-05, - "loss": 0.0019039563834667207, - "step": 17190 - }, - { - "epoch": 2.9317988064791134, - "grad_norm": 0.07360806316137314, - "learning_rate": 1.2927235124316362e-05, - "loss": 0.001339799538254738, - "step": 17195 - }, - { - "epoch": 2.9326513213981245, - "grad_norm": 0.18903285264968872, - "learning_rate": 1.2907886258184876e-05, - "loss": 0.003720489144325256, - "step": 17200 - }, - { - "epoch": 2.9335038363171355, - "grad_norm": 0.07760016620159149, - "learning_rate": 1.2888548872470143e-05, - "loss": 0.0015237806364893913, - "step": 17205 - }, - { - "epoch": 2.9343563512361466, - "grad_norm": 0.055864643305540085, - "learning_rate": 1.286922297619949e-05, - "loss": 0.0014091457240283489, - "step": 17210 - }, - { - "epoch": 2.9352088661551576, - "grad_norm": 0.08161517977714539, - "learning_rate": 1.2849908578394888e-05, - "loss": 0.002047298289835453, - "step": 17215 - }, - { - "epoch": 2.9360613810741687, - "grad_norm": 0.11219590902328491, - "learning_rate": 1.283060568807294e-05, - "loss": 0.0023268122225999833, - "step": 17220 - }, - { - "epoch": 2.9369138959931798, - "grad_norm": 0.10008323192596436, - "learning_rate": 1.2811314314244867e-05, - "loss": 0.002319963276386261, - "step": 17225 - }, - { - "epoch": 2.937766410912191, - "grad_norm": 0.077080138027668, - "learning_rate": 1.2792034465916536e-05, - "loss": 0.0020459359511733055, - "step": 17230 - }, - { - "epoch": 2.938618925831202, - "grad_norm": 0.09049349278211594, - "learning_rate": 1.2772766152088431e-05, - "loss": 0.0038630947470664977, - "step": 17235 - }, - { - "epoch": 2.9394714407502134, - "grad_norm": 0.09306768327951431, - "learning_rate": 1.275350938175563e-05, - "loss": 0.0017305316403508186, - "step": 17240 - }, - { - "epoch": 2.940323955669224, - "grad_norm": 0.061699800193309784, - "learning_rate": 1.2734264163907824e-05, - "loss": 0.00341113954782486, - "step": 17245 - }, - { - "epoch": 2.9411764705882355, - "grad_norm": 0.11029893159866333, - "learning_rate": 1.2715030507529347e-05, - "loss": 0.0023353056982159614, - "step": 17250 - }, - { - "epoch": 2.942028985507246, - "grad_norm": 0.06272252649068832, - "learning_rate": 1.2695808421599087e-05, - "loss": 0.0012727061286568642, - "step": 17255 - }, - { - "epoch": 2.9428815004262576, - "grad_norm": 0.02106044627726078, - "learning_rate": 1.2676597915090567e-05, - "loss": 0.0020675512030720712, - "step": 17260 - }, - { - "epoch": 2.9437340153452687, - "grad_norm": 0.08245997875928879, - "learning_rate": 1.2657398996971883e-05, - "loss": 0.002128716930747032, - "step": 17265 - }, - { - "epoch": 2.9445865302642797, - "grad_norm": 0.10804266482591629, - "learning_rate": 1.2638211676205718e-05, - "loss": 0.0012407343834638595, - "step": 17270 - }, - { - "epoch": 2.945439045183291, - "grad_norm": 0.0485721081495285, - "learning_rate": 1.2619035961749375e-05, - "loss": 0.0019056517630815506, - "step": 17275 - }, - { - "epoch": 2.946291560102302, - "grad_norm": 0.04094598814845085, - "learning_rate": 1.2599871862554694e-05, - "loss": 0.0014778503216803073, - "step": 17280 - }, - { - "epoch": 2.947144075021313, - "grad_norm": 0.08831547200679779, - "learning_rate": 1.2580719387568133e-05, - "loss": 0.002304557338356972, - "step": 17285 - }, - { - "epoch": 2.947996589940324, - "grad_norm": 0.02547610178589821, - "learning_rate": 1.2561578545730685e-05, - "loss": 0.0010631450451910496, - "step": 17290 - }, - { - "epoch": 2.948849104859335, - "grad_norm": 0.09562932699918747, - "learning_rate": 1.2542449345977952e-05, - "loss": 0.0021377883851528166, - "step": 17295 - }, - { - "epoch": 2.949701619778346, - "grad_norm": 0.02090577222406864, - "learning_rate": 1.2523331797240072e-05, - "loss": 0.001333952508866787, - "step": 17300 - }, - { - "epoch": 2.950554134697357, - "grad_norm": 0.12461904436349869, - "learning_rate": 1.2504225908441751e-05, - "loss": 0.0025647601112723352, - "step": 17305 - }, - { - "epoch": 2.9514066496163682, - "grad_norm": 0.047791410237550735, - "learning_rate": 1.2485131688502254e-05, - "loss": 0.0014650242403149605, - "step": 17310 - }, - { - "epoch": 2.9522591645353793, - "grad_norm": 0.055085547268390656, - "learning_rate": 1.2466049146335387e-05, - "loss": 0.002520528435707092, - "step": 17315 - }, - { - "epoch": 2.9531116794543903, - "grad_norm": 0.09370748698711395, - "learning_rate": 1.2446978290849538e-05, - "loss": 0.002327192947268486, - "step": 17320 - }, - { - "epoch": 2.9539641943734014, - "grad_norm": 0.06663045287132263, - "learning_rate": 1.242791913094759e-05, - "loss": 0.0025285203009843826, - "step": 17325 - }, - { - "epoch": 2.9548167092924125, - "grad_norm": 0.06620613485574722, - "learning_rate": 1.2408871675527022e-05, - "loss": 0.001520772185176611, - "step": 17330 - }, - { - "epoch": 2.955669224211424, - "grad_norm": 0.08397935330867767, - "learning_rate": 1.2389835933479805e-05, - "loss": 0.001917354017496109, - "step": 17335 - }, - { - "epoch": 2.9565217391304346, - "grad_norm": 0.037347212433815, - "learning_rate": 1.2370811913692447e-05, - "loss": 0.001991302520036697, - "step": 17340 - }, - { - "epoch": 2.957374254049446, - "grad_norm": 0.09309769421815872, - "learning_rate": 1.2351799625046013e-05, - "loss": 0.0028038494288921355, - "step": 17345 - }, - { - "epoch": 2.9582267689684567, - "grad_norm": 0.03684366121888161, - "learning_rate": 1.2332799076416064e-05, - "loss": 0.0017773956060409546, - "step": 17350 - }, - { - "epoch": 2.959079283887468, - "grad_norm": 0.05473257228732109, - "learning_rate": 1.2313810276672687e-05, - "loss": 0.0012853020802140237, - "step": 17355 - }, - { - "epoch": 2.9599317988064793, - "grad_norm": 0.042117465287446976, - "learning_rate": 1.2294833234680473e-05, - "loss": 0.001919369027018547, - "step": 17360 - }, - { - "epoch": 2.9607843137254903, - "grad_norm": 0.05097515508532524, - "learning_rate": 1.2275867959298559e-05, - "loss": 0.001891462691128254, - "step": 17365 - }, - { - "epoch": 2.9616368286445014, - "grad_norm": 0.09409259259700775, - "learning_rate": 1.2256914459380544e-05, - "loss": 0.0014902386814355851, - "step": 17370 - }, - { - "epoch": 2.9624893435635125, - "grad_norm": 0.09465356171131134, - "learning_rate": 1.2237972743774576e-05, - "loss": 0.002463678829371929, - "step": 17375 - }, - { - "epoch": 2.9633418584825235, - "grad_norm": 0.02534087561070919, - "learning_rate": 1.221904282132327e-05, - "loss": 0.0023292653262615205, - "step": 17380 - }, - { - "epoch": 2.9641943734015346, - "grad_norm": 0.1058032363653183, - "learning_rate": 1.2200124700863723e-05, - "loss": 0.002900855429470539, - "step": 17385 - }, - { - "epoch": 2.9650468883205456, - "grad_norm": 0.07726191729307175, - "learning_rate": 1.218121839122757e-05, - "loss": 0.0014870663173496724, - "step": 17390 - }, - { - "epoch": 2.9658994032395567, - "grad_norm": 0.0792614072561264, - "learning_rate": 1.21623239012409e-05, - "loss": 0.001744781993329525, - "step": 17395 - }, - { - "epoch": 2.9667519181585678, - "grad_norm": 0.07266564667224884, - "learning_rate": 1.214344123972428e-05, - "loss": 0.002622047811746597, - "step": 17400 - }, - { - "epoch": 2.967604433077579, - "grad_norm": 0.06203412637114525, - "learning_rate": 1.2124570415492758e-05, - "loss": 0.002504969388246536, - "step": 17405 - }, - { - "epoch": 2.96845694799659, - "grad_norm": 0.07259709388017654, - "learning_rate": 1.2105711437355884e-05, - "loss": 0.0018782744184136391, - "step": 17410 - }, - { - "epoch": 2.969309462915601, - "grad_norm": 0.05496470257639885, - "learning_rate": 1.2086864314117633e-05, - "loss": 0.0018179532140493392, - "step": 17415 - }, - { - "epoch": 2.970161977834612, - "grad_norm": 0.0235351100564003, - "learning_rate": 1.2068029054576496e-05, - "loss": 0.0015613840892910956, - "step": 17420 - }, - { - "epoch": 2.971014492753623, - "grad_norm": 0.046441882848739624, - "learning_rate": 1.2049205667525383e-05, - "loss": 0.0014228712767362594, - "step": 17425 - }, - { - "epoch": 2.971867007672634, - "grad_norm": 0.06290153414011002, - "learning_rate": 1.2030394161751664e-05, - "loss": 0.0011624433100223541, - "step": 17430 - }, - { - "epoch": 2.972719522591645, - "grad_norm": 0.0662989467382431, - "learning_rate": 1.2011594546037205e-05, - "loss": 0.002170179411768913, - "step": 17435 - }, - { - "epoch": 2.9735720375106567, - "grad_norm": 0.06470426172018051, - "learning_rate": 1.1992806829158275e-05, - "loss": 0.0010997526347637176, - "step": 17440 - }, - { - "epoch": 2.9744245524296673, - "grad_norm": 0.039091553539037704, - "learning_rate": 1.1974031019885612e-05, - "loss": 0.0014238604344427586, - "step": 17445 - }, - { - "epoch": 2.975277067348679, - "grad_norm": 0.03796529024839401, - "learning_rate": 1.1955267126984376e-05, - "loss": 0.002270728349685669, - "step": 17450 - }, - { - "epoch": 2.9761295822676894, - "grad_norm": 0.09608127176761627, - "learning_rate": 1.1936515159214177e-05, - "loss": 0.0030095497146248817, - "step": 17455 - }, - { - "epoch": 2.976982097186701, - "grad_norm": 0.09011568874120712, - "learning_rate": 1.1917775125329063e-05, - "loss": 0.0031840000301599503, - "step": 17460 - }, - { - "epoch": 2.977834612105712, - "grad_norm": 0.057273294776678085, - "learning_rate": 1.1899047034077522e-05, - "loss": 0.0011888986453413963, - "step": 17465 - }, - { - "epoch": 2.978687127024723, - "grad_norm": 0.14515799283981323, - "learning_rate": 1.1880330894202432e-05, - "loss": 0.001710682176053524, - "step": 17470 - }, - { - "epoch": 2.979539641943734, - "grad_norm": 0.09522838145494461, - "learning_rate": 1.1861626714441096e-05, - "loss": 0.002519896999001503, - "step": 17475 - }, - { - "epoch": 2.980392156862745, - "grad_norm": 0.08164853602647781, - "learning_rate": 1.1842934503525282e-05, - "loss": 0.002578527852892876, - "step": 17480 - }, - { - "epoch": 2.9812446717817562, - "grad_norm": 0.08428774774074554, - "learning_rate": 1.1824254270181112e-05, - "loss": 0.0012953916564583778, - "step": 17485 - }, - { - "epoch": 2.9820971867007673, - "grad_norm": 0.07469037175178528, - "learning_rate": 1.180558602312915e-05, - "loss": 0.0037867244333028792, - "step": 17490 - }, - { - "epoch": 2.9829497016197783, - "grad_norm": 0.08371725678443909, - "learning_rate": 1.1786929771084346e-05, - "loss": 0.002791520766913891, - "step": 17495 - }, - { - "epoch": 2.9838022165387894, - "grad_norm": 0.014852025546133518, - "learning_rate": 1.1768285522756056e-05, - "loss": 0.0014176778495311737, - "step": 17500 - }, - { - "epoch": 2.9846547314578005, - "grad_norm": 0.04576858505606651, - "learning_rate": 1.174965328684804e-05, - "loss": 0.002578184753656387, - "step": 17505 - }, - { - "epoch": 2.9855072463768115, - "grad_norm": 0.05726059526205063, - "learning_rate": 1.1731033072058464e-05, - "loss": 0.0016687212511897088, - "step": 17510 - }, - { - "epoch": 2.9863597612958226, - "grad_norm": 0.0770409107208252, - "learning_rate": 1.171242488707984e-05, - "loss": 0.0013428821228444576, - "step": 17515 - }, - { - "epoch": 2.9872122762148337, - "grad_norm": 0.10322020947933197, - "learning_rate": 1.1693828740599093e-05, - "loss": 0.0019340002909302711, - "step": 17520 - }, - { - "epoch": 2.9880647911338447, - "grad_norm": 0.08900497853755951, - "learning_rate": 1.1675244641297531e-05, - "loss": 0.002262430638074875, - "step": 17525 - }, - { - "epoch": 2.9889173060528558, - "grad_norm": 0.06439421325922012, - "learning_rate": 1.1656672597850828e-05, - "loss": 0.003663495182991028, - "step": 17530 - }, - { - "epoch": 2.9897698209718673, - "grad_norm": 0.032524604350328445, - "learning_rate": 1.1638112618929023e-05, - "loss": 0.00146266371011734, - "step": 17535 - }, - { - "epoch": 2.990622335890878, - "grad_norm": 0.09089723974466324, - "learning_rate": 1.1619564713196542e-05, - "loss": 0.002597668394446373, - "step": 17540 - }, - { - "epoch": 2.9914748508098894, - "grad_norm": 0.11931595206260681, - "learning_rate": 1.1601028889312144e-05, - "loss": 0.0025284418836236, - "step": 17545 - }, - { - "epoch": 2.9923273657289, - "grad_norm": 0.05474149063229561, - "learning_rate": 1.1582505155928994e-05, - "loss": 0.002077813073992729, - "step": 17550 - }, - { - "epoch": 2.9931798806479115, - "grad_norm": 0.060414139181375504, - "learning_rate": 1.1563993521694564e-05, - "loss": 0.0014027852565050125, - "step": 17555 - }, - { - "epoch": 2.9940323955669226, - "grad_norm": 0.03036579303443432, - "learning_rate": 1.1545493995250727e-05, - "loss": 0.0008949190378189087, - "step": 17560 - }, - { - "epoch": 2.9948849104859336, - "grad_norm": 0.030154328793287277, - "learning_rate": 1.1527006585233662e-05, - "loss": 0.002073490060865879, - "step": 17565 - }, - { - "epoch": 2.9957374254049447, - "grad_norm": 0.04413657262921333, - "learning_rate": 1.1508531300273893e-05, - "loss": 0.0018356587737798692, - "step": 17570 - }, - { - "epoch": 2.9965899403239558, - "grad_norm": 0.022916359826922417, - "learning_rate": 1.1490068148996329e-05, - "loss": 0.0018058544024825095, - "step": 17575 - }, - { - "epoch": 2.997442455242967, - "grad_norm": 0.059595149010419846, - "learning_rate": 1.1471617140020162e-05, - "loss": 0.0019177049398422241, - "step": 17580 - }, - { - "epoch": 2.998294970161978, - "grad_norm": 0.038439393043518066, - "learning_rate": 1.1453178281958944e-05, - "loss": 0.002159320004284382, - "step": 17585 - }, - { - "epoch": 2.999147485080989, - "grad_norm": 0.021210921928286552, - "learning_rate": 1.1434751583420536e-05, - "loss": 0.0014576959423720838, - "step": 17590 - }, - { - "epoch": 2.9994884910485933, - "eval_loss": 0.04721549153327942, - "eval_runtime": 3.7007, - "eval_samples_per_second": 68.095, - "eval_steps_per_second": 1.081, - "step": 17592 - }, - { - "eval_cer_subset": 0.014346898562053186, - "eval_cer_subset_edit_distance": 881, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 17592 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 5.930443211653472e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-17592/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/adapter_model.safetensors deleted file mode 100644 index 19e650309b2d236f0160931a75a69ac52c3d75ff..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be7d6022ef168903e30a9dff4cfaf5faff2f4835a52ea03b71b558cfaff397be -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/optimizer.pt deleted file mode 100644 index 095dfbf057d9095b4682865ca8042b3b7327bd93..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:435930853716076c9104a36e22ec9a7661704fb2e726546fb6e48cc618125215 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/rng_state.pth deleted file mode 100644 index 625df13af9c361a10b1f62dae2674ca7cca689bb..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:301716f43dd7c92a67e80d7d8700f1333280e9a03747298651c9c41715ace6de -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/scheduler.pt deleted file mode 100644 index 2201e42361382ea61de6152825be9ecf652be644..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65c4ed07ead38044e922d2ece4dcae16145544e57bfa5b43d1942110585da0d3 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/trainer_state.json deleted file mode 100644 index 11e72d5a8aa4cb226cefffafe5521555502c2c1f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/trainer_state.json +++ /dev/null @@ -1,2676 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.2482946793997272, - "eval_steps": 366, - "global_step": 1830, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 1.9132920319205376e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-1830/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/adapter_model.safetensors deleted file mode 100644 index bc7275c273578f48654d49d8f8d63a3c51f66cdd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2330f7dce17c9c339af629bd48174635e6eb3f4b6fd7cad587f362c0c7bcb52a -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/optimizer.pt deleted file mode 100644 index 328838652f73729da27474f88b7ef7afd1b0392a..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa03c6fa79547ad35d6c78377b0e3b3e8691ba1a6b6f42b3acc4f71fada708c7 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/rng_state.pth deleted file mode 100644 index a1212d03b765eaaf9ef7ed52fadb49a6fa1c7008..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21c07160197b34717d595cac163969653154415c96eda4bdeb7cf2d945992899 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/scheduler.pt deleted file mode 100644 index ea8e443c68d6e9b204be406bbe689a68ca779d39..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9af8a82defeaada0adaac504d5a308b0760b6e9179c4b01165a78bea4e1b8ff8 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/trainer_state.json deleted file mode 100644 index ecc69c4b63ff0dea0d3123d5600a923a6d4e0358..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/trainer_state.json +++ /dev/null @@ -1,28986 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.4994032395566923, - "eval_steps": 1466, - "global_step": 20524, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - }, - { - "epoch": 0.7502131287297528, - "grad_norm": 0.061287231743335724, - "learning_rate": 6.972370044908141e-05, - "loss": 0.011720015108585358, - "step": 4400 - }, - { - "epoch": 0.7510656436487638, - "grad_norm": 0.0668778270483017, - "learning_rate": 6.971058796426478e-05, - "loss": 0.012064819037914277, - "step": 4405 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 0.07897942513227463, - "learning_rate": 6.969746044247421e-05, - "loss": 0.010592888295650481, - "step": 4410 - }, - { - "epoch": 0.7527706734867861, - "grad_norm": 0.09090534597635269, - "learning_rate": 6.968431788983806e-05, - "loss": 0.011600010097026825, - "step": 4415 - }, - { - "epoch": 0.7536231884057971, - "grad_norm": 0.080506332218647, - "learning_rate": 6.967116031249172e-05, - "loss": 0.013006125390529633, - "step": 4420 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 0.04851040989160538, - "learning_rate": 6.965798771657758e-05, - "loss": 0.010165790468454361, - "step": 4425 - }, - { - "epoch": 0.7553282182438192, - "grad_norm": 0.06298603117465973, - "learning_rate": 6.964480010824505e-05, - "loss": 0.007800602912902832, - "step": 4430 - }, - { - "epoch": 0.7561807331628303, - "grad_norm": 0.09919694811105728, - "learning_rate": 6.963159749365053e-05, - "loss": 0.010877586901187897, - "step": 4435 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 0.083896704018116, - "learning_rate": 6.961837987895747e-05, - "loss": 0.011114003509283066, - "step": 4440 - }, - { - "epoch": 0.7578857630008525, - "grad_norm": 0.0452699176967144, - "learning_rate": 6.960514727033626e-05, - "loss": 0.008609171956777573, - "step": 4445 - }, - { - "epoch": 0.7587382779198636, - "grad_norm": 0.08951374143362045, - "learning_rate": 6.959189967396435e-05, - "loss": 0.01193360835313797, - "step": 4450 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 0.08749551326036453, - "learning_rate": 6.957863709602611e-05, - "loss": 0.009163837879896164, - "step": 4455 - }, - { - "epoch": 0.7604433077578857, - "grad_norm": 0.09304409474134445, - "learning_rate": 6.956535954271301e-05, - "loss": 0.01038273349404335, - "step": 4460 - }, - { - "epoch": 0.7612958226768969, - "grad_norm": 0.06662629544734955, - "learning_rate": 6.955206702022342e-05, - "loss": 0.010570932179689407, - "step": 4465 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 0.07736595720052719, - "learning_rate": 6.953875953476276e-05, - "loss": 0.009856238961219788, - "step": 4470 - }, - { - "epoch": 0.763000852514919, - "grad_norm": 0.04692552238702774, - "learning_rate": 6.952543709254338e-05, - "loss": 0.006967573612928391, - "step": 4475 - }, - { - "epoch": 0.7638533674339301, - "grad_norm": 0.06901179254055023, - "learning_rate": 6.951209969978464e-05, - "loss": 0.008588603138923645, - "step": 4480 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 0.07733304053544998, - "learning_rate": 6.949874736271289e-05, - "loss": 0.012579981982707978, - "step": 4485 - }, - { - "epoch": 0.7655583972719523, - "grad_norm": 0.0693301409482956, - "learning_rate": 6.948538008756144e-05, - "loss": 0.009747470915317535, - "step": 4490 - }, - { - "epoch": 0.7664109121909634, - "grad_norm": 0.07054253667593002, - "learning_rate": 6.947199788057059e-05, - "loss": 0.008387601375579834, - "step": 4495 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 0.06526053696870804, - "learning_rate": 6.945860074798757e-05, - "loss": 0.008389735966920853, - "step": 4500 - }, - { - "epoch": 0.7681159420289855, - "grad_norm": 0.06862380355596542, - "learning_rate": 6.944518869606662e-05, - "loss": 0.008736115694046021, - "step": 4505 - }, - { - "epoch": 0.7689684569479965, - "grad_norm": 0.06233246996998787, - "learning_rate": 6.943176173106897e-05, - "loss": 0.008519527316093446, - "step": 4510 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 0.07696249336004257, - "learning_rate": 6.941831985926273e-05, - "loss": 0.011381441354751587, - "step": 4515 - }, - { - "epoch": 0.7706734867860188, - "grad_norm": 0.11450641602277756, - "learning_rate": 6.940486308692302e-05, - "loss": 0.012895810604095458, - "step": 4520 - }, - { - "epoch": 0.7715260017050298, - "grad_norm": 0.09141158312559128, - "learning_rate": 6.939139142033191e-05, - "loss": 0.009508632868528367, - "step": 4525 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 0.09469986706972122, - "learning_rate": 6.937790486577844e-05, - "loss": 0.014812557399272919, - "step": 4530 - }, - { - "epoch": 0.773231031543052, - "grad_norm": 0.08494299650192261, - "learning_rate": 6.936440342955855e-05, - "loss": 0.01355334222316742, - "step": 4535 - }, - { - "epoch": 0.7740835464620631, - "grad_norm": 0.09328251332044601, - "learning_rate": 6.93508871179752e-05, - "loss": 0.011529977619647979, - "step": 4540 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 0.06696850806474686, - "learning_rate": 6.933735593733821e-05, - "loss": 0.010230815410614014, - "step": 4545 - }, - { - "epoch": 0.7757885763000852, - "grad_norm": 0.07968153059482574, - "learning_rate": 6.932380989396442e-05, - "loss": 0.012129776924848557, - "step": 4550 - }, - { - "epoch": 0.7766410912190963, - "grad_norm": 0.07916650176048279, - "learning_rate": 6.931024899417756e-05, - "loss": 0.009455519914627075, - "step": 4555 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 0.05229945108294487, - "learning_rate": 6.92966732443083e-05, - "loss": 0.008516684174537659, - "step": 4560 - }, - { - "epoch": 0.7783461210571185, - "grad_norm": 0.08314234018325806, - "learning_rate": 6.928308265069428e-05, - "loss": 0.010914114117622376, - "step": 4565 - }, - { - "epoch": 0.7791986359761296, - "grad_norm": 0.05075672268867493, - "learning_rate": 6.926947721968001e-05, - "loss": 0.008188208192586898, - "step": 4570 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 0.09465362876653671, - "learning_rate": 6.925585695761697e-05, - "loss": 0.009074468165636063, - "step": 4575 - }, - { - "epoch": 0.7809036658141517, - "grad_norm": 0.09024044126272202, - "learning_rate": 6.924222187086356e-05, - "loss": 0.006571034342050553, - "step": 4580 - }, - { - "epoch": 0.7817561807331628, - "grad_norm": 0.050575681030750275, - "learning_rate": 6.922857196578507e-05, - "loss": 0.008829852938652039, - "step": 4585 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.09888230264186859, - "learning_rate": 6.921490724875376e-05, - "loss": 0.01053793728351593, - "step": 4590 - }, - { - "epoch": 0.783461210571185, - "grad_norm": 0.042567264288663864, - "learning_rate": 6.920122772614875e-05, - "loss": 0.009682109951972962, - "step": 4595 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 0.10021623969078064, - "learning_rate": 6.91875334043561e-05, - "loss": 0.012160807102918624, - "step": 4600 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 0.07016255706548691, - "learning_rate": 6.917382428976878e-05, - "loss": 0.008590599894523621, - "step": 4605 - }, - { - "epoch": 0.7860187553282183, - "grad_norm": 0.06567320227622986, - "learning_rate": 6.916010038878667e-05, - "loss": 0.00809207409620285, - "step": 4610 - }, - { - "epoch": 0.7868712702472294, - "grad_norm": 0.05032164603471756, - "learning_rate": 6.914636170781652e-05, - "loss": 0.009291460365056991, - "step": 4615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 0.06303273886442184, - "learning_rate": 6.913260825327204e-05, - "loss": 0.00837131291627884, - "step": 4620 - }, - { - "epoch": 0.7885763000852515, - "grad_norm": 0.05837355926632881, - "learning_rate": 6.911884003157376e-05, - "loss": 0.007800968736410141, - "step": 4625 - }, - { - "epoch": 0.7894288150042625, - "grad_norm": 0.07052712142467499, - "learning_rate": 6.910505704914916e-05, - "loss": 0.010577390342950821, - "step": 4630 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 0.08550997078418732, - "learning_rate": 6.909125931243259e-05, - "loss": 0.012821859121322632, - "step": 4635 - }, - { - "epoch": 0.7911338448422848, - "grad_norm": 0.060406558215618134, - "learning_rate": 6.90774468278653e-05, - "loss": 0.0065113060176372525, - "step": 4640 - }, - { - "epoch": 0.7919863597612958, - "grad_norm": 0.13999445736408234, - "learning_rate": 6.906361960189542e-05, - "loss": 0.012699820101261139, - "step": 4645 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 0.07585978507995605, - "learning_rate": 6.904977764097797e-05, - "loss": 0.008435635268688202, - "step": 4650 - }, - { - "epoch": 0.793691389599318, - "grad_norm": 0.07488108426332474, - "learning_rate": 6.90359209515748e-05, - "loss": 0.011925875395536422, - "step": 4655 - }, - { - "epoch": 0.7945439045183291, - "grad_norm": 0.12476535886526108, - "learning_rate": 6.902204954015471e-05, - "loss": 0.009086847305297852, - "step": 4660 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 0.08779732137918472, - "learning_rate": 6.900816341319331e-05, - "loss": 0.00962812826037407, - "step": 4665 - }, - { - "epoch": 0.7962489343563512, - "grad_norm": 0.15791405737400055, - "learning_rate": 6.899426257717312e-05, - "loss": 0.011767397075891495, - "step": 4670 - }, - { - "epoch": 0.7971014492753623, - "grad_norm": 0.11228909343481064, - "learning_rate": 6.898034703858352e-05, - "loss": 0.008271434903144836, - "step": 4675 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 0.07288003712892532, - "learning_rate": 6.896641680392073e-05, - "loss": 0.009384474158287049, - "step": 4680 - }, - { - "epoch": 0.7988064791133845, - "grad_norm": 0.08285173773765564, - "learning_rate": 6.895247187968784e-05, - "loss": 0.012600034475326538, - "step": 4685 - }, - { - "epoch": 0.7996589940323956, - "grad_norm": 0.07812397927045822, - "learning_rate": 6.893851227239484e-05, - "loss": 0.008935874700546265, - "step": 4690 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 0.07500546425580978, - "learning_rate": 6.892453798855852e-05, - "loss": 0.010619471222162247, - "step": 4695 - }, - { - "epoch": 0.8013640238704177, - "grad_norm": 0.05211177095770836, - "learning_rate": 6.891054903470251e-05, - "loss": 0.008601508289575576, - "step": 4700 - }, - { - "epoch": 0.8022165387894288, - "grad_norm": 0.029616642743349075, - "learning_rate": 6.889654541735738e-05, - "loss": 0.007921247184276581, - "step": 4705 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 0.06894131749868393, - "learning_rate": 6.888252714306044e-05, - "loss": 0.010935742408037186, - "step": 4710 - }, - { - "epoch": 0.803921568627451, - "grad_norm": 0.0766182467341423, - "learning_rate": 6.886849421835587e-05, - "loss": 0.010556706041097642, - "step": 4715 - }, - { - "epoch": 0.8047740835464621, - "grad_norm": 0.09164462238550186, - "learning_rate": 6.885444664979477e-05, - "loss": 0.010812586545944214, - "step": 4720 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 0.06463408470153809, - "learning_rate": 6.884038444393496e-05, - "loss": 0.009179002791643142, - "step": 4725 - }, - { - "epoch": 0.8064791133844842, - "grad_norm": 0.06639672070741653, - "learning_rate": 6.882630760734118e-05, - "loss": 0.012755092978477479, - "step": 4730 - }, - { - "epoch": 0.8073316283034954, - "grad_norm": 0.062024496495723724, - "learning_rate": 6.881221614658493e-05, - "loss": 0.009655499458312988, - "step": 4735 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 0.06751494854688644, - "learning_rate": 6.879811006824459e-05, - "loss": 0.010860173404216767, - "step": 4740 - }, - { - "epoch": 0.8090366581415175, - "grad_norm": 0.05535218119621277, - "learning_rate": 6.878398937890535e-05, - "loss": 0.011440058052539826, - "step": 4745 - }, - { - "epoch": 0.8098891730605285, - "grad_norm": 0.08365204185247421, - "learning_rate": 6.876985408515922e-05, - "loss": 0.011058357357978821, - "step": 4750 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 0.06450537592172623, - "learning_rate": 6.875570419360501e-05, - "loss": 0.01046149879693985, - "step": 4755 - }, - { - "epoch": 0.8115942028985508, - "grad_norm": 0.08542726188898087, - "learning_rate": 6.874153971084837e-05, - "loss": 0.009869573265314102, - "step": 4760 - }, - { - "epoch": 0.8124467178175618, - "grad_norm": 0.08184531331062317, - "learning_rate": 6.872736064350176e-05, - "loss": 0.01054040789604187, - "step": 4765 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 0.07068512588739395, - "learning_rate": 6.871316699818442e-05, - "loss": 0.009573462605476379, - "step": 4770 - }, - { - "epoch": 0.814151747655584, - "grad_norm": 0.08866564929485321, - "learning_rate": 6.869895878152244e-05, - "loss": 0.008078956604003906, - "step": 4775 - }, - { - "epoch": 0.815004262574595, - "grad_norm": 0.08215270191431046, - "learning_rate": 6.868473600014867e-05, - "loss": 0.010586659610271453, - "step": 4780 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 0.0449003241956234, - "learning_rate": 6.867049866070278e-05, - "loss": 0.008572281152009965, - "step": 4785 - }, - { - "epoch": 0.8167092924126172, - "grad_norm": 0.0766722783446312, - "learning_rate": 6.865624676983124e-05, - "loss": 0.009015947580337524, - "step": 4790 - }, - { - "epoch": 0.8175618073316283, - "grad_norm": 0.07404733449220657, - "learning_rate": 6.864198033418732e-05, - "loss": 0.014639028906822204, - "step": 4795 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 0.10437514632940292, - "learning_rate": 6.862769936043102e-05, - "loss": 0.009333716332912445, - "step": 4800 - }, - { - "epoch": 0.8192668371696504, - "grad_norm": 0.06732609868049622, - "learning_rate": 6.861340385522921e-05, - "loss": 0.007169592380523682, - "step": 4805 - }, - { - "epoch": 0.8201193520886616, - "grad_norm": 0.06016068905591965, - "learning_rate": 6.859909382525552e-05, - "loss": 0.009211564064025879, - "step": 4810 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 0.07302942126989365, - "learning_rate": 6.858476927719031e-05, - "loss": 0.009643231332302094, - "step": 4815 - }, - { - "epoch": 0.8218243819266837, - "grad_norm": 0.07511111348867416, - "learning_rate": 6.857043021772079e-05, - "loss": 0.010751830041408538, - "step": 4820 - }, - { - "epoch": 0.8226768968456948, - "grad_norm": 0.04791528359055519, - "learning_rate": 6.855607665354088e-05, - "loss": 0.008413314074277877, - "step": 4825 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 0.08279003202915192, - "learning_rate": 6.854170859135132e-05, - "loss": 0.009260118752717972, - "step": 4830 - }, - { - "epoch": 0.824381926683717, - "grad_norm": 0.06907783448696136, - "learning_rate": 6.85273260378596e-05, - "loss": 0.009681220352649688, - "step": 4835 - }, - { - "epoch": 0.8252344416027281, - "grad_norm": 0.09847953170537949, - "learning_rate": 6.851292899977997e-05, - "loss": 0.009847448766231537, - "step": 4840 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 0.0683966800570488, - "learning_rate": 6.849851748383343e-05, - "loss": 0.007601346075534821, - "step": 4845 - }, - { - "epoch": 0.8269394714407502, - "grad_norm": 0.0523662269115448, - "learning_rate": 6.848409149674779e-05, - "loss": 0.00900915488600731, - "step": 4850 - }, - { - "epoch": 0.8277919863597613, - "grad_norm": 0.07112257927656174, - "learning_rate": 6.846965104525757e-05, - "loss": 0.011230588704347611, - "step": 4855 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 0.09305348247289658, - "learning_rate": 6.845519613610402e-05, - "loss": 0.01087992861866951, - "step": 4860 - }, - { - "epoch": 0.8294970161977835, - "grad_norm": 0.062347084283828735, - "learning_rate": 6.84407267760352e-05, - "loss": 0.00806276947259903, - "step": 4865 - }, - { - "epoch": 0.8303495311167945, - "grad_norm": 0.09091926366090775, - "learning_rate": 6.84262429718059e-05, - "loss": 0.010562103241682053, - "step": 4870 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 0.05807443708181381, - "learning_rate": 6.841174473017762e-05, - "loss": 0.010788433253765106, - "step": 4875 - }, - { - "epoch": 0.8320545609548167, - "grad_norm": 0.06664252281188965, - "learning_rate": 6.839723205791863e-05, - "loss": 0.01030244082212448, - "step": 4880 - }, - { - "epoch": 0.8329070758738278, - "grad_norm": 0.07469561696052551, - "learning_rate": 6.838270496180392e-05, - "loss": 0.01250479370355606, - "step": 4885 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 0.0469096302986145, - "learning_rate": 6.836816344861523e-05, - "loss": 0.010546717792749405, - "step": 4890 - }, - { - "epoch": 0.83461210571185, - "grad_norm": 0.0646355077624321, - "learning_rate": 6.835360752514104e-05, - "loss": 0.008491561561822892, - "step": 4895 - }, - { - "epoch": 0.835464620630861, - "grad_norm": 0.06006006523966789, - "learning_rate": 6.83390371981765e-05, - "loss": 0.010175065696239471, - "step": 4900 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 0.0595518983900547, - "learning_rate": 6.832445247452355e-05, - "loss": 0.009471315890550613, - "step": 4905 - }, - { - "epoch": 0.8371696504688833, - "grad_norm": 0.0722845196723938, - "learning_rate": 6.830985336099081e-05, - "loss": 0.011656039208173753, - "step": 4910 - }, - { - "epoch": 0.8380221653878943, - "grad_norm": 0.09830670058727264, - "learning_rate": 6.829523986439366e-05, - "loss": 0.0106172576546669, - "step": 4915 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 0.0725899264216423, - "learning_rate": 6.828061199155413e-05, - "loss": 0.00935768336057663, - "step": 4920 - }, - { - "epoch": 0.8397271952259164, - "grad_norm": 0.06721889227628708, - "learning_rate": 6.826596974930101e-05, - "loss": 0.010951700061559677, - "step": 4925 - }, - { - "epoch": 0.8405797101449275, - "grad_norm": 0.09289079904556274, - "learning_rate": 6.82513131444698e-05, - "loss": 0.010057362169027329, - "step": 4930 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 0.07667957991361618, - "learning_rate": 6.823664218390267e-05, - "loss": 0.012943412363529205, - "step": 4935 - }, - { - "epoch": 0.8422847399829497, - "grad_norm": 0.041785743087530136, - "learning_rate": 6.822195687444853e-05, - "loss": 0.009845246374607087, - "step": 4940 - }, - { - "epoch": 0.8431372549019608, - "grad_norm": 0.062134817242622375, - "learning_rate": 6.820725722296295e-05, - "loss": 0.010506168007850647, - "step": 4945 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 0.0870882049202919, - "learning_rate": 6.819254323630825e-05, - "loss": 0.008953387290239334, - "step": 4950 - }, - { - "epoch": 0.8448422847399829, - "grad_norm": 0.06943989545106888, - "learning_rate": 6.817781492135337e-05, - "loss": 0.009975537657737732, - "step": 4955 - }, - { - "epoch": 0.8456947996589941, - "grad_norm": 0.0961625948548317, - "learning_rate": 6.8163072284974e-05, - "loss": 0.010299818217754364, - "step": 4960 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 0.0685473084449768, - "learning_rate": 6.814831533405249e-05, - "loss": 0.007892660051584243, - "step": 4965 - }, - { - "epoch": 0.8473998294970162, - "grad_norm": 0.06370922178030014, - "learning_rate": 6.813354407547787e-05, - "loss": 0.011673354357481004, - "step": 4970 - }, - { - "epoch": 0.8482523444160273, - "grad_norm": 0.1212867870926857, - "learning_rate": 6.811875851614586e-05, - "loss": 0.01049395203590393, - "step": 4975 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 0.07590476423501968, - "learning_rate": 6.810395866295885e-05, - "loss": 0.011261900514364242, - "step": 4980 - }, - { - "epoch": 0.8499573742540495, - "grad_norm": 0.06342966109514236, - "learning_rate": 6.808914452282592e-05, - "loss": 0.011414043605327606, - "step": 4985 - }, - { - "epoch": 0.8508098891730606, - "grad_norm": 0.08939556777477264, - "learning_rate": 6.807431610266278e-05, - "loss": 0.008719882369041443, - "step": 4990 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 0.09418119490146637, - "learning_rate": 6.805947340939183e-05, - "loss": 0.011406099796295166, - "step": 4995 - }, - { - "epoch": 0.8525149190110827, - "grad_norm": 0.09113836288452148, - "learning_rate": 6.804461644994213e-05, - "loss": 0.011070792376995087, - "step": 5000 - }, - { - "epoch": 0.8533674339300937, - "grad_norm": 0.06545080989599228, - "learning_rate": 6.802974523124941e-05, - "loss": 0.009602059423923493, - "step": 5005 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 0.07779792696237564, - "learning_rate": 6.801485976025607e-05, - "loss": 0.008207190036773681, - "step": 5010 - }, - { - "epoch": 0.855072463768116, - "grad_norm": 0.038606271147727966, - "learning_rate": 6.799996004391113e-05, - "loss": 0.008772189915180206, - "step": 5015 - }, - { - "epoch": 0.855924978687127, - "grad_norm": 0.07329479604959488, - "learning_rate": 6.798504608917025e-05, - "loss": 0.011226999759674072, - "step": 5020 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 0.08631903678178787, - "learning_rate": 6.797011790299579e-05, - "loss": 0.012361649423837662, - "step": 5025 - }, - { - "epoch": 0.8576300085251491, - "grad_norm": 0.09041957557201385, - "learning_rate": 6.79551754923567e-05, - "loss": 0.010576151311397552, - "step": 5030 - }, - { - "epoch": 0.8584825234441603, - "grad_norm": 0.07003892213106155, - "learning_rate": 6.794021886422861e-05, - "loss": 0.008748160302639007, - "step": 5035 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 0.06976106762886047, - "learning_rate": 6.792524802559378e-05, - "loss": 0.010539846867322922, - "step": 5040 - }, - { - "epoch": 0.8601875532821824, - "grad_norm": 0.05501266196370125, - "learning_rate": 6.791026298344107e-05, - "loss": 0.01045292615890503, - "step": 5045 - }, - { - "epoch": 0.8610400682011935, - "grad_norm": 0.051503781229257584, - "learning_rate": 6.789526374476602e-05, - "loss": 0.009410140663385391, - "step": 5050 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 0.05674072727560997, - "learning_rate": 6.788025031657076e-05, - "loss": 0.00899135023355484, - "step": 5055 - }, - { - "epoch": 0.8627450980392157, - "grad_norm": 0.09688259661197662, - "learning_rate": 6.786522270586406e-05, - "loss": 0.010228607058525085, - "step": 5060 - }, - { - "epoch": 0.8635976129582268, - "grad_norm": 0.08542542159557343, - "learning_rate": 6.785018091966131e-05, - "loss": 0.010819461196660995, - "step": 5065 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 0.05703757330775261, - "learning_rate": 6.783512496498452e-05, - "loss": 0.011345957219600678, - "step": 5070 - }, - { - "epoch": 0.8653026427962489, - "grad_norm": 0.08096349984407425, - "learning_rate": 6.782005484886231e-05, - "loss": 0.011220332235097885, - "step": 5075 - }, - { - "epoch": 0.86615515771526, - "grad_norm": 0.07224266976118088, - "learning_rate": 6.780497057832988e-05, - "loss": 0.012557309865951539, - "step": 5080 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 0.08718731999397278, - "learning_rate": 6.778987216042912e-05, - "loss": 0.009770408272743225, - "step": 5085 - }, - { - "epoch": 0.8678601875532822, - "grad_norm": 0.05731170251965523, - "learning_rate": 6.777475960220846e-05, - "loss": 0.008567098528146744, - "step": 5090 - }, - { - "epoch": 0.8687127024722933, - "grad_norm": 0.07938708364963531, - "learning_rate": 6.775963291072292e-05, - "loss": 0.010664292424917222, - "step": 5095 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.028421485796570778, - "learning_rate": 6.774449209303416e-05, - "loss": 0.010194088518619537, - "step": 5100 - }, - { - "epoch": 0.8704177323103154, - "grad_norm": 0.06094631180167198, - "learning_rate": 6.772933715621042e-05, - "loss": 0.009481045603752136, - "step": 5105 - }, - { - "epoch": 0.8712702472293266, - "grad_norm": 0.08887558430433273, - "learning_rate": 6.771416810732653e-05, - "loss": 0.012845474481582641, - "step": 5110 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 0.07431238144636154, - "learning_rate": 6.76989849534639e-05, - "loss": 0.00815560668706894, - "step": 5115 - }, - { - "epoch": 0.8729752770673487, - "grad_norm": 0.09380137175321579, - "learning_rate": 6.768378770171052e-05, - "loss": 0.009580246359109878, - "step": 5120 - }, - { - "epoch": 0.8738277919863597, - "grad_norm": 0.08001488447189331, - "learning_rate": 6.766857635916099e-05, - "loss": 0.00857289507985115, - "step": 5125 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 0.049355555325746536, - "learning_rate": 6.765335093291647e-05, - "loss": 0.009263276308774947, - "step": 5130 - }, - { - "epoch": 0.875532821824382, - "grad_norm": 0.05873994901776314, - "learning_rate": 6.763811143008469e-05, - "loss": 0.008233514428138734, - "step": 5135 - }, - { - "epoch": 0.876385336743393, - "grad_norm": 0.10190756618976593, - "learning_rate": 6.762285785777995e-05, - "loss": 0.01529676467180252, - "step": 5140 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 0.08395158499479294, - "learning_rate": 6.760759022312313e-05, - "loss": 0.00961325541138649, - "step": 5145 - }, - { - "epoch": 0.8780903665814151, - "grad_norm": 0.07476748526096344, - "learning_rate": 6.759230853324169e-05, - "loss": 0.010477164387702942, - "step": 5150 - }, - { - "epoch": 0.8789428815004262, - "grad_norm": 0.07773051410913467, - "learning_rate": 6.757701279526961e-05, - "loss": 0.010389962792396545, - "step": 5155 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 0.07345708459615707, - "learning_rate": 6.756170301634745e-05, - "loss": 0.009174319356679917, - "step": 5160 - }, - { - "epoch": 0.8806479113384484, - "grad_norm": 0.07917368412017822, - "learning_rate": 6.754637920362233e-05, - "loss": 0.012756256759166718, - "step": 5165 - }, - { - "epoch": 0.8815004262574595, - "grad_norm": 0.06016271933913231, - "learning_rate": 6.75310413642479e-05, - "loss": 0.011058077961206437, - "step": 5170 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 0.06637005507946014, - "learning_rate": 6.751568950538441e-05, - "loss": 0.013590328395366669, - "step": 5175 - }, - { - "epoch": 0.8832054560954816, - "grad_norm": 0.06486016511917114, - "learning_rate": 6.750032363419857e-05, - "loss": 0.010195261240005494, - "step": 5180 - }, - { - "epoch": 0.8840579710144928, - "grad_norm": 0.09800687432289124, - "learning_rate": 6.748494375786372e-05, - "loss": 0.011106249690055848, - "step": 5185 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 0.04665162041783333, - "learning_rate": 6.746954988355967e-05, - "loss": 0.009880972653627395, - "step": 5190 - }, - { - "epoch": 0.8857630008525149, - "grad_norm": 0.05554487928748131, - "learning_rate": 6.745414201847282e-05, - "loss": 0.009480565786361694, - "step": 5195 - }, - { - "epoch": 0.886615515771526, - "grad_norm": 0.059967316687107086, - "learning_rate": 6.743872016979605e-05, - "loss": 0.010072766989469528, - "step": 5200 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 0.08275031298398972, - "learning_rate": 6.74232843447288e-05, - "loss": 0.010208947211503982, - "step": 5205 - }, - { - "epoch": 0.8883205456095482, - "grad_norm": 0.07091715931892395, - "learning_rate": 6.740783455047704e-05, - "loss": 0.011881709098815918, - "step": 5210 - }, - { - "epoch": 0.8891730605285593, - "grad_norm": 0.1465480923652649, - "learning_rate": 6.739237079425322e-05, - "loss": 0.010970161855220794, - "step": 5215 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 0.07571437954902649, - "learning_rate": 6.737689308327636e-05, - "loss": 0.010722124576568603, - "step": 5220 - }, - { - "epoch": 0.8908780903665814, - "grad_norm": 0.06671100109815598, - "learning_rate": 6.736140142477194e-05, - "loss": 0.010463282465934753, - "step": 5225 - }, - { - "epoch": 0.8917306052855924, - "grad_norm": 0.05676295980811119, - "learning_rate": 6.734589582597204e-05, - "loss": 0.00933043509721756, - "step": 5230 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 0.09708777070045471, - "learning_rate": 6.733037629411514e-05, - "loss": 0.011712780594825745, - "step": 5235 - }, - { - "epoch": 0.8934356351236147, - "grad_norm": 0.07073090970516205, - "learning_rate": 6.731484283644626e-05, - "loss": 0.008112293481826783, - "step": 5240 - }, - { - "epoch": 0.8942881500426257, - "grad_norm": 0.08821752667427063, - "learning_rate": 6.7299295460217e-05, - "loss": 0.01026250645518303, - "step": 5245 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 0.05165687948465347, - "learning_rate": 6.728373417268533e-05, - "loss": 0.008761890232563019, - "step": 5250 - }, - { - "epoch": 0.8959931798806479, - "grad_norm": 0.06872246414422989, - "learning_rate": 6.726815898111581e-05, - "loss": 0.012551462650299073, - "step": 5255 - }, - { - "epoch": 0.896845694799659, - "grad_norm": 0.07550673931837082, - "learning_rate": 6.725256989277944e-05, - "loss": 0.010165071487426758, - "step": 5260 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 0.05931933969259262, - "learning_rate": 6.723696691495373e-05, - "loss": 0.009457996487617493, - "step": 5265 - }, - { - "epoch": 0.8985507246376812, - "grad_norm": 0.10838861763477325, - "learning_rate": 6.722135005492268e-05, - "loss": 0.012053199112415314, - "step": 5270 - }, - { - "epoch": 0.8994032395566922, - "grad_norm": 0.062102265655994415, - "learning_rate": 6.720571931997676e-05, - "loss": 0.007582514733076096, - "step": 5275 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 0.05997858941555023, - "learning_rate": 6.719007471741289e-05, - "loss": 0.009348342567682267, - "step": 5280 - }, - { - "epoch": 0.9011082693947144, - "grad_norm": 0.05405129864811897, - "learning_rate": 6.717441625453451e-05, - "loss": 0.009910254925489425, - "step": 5285 - }, - { - "epoch": 0.9019607843137255, - "grad_norm": 0.08071329444646835, - "learning_rate": 6.715874393865152e-05, - "loss": 0.010672248899936676, - "step": 5290 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 0.06863300502300262, - "learning_rate": 6.714305777708027e-05, - "loss": 0.008570954948663712, - "step": 5295 - }, - { - "epoch": 0.9036658141517476, - "grad_norm": 0.07818135619163513, - "learning_rate": 6.712735777714357e-05, - "loss": 0.008647527545690536, - "step": 5300 - }, - { - "epoch": 0.9045183290707587, - "grad_norm": 0.14757376909255981, - "learning_rate": 6.711164394617072e-05, - "loss": 0.010825049877166749, - "step": 5305 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 0.07376445829868317, - "learning_rate": 6.709591629149746e-05, - "loss": 0.01299697458744049, - "step": 5310 - }, - { - "epoch": 0.9062233589087809, - "grad_norm": 0.05860469490289688, - "learning_rate": 6.708017482046597e-05, - "loss": 0.009549598395824432, - "step": 5315 - }, - { - "epoch": 0.907075873827792, - "grad_norm": 0.0799872875213623, - "learning_rate": 6.706441954042488e-05, - "loss": 0.009733843803405761, - "step": 5320 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 0.05245954543352127, - "learning_rate": 6.704865045872932e-05, - "loss": 0.009799794852733612, - "step": 5325 - }, - { - "epoch": 0.9087809036658141, - "grad_norm": 0.05515241622924805, - "learning_rate": 6.703286758274079e-05, - "loss": 0.007391643524169922, - "step": 5330 - }, - { - "epoch": 0.9096334185848253, - "grad_norm": 0.05900256708264351, - "learning_rate": 6.701707091982726e-05, - "loss": 0.009107303619384766, - "step": 5335 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 0.09559495002031326, - "learning_rate": 6.700126047736317e-05, - "loss": 0.009052158147096635, - "step": 5340 - }, - { - "epoch": 0.9113384484228474, - "grad_norm": 0.11189334839582443, - "learning_rate": 6.698543626272932e-05, - "loss": 0.011292549222707749, - "step": 5345 - }, - { - "epoch": 0.9121909633418585, - "grad_norm": 0.07031659781932831, - "learning_rate": 6.6969598283313e-05, - "loss": 0.008589480072259903, - "step": 5350 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.1652907431125641, - "learning_rate": 6.69537465465079e-05, - "loss": 0.010865563899278641, - "step": 5355 - }, - { - "epoch": 0.9138959931798807, - "grad_norm": 0.06157436594367027, - "learning_rate": 6.693788105971413e-05, - "loss": 0.012611952424049378, - "step": 5360 - }, - { - "epoch": 0.9147485080988917, - "grad_norm": 0.03928734362125397, - "learning_rate": 6.692200183033826e-05, - "loss": 0.009115418046712875, - "step": 5365 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 0.06604880094528198, - "learning_rate": 6.690610886579321e-05, - "loss": 0.010015038400888443, - "step": 5370 - }, - { - "epoch": 0.9164535379369139, - "grad_norm": 0.07625336199998856, - "learning_rate": 6.689020217349835e-05, - "loss": 0.010416677594184876, - "step": 5375 - }, - { - "epoch": 0.9173060528559249, - "grad_norm": 0.07674526423215866, - "learning_rate": 6.687428176087946e-05, - "loss": 0.01016802191734314, - "step": 5380 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 0.08422617614269257, - "learning_rate": 6.685834763536872e-05, - "loss": 0.011127004027366638, - "step": 5385 - }, - { - "epoch": 0.9190110826939472, - "grad_norm": 0.057719554752111435, - "learning_rate": 6.684239980440472e-05, - "loss": 0.008915853500366212, - "step": 5390 - }, - { - "epoch": 0.9198635976129582, - "grad_norm": 0.056555263698101044, - "learning_rate": 6.682643827543241e-05, - "loss": 0.0095272496342659, - "step": 5395 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 0.07605638355016708, - "learning_rate": 6.681046305590317e-05, - "loss": 0.010731159895658492, - "step": 5400 - }, - { - "epoch": 0.9215686274509803, - "grad_norm": 0.07499220222234726, - "learning_rate": 6.679447415327479e-05, - "loss": 0.010919998586177825, - "step": 5405 - }, - { - "epoch": 0.9224211423699915, - "grad_norm": 0.052863143384456635, - "learning_rate": 6.677847157501137e-05, - "loss": 0.011300939321517944, - "step": 5410 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 0.09210597723722458, - "learning_rate": 6.676245532858351e-05, - "loss": 0.013997772336006164, - "step": 5415 - }, - { - "epoch": 0.9241261722080136, - "grad_norm": 0.0746840238571167, - "learning_rate": 6.674642542146807e-05, - "loss": 0.012542533874511718, - "step": 5420 - }, - { - "epoch": 0.9249786871270247, - "grad_norm": 0.06137506663799286, - "learning_rate": 6.67303818611484e-05, - "loss": 0.008029398322105408, - "step": 5425 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 0.03867131471633911, - "learning_rate": 6.671432465511411e-05, - "loss": 0.009305672347545623, - "step": 5430 - }, - { - "epoch": 0.9266837169650469, - "grad_norm": 0.08291540294885635, - "learning_rate": 6.669825381086128e-05, - "loss": 0.011059926450252533, - "step": 5435 - }, - { - "epoch": 0.927536231884058, - "grad_norm": 0.0689411610364914, - "learning_rate": 6.668216933589228e-05, - "loss": 0.008350597321987152, - "step": 5440 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 0.051505669951438904, - "learning_rate": 6.666607123771591e-05, - "loss": 0.011102759093046189, - "step": 5445 - }, - { - "epoch": 0.9292412617220801, - "grad_norm": 0.08774327486753464, - "learning_rate": 6.664995952384729e-05, - "loss": 0.009498609602451325, - "step": 5450 - }, - { - "epoch": 0.9300937766410913, - "grad_norm": 0.060566093772649765, - "learning_rate": 6.663383420180789e-05, - "loss": 0.007811173051595688, - "step": 5455 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 0.09952156245708466, - "learning_rate": 6.661769527912555e-05, - "loss": 0.010514630377292633, - "step": 5460 - }, - { - "epoch": 0.9317988064791134, - "grad_norm": 0.04871741309762001, - "learning_rate": 6.660154276333446e-05, - "loss": 0.006630983203649521, - "step": 5465 - }, - { - "epoch": 0.9326513213981245, - "grad_norm": 0.06254981458187103, - "learning_rate": 6.658537666197517e-05, - "loss": 0.009895097464323044, - "step": 5470 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 0.08351470530033112, - "learning_rate": 6.656919698259452e-05, - "loss": 0.010659988969564438, - "step": 5475 - }, - { - "epoch": 0.9343563512361467, - "grad_norm": 0.07085305452346802, - "learning_rate": 6.655300373274575e-05, - "loss": 0.008971457183361054, - "step": 5480 - }, - { - "epoch": 0.9352088661551577, - "grad_norm": 0.06461923569440842, - "learning_rate": 6.653679691998839e-05, - "loss": 0.009138958156108856, - "step": 5485 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 0.11675399541854858, - "learning_rate": 6.652057655188832e-05, - "loss": 0.008388948440551759, - "step": 5490 - }, - { - "epoch": 0.9369138959931799, - "grad_norm": 0.09698229283094406, - "learning_rate": 6.650434263601777e-05, - "loss": 0.011885351687669753, - "step": 5495 - }, - { - "epoch": 0.9377664109121909, - "grad_norm": 0.06786464154720306, - "learning_rate": 6.648809517995524e-05, - "loss": 0.012351768463850022, - "step": 5500 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 0.09192351251840591, - "learning_rate": 6.647183419128561e-05, - "loss": 0.010940471291542053, - "step": 5505 - }, - { - "epoch": 0.9394714407502132, - "grad_norm": 0.05660499259829521, - "learning_rate": 6.645555967760003e-05, - "loss": 0.01160380095243454, - "step": 5510 - }, - { - "epoch": 0.9403239556692242, - "grad_norm": 0.0829106792807579, - "learning_rate": 6.6439271646496e-05, - "loss": 0.009475469589233398, - "step": 5515 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 0.08007021248340607, - "learning_rate": 6.642297010557733e-05, - "loss": 0.010524801164865493, - "step": 5520 - }, - { - "epoch": 0.9420289855072463, - "grad_norm": 0.07578855752944946, - "learning_rate": 6.640665506245406e-05, - "loss": 0.008864742517471314, - "step": 5525 - }, - { - "epoch": 0.9428815004262575, - "grad_norm": 0.064674511551857, - "learning_rate": 6.639032652474265e-05, - "loss": 0.010619612783193589, - "step": 5530 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 0.07176528871059418, - "learning_rate": 6.637398450006579e-05, - "loss": 0.011696039140224457, - "step": 5535 - }, - { - "epoch": 0.9445865302642796, - "grad_norm": 0.16578713059425354, - "learning_rate": 6.635762899605248e-05, - "loss": 0.009867334365844726, - "step": 5540 - }, - { - "epoch": 0.9454390451832907, - "grad_norm": 0.069394052028656, - "learning_rate": 6.634126002033802e-05, - "loss": 0.012951886653900147, - "step": 5545 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 0.07247213274240494, - "learning_rate": 6.632487758056397e-05, - "loss": 0.009127721190452576, - "step": 5550 - }, - { - "epoch": 0.9471440750213129, - "grad_norm": 0.11084317415952682, - "learning_rate": 6.630848168437822e-05, - "loss": 0.009197863936424255, - "step": 5555 - }, - { - "epoch": 0.947996589940324, - "grad_norm": 0.08719248324632645, - "learning_rate": 6.629207233943492e-05, - "loss": 0.010768509656190871, - "step": 5560 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 0.0857851505279541, - "learning_rate": 6.62756495533945e-05, - "loss": 0.009163270145654679, - "step": 5565 - }, - { - "epoch": 0.9497016197783461, - "grad_norm": 0.09808778762817383, - "learning_rate": 6.625921333392362e-05, - "loss": 0.01005362868309021, - "step": 5570 - }, - { - "epoch": 0.9505541346973572, - "grad_norm": 0.08482059836387634, - "learning_rate": 6.624276368869532e-05, - "loss": 0.008303509652614593, - "step": 5575 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 0.07460886240005493, - "learning_rate": 6.62263006253888e-05, - "loss": 0.00857923850417137, - "step": 5580 - }, - { - "epoch": 0.9522591645353794, - "grad_norm": 0.09494256228208542, - "learning_rate": 6.620982415168956e-05, - "loss": 0.009573552012443542, - "step": 5585 - }, - { - "epoch": 0.9531116794543905, - "grad_norm": 0.08753519505262375, - "learning_rate": 6.61933342752894e-05, - "loss": 0.010430536419153213, - "step": 5590 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 0.05132949724793434, - "learning_rate": 6.617683100388632e-05, - "loss": 0.009080658107995987, - "step": 5595 - }, - { - "epoch": 0.9548167092924126, - "grad_norm": 0.07516856491565704, - "learning_rate": 6.61603143451846e-05, - "loss": 0.009718524664640427, - "step": 5600 - }, - { - "epoch": 0.9556692242114238, - "grad_norm": 0.12723733484745026, - "learning_rate": 6.614378430689477e-05, - "loss": 0.01136334240436554, - "step": 5605 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 0.0905863493680954, - "learning_rate": 6.612724089673359e-05, - "loss": 0.013209307193756103, - "step": 5610 - }, - { - "epoch": 0.9573742540494459, - "grad_norm": 0.06084009259939194, - "learning_rate": 6.611068412242409e-05, - "loss": 0.01001257449388504, - "step": 5615 - }, - { - "epoch": 0.9582267689684569, - "grad_norm": 0.11799532175064087, - "learning_rate": 6.60941139916955e-05, - "loss": 0.008610795438289642, - "step": 5620 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 0.08627504110336304, - "learning_rate": 6.607753051228333e-05, - "loss": 0.009049218893051148, - "step": 5625 - }, - { - "epoch": 0.9599317988064792, - "grad_norm": 0.0910186693072319, - "learning_rate": 6.60609336919293e-05, - "loss": 0.01068672090768814, - "step": 5630 - }, - { - "epoch": 0.9607843137254902, - "grad_norm": 0.06503022462129593, - "learning_rate": 6.604432353838134e-05, - "loss": 0.010604655742645264, - "step": 5635 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 0.07681523263454437, - "learning_rate": 6.602770005939363e-05, - "loss": 0.010527564585208893, - "step": 5640 - }, - { - "epoch": 0.9624893435635123, - "grad_norm": 0.0680806040763855, - "learning_rate": 6.601106326272659e-05, - "loss": 0.009375665336847305, - "step": 5645 - }, - { - "epoch": 0.9633418584825234, - "grad_norm": 0.06601905822753906, - "learning_rate": 6.599441315614678e-05, - "loss": 0.009470004588365555, - "step": 5650 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 0.06291890889406204, - "learning_rate": 6.597774974742706e-05, - "loss": 0.012320800870656966, - "step": 5655 - }, - { - "epoch": 0.9650468883205456, - "grad_norm": 0.0956176221370697, - "learning_rate": 6.596107304434645e-05, - "loss": 0.01018187329173088, - "step": 5660 - }, - { - "epoch": 0.9658994032395567, - "grad_norm": 0.06642715632915497, - "learning_rate": 6.59443830546902e-05, - "loss": 0.010271859169006348, - "step": 5665 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 0.06783592700958252, - "learning_rate": 6.592767978624973e-05, - "loss": 0.00986798033118248, - "step": 5670 - }, - { - "epoch": 0.9676044330775788, - "grad_norm": 0.05877846106886864, - "learning_rate": 6.591096324682272e-05, - "loss": 0.009708859026432037, - "step": 5675 - }, - { - "epoch": 0.96845694799659, - "grad_norm": 0.049258604645729065, - "learning_rate": 6.589423344421297e-05, - "loss": 0.008615868538618088, - "step": 5680 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 0.05622515454888344, - "learning_rate": 6.587749038623052e-05, - "loss": 0.010138686001300811, - "step": 5685 - }, - { - "epoch": 0.9701619778346121, - "grad_norm": 0.05719893425703049, - "learning_rate": 6.586073408069159e-05, - "loss": 0.00869678258895874, - "step": 5690 - }, - { - "epoch": 0.9710144927536232, - "grad_norm": 0.07675095647573471, - "learning_rate": 6.584396453541856e-05, - "loss": 0.010557885468006133, - "step": 5695 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 0.0999779924750328, - "learning_rate": 6.582718175824006e-05, - "loss": 0.010667790472507478, - "step": 5700 - }, - { - "epoch": 0.9727195225916454, - "grad_norm": 0.04620293527841568, - "learning_rate": 6.58103857569908e-05, - "loss": 0.0078192800283432, - "step": 5705 - }, - { - "epoch": 0.9735720375106565, - "grad_norm": 0.06725125759840012, - "learning_rate": 6.579357653951174e-05, - "loss": 0.010441574454307555, - "step": 5710 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 0.09062530100345612, - "learning_rate": 6.577675411364997e-05, - "loss": 0.011757946014404297, - "step": 5715 - }, - { - "epoch": 0.9752770673486786, - "grad_norm": 0.050651032477617264, - "learning_rate": 6.575991848725876e-05, - "loss": 0.009817829728126526, - "step": 5720 - }, - { - "epoch": 0.9761295822676896, - "grad_norm": 0.06951560825109482, - "learning_rate": 6.574306966819755e-05, - "loss": 0.008903174102306366, - "step": 5725 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 0.0733589306473732, - "learning_rate": 6.57262076643319e-05, - "loss": 0.009786784648895264, - "step": 5730 - }, - { - "epoch": 0.9778346121057119, - "grad_norm": 0.0736282467842102, - "learning_rate": 6.570933248353359e-05, - "loss": 0.012176553905010223, - "step": 5735 - }, - { - "epoch": 0.9786871270247229, - "grad_norm": 0.069704569876194, - "learning_rate": 6.56924441336805e-05, - "loss": 0.008654942363500595, - "step": 5740 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 0.07497496902942657, - "learning_rate": 6.567554262265668e-05, - "loss": 0.010902392119169236, - "step": 5745 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 0.07559038698673248, - "learning_rate": 6.56586279583523e-05, - "loss": 0.00921270027756691, - "step": 5750 - }, - { - "epoch": 0.9812446717817562, - "grad_norm": 0.05894545465707779, - "learning_rate": 6.56417001486637e-05, - "loss": 0.009073206037282944, - "step": 5755 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 0.06555377691984177, - "learning_rate": 6.562475920149335e-05, - "loss": 0.010872729122638702, - "step": 5760 - }, - { - "epoch": 0.9829497016197783, - "grad_norm": 0.15036429464817047, - "learning_rate": 6.560780512474984e-05, - "loss": 0.009879975020885468, - "step": 5765 - }, - { - "epoch": 0.9838022165387894, - "grad_norm": 0.06842299550771713, - "learning_rate": 6.559083792634791e-05, - "loss": 0.00965554341673851, - "step": 5770 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 0.0486510805785656, - "learning_rate": 6.557385761420839e-05, - "loss": 0.00872802734375, - "step": 5775 - }, - { - "epoch": 0.9855072463768116, - "grad_norm": 0.059796739369630814, - "learning_rate": 6.555686419625826e-05, - "loss": 0.009720289707183838, - "step": 5780 - }, - { - "epoch": 0.9863597612958227, - "grad_norm": 0.07971934229135513, - "learning_rate": 6.553985768043062e-05, - "loss": 0.008043316006660462, - "step": 5785 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 0.09556971490383148, - "learning_rate": 6.552283807466468e-05, - "loss": 0.009030704945325851, - "step": 5790 - }, - { - "epoch": 0.9880647911338448, - "grad_norm": 0.04631726071238518, - "learning_rate": 6.550580538690577e-05, - "loss": 0.007388583570718765, - "step": 5795 - }, - { - "epoch": 0.9889173060528559, - "grad_norm": 0.08172665536403656, - "learning_rate": 6.548875962510528e-05, - "loss": 0.007863265275955201, - "step": 5800 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 0.11129096895456314, - "learning_rate": 6.547170079722076e-05, - "loss": 0.012218999862670898, - "step": 5805 - }, - { - "epoch": 0.9906223358908781, - "grad_norm": 0.06619804352521896, - "learning_rate": 6.545462891121584e-05, - "loss": 0.007535400986671448, - "step": 5810 - }, - { - "epoch": 0.9914748508098892, - "grad_norm": 0.11495351046323776, - "learning_rate": 6.543754397506025e-05, - "loss": 0.0121284119784832, - "step": 5815 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 0.06017669290304184, - "learning_rate": 6.542044599672978e-05, - "loss": 0.008776353299617767, - "step": 5820 - }, - { - "epoch": 0.9931798806479113, - "grad_norm": 0.08049561828374863, - "learning_rate": 6.540333498420637e-05, - "loss": 0.010460492223501205, - "step": 5825 - }, - { - "epoch": 0.9940323955669225, - "grad_norm": 0.07041274011135101, - "learning_rate": 6.538621094547798e-05, - "loss": 0.008290639519691468, - "step": 5830 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 0.061981480568647385, - "learning_rate": 6.53690738885387e-05, - "loss": 0.007011125236749649, - "step": 5835 - }, - { - "epoch": 0.9957374254049446, - "grad_norm": 0.09022640436887741, - "learning_rate": 6.535192382138867e-05, - "loss": 0.012455084919929504, - "step": 5840 - }, - { - "epoch": 0.9965899403239556, - "grad_norm": 0.05652628839015961, - "learning_rate": 6.53347607520341e-05, - "loss": 0.011704784631729127, - "step": 5845 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 0.0717577114701271, - "learning_rate": 6.531758468848732e-05, - "loss": 0.007738448679447174, - "step": 5850 - }, - { - "epoch": 0.9982949701619779, - "grad_norm": 0.04797588661313057, - "learning_rate": 6.530039563876665e-05, - "loss": 0.00894927978515625, - "step": 5855 - }, - { - "epoch": 0.9991474850809889, - "grad_norm": 0.06541015207767487, - "learning_rate": 6.528319361089651e-05, - "loss": 0.00731588676571846, - "step": 5860 - }, - { - "epoch": 0.9998294970161978, - "eval_loss": 0.03369956836104393, - "eval_runtime": 3.5892, - "eval_samples_per_second": 70.21, - "eval_steps_per_second": 1.114, - "step": 5864 - }, - { - "eval_cer_subset": 0.014444607292328236, - "eval_cer_subset_edit_distance": 887, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 5864 - }, - { - "epoch": 1.0, - "grad_norm": 0.06960473209619522, - "learning_rate": 6.52659786129074e-05, - "loss": 0.009423434734344482, - "step": 5865 - }, - { - "epoch": 1.000852514919011, - "grad_norm": 0.08438396453857422, - "learning_rate": 6.524875065283587e-05, - "loss": 0.009560108184814453, - "step": 5870 - }, - { - "epoch": 1.0017050298380221, - "grad_norm": 0.06281089037656784, - "learning_rate": 6.523150973872446e-05, - "loss": 0.007503298670053482, - "step": 5875 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 0.11766793578863144, - "learning_rate": 6.52142558786218e-05, - "loss": 0.008890827000141144, - "step": 5880 - }, - { - "epoch": 1.0034100596760442, - "grad_norm": 0.058115314692258835, - "learning_rate": 6.519698908058262e-05, - "loss": 0.006190531700849533, - "step": 5885 - }, - { - "epoch": 1.0042625745950555, - "grad_norm": 0.06857501715421677, - "learning_rate": 6.51797093526676e-05, - "loss": 0.007162582129240036, - "step": 5890 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 0.03517467528581619, - "learning_rate": 6.51624167029435e-05, - "loss": 0.0060476396232843396, - "step": 5895 - }, - { - "epoch": 1.0059676044330776, - "grad_norm": 0.10047292709350586, - "learning_rate": 6.514511113948307e-05, - "loss": 0.006416718661785126, - "step": 5900 - }, - { - "epoch": 1.0068201193520887, - "grad_norm": 0.07266796380281448, - "learning_rate": 6.512779267036518e-05, - "loss": 0.005519292503595352, - "step": 5905 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 0.05385264754295349, - "learning_rate": 6.511046130367464e-05, - "loss": 0.006731215119361878, - "step": 5910 - }, - { - "epoch": 1.0085251491901108, - "grad_norm": 0.0927869975566864, - "learning_rate": 6.50931170475023e-05, - "loss": 0.0073065564036369325, - "step": 5915 - }, - { - "epoch": 1.0093776641091219, - "grad_norm": 0.08416371792554855, - "learning_rate": 6.507575990994504e-05, - "loss": 0.005843915045261383, - "step": 5920 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 0.06585095822811127, - "learning_rate": 6.505838989910576e-05, - "loss": 0.006345044076442719, - "step": 5925 - }, - { - "epoch": 1.011082693947144, - "grad_norm": 0.06341785192489624, - "learning_rate": 6.504100702309336e-05, - "loss": 0.005391617119312286, - "step": 5930 - }, - { - "epoch": 1.011935208866155, - "grad_norm": 0.08260001242160797, - "learning_rate": 6.502361129002273e-05, - "loss": 0.008031262457370758, - "step": 5935 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 0.08805666118860245, - "learning_rate": 6.500620270801478e-05, - "loss": 0.006408621370792389, - "step": 5940 - }, - { - "epoch": 1.0136402387041774, - "grad_norm": 0.0704861581325531, - "learning_rate": 6.498878128519642e-05, - "loss": 0.006208440661430359, - "step": 5945 - }, - { - "epoch": 1.0144927536231885, - "grad_norm": 0.07539117336273193, - "learning_rate": 6.497134702970055e-05, - "loss": 0.005263582617044449, - "step": 5950 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 0.022507963702082634, - "learning_rate": 6.495389994966606e-05, - "loss": 0.005692056566476822, - "step": 5955 - }, - { - "epoch": 1.0161977834612106, - "grad_norm": 0.05641510710120201, - "learning_rate": 6.493644005323783e-05, - "loss": 0.007954449951648712, - "step": 5960 - }, - { - "epoch": 1.0170502983802217, - "grad_norm": 0.04853788763284683, - "learning_rate": 6.49189673485667e-05, - "loss": 0.006910678744316101, - "step": 5965 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 0.07868898659944534, - "learning_rate": 6.490148184380956e-05, - "loss": 0.007678037136793136, - "step": 5970 - }, - { - "epoch": 1.0187553282182438, - "grad_norm": 0.08481275290250778, - "learning_rate": 6.488398354712917e-05, - "loss": 0.0060794509947299956, - "step": 5975 - }, - { - "epoch": 1.0196078431372548, - "grad_norm": 0.05573422089219093, - "learning_rate": 6.486647246669435e-05, - "loss": 0.0050107244402170185, - "step": 5980 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 0.10777781158685684, - "learning_rate": 6.484894861067983e-05, - "loss": 0.006611569225788117, - "step": 5985 - }, - { - "epoch": 1.0213128729752772, - "grad_norm": 0.041842151433229446, - "learning_rate": 6.483141198726635e-05, - "loss": 0.0060344856232404705, - "step": 5990 - }, - { - "epoch": 1.0221653878942882, - "grad_norm": 0.05765567347407341, - "learning_rate": 6.48138626046406e-05, - "loss": 0.005772604793310166, - "step": 5995 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 0.05987582355737686, - "learning_rate": 6.479630047099517e-05, - "loss": 0.006899695098400116, - "step": 6000 - }, - { - "epoch": 1.0238704177323104, - "grad_norm": 0.046085257083177567, - "learning_rate": 6.477872559452867e-05, - "loss": 0.006151453405618667, - "step": 6005 - }, - { - "epoch": 1.0247229326513214, - "grad_norm": 0.05994739755988121, - "learning_rate": 6.476113798344566e-05, - "loss": 0.007787984609603882, - "step": 6010 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 0.08866287767887115, - "learning_rate": 6.47435376459566e-05, - "loss": 0.007754974067211151, - "step": 6015 - }, - { - "epoch": 1.0264279624893435, - "grad_norm": 0.07492240518331528, - "learning_rate": 6.472592459027793e-05, - "loss": 0.005562775582075119, - "step": 6020 - }, - { - "epoch": 1.0272804774083546, - "grad_norm": 0.058771468698978424, - "learning_rate": 6.470829882463198e-05, - "loss": 0.008101420104503631, - "step": 6025 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 0.08099868148565292, - "learning_rate": 6.469066035724708e-05, - "loss": 0.007585109025239944, - "step": 6030 - }, - { - "epoch": 1.0289855072463767, - "grad_norm": 0.09368649870157242, - "learning_rate": 6.467300919635743e-05, - "loss": 0.007342393696308136, - "step": 6035 - }, - { - "epoch": 1.029838022165388, - "grad_norm": 0.07358572632074356, - "learning_rate": 6.465534535020317e-05, - "loss": 0.007179292291402817, - "step": 6040 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 0.0542459636926651, - "learning_rate": 6.46376688270304e-05, - "loss": 0.007178785651922226, - "step": 6045 - }, - { - "epoch": 1.0315430520034101, - "grad_norm": 0.04534808546304703, - "learning_rate": 6.461997963509109e-05, - "loss": 0.005939013883471489, - "step": 6050 - }, - { - "epoch": 1.0323955669224212, - "grad_norm": 0.04498334974050522, - "learning_rate": 6.460227778264314e-05, - "loss": 0.007932021468877792, - "step": 6055 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 0.09503943473100662, - "learning_rate": 6.458456327795038e-05, - "loss": 0.006005316227674484, - "step": 6060 - }, - { - "epoch": 1.0341005967604433, - "grad_norm": 0.06634567677974701, - "learning_rate": 6.456683612928252e-05, - "loss": 0.00472346730530262, - "step": 6065 - }, - { - "epoch": 1.0349531116794544, - "grad_norm": 0.06090138852596283, - "learning_rate": 6.454909634491518e-05, - "loss": 0.0071956045925617215, - "step": 6070 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 0.09833965450525284, - "learning_rate": 6.453134393312988e-05, - "loss": 0.00738539919257164, - "step": 6075 - }, - { - "epoch": 1.0366581415174765, - "grad_norm": 0.07924133539199829, - "learning_rate": 6.451357890221406e-05, - "loss": 0.008464773744344711, - "step": 6080 - }, - { - "epoch": 1.0375106564364875, - "grad_norm": 0.04132373258471489, - "learning_rate": 6.4495801260461e-05, - "loss": 0.005705388635396958, - "step": 6085 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 0.08653424680233002, - "learning_rate": 6.44780110161699e-05, - "loss": 0.00777137503027916, - "step": 6090 - }, - { - "epoch": 1.0392156862745099, - "grad_norm": 0.08147025108337402, - "learning_rate": 6.446020817764583e-05, - "loss": 0.005003783106803894, - "step": 6095 - }, - { - "epoch": 1.040068201193521, - "grad_norm": 0.07091398537158966, - "learning_rate": 6.444239275319977e-05, - "loss": 0.005957254022359848, - "step": 6100 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 0.06259306520223618, - "learning_rate": 6.442456475114855e-05, - "loss": 0.005096634104847908, - "step": 6105 - }, - { - "epoch": 1.041773231031543, - "grad_norm": 0.07044103741645813, - "learning_rate": 6.440672417981485e-05, - "loss": 0.00557241328060627, - "step": 6110 - }, - { - "epoch": 1.0426257459505541, - "grad_norm": 0.05029159039258957, - "learning_rate": 6.438887104752726e-05, - "loss": 0.0056043524295091626, - "step": 6115 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 0.04778699576854706, - "learning_rate": 6.437100536262022e-05, - "loss": 0.00855453684926033, - "step": 6120 - }, - { - "epoch": 1.0443307757885762, - "grad_norm": 0.07467184215784073, - "learning_rate": 6.435312713343401e-05, - "loss": 0.006690071523189544, - "step": 6125 - }, - { - "epoch": 1.0451832907075873, - "grad_norm": 0.07189153879880905, - "learning_rate": 6.433523636831481e-05, - "loss": 0.007009527087211609, - "step": 6130 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 0.08000020682811737, - "learning_rate": 6.431733307561459e-05, - "loss": 0.007411211729049683, - "step": 6135 - }, - { - "epoch": 1.0468883205456097, - "grad_norm": 0.06737730652093887, - "learning_rate": 6.429941726369124e-05, - "loss": 0.006843548268079758, - "step": 6140 - }, - { - "epoch": 1.0477408354646207, - "grad_norm": 0.09834714978933334, - "learning_rate": 6.428148894090841e-05, - "loss": 0.007167841494083405, - "step": 6145 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 0.06415695697069168, - "learning_rate": 6.426354811563567e-05, - "loss": 0.005131457373499871, - "step": 6150 - }, - { - "epoch": 1.0494458653026428, - "grad_norm": 0.07823871076107025, - "learning_rate": 6.424559479624839e-05, - "loss": 0.004797356575727463, - "step": 6155 - }, - { - "epoch": 1.050298380221654, - "grad_norm": 0.07165013998746872, - "learning_rate": 6.422762899112777e-05, - "loss": 0.006430945545434952, - "step": 6160 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 0.10924427956342697, - "learning_rate": 6.420965070866086e-05, - "loss": 0.008151047676801682, - "step": 6165 - }, - { - "epoch": 1.052003410059676, - "grad_norm": 0.10381831228733063, - "learning_rate": 6.41916599572405e-05, - "loss": 0.009056917577981948, - "step": 6170 - }, - { - "epoch": 1.052855924978687, - "grad_norm": 0.05251248553395271, - "learning_rate": 6.417365674526539e-05, - "loss": 0.004240944981575012, - "step": 6175 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 0.0812104344367981, - "learning_rate": 6.415564108114001e-05, - "loss": 0.008805926889181137, - "step": 6180 - }, - { - "epoch": 1.0545609548167092, - "grad_norm": 0.05640942230820656, - "learning_rate": 6.413761297327469e-05, - "loss": 0.005727213248610497, - "step": 6185 - }, - { - "epoch": 1.0554134697357205, - "grad_norm": 0.10114334523677826, - "learning_rate": 6.411957243008552e-05, - "loss": 0.008660107105970382, - "step": 6190 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 0.06809760630130768, - "learning_rate": 6.410151945999447e-05, - "loss": 0.006786180287599563, - "step": 6195 - }, - { - "epoch": 1.0571184995737426, - "grad_norm": 0.08121974021196365, - "learning_rate": 6.408345407142924e-05, - "loss": 0.004730105027556419, - "step": 6200 - }, - { - "epoch": 1.0579710144927537, - "grad_norm": 0.0630379393696785, - "learning_rate": 6.406537627282336e-05, - "loss": 0.006532897800207138, - "step": 6205 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 0.09354323893785477, - "learning_rate": 6.404728607261612e-05, - "loss": 0.008165966719388962, - "step": 6210 - }, - { - "epoch": 1.0596760443307758, - "grad_norm": 0.0509798526763916, - "learning_rate": 6.402918347925267e-05, - "loss": 0.006781977415084839, - "step": 6215 - }, - { - "epoch": 1.0605285592497868, - "grad_norm": 0.09830603748559952, - "learning_rate": 6.401106850118389e-05, - "loss": 0.00675075501203537, - "step": 6220 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 0.08417326211929321, - "learning_rate": 6.399294114686645e-05, - "loss": 0.005759935826063156, - "step": 6225 - }, - { - "epoch": 1.062233589087809, - "grad_norm": 0.04999511316418648, - "learning_rate": 6.39748014247628e-05, - "loss": 0.0059943776577711105, - "step": 6230 - }, - { - "epoch": 1.06308610400682, - "grad_norm": 0.0355304591357708, - "learning_rate": 6.395664934334116e-05, - "loss": 0.003978967294096946, - "step": 6235 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 0.09096778929233551, - "learning_rate": 6.393848491107554e-05, - "loss": 0.006428928673267364, - "step": 6240 - }, - { - "epoch": 1.0647911338448424, - "grad_norm": 0.09047707170248032, - "learning_rate": 6.392030813644569e-05, - "loss": 0.005584535002708435, - "step": 6245 - }, - { - "epoch": 1.0656436487638534, - "grad_norm": 0.07133036106824875, - "learning_rate": 6.390211902793714e-05, - "loss": 0.00610351674258709, - "step": 6250 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 0.1025620549917221, - "learning_rate": 6.388391759404117e-05, - "loss": 0.006316560506820679, - "step": 6255 - }, - { - "epoch": 1.0673486786018755, - "grad_norm": 0.0922650694847107, - "learning_rate": 6.386570384325482e-05, - "loss": 0.008717238903045654, - "step": 6260 - }, - { - "epoch": 1.0682011935208866, - "grad_norm": 0.094338558614254, - "learning_rate": 6.384747778408085e-05, - "loss": 0.0067199327051639555, - "step": 6265 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 0.07260075211524963, - "learning_rate": 6.382923942502782e-05, - "loss": 0.007249505072832107, - "step": 6270 - }, - { - "epoch": 1.0699062233589087, - "grad_norm": 0.06572386622428894, - "learning_rate": 6.381098877460999e-05, - "loss": 0.007879015803337098, - "step": 6275 - }, - { - "epoch": 1.0707587382779198, - "grad_norm": 0.11646077036857605, - "learning_rate": 6.379272584134737e-05, - "loss": 0.006477512419223785, - "step": 6280 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 0.14154180884361267, - "learning_rate": 6.37744506337657e-05, - "loss": 0.0069471016526222226, - "step": 6285 - }, - { - "epoch": 1.0724637681159421, - "grad_norm": 0.113606296479702, - "learning_rate": 6.375616316039647e-05, - "loss": 0.010210946947336198, - "step": 6290 - }, - { - "epoch": 1.0733162830349532, - "grad_norm": 0.07193166017532349, - "learning_rate": 6.373786342977687e-05, - "loss": 0.00820360854268074, - "step": 6295 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 0.06180251017212868, - "learning_rate": 6.371955145044983e-05, - "loss": 0.006048502773046494, - "step": 6300 - }, - { - "epoch": 1.0750213128729753, - "grad_norm": 0.06956778466701508, - "learning_rate": 6.370122723096398e-05, - "loss": 0.005345676839351654, - "step": 6305 - }, - { - "epoch": 1.0758738277919864, - "grad_norm": 0.09170625358819962, - "learning_rate": 6.368289077987368e-05, - "loss": 0.0068355493247509004, - "step": 6310 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 0.07023731619119644, - "learning_rate": 6.366454210573901e-05, - "loss": 0.004600600153207779, - "step": 6315 - }, - { - "epoch": 1.0775788576300085, - "grad_norm": 0.07429320365190506, - "learning_rate": 6.36461812171257e-05, - "loss": 0.006272794306278228, - "step": 6320 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 0.11356805264949799, - "learning_rate": 6.362780812260528e-05, - "loss": 0.0048342026770114895, - "step": 6325 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 0.11231013387441635, - "learning_rate": 6.360942283075489e-05, - "loss": 0.00653451681137085, - "step": 6330 - }, - { - "epoch": 1.0801364023870417, - "grad_norm": 0.09655431658029556, - "learning_rate": 6.359102535015739e-05, - "loss": 0.008280844241380692, - "step": 6335 - }, - { - "epoch": 1.080988917306053, - "grad_norm": 0.10172779113054276, - "learning_rate": 6.357261568940135e-05, - "loss": 0.007757744938135147, - "step": 6340 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 0.06417235732078552, - "learning_rate": 6.3554193857081e-05, - "loss": 0.007309675216674805, - "step": 6345 - }, - { - "epoch": 1.082693947144075, - "grad_norm": 0.053178418427705765, - "learning_rate": 6.35357598617963e-05, - "loss": 0.007162143290042877, - "step": 6350 - }, - { - "epoch": 1.0835464620630861, - "grad_norm": 0.07408315688371658, - "learning_rate": 6.351731371215278e-05, - "loss": 0.008392173796892166, - "step": 6355 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 0.03643275052309036, - "learning_rate": 6.349885541676179e-05, - "loss": 0.00610513798892498, - "step": 6360 - }, - { - "epoch": 1.0852514919011083, - "grad_norm": 0.04701307415962219, - "learning_rate": 6.348038498424023e-05, - "loss": 0.00612705871462822, - "step": 6365 - }, - { - "epoch": 1.0861040068201193, - "grad_norm": 0.05050053820014, - "learning_rate": 6.346190242321075e-05, - "loss": 0.005640604719519615, - "step": 6370 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 0.05203640088438988, - "learning_rate": 6.344340774230159e-05, - "loss": 0.005340654775500298, - "step": 6375 - }, - { - "epoch": 1.0878090366581414, - "grad_norm": 0.07451866567134857, - "learning_rate": 6.342490095014669e-05, - "loss": 0.006459225714206695, - "step": 6380 - }, - { - "epoch": 1.0886615515771525, - "grad_norm": 0.09951499849557877, - "learning_rate": 6.340638205538566e-05, - "loss": 0.008529558777809143, - "step": 6385 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 0.06064416840672493, - "learning_rate": 6.33878510666637e-05, - "loss": 0.007885071635246276, - "step": 6390 - }, - { - "epoch": 1.0903665814151748, - "grad_norm": 0.09382321685552597, - "learning_rate": 6.33693079926317e-05, - "loss": 0.007992906123399734, - "step": 6395 - }, - { - "epoch": 1.091219096334186, - "grad_norm": 0.054066915065050125, - "learning_rate": 6.335075284194621e-05, - "loss": 0.007473263889551163, - "step": 6400 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 0.06763065606355667, - "learning_rate": 6.333218562326937e-05, - "loss": 0.006374929845333099, - "step": 6405 - }, - { - "epoch": 1.092924126172208, - "grad_norm": 0.0656818076968193, - "learning_rate": 6.331360634526899e-05, - "loss": 0.006085469573736191, - "step": 6410 - }, - { - "epoch": 1.093776641091219, - "grad_norm": 0.060463279485702515, - "learning_rate": 6.329501501661848e-05, - "loss": 0.005605050176382065, - "step": 6415 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 0.05734890326857567, - "learning_rate": 6.32764116459969e-05, - "loss": 0.00563613623380661, - "step": 6420 - }, - { - "epoch": 1.0954816709292412, - "grad_norm": 0.0502542182803154, - "learning_rate": 6.32577962420889e-05, - "loss": 0.004675766825675965, - "step": 6425 - }, - { - "epoch": 1.0963341858482523, - "grad_norm": 0.06550677120685577, - "learning_rate": 6.32391688135848e-05, - "loss": 0.006265480071306229, - "step": 6430 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 0.1298699826002121, - "learning_rate": 6.322052936918048e-05, - "loss": 0.008352620899677277, - "step": 6435 - }, - { - "epoch": 1.0980392156862746, - "grad_norm": 0.08422241359949112, - "learning_rate": 6.320187791757748e-05, - "loss": 0.005868597701191902, - "step": 6440 - }, - { - "epoch": 1.0988917306052857, - "grad_norm": 0.07807652652263641, - "learning_rate": 6.318321446748291e-05, - "loss": 0.010353461652994157, - "step": 6445 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 0.0663999617099762, - "learning_rate": 6.316453902760946e-05, - "loss": 0.00667201578617096, - "step": 6450 - }, - { - "epoch": 1.1005967604433078, - "grad_norm": 0.06362646073102951, - "learning_rate": 6.314585160667547e-05, - "loss": 0.006539353728294372, - "step": 6455 - }, - { - "epoch": 1.1014492753623188, - "grad_norm": 0.04160219058394432, - "learning_rate": 6.312715221340485e-05, - "loss": 0.004082740843296051, - "step": 6460 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 0.05449013039469719, - "learning_rate": 6.31084408565271e-05, - "loss": 0.007537595182657242, - "step": 6465 - }, - { - "epoch": 1.103154305200341, - "grad_norm": 0.06819169223308563, - "learning_rate": 6.308971754477729e-05, - "loss": 0.006866573542356491, - "step": 6470 - }, - { - "epoch": 1.104006820119352, - "grad_norm": 0.06622573733329773, - "learning_rate": 6.307098228689611e-05, - "loss": 0.009055091440677643, - "step": 6475 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 0.05732693895697594, - "learning_rate": 6.305223509162978e-05, - "loss": 0.006077280640602112, - "step": 6480 - }, - { - "epoch": 1.1057118499573741, - "grad_norm": 0.06630431115627289, - "learning_rate": 6.303347596773012e-05, - "loss": 0.0064442440867424015, - "step": 6485 - }, - { - "epoch": 1.1065643648763854, - "grad_norm": 0.06782745569944382, - "learning_rate": 6.301470492395451e-05, - "loss": 0.005072608217597007, - "step": 6490 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 0.05796601250767708, - "learning_rate": 6.299592196906591e-05, - "loss": 0.0074319586157798765, - "step": 6495 - }, - { - "epoch": 1.1082693947144076, - "grad_norm": 0.04627149552106857, - "learning_rate": 6.297712711183282e-05, - "loss": 0.00512685589492321, - "step": 6500 - }, - { - "epoch": 1.1091219096334186, - "grad_norm": 0.08210720866918564, - "learning_rate": 6.295832036102929e-05, - "loss": 0.006917678564786911, - "step": 6505 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 0.08367052674293518, - "learning_rate": 6.293950172543496e-05, - "loss": 0.0054212499409914015, - "step": 6510 - }, - { - "epoch": 1.1108269394714407, - "grad_norm": 0.08192700892686844, - "learning_rate": 6.292067121383499e-05, - "loss": 0.00791442021727562, - "step": 6515 - }, - { - "epoch": 1.1116794543904518, - "grad_norm": 0.05766206234693527, - "learning_rate": 6.290182883502008e-05, - "loss": 0.006540960818529129, - "step": 6520 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 0.07752664387226105, - "learning_rate": 6.28829745977865e-05, - "loss": 0.009196925908327103, - "step": 6525 - }, - { - "epoch": 1.113384484228474, - "grad_norm": 0.07419038563966751, - "learning_rate": 6.2864108510936e-05, - "loss": 0.007524489611387253, - "step": 6530 - }, - { - "epoch": 1.1142369991474852, - "grad_norm": 0.04851066321134567, - "learning_rate": 6.284523058327593e-05, - "loss": 0.004060015082359314, - "step": 6535 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 0.0764140635728836, - "learning_rate": 6.282634082361911e-05, - "loss": 0.006797254830598831, - "step": 6540 - }, - { - "epoch": 1.1159420289855073, - "grad_norm": 0.06873292475938797, - "learning_rate": 6.280743924078392e-05, - "loss": 0.007637844234704971, - "step": 6545 - }, - { - "epoch": 1.1167945439045184, - "grad_norm": 0.047832686454057693, - "learning_rate": 6.278852584359425e-05, - "loss": 0.00542646199464798, - "step": 6550 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 0.10061443597078323, - "learning_rate": 6.27696006408795e-05, - "loss": 0.008591605722904206, - "step": 6555 - }, - { - "epoch": 1.1184995737425405, - "grad_norm": 0.09729041159152985, - "learning_rate": 6.27506636414746e-05, - "loss": 0.0064585842192173, - "step": 6560 - }, - { - "epoch": 1.1193520886615516, - "grad_norm": 0.04427873343229294, - "learning_rate": 6.273171485421992e-05, - "loss": 0.004846593365073204, - "step": 6565 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 0.07813888788223267, - "learning_rate": 6.271275428796146e-05, - "loss": 0.007345958054065705, - "step": 6570 - }, - { - "epoch": 1.1210571184995737, - "grad_norm": 0.12500733137130737, - "learning_rate": 6.269378195155058e-05, - "loss": 0.006376177072525024, - "step": 6575 - }, - { - "epoch": 1.1219096334185847, - "grad_norm": 0.09636004269123077, - "learning_rate": 6.267479785384422e-05, - "loss": 0.0069282323122024534, - "step": 6580 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 0.06236017122864723, - "learning_rate": 6.265580200370478e-05, - "loss": 0.0050656192004680635, - "step": 6585 - }, - { - "epoch": 1.123614663256607, - "grad_norm": 0.0596463568508625, - "learning_rate": 6.263679441000019e-05, - "loss": 0.006198804825544357, - "step": 6590 - }, - { - "epoch": 1.1244671781756181, - "grad_norm": 0.0846855491399765, - "learning_rate": 6.261777508160378e-05, - "loss": 0.0073812372982501985, - "step": 6595 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 0.05228402093052864, - "learning_rate": 6.259874402739442e-05, - "loss": 0.006196213513612747, - "step": 6600 - }, - { - "epoch": 1.1261722080136403, - "grad_norm": 0.08395595103502274, - "learning_rate": 6.257970125625647e-05, - "loss": 0.0060448311269283295, - "step": 6605 - }, - { - "epoch": 1.1270247229326513, - "grad_norm": 0.055274344980716705, - "learning_rate": 6.25606467770797e-05, - "loss": 0.006578336656093598, - "step": 6610 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 0.054609380662441254, - "learning_rate": 6.254158059875936e-05, - "loss": 0.008093905448913575, - "step": 6615 - }, - { - "epoch": 1.1287297527706734, - "grad_norm": 0.05168715491890907, - "learning_rate": 6.25225027301962e-05, - "loss": 0.006086795404553413, - "step": 6620 - }, - { - "epoch": 1.1295822676896845, - "grad_norm": 0.06260590255260468, - "learning_rate": 6.250341318029641e-05, - "loss": 0.007288631051778793, - "step": 6625 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 0.06585957854986191, - "learning_rate": 6.24843119579716e-05, - "loss": 0.005779954791069031, - "step": 6630 - }, - { - "epoch": 1.1312872975277068, - "grad_norm": 0.05828391760587692, - "learning_rate": 6.246519907213888e-05, - "loss": 0.006320308148860932, - "step": 6635 - }, - { - "epoch": 1.132139812446718, - "grad_norm": 0.08400154113769531, - "learning_rate": 6.244607453172078e-05, - "loss": 0.00452205128967762, - "step": 6640 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 0.059920940548181534, - "learning_rate": 6.242693834564525e-05, - "loss": 0.00620727390050888, - "step": 6645 - }, - { - "epoch": 1.13384484228474, - "grad_norm": 0.1100456491112709, - "learning_rate": 6.240779052284571e-05, - "loss": 0.006768511235713958, - "step": 6650 - }, - { - "epoch": 1.134697357203751, - "grad_norm": 0.07722730189561844, - "learning_rate": 6.238863107226102e-05, - "loss": 0.008207496255636215, - "step": 6655 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 0.05468403548002243, - "learning_rate": 6.236946000283542e-05, - "loss": 0.005272969231009483, - "step": 6660 - }, - { - "epoch": 1.1364023870417732, - "grad_norm": 0.0685155913233757, - "learning_rate": 6.235027732351863e-05, - "loss": 0.008074409514665603, - "step": 6665 - }, - { - "epoch": 1.1372549019607843, - "grad_norm": 0.07667854428291321, - "learning_rate": 6.233108304326571e-05, - "loss": 0.00863628089427948, - "step": 6670 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 0.0727204978466034, - "learning_rate": 6.231187717103727e-05, - "loss": 0.004446333646774292, - "step": 6675 - }, - { - "epoch": 1.1389599317988064, - "grad_norm": 0.06465403735637665, - "learning_rate": 6.229265971579918e-05, - "loss": 0.007380707561969757, - "step": 6680 - }, - { - "epoch": 1.1398124467178175, - "grad_norm": 0.04102586954832077, - "learning_rate": 6.227343068652281e-05, - "loss": 0.006103607639670372, - "step": 6685 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 0.06988929212093353, - "learning_rate": 6.225419009218493e-05, - "loss": 0.007358456403017044, - "step": 6690 - }, - { - "epoch": 1.1415174765558398, - "grad_norm": 0.07802724838256836, - "learning_rate": 6.223493794176767e-05, - "loss": 0.007887010276317597, - "step": 6695 - }, - { - "epoch": 1.1423699914748509, - "grad_norm": 0.10777991265058517, - "learning_rate": 6.221567424425857e-05, - "loss": 0.007653985172510147, - "step": 6700 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 0.0922352522611618, - "learning_rate": 6.219639900865058e-05, - "loss": 0.007459624856710434, - "step": 6705 - }, - { - "epoch": 1.144075021312873, - "grad_norm": 0.06321967393159866, - "learning_rate": 6.217711224394202e-05, - "loss": 0.00540911853313446, - "step": 6710 - }, - { - "epoch": 1.144927536231884, - "grad_norm": 0.09344825148582458, - "learning_rate": 6.215781395913656e-05, - "loss": 0.0053936421871185304, - "step": 6715 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 0.03457584232091904, - "learning_rate": 6.213850416324333e-05, - "loss": 0.006388355046510696, - "step": 6720 - }, - { - "epoch": 1.1466325660699062, - "grad_norm": 0.06931985914707184, - "learning_rate": 6.211918286527676e-05, - "loss": 0.005831810832023621, - "step": 6725 - }, - { - "epoch": 1.1474850809889172, - "grad_norm": 0.05588890612125397, - "learning_rate": 6.209985007425668e-05, - "loss": 0.0041655078530311584, - "step": 6730 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 0.07582694292068481, - "learning_rate": 6.208050579920826e-05, - "loss": 0.006521198153495789, - "step": 6735 - }, - { - "epoch": 1.1491901108269396, - "grad_norm": 0.07055433094501495, - "learning_rate": 6.206115004916209e-05, - "loss": 0.0066129244863986966, - "step": 6740 - }, - { - "epoch": 1.1500426257459506, - "grad_norm": 0.07039172202348709, - "learning_rate": 6.204178283315405e-05, - "loss": 0.005633739382028579, - "step": 6745 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 0.07833350449800491, - "learning_rate": 6.202240416022541e-05, - "loss": 0.006761975586414337, - "step": 6750 - }, - { - "epoch": 1.1517476555839727, - "grad_norm": 0.05553733557462692, - "learning_rate": 6.200301403942278e-05, - "loss": 0.006545065343379975, - "step": 6755 - }, - { - "epoch": 1.1526001705029838, - "grad_norm": 0.07306832075119019, - "learning_rate": 6.198361247979809e-05, - "loss": 0.008323725312948227, - "step": 6760 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 0.04787914454936981, - "learning_rate": 6.196419949040867e-05, - "loss": 0.004425797611474991, - "step": 6765 - }, - { - "epoch": 1.154305200341006, - "grad_norm": 0.08021930605173111, - "learning_rate": 6.194477508031712e-05, - "loss": 0.005103312805294991, - "step": 6770 - }, - { - "epoch": 1.155157715260017, - "grad_norm": 0.0821428894996643, - "learning_rate": 6.192533925859144e-05, - "loss": 0.009274877607822418, - "step": 6775 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 0.09880609810352325, - "learning_rate": 6.190589203430486e-05, - "loss": 0.007863005250692367, - "step": 6780 - }, - { - "epoch": 1.156862745098039, - "grad_norm": 0.08075276762247086, - "learning_rate": 6.188643341653604e-05, - "loss": 0.004675677418708802, - "step": 6785 - }, - { - "epoch": 1.1577152600170504, - "grad_norm": 0.0633573830127716, - "learning_rate": 6.186696341436889e-05, - "loss": 0.007359890639781952, - "step": 6790 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 0.03828895092010498, - "learning_rate": 6.184748203689265e-05, - "loss": 0.004494070634245872, - "step": 6795 - }, - { - "epoch": 1.1594202898550725, - "grad_norm": 0.07907325774431229, - "learning_rate": 6.18279892932019e-05, - "loss": 0.006256269663572312, - "step": 6800 - }, - { - "epoch": 1.1602728047740836, - "grad_norm": 0.055188342928886414, - "learning_rate": 6.180848519239647e-05, - "loss": 0.009548474848270417, - "step": 6805 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 0.05850991606712341, - "learning_rate": 6.178896974358154e-05, - "loss": 0.0056076571345329285, - "step": 6810 - }, - { - "epoch": 1.1619778346121057, - "grad_norm": 0.0626932755112648, - "learning_rate": 6.176944295586757e-05, - "loss": 0.005667714029550552, - "step": 6815 - }, - { - "epoch": 1.1628303495311167, - "grad_norm": 0.06506946682929993, - "learning_rate": 6.174990483837031e-05, - "loss": 0.006154880672693252, - "step": 6820 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 0.0535859651863575, - "learning_rate": 6.17303554002108e-05, - "loss": 0.0042555928230285645, - "step": 6825 - }, - { - "epoch": 1.1645353793691389, - "grad_norm": 0.05577898398041725, - "learning_rate": 6.171079465051538e-05, - "loss": 0.006060104072093964, - "step": 6830 - }, - { - "epoch": 1.1653878942881502, - "grad_norm": 0.05574663355946541, - "learning_rate": 6.169122259841566e-05, - "loss": 0.008667515218257904, - "step": 6835 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 0.09157130867242813, - "learning_rate": 6.16716392530485e-05, - "loss": 0.007259176671504974, - "step": 6840 - }, - { - "epoch": 1.1670929241261723, - "grad_norm": 0.06404415518045425, - "learning_rate": 6.165204462355608e-05, - "loss": 0.007140593230724334, - "step": 6845 - }, - { - "epoch": 1.1679454390451833, - "grad_norm": 0.0713329091668129, - "learning_rate": 6.163243871908581e-05, - "loss": 0.006118748337030411, - "step": 6850 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 0.04038231074810028, - "learning_rate": 6.16128215487904e-05, - "loss": 0.006028918176889419, - "step": 6855 - }, - { - "epoch": 1.1696504688832055, - "grad_norm": 0.07793593406677246, - "learning_rate": 6.159319312182777e-05, - "loss": 0.006851163506507873, - "step": 6860 - }, - { - "epoch": 1.1705029838022165, - "grad_norm": 0.07543511688709259, - "learning_rate": 6.157355344736114e-05, - "loss": 0.006878272444009781, - "step": 6865 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 0.06332696974277496, - "learning_rate": 6.155390253455897e-05, - "loss": 0.006324999034404755, - "step": 6870 - }, - { - "epoch": 1.1722080136402386, - "grad_norm": 0.06920734792947769, - "learning_rate": 6.153424039259495e-05, - "loss": 0.00536310225725174, - "step": 6875 - }, - { - "epoch": 1.1730605285592497, - "grad_norm": 0.09313163161277771, - "learning_rate": 6.151456703064802e-05, - "loss": 0.00795424059033394, - "step": 6880 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 0.08588451147079468, - "learning_rate": 6.149488245790234e-05, - "loss": 0.006889034807682037, - "step": 6885 - }, - { - "epoch": 1.174765558397272, - "grad_norm": 0.09814538061618805, - "learning_rate": 6.147518668354737e-05, - "loss": 0.007332245260477066, - "step": 6890 - }, - { - "epoch": 1.175618073316283, - "grad_norm": 0.05141104385256767, - "learning_rate": 6.145547971677772e-05, - "loss": 0.00333656407892704, - "step": 6895 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 0.05575519800186157, - "learning_rate": 6.143576156679327e-05, - "loss": 0.005542768910527229, - "step": 6900 - }, - { - "epoch": 1.1773231031543052, - "grad_norm": 0.04917008429765701, - "learning_rate": 6.14160322427991e-05, - "loss": 0.007007633149623871, - "step": 6905 - }, - { - "epoch": 1.1781756180733163, - "grad_norm": 0.06385336071252823, - "learning_rate": 6.139629175400552e-05, - "loss": 0.007495941221714019, - "step": 6910 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 0.08664151281118393, - "learning_rate": 6.137654010962805e-05, - "loss": 0.0075534448027610775, - "step": 6915 - }, - { - "epoch": 1.1798806479113384, - "grad_norm": 0.08881189674139023, - "learning_rate": 6.13567773188874e-05, - "loss": 0.0059935558587312695, - "step": 6920 - }, - { - "epoch": 1.1807331628303495, - "grad_norm": 0.07297934591770172, - "learning_rate": 6.133700339100952e-05, - "loss": 0.006142234057188034, - "step": 6925 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 0.053351663053035736, - "learning_rate": 6.131721833522552e-05, - "loss": 0.006038139387965202, - "step": 6930 - }, - { - "epoch": 1.1824381926683718, - "grad_norm": 0.12923622131347656, - "learning_rate": 6.129742216077172e-05, - "loss": 0.008645947277545928, - "step": 6935 - }, - { - "epoch": 1.1832907075873829, - "grad_norm": 0.095455601811409, - "learning_rate": 6.127761487688964e-05, - "loss": 0.004740688577294349, - "step": 6940 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 0.11498606204986572, - "learning_rate": 6.125779649282599e-05, - "loss": 0.00805070549249649, - "step": 6945 - }, - { - "epoch": 1.184995737425405, - "grad_norm": 0.07489286363124847, - "learning_rate": 6.123796701783264e-05, - "loss": 0.0060746859759092334, - "step": 6950 - }, - { - "epoch": 1.185848252344416, - "grad_norm": 0.07027438282966614, - "learning_rate": 6.121812646116663e-05, - "loss": 0.006816025823354721, - "step": 6955 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 0.08542973548173904, - "learning_rate": 6.119827483209024e-05, - "loss": 0.005315831303596497, - "step": 6960 - }, - { - "epoch": 1.1875532821824382, - "grad_norm": 0.08409032970666885, - "learning_rate": 6.117841213987082e-05, - "loss": 0.0061601437628269196, - "step": 6965 - }, - { - "epoch": 1.1884057971014492, - "grad_norm": 0.10387974232435226, - "learning_rate": 6.115853839378096e-05, - "loss": 0.0071022816002368925, - "step": 6970 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 0.056441329419612885, - "learning_rate": 6.113865360309838e-05, - "loss": 0.004539218544960022, - "step": 6975 - }, - { - "epoch": 1.1901108269394713, - "grad_norm": 0.10132234543561935, - "learning_rate": 6.111875777710598e-05, - "loss": 0.0060778014361858364, - "step": 6980 - }, - { - "epoch": 1.1909633418584824, - "grad_norm": 0.07129226624965668, - "learning_rate": 6.109885092509177e-05, - "loss": 0.007794113457202911, - "step": 6985 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 0.09267892688512802, - "learning_rate": 6.107893305634893e-05, - "loss": 0.006867295503616333, - "step": 6990 - }, - { - "epoch": 1.1926683716965047, - "grad_norm": 0.0739888921380043, - "learning_rate": 6.10590041801758e-05, - "loss": 0.006263263523578644, - "step": 6995 - }, - { - "epoch": 1.1935208866155158, - "grad_norm": 0.10201311856508255, - "learning_rate": 6.103906430587583e-05, - "loss": 0.006245525926351548, - "step": 7000 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 0.08561360090970993, - "learning_rate": 6.1019113442757636e-05, - "loss": 0.005739351361989975, - "step": 7005 - }, - { - "epoch": 1.195225916453538, - "grad_norm": 0.06410367786884308, - "learning_rate": 6.099915160013491e-05, - "loss": 0.00598936527967453, - "step": 7010 - }, - { - "epoch": 1.196078431372549, - "grad_norm": 0.11765716224908829, - "learning_rate": 6.0979178787326544e-05, - "loss": 0.010477253794670105, - "step": 7015 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 0.06659694015979767, - "learning_rate": 6.095919501365648e-05, - "loss": 0.0072447523474693295, - "step": 7020 - }, - { - "epoch": 1.197783461210571, - "grad_norm": 0.05651358142495155, - "learning_rate": 6.093920028845381e-05, - "loss": 0.004644358158111572, - "step": 7025 - }, - { - "epoch": 1.1986359761295824, - "grad_norm": 0.07198809087276459, - "learning_rate": 6.0919194621052764e-05, - "loss": 0.00517328754067421, - "step": 7030 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 0.06188420578837395, - "learning_rate": 6.0899178020792614e-05, - "loss": 0.005182894691824913, - "step": 7035 - }, - { - "epoch": 1.2003410059676045, - "grad_norm": 0.07731341570615768, - "learning_rate": 6.087915049701783e-05, - "loss": 0.006863371282815933, - "step": 7040 - }, - { - "epoch": 1.2011935208866156, - "grad_norm": 0.07693833857774734, - "learning_rate": 6.0859112059077866e-05, - "loss": 0.008634812384843826, - "step": 7045 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 0.08118413388729095, - "learning_rate": 6.083906271632736e-05, - "loss": 0.008003174513578414, - "step": 7050 - }, - { - "epoch": 1.2028985507246377, - "grad_norm": 0.07794218510389328, - "learning_rate": 6.0819002478126016e-05, - "loss": 0.005899757146835327, - "step": 7055 - }, - { - "epoch": 1.2037510656436488, - "grad_norm": 0.08107218146324158, - "learning_rate": 6.079893135383861e-05, - "loss": 0.007581057399511338, - "step": 7060 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 0.06910198926925659, - "learning_rate": 6.077884935283502e-05, - "loss": 0.00794234573841095, - "step": 7065 - }, - { - "epoch": 1.2054560954816709, - "grad_norm": 0.08426421135663986, - "learning_rate": 6.0758756484490186e-05, - "loss": 0.0057635679841041565, - "step": 7070 - }, - { - "epoch": 1.206308610400682, - "grad_norm": 0.08670961856842041, - "learning_rate": 6.073865275818413e-05, - "loss": 0.006369538605213165, - "step": 7075 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 0.04245399683713913, - "learning_rate": 6.071853818330193e-05, - "loss": 0.0067548036575317385, - "step": 7080 - }, - { - "epoch": 1.208013640238704, - "grad_norm": 0.10600235313177109, - "learning_rate": 6.069841276923376e-05, - "loss": 0.006923867762088776, - "step": 7085 - }, - { - "epoch": 1.2088661551577153, - "grad_norm": 0.07093790173530579, - "learning_rate": 6.0678276525374814e-05, - "loss": 0.005083417519927025, - "step": 7090 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 0.04997126758098602, - "learning_rate": 6.065812946112537e-05, - "loss": 0.006207586824893951, - "step": 7095 - }, - { - "epoch": 1.2105711849957375, - "grad_norm": 0.04425262287259102, - "learning_rate": 6.063797158589074e-05, - "loss": 0.0046977601945400235, - "step": 7100 - }, - { - "epoch": 1.2114236999147485, - "grad_norm": 0.07300136238336563, - "learning_rate": 6.0617802909081295e-05, - "loss": 0.005589437484741211, - "step": 7105 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 0.0878889262676239, - "learning_rate": 6.0597623440112445e-05, - "loss": 0.005844222381711006, - "step": 7110 - }, - { - "epoch": 1.2131287297527706, - "grad_norm": 0.09079992771148682, - "learning_rate": 6.0577433188404635e-05, - "loss": 0.007329034805297852, - "step": 7115 - }, - { - "epoch": 1.2139812446717817, - "grad_norm": 0.07165387272834778, - "learning_rate": 6.055723216338336e-05, - "loss": 0.006642927974462509, - "step": 7120 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 0.07113955169916153, - "learning_rate": 6.053702037447911e-05, - "loss": 0.006992670893669129, - "step": 7125 - }, - { - "epoch": 1.215686274509804, - "grad_norm": 0.08737215399742126, - "learning_rate": 6.0516797831127425e-05, - "loss": 0.006702055037021637, - "step": 7130 - }, - { - "epoch": 1.216538789428815, - "grad_norm": 0.07276564091444016, - "learning_rate": 6.049656454276887e-05, - "loss": 0.005692619457840919, - "step": 7135 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 0.09203831106424332, - "learning_rate": 6.0476320518849006e-05, - "loss": 0.006464710086584091, - "step": 7140 - }, - { - "epoch": 1.2182438192668372, - "grad_norm": 0.07749375700950623, - "learning_rate": 6.045606576881843e-05, - "loss": 0.008598372340202332, - "step": 7145 - }, - { - "epoch": 1.2190963341858483, - "grad_norm": 0.04338040575385094, - "learning_rate": 6.043580030213272e-05, - "loss": 0.006892016530036927, - "step": 7150 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 0.06691034138202667, - "learning_rate": 6.0415524128252474e-05, - "loss": 0.007622670382261276, - "step": 7155 - }, - { - "epoch": 1.2208013640238704, - "grad_norm": 0.07532396167516708, - "learning_rate": 6.039523725664329e-05, - "loss": 0.00698106437921524, - "step": 7160 - }, - { - "epoch": 1.2216538789428815, - "grad_norm": 0.0408058688044548, - "learning_rate": 6.037493969677575e-05, - "loss": 0.005919446796178817, - "step": 7165 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 0.07306578010320663, - "learning_rate": 6.0354631458125425e-05, - "loss": 0.008953345566987991, - "step": 7170 - }, - { - "epoch": 1.2233589087809036, - "grad_norm": 0.08269601315259933, - "learning_rate": 6.033431255017289e-05, - "loss": 0.007224951684474945, - "step": 7175 - }, - { - "epoch": 1.2242114236999146, - "grad_norm": 0.045140884816646576, - "learning_rate": 6.0313982982403676e-05, - "loss": 0.006175454705953598, - "step": 7180 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 0.0721440315246582, - "learning_rate": 6.0293642764308304e-05, - "loss": 0.007267911732196808, - "step": 7185 - }, - { - "epoch": 1.225916453537937, - "grad_norm": 0.081813283264637, - "learning_rate": 6.027329190538227e-05, - "loss": 0.006872846186161042, - "step": 7190 - }, - { - "epoch": 1.226768968456948, - "grad_norm": 0.05662613734602928, - "learning_rate": 6.025293041512602e-05, - "loss": 0.004837202653288841, - "step": 7195 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 0.10023492574691772, - "learning_rate": 6.023255830304498e-05, - "loss": 0.0060194481164217, - "step": 7200 - }, - { - "epoch": 1.2284739982949702, - "grad_norm": 0.06398235261440277, - "learning_rate": 6.021217557864954e-05, - "loss": 0.007653398066759109, - "step": 7205 - }, - { - "epoch": 1.2293265132139812, - "grad_norm": 0.09494475275278091, - "learning_rate": 6.019178225145503e-05, - "loss": 0.007547302544116974, - "step": 7210 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 0.05356467142701149, - "learning_rate": 6.017137833098171e-05, - "loss": 0.007133310288190841, - "step": 7215 - }, - { - "epoch": 1.2310315430520034, - "grad_norm": 0.09225092083215714, - "learning_rate": 6.0150963826754836e-05, - "loss": 0.006320309638977051, - "step": 7220 - }, - { - "epoch": 1.2318840579710144, - "grad_norm": 0.07072161883115768, - "learning_rate": 6.013053874830458e-05, - "loss": 0.007313568145036697, - "step": 7225 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 0.07206818461418152, - "learning_rate": 6.0110103105166026e-05, - "loss": 0.0054031949490308765, - "step": 7230 - }, - { - "epoch": 1.2335890878090368, - "grad_norm": 0.08611681312322617, - "learning_rate": 6.008965690687922e-05, - "loss": 0.00670153945684433, - "step": 7235 - }, - { - "epoch": 1.2344416027280478, - "grad_norm": 0.07864221930503845, - "learning_rate": 6.0069200162989154e-05, - "loss": 0.0069690033793449405, - "step": 7240 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 0.06847227364778519, - "learning_rate": 6.0048732883045665e-05, - "loss": 0.006755173206329346, - "step": 7245 - }, - { - "epoch": 1.23614663256607, - "grad_norm": 0.06264699995517731, - "learning_rate": 6.0028255076603606e-05, - "loss": 0.00519348569214344, - "step": 7250 - }, - { - "epoch": 1.236999147485081, - "grad_norm": 0.04114431515336037, - "learning_rate": 6.0007766753222665e-05, - "loss": 0.006071234866976738, - "step": 7255 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 0.03660140559077263, - "learning_rate": 5.998726792246751e-05, - "loss": 0.007517064362764359, - "step": 7260 - }, - { - "epoch": 1.2387041773231031, - "grad_norm": 0.10343052446842194, - "learning_rate": 5.9966758593907636e-05, - "loss": 0.0070131182670593265, - "step": 7265 - }, - { - "epoch": 1.2395566922421142, - "grad_norm": 0.08169959485530853, - "learning_rate": 5.994623877711751e-05, - "loss": 0.006279528886079788, - "step": 7270 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 0.06312677264213562, - "learning_rate": 5.992570848167645e-05, - "loss": 0.0041657909750938416, - "step": 7275 - }, - { - "epoch": 1.2412617220801363, - "grad_norm": 0.08725713193416595, - "learning_rate": 5.990516771716869e-05, - "loss": 0.007908149063587189, - "step": 7280 - }, - { - "epoch": 1.2421142369991476, - "grad_norm": 0.05857875198125839, - "learning_rate": 5.988461649318333e-05, - "loss": 0.005137740075588227, - "step": 7285 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 0.04836168512701988, - "learning_rate": 5.986405481931438e-05, - "loss": 0.005033157765865326, - "step": 7290 - }, - { - "epoch": 1.2438192668371697, - "grad_norm": 0.09514568001031876, - "learning_rate": 5.98434827051607e-05, - "loss": 0.007460397481918335, - "step": 7295 - }, - { - "epoch": 1.2446717817561808, - "grad_norm": 0.049415748566389084, - "learning_rate": 5.982290016032604e-05, - "loss": 0.0049881644546985624, - "step": 7300 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 0.1069302037358284, - "learning_rate": 5.980230719441903e-05, - "loss": 0.005356961116194725, - "step": 7305 - }, - { - "epoch": 1.2463768115942029, - "grad_norm": 0.1031380444765091, - "learning_rate": 5.9781703817053136e-05, - "loss": 0.0067513369023799895, - "step": 7310 - }, - { - "epoch": 1.247229326513214, - "grad_norm": 0.05909884348511696, - "learning_rate": 5.976109003784671e-05, - "loss": 0.005357486009597778, - "step": 7315 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 0.09306607395410538, - "learning_rate": 5.974046586642295e-05, - "loss": 0.005747456848621368, - "step": 7320 - }, - { - "epoch": 1.248934356351236, - "grad_norm": 0.0688752606511116, - "learning_rate": 5.971983131240988e-05, - "loss": 0.0073902375996112825, - "step": 7325 - }, - { - "epoch": 1.2497868712702473, - "grad_norm": 0.06592141836881638, - "learning_rate": 5.969918638544044e-05, - "loss": 0.008268805593252182, - "step": 7330 - }, - { - "epoch": 1.2497868712702473, - "eval_loss": 0.037298671901226044, - "eval_runtime": 3.6917, - "eval_samples_per_second": 68.261, - "eval_steps_per_second": 1.084, - "step": 7330 - }, - { - "eval_cer_subset": 0.01283241324278991, - "eval_cer_subset_edit_distance": 788, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 7330 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 0.06094380095601082, - "learning_rate": 5.9678531095152326e-05, - "loss": 0.005528298765420913, - "step": 7335 - }, - { - "epoch": 1.2514919011082695, - "grad_norm": 0.1417030543088913, - "learning_rate": 5.965786545118815e-05, - "loss": 0.00984017476439476, - "step": 7340 - }, - { - "epoch": 1.2523444160272805, - "grad_norm": 0.08209668844938278, - "learning_rate": 5.963718946319529e-05, - "loss": 0.007516486942768097, - "step": 7345 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 0.06825494766235352, - "learning_rate": 5.9616503140826006e-05, - "loss": 0.005924524366855621, - "step": 7350 - }, - { - "epoch": 1.2540494458653026, - "grad_norm": 0.11229037493467331, - "learning_rate": 5.959580649373736e-05, - "loss": 0.006495627760887146, - "step": 7355 - }, - { - "epoch": 1.2549019607843137, - "grad_norm": 0.13235078752040863, - "learning_rate": 5.957509953159123e-05, - "loss": 0.00942063182592392, - "step": 7360 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 0.04514055699110031, - "learning_rate": 5.955438226405432e-05, - "loss": 0.006601292639970779, - "step": 7365 - }, - { - "epoch": 1.2566069906223358, - "grad_norm": 0.08192043751478195, - "learning_rate": 5.9533654700798126e-05, - "loss": 0.007403627783060074, - "step": 7370 - }, - { - "epoch": 1.257459505541347, - "grad_norm": 0.07101254910230637, - "learning_rate": 5.951291685149898e-05, - "loss": 0.006301522254943848, - "step": 7375 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 0.05598035827279091, - "learning_rate": 5.949216872583799e-05, - "loss": 0.006812388449907303, - "step": 7380 - }, - { - "epoch": 1.259164535379369, - "grad_norm": 0.06444506347179413, - "learning_rate": 5.9471410333501085e-05, - "loss": 0.005891536176204681, - "step": 7385 - }, - { - "epoch": 1.2600170502983803, - "grad_norm": 0.04921717569231987, - "learning_rate": 5.945064168417895e-05, - "loss": 0.004649973660707474, - "step": 7390 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 0.09095602482557297, - "learning_rate": 5.94298627875671e-05, - "loss": 0.007515725493431091, - "step": 7395 - }, - { - "epoch": 1.2617220801364024, - "grad_norm": 0.09932803362607956, - "learning_rate": 5.9409073653365816e-05, - "loss": 0.006223166733980179, - "step": 7400 - }, - { - "epoch": 1.2625745950554135, - "grad_norm": 0.08616010844707489, - "learning_rate": 5.938827429128014e-05, - "loss": 0.006999516487121582, - "step": 7405 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 0.11979297548532486, - "learning_rate": 5.936746471101993e-05, - "loss": 0.00812242105603218, - "step": 7410 - }, - { - "epoch": 1.2642796248934356, - "grad_norm": 0.12872007489204407, - "learning_rate": 5.934664492229976e-05, - "loss": 0.006246988475322723, - "step": 7415 - }, - { - "epoch": 1.2651321398124467, - "grad_norm": 0.0831044539809227, - "learning_rate": 5.932581493483903e-05, - "loss": 0.00590248554944992, - "step": 7420 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 0.09913221001625061, - "learning_rate": 5.9304974758361857e-05, - "loss": 0.007224322855472564, - "step": 7425 - }, - { - "epoch": 1.266837169650469, - "grad_norm": 0.08654595911502838, - "learning_rate": 5.928412440259713e-05, - "loss": 0.007056090980768204, - "step": 7430 - }, - { - "epoch": 1.26768968456948, - "grad_norm": 0.07882801443338394, - "learning_rate": 5.926326387727849e-05, - "loss": 0.00572751946747303, - "step": 7435 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 0.12886428833007812, - "learning_rate": 5.924239319214432e-05, - "loss": 0.0106881283223629, - "step": 7440 - }, - { - "epoch": 1.2693947144075022, - "grad_norm": 0.05597686767578125, - "learning_rate": 5.922151235693775e-05, - "loss": 0.005041084438562393, - "step": 7445 - }, - { - "epoch": 1.2702472293265132, - "grad_norm": 0.10719682276248932, - "learning_rate": 5.920062138140665e-05, - "loss": 0.007724158465862274, - "step": 7450 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 0.045485325157642365, - "learning_rate": 5.917972027530363e-05, - "loss": 0.003246675431728363, - "step": 7455 - }, - { - "epoch": 1.2719522591645354, - "grad_norm": 0.09602563083171844, - "learning_rate": 5.9158809048386017e-05, - "loss": 0.006592199206352234, - "step": 7460 - }, - { - "epoch": 1.2728047740835464, - "grad_norm": 0.0555407889187336, - "learning_rate": 5.913788771041586e-05, - "loss": 0.00537751168012619, - "step": 7465 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 0.15820109844207764, - "learning_rate": 5.911695627115994e-05, - "loss": 0.005968114733695984, - "step": 7470 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 0.05781199410557747, - "learning_rate": 5.9096014740389754e-05, - "loss": 0.00887204110622406, - "step": 7475 - }, - { - "epoch": 1.2753623188405796, - "grad_norm": 0.07927337288856506, - "learning_rate": 5.90750631278815e-05, - "loss": 0.006439142674207687, - "step": 7480 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 0.03843824937939644, - "learning_rate": 5.905410144341609e-05, - "loss": 0.007792883366346359, - "step": 7485 - }, - { - "epoch": 1.277067348678602, - "grad_norm": 0.0692640095949173, - "learning_rate": 5.903312969677914e-05, - "loss": 0.006274447590112686, - "step": 7490 - }, - { - "epoch": 1.277919863597613, - "grad_norm": 0.07501527667045593, - "learning_rate": 5.901214789776094e-05, - "loss": 0.007496471703052521, - "step": 7495 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 0.10271260142326355, - "learning_rate": 5.8991156056156514e-05, - "loss": 0.008766942471265794, - "step": 7500 - }, - { - "epoch": 1.2796248934356351, - "grad_norm": 0.03995242714881897, - "learning_rate": 5.897015418176555e-05, - "loss": 0.0055749226361513134, - "step": 7505 - }, - { - "epoch": 1.2804774083546462, - "grad_norm": 0.09215585142374039, - "learning_rate": 5.8949142284392406e-05, - "loss": 0.005763960257172585, - "step": 7510 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 0.07763402909040451, - "learning_rate": 5.892812037384615e-05, - "loss": 0.006439389288425445, - "step": 7515 - }, - { - "epoch": 1.2821824381926683, - "grad_norm": 0.04945438355207443, - "learning_rate": 5.890708845994049e-05, - "loss": 0.006960665434598922, - "step": 7520 - }, - { - "epoch": 1.2830349531116796, - "grad_norm": 0.05348283797502518, - "learning_rate": 5.888604655249384e-05, - "loss": 0.0061422914266586305, - "step": 7525 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 0.10389877110719681, - "learning_rate": 5.886499466132926e-05, - "loss": 0.009247081726789475, - "step": 7530 - }, - { - "epoch": 1.2847399829497017, - "grad_norm": 0.07753872126340866, - "learning_rate": 5.884393279627448e-05, - "loss": 0.004902977123856544, - "step": 7535 - }, - { - "epoch": 1.2855924978687128, - "grad_norm": 0.10553103685379028, - "learning_rate": 5.8822860967161856e-05, - "loss": 0.004547145590186119, - "step": 7540 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 0.08235067129135132, - "learning_rate": 5.880177918382844e-05, - "loss": 0.005282455682754516, - "step": 7545 - }, - { - "epoch": 1.287297527706735, - "grad_norm": 0.08135014772415161, - "learning_rate": 5.878068745611591e-05, - "loss": 0.006127358600497246, - "step": 7550 - }, - { - "epoch": 1.288150042625746, - "grad_norm": 0.04027952626347542, - "learning_rate": 5.875958579387056e-05, - "loss": 0.008251778036355972, - "step": 7555 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 0.1060953438282013, - "learning_rate": 5.8738474206943385e-05, - "loss": 0.008290941268205643, - "step": 7560 - }, - { - "epoch": 1.289855072463768, - "grad_norm": 0.06716421991586685, - "learning_rate": 5.871735270518995e-05, - "loss": 0.004932524263858795, - "step": 7565 - }, - { - "epoch": 1.2907075873827791, - "grad_norm": 0.07644582539796829, - "learning_rate": 5.869622129847048e-05, - "loss": 0.006172410026192665, - "step": 7570 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 0.06018557399511337, - "learning_rate": 5.867507999664983e-05, - "loss": 0.005532362312078476, - "step": 7575 - }, - { - "epoch": 1.2924126172208013, - "grad_norm": 0.06454342603683472, - "learning_rate": 5.865392880959745e-05, - "loss": 0.005053167790174484, - "step": 7580 - }, - { - "epoch": 1.2932651321398123, - "grad_norm": 0.07618142664432526, - "learning_rate": 5.863276774718742e-05, - "loss": 0.005658206716179848, - "step": 7585 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 0.05649973824620247, - "learning_rate": 5.8611596819298434e-05, - "loss": 0.007477214187383651, - "step": 7590 - }, - { - "epoch": 1.2949701619778347, - "grad_norm": 0.09222351759672165, - "learning_rate": 5.859041603581377e-05, - "loss": 0.006974493712186813, - "step": 7595 - }, - { - "epoch": 1.2958226768968457, - "grad_norm": 0.07462326437234879, - "learning_rate": 5.856922540662134e-05, - "loss": 0.008175718039274216, - "step": 7600 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 0.10593193024396896, - "learning_rate": 5.854802494161364e-05, - "loss": 0.006635700166225433, - "step": 7605 - }, - { - "epoch": 1.2975277067348678, - "grad_norm": 0.08673358708620071, - "learning_rate": 5.8526814650687724e-05, - "loss": 0.007347754389047623, - "step": 7610 - }, - { - "epoch": 1.298380221653879, - "grad_norm": 0.10450063645839691, - "learning_rate": 5.850559454374528e-05, - "loss": 0.008085139095783234, - "step": 7615 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 0.04219435900449753, - "learning_rate": 5.848436463069257e-05, - "loss": 0.006296204030513763, - "step": 7620 - }, - { - "epoch": 1.3000852514919012, - "grad_norm": 0.08187524974346161, - "learning_rate": 5.84631249214404e-05, - "loss": 0.007680010050535202, - "step": 7625 - }, - { - "epoch": 1.3009377664109123, - "grad_norm": 0.21044164896011353, - "learning_rate": 5.844187542590418e-05, - "loss": 0.008709554374217988, - "step": 7630 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 0.09822215139865875, - "learning_rate": 5.842061615400389e-05, - "loss": 0.007412384450435639, - "step": 7635 - }, - { - "epoch": 1.3026427962489344, - "grad_norm": 0.05957398563623428, - "learning_rate": 5.8399347115664053e-05, - "loss": 0.0062717020511627196, - "step": 7640 - }, - { - "epoch": 1.3034953111679455, - "grad_norm": 0.07013436406850815, - "learning_rate": 5.837806832081378e-05, - "loss": 0.005471421033143997, - "step": 7645 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 0.09616916626691818, - "learning_rate": 5.835677977938671e-05, - "loss": 0.008985907584428788, - "step": 7650 - }, - { - "epoch": 1.3052003410059676, - "grad_norm": 0.07946161180734634, - "learning_rate": 5.833548150132105e-05, - "loss": 0.00563003197312355, - "step": 7655 - }, - { - "epoch": 1.3060528559249787, - "grad_norm": 0.0630686804652214, - "learning_rate": 5.831417349655953e-05, - "loss": 0.007591667026281357, - "step": 7660 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 0.08530164510011673, - "learning_rate": 5.829285577504944e-05, - "loss": 0.006751708686351776, - "step": 7665 - }, - { - "epoch": 1.3077578857630008, - "grad_norm": 0.045148320496082306, - "learning_rate": 5.8271528346742616e-05, - "loss": 0.0052963607013225555, - "step": 7670 - }, - { - "epoch": 1.3086104006820118, - "grad_norm": 0.07147885859012604, - "learning_rate": 5.82501912215954e-05, - "loss": 0.005282463133335113, - "step": 7675 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 0.0933302789926529, - "learning_rate": 5.8228844409568654e-05, - "loss": 0.0073209434747695925, - "step": 7680 - }, - { - "epoch": 1.310315430520034, - "grad_norm": 0.07449645549058914, - "learning_rate": 5.820748792062781e-05, - "loss": 0.007801111787557602, - "step": 7685 - }, - { - "epoch": 1.3111679454390452, - "grad_norm": 0.04569214582443237, - "learning_rate": 5.8186121764742774e-05, - "loss": 0.006659354269504547, - "step": 7690 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 0.07046396285295486, - "learning_rate": 5.8164745951887995e-05, - "loss": 0.006448440253734589, - "step": 7695 - }, - { - "epoch": 1.3128729752770674, - "grad_norm": 0.09704319387674332, - "learning_rate": 5.814336049204239e-05, - "loss": 0.008210816234350205, - "step": 7700 - }, - { - "epoch": 1.3137254901960784, - "grad_norm": 0.06477776169776917, - "learning_rate": 5.81219653951894e-05, - "loss": 0.005369330942630768, - "step": 7705 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 0.11657397449016571, - "learning_rate": 5.810056067131698e-05, - "loss": 0.010190412402153015, - "step": 7710 - }, - { - "epoch": 1.3154305200341005, - "grad_norm": 0.06578268110752106, - "learning_rate": 5.8079146330417575e-05, - "loss": 0.006289052963256836, - "step": 7715 - }, - { - "epoch": 1.3162830349531116, - "grad_norm": 0.06296945363283157, - "learning_rate": 5.80577223824881e-05, - "loss": 0.008120459318161011, - "step": 7720 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 0.08275634050369263, - "learning_rate": 5.803628883752996e-05, - "loss": 0.006926379352807999, - "step": 7725 - }, - { - "epoch": 1.317988064791134, - "grad_norm": 0.0693436712026596, - "learning_rate": 5.8014845705549086e-05, - "loss": 0.006521113961935043, - "step": 7730 - }, - { - "epoch": 1.318840579710145, - "grad_norm": 0.05845775827765465, - "learning_rate": 5.799339299655579e-05, - "loss": 0.00762510895729065, - "step": 7735 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 0.08802217245101929, - "learning_rate": 5.7971930720564947e-05, - "loss": 0.008071760833263397, - "step": 7740 - }, - { - "epoch": 1.3205456095481671, - "grad_norm": 0.08866037428379059, - "learning_rate": 5.795045888759585e-05, - "loss": 0.006111105903983116, - "step": 7745 - }, - { - "epoch": 1.3213981244671782, - "grad_norm": 0.0844360888004303, - "learning_rate": 5.792897750767225e-05, - "loss": 0.005196729302406311, - "step": 7750 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 0.06763950735330582, - "learning_rate": 5.79074865908224e-05, - "loss": 0.006462454050779343, - "step": 7755 - }, - { - "epoch": 1.3231031543052003, - "grad_norm": 0.06333937495946884, - "learning_rate": 5.7885986147078946e-05, - "loss": 0.0068017512559890745, - "step": 7760 - }, - { - "epoch": 1.3239556692242114, - "grad_norm": 0.05730217695236206, - "learning_rate": 5.786447618647904e-05, - "loss": 0.0065845087170600895, - "step": 7765 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 0.06838720291852951, - "learning_rate": 5.784295671906422e-05, - "loss": 0.0059626404196023945, - "step": 7770 - }, - { - "epoch": 1.3256606990622335, - "grad_norm": 0.06693503260612488, - "learning_rate": 5.782142775488051e-05, - "loss": 0.008056168258190156, - "step": 7775 - }, - { - "epoch": 1.3265132139812446, - "grad_norm": 0.07886708527803421, - "learning_rate": 5.7799889303978324e-05, - "loss": 0.006670922040939331, - "step": 7780 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 0.06932322680950165, - "learning_rate": 5.777834137641255e-05, - "loss": 0.006734507530927658, - "step": 7785 - }, - { - "epoch": 1.328218243819267, - "grad_norm": 0.08057818561792374, - "learning_rate": 5.775678398224247e-05, - "loss": 0.005952415242791176, - "step": 7790 - }, - { - "epoch": 1.329070758738278, - "grad_norm": 0.06614059209823608, - "learning_rate": 5.7735217131531785e-05, - "loss": 0.007471600174903869, - "step": 7795 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 0.06335467845201492, - "learning_rate": 5.771364083434862e-05, - "loss": 0.007279399782419205, - "step": 7800 - }, - { - "epoch": 1.3307757885763, - "grad_norm": 0.11745526641607285, - "learning_rate": 5.769205510076552e-05, - "loss": 0.006242561340332031, - "step": 7805 - }, - { - "epoch": 1.3316283034953111, - "grad_norm": 0.0590963289141655, - "learning_rate": 5.7670459940859414e-05, - "loss": 0.006263129413127899, - "step": 7810 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 0.05416800454258919, - "learning_rate": 5.764885536471164e-05, - "loss": 0.00531160868704319, - "step": 7815 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.05527244135737419, - "learning_rate": 5.7627241382407933e-05, - "loss": 0.005747637152671814, - "step": 7820 - }, - { - "epoch": 1.3341858482523445, - "grad_norm": 0.057753629982471466, - "learning_rate": 5.760561800403844e-05, - "loss": 0.004979781061410904, - "step": 7825 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 0.10882547497749329, - "learning_rate": 5.758398523969763e-05, - "loss": 0.00589316263794899, - "step": 7830 - }, - { - "epoch": 1.3358908780903667, - "grad_norm": 0.08053787797689438, - "learning_rate": 5.756234309948443e-05, - "loss": 0.004465704411268234, - "step": 7835 - }, - { - "epoch": 1.3367433930093777, - "grad_norm": 0.09168808907270432, - "learning_rate": 5.75406915935021e-05, - "loss": 0.004735191911458969, - "step": 7840 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 0.0956537052989006, - "learning_rate": 5.751903073185829e-05, - "loss": 0.005519610643386841, - "step": 7845 - }, - { - "epoch": 1.3384484228473998, - "grad_norm": 0.05775619298219681, - "learning_rate": 5.749736052466501e-05, - "loss": 0.005525605380535125, - "step": 7850 - }, - { - "epoch": 1.339300937766411, - "grad_norm": 0.08594895154237747, - "learning_rate": 5.7475680982038616e-05, - "loss": 0.005280618742108345, - "step": 7855 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 0.10326153039932251, - "learning_rate": 5.745399211409987e-05, - "loss": 0.005818159133195877, - "step": 7860 - }, - { - "epoch": 1.341005967604433, - "grad_norm": 0.053448133170604706, - "learning_rate": 5.743229393097384e-05, - "loss": 0.008255011588335037, - "step": 7865 - }, - { - "epoch": 1.341858482523444, - "grad_norm": 0.05307561904191971, - "learning_rate": 5.741058644278995e-05, - "loss": 0.006851959228515625, - "step": 7870 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 0.050789013504981995, - "learning_rate": 5.738886965968199e-05, - "loss": 0.005396667867898941, - "step": 7875 - }, - { - "epoch": 1.3435635123614662, - "grad_norm": 0.06762190908193588, - "learning_rate": 5.736714359178808e-05, - "loss": 0.005661940947175026, - "step": 7880 - }, - { - "epoch": 1.3444160272804775, - "grad_norm": 0.06955094635486603, - "learning_rate": 5.734540824925066e-05, - "loss": 0.0065834902226924895, - "step": 7885 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 0.09844111651182175, - "learning_rate": 5.7323663642216525e-05, - "loss": 0.006687184423208236, - "step": 7890 - }, - { - "epoch": 1.3461210571184996, - "grad_norm": 0.05249316990375519, - "learning_rate": 5.7301909780836766e-05, - "loss": 0.00670531764626503, - "step": 7895 - }, - { - "epoch": 1.3469735720375107, - "grad_norm": 0.06578750163316727, - "learning_rate": 5.7280146675266815e-05, - "loss": 0.0063153237104415895, - "step": 7900 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 0.10460260510444641, - "learning_rate": 5.725837433566643e-05, - "loss": 0.008820119500160217, - "step": 7905 - }, - { - "epoch": 1.3486786018755328, - "grad_norm": 0.06620552390813828, - "learning_rate": 5.7236592772199624e-05, - "loss": 0.006502580642700195, - "step": 7910 - }, - { - "epoch": 1.3495311167945439, - "grad_norm": 0.1033373698592186, - "learning_rate": 5.72148019950348e-05, - "loss": 0.008503454178571701, - "step": 7915 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 0.05790281295776367, - "learning_rate": 5.719300201434458e-05, - "loss": 0.006304294615983963, - "step": 7920 - }, - { - "epoch": 1.3512361466325662, - "grad_norm": 0.06094033271074295, - "learning_rate": 5.717119284030595e-05, - "loss": 0.006775079667568207, - "step": 7925 - }, - { - "epoch": 1.3520886615515773, - "grad_norm": 0.08011666685342789, - "learning_rate": 5.714937448310015e-05, - "loss": 0.0064566083252429966, - "step": 7930 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 0.06395548582077026, - "learning_rate": 5.7127546952912686e-05, - "loss": 0.009279583394527436, - "step": 7935 - }, - { - "epoch": 1.3537936913895994, - "grad_norm": 0.06697574257850647, - "learning_rate": 5.710571025993342e-05, - "loss": 0.005718713253736496, - "step": 7940 - }, - { - "epoch": 1.3546462063086104, - "grad_norm": 0.08821829408407211, - "learning_rate": 5.7083864414356414e-05, - "loss": 0.008157726377248764, - "step": 7945 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 0.07097669690847397, - "learning_rate": 5.706200942638006e-05, - "loss": 0.004782359302043915, - "step": 7950 - }, - { - "epoch": 1.3563512361466326, - "grad_norm": 0.05015713721513748, - "learning_rate": 5.7040145306206963e-05, - "loss": 0.004204710572957992, - "step": 7955 - }, - { - "epoch": 1.3572037510656436, - "grad_norm": 0.054049719125032425, - "learning_rate": 5.701827206404406e-05, - "loss": 0.00606432780623436, - "step": 7960 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 0.0878557488322258, - "learning_rate": 5.6996389710102474e-05, - "loss": 0.008037066459655762, - "step": 7965 - }, - { - "epoch": 1.3589087809036657, - "grad_norm": 0.10873926430940628, - "learning_rate": 5.697449825459762e-05, - "loss": 0.007864821702241898, - "step": 7970 - }, - { - "epoch": 1.3597612958226768, - "grad_norm": 0.05823246389627457, - "learning_rate": 5.695259770774919e-05, - "loss": 0.00715988278388977, - "step": 7975 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 0.06690117716789246, - "learning_rate": 5.693068807978106e-05, - "loss": 0.006888707727193832, - "step": 7980 - }, - { - "epoch": 1.3614663256606991, - "grad_norm": 0.07290884852409363, - "learning_rate": 5.6908769380921363e-05, - "loss": 0.005684115365147591, - "step": 7985 - }, - { - "epoch": 1.3623188405797102, - "grad_norm": 0.07930465042591095, - "learning_rate": 5.6886841621402504e-05, - "loss": 0.0077220767736434935, - "step": 7990 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 0.08893048763275146, - "learning_rate": 5.686490481146107e-05, - "loss": 0.007824088633060455, - "step": 7995 - }, - { - "epoch": 1.3640238704177323, - "grad_norm": 0.09335844218730927, - "learning_rate": 5.6842958961337905e-05, - "loss": 0.006522499769926071, - "step": 8000 - }, - { - "epoch": 1.3648763853367434, - "grad_norm": 0.07194571942090988, - "learning_rate": 5.682100408127806e-05, - "loss": 0.008011893928050995, - "step": 8005 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 0.053429413586854935, - "learning_rate": 5.6799040181530794e-05, - "loss": 0.006260050833225251, - "step": 8010 - }, - { - "epoch": 1.3665814151747655, - "grad_norm": 0.11974478513002396, - "learning_rate": 5.677706727234959e-05, - "loss": 0.006692723929882049, - "step": 8015 - }, - { - "epoch": 1.3674339300937766, - "grad_norm": 0.07810027152299881, - "learning_rate": 5.6755085363992155e-05, - "loss": 0.007429388910531997, - "step": 8020 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 0.10204190760850906, - "learning_rate": 5.673309446672034e-05, - "loss": 0.005550343170762062, - "step": 8025 - }, - { - "epoch": 1.369138959931799, - "grad_norm": 0.07640541344881058, - "learning_rate": 5.671109459080026e-05, - "loss": 0.006840181350708008, - "step": 8030 - }, - { - "epoch": 1.36999147485081, - "grad_norm": 0.06644181162118912, - "learning_rate": 5.668908574650216e-05, - "loss": 0.005395495146512985, - "step": 8035 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 0.09630967676639557, - "learning_rate": 5.6667067944100526e-05, - "loss": 0.005423872545361519, - "step": 8040 - }, - { - "epoch": 1.371696504688832, - "grad_norm": 0.07114128023386002, - "learning_rate": 5.664504119387398e-05, - "loss": 0.007013414800167084, - "step": 8045 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 0.07324981689453125, - "learning_rate": 5.662300550610535e-05, - "loss": 0.008274464309215546, - "step": 8050 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 0.06012870743870735, - "learning_rate": 5.660096089108163e-05, - "loss": 0.00520169697701931, - "step": 8055 - }, - { - "epoch": 1.3742540494458653, - "grad_norm": 0.07458557933568954, - "learning_rate": 5.657890735909397e-05, - "loss": 0.006112886965274811, - "step": 8060 - }, - { - "epoch": 1.3751065643648763, - "grad_norm": 0.0470297709107399, - "learning_rate": 5.655684492043771e-05, - "loss": 0.004435106366872788, - "step": 8065 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 0.05244847387075424, - "learning_rate": 5.653477358541231e-05, - "loss": 0.006484140455722809, - "step": 8070 - }, - { - "epoch": 1.3768115942028984, - "grad_norm": 0.10809201747179031, - "learning_rate": 5.651269336432142e-05, - "loss": 0.006385499238967895, - "step": 8075 - }, - { - "epoch": 1.3776641091219095, - "grad_norm": 0.11761374026536942, - "learning_rate": 5.649060426747281e-05, - "loss": 0.0056259695440530775, - "step": 8080 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 0.06250949203968048, - "learning_rate": 5.646850630517842e-05, - "loss": 0.005127568915486336, - "step": 8085 - }, - { - "epoch": 1.3793691389599319, - "grad_norm": 0.07686682790517807, - "learning_rate": 5.6446399487754307e-05, - "loss": 0.006484859436750412, - "step": 8090 - }, - { - "epoch": 1.380221653878943, - "grad_norm": 0.10453952848911285, - "learning_rate": 5.6424283825520656e-05, - "loss": 0.007125881314277649, - "step": 8095 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 0.08170976489782333, - "learning_rate": 5.640215932880181e-05, - "loss": 0.007152590900659561, - "step": 8100 - }, - { - "epoch": 1.381926683716965, - "grad_norm": 0.08639637380838394, - "learning_rate": 5.638002600792621e-05, - "loss": 0.006862475723028183, - "step": 8105 - }, - { - "epoch": 1.382779198635976, - "grad_norm": 0.061349738389253616, - "learning_rate": 5.635788387322642e-05, - "loss": 0.006520121544599533, - "step": 8110 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 0.09568873792886734, - "learning_rate": 5.633573293503915e-05, - "loss": 0.00690893828868866, - "step": 8115 - }, - { - "epoch": 1.3844842284739982, - "grad_norm": 0.05280910059809685, - "learning_rate": 5.631357320370518e-05, - "loss": 0.0068241022527217865, - "step": 8120 - }, - { - "epoch": 1.3853367433930095, - "grad_norm": 0.08307540416717529, - "learning_rate": 5.6291404689569406e-05, - "loss": 0.009796305000782013, - "step": 8125 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 0.06511564552783966, - "learning_rate": 5.6269227402980824e-05, - "loss": 0.00675605982542038, - "step": 8130 - }, - { - "epoch": 1.3870417732310316, - "grad_norm": 0.09521665424108505, - "learning_rate": 5.624704135429255e-05, - "loss": 0.00661565363407135, - "step": 8135 - }, - { - "epoch": 1.3878942881500427, - "grad_norm": 0.06467590481042862, - "learning_rate": 5.622484655386175e-05, - "loss": 0.007056808471679688, - "step": 8140 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 0.04240449517965317, - "learning_rate": 5.62026430120497e-05, - "loss": 0.005277678743004799, - "step": 8145 - }, - { - "epoch": 1.3895993179880648, - "grad_norm": 0.08462672680616379, - "learning_rate": 5.618043073922176e-05, - "loss": 0.005951377004384995, - "step": 8150 - }, - { - "epoch": 1.3904518329070759, - "grad_norm": 0.08304573595523834, - "learning_rate": 5.615820974574735e-05, - "loss": 0.006729351729154587, - "step": 8155 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 0.04584382846951485, - "learning_rate": 5.6135980041999964e-05, - "loss": 0.00490913912653923, - "step": 8160 - }, - { - "epoch": 1.392156862745098, - "grad_norm": 0.06771710515022278, - "learning_rate": 5.6113741638357175e-05, - "loss": 0.007046511024236679, - "step": 8165 - }, - { - "epoch": 1.393009377664109, - "grad_norm": 0.06334209442138672, - "learning_rate": 5.609149454520062e-05, - "loss": 0.006314977258443833, - "step": 8170 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 0.06783269345760345, - "learning_rate": 5.606923877291595e-05, - "loss": 0.006176649779081345, - "step": 8175 - }, - { - "epoch": 1.3947144075021312, - "grad_norm": 0.10245220363140106, - "learning_rate": 5.604697433189293e-05, - "loss": 0.006309907138347626, - "step": 8180 - }, - { - "epoch": 1.3955669224211424, - "grad_norm": 0.07151709496974945, - "learning_rate": 5.6024701232525325e-05, - "loss": 0.005038458108901978, - "step": 8185 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 0.08811933547258377, - "learning_rate": 5.600241948521099e-05, - "loss": 0.006065644696354866, - "step": 8190 - }, - { - "epoch": 1.3972719522591646, - "grad_norm": 0.07598903775215149, - "learning_rate": 5.5980129100351736e-05, - "loss": 0.006201237812638283, - "step": 8195 - }, - { - "epoch": 1.3981244671781756, - "grad_norm": 0.058092061430215836, - "learning_rate": 5.5957830088353475e-05, - "loss": 0.006383272260427475, - "step": 8200 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 0.18196560442447662, - "learning_rate": 5.593552245962616e-05, - "loss": 0.004768157005310058, - "step": 8205 - }, - { - "epoch": 1.3998294970161977, - "grad_norm": 0.09071574360132217, - "learning_rate": 5.591320622458369e-05, - "loss": 0.007671289891004562, - "step": 8210 - }, - { - "epoch": 1.4006820119352088, - "grad_norm": 0.09470858424901962, - "learning_rate": 5.589088139364405e-05, - "loss": 0.006691985577344894, - "step": 8215 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 0.05345157906413078, - "learning_rate": 5.58685479772292e-05, - "loss": 0.005832263827323913, - "step": 8220 - }, - { - "epoch": 1.4023870417732311, - "grad_norm": 0.08154480904340744, - "learning_rate": 5.584620598576514e-05, - "loss": 0.00907905399799347, - "step": 8225 - }, - { - "epoch": 1.4032395566922422, - "grad_norm": 0.06621691584587097, - "learning_rate": 5.582385542968185e-05, - "loss": 0.005938088893890381, - "step": 8230 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 0.0557720884680748, - "learning_rate": 5.580149631941329e-05, - "loss": 0.005079039558768273, - "step": 8235 - }, - { - "epoch": 1.4049445865302643, - "grad_norm": 0.07839027792215347, - "learning_rate": 5.577912866539746e-05, - "loss": 0.006307472288608551, - "step": 8240 - }, - { - "epoch": 1.4057971014492754, - "grad_norm": 0.05926419049501419, - "learning_rate": 5.575675247807632e-05, - "loss": 0.0072102643549442295, - "step": 8245 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 0.0570182129740715, - "learning_rate": 5.5734367767895814e-05, - "loss": 0.0066485337913036345, - "step": 8250 - }, - { - "epoch": 1.4075021312872975, - "grad_norm": 0.0930657833814621, - "learning_rate": 5.571197454530588e-05, - "loss": 0.005854785442352295, - "step": 8255 - }, - { - "epoch": 1.4083546462063086, - "grad_norm": 0.06018427759408951, - "learning_rate": 5.568957282076041e-05, - "loss": 0.0049718767404556274, - "step": 8260 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 0.0889105498790741, - "learning_rate": 5.566716260471726e-05, - "loss": 0.005993577092885971, - "step": 8265 - }, - { - "epoch": 1.4100596760443307, - "grad_norm": 0.04429823160171509, - "learning_rate": 5.5644743907638294e-05, - "loss": 0.005357314646244049, - "step": 8270 - }, - { - "epoch": 1.4109121909633418, - "grad_norm": 0.054813142865896225, - "learning_rate": 5.5622316739989296e-05, - "loss": 0.005666692927479744, - "step": 8275 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 0.06909197568893433, - "learning_rate": 5.559988111224001e-05, - "loss": 0.005058525875210762, - "step": 8280 - }, - { - "epoch": 1.412617220801364, - "grad_norm": 0.10060004889965057, - "learning_rate": 5.557743703486413e-05, - "loss": 0.0070966087281703946, - "step": 8285 - }, - { - "epoch": 1.4134697357203752, - "grad_norm": 0.052008096128702164, - "learning_rate": 5.555498451833929e-05, - "loss": 0.006671085208654404, - "step": 8290 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 0.06272758543491364, - "learning_rate": 5.5532523573147094e-05, - "loss": 0.006071484088897705, - "step": 8295 - }, - { - "epoch": 1.4151747655583973, - "grad_norm": 0.08005380630493164, - "learning_rate": 5.551005420977304e-05, - "loss": 0.006429645419120789, - "step": 8300 - }, - { - "epoch": 1.4160272804774083, - "grad_norm": 0.08246695250272751, - "learning_rate": 5.548757643870659e-05, - "loss": 0.004599097743630409, - "step": 8305 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 0.1190599724650383, - "learning_rate": 5.54650902704411e-05, - "loss": 0.00652359127998352, - "step": 8310 - }, - { - "epoch": 1.4177323103154305, - "grad_norm": 0.042393747717142105, - "learning_rate": 5.5442595715473867e-05, - "loss": 0.004551848769187928, - "step": 8315 - }, - { - "epoch": 1.4185848252344417, - "grad_norm": 0.0809137374162674, - "learning_rate": 5.54200927843061e-05, - "loss": 0.0062880381941795346, - "step": 8320 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 0.09030820429325104, - "learning_rate": 5.5397581487442905e-05, - "loss": 0.007365265488624572, - "step": 8325 - }, - { - "epoch": 1.4202898550724639, - "grad_norm": 0.060766976326704025, - "learning_rate": 5.537506183539333e-05, - "loss": 0.0047208376228809355, - "step": 8330 - }, - { - "epoch": 1.421142369991475, - "grad_norm": 0.0763862356543541, - "learning_rate": 5.535253383867027e-05, - "loss": 0.006549081206321717, - "step": 8335 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 0.13493886590003967, - "learning_rate": 5.532999750779056e-05, - "loss": 0.0075773999094963075, - "step": 8340 - }, - { - "epoch": 1.422847399829497, - "grad_norm": 0.07888541370630264, - "learning_rate": 5.53074528532749e-05, - "loss": 0.007893601059913635, - "step": 8345 - }, - { - "epoch": 1.423699914748508, - "grad_norm": 0.04488302394747734, - "learning_rate": 5.528489988564789e-05, - "loss": 0.006052879989147187, - "step": 8350 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 0.09534436464309692, - "learning_rate": 5.5262338615438e-05, - "loss": 0.006183170899748802, - "step": 8355 - }, - { - "epoch": 1.4254049445865302, - "grad_norm": 0.0796227753162384, - "learning_rate": 5.523976905317758e-05, - "loss": 0.006368820369243622, - "step": 8360 - }, - { - "epoch": 1.4262574595055413, - "grad_norm": 0.039230771362781525, - "learning_rate": 5.521719120940287e-05, - "loss": 0.005282421037554741, - "step": 8365 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 0.12020807713270187, - "learning_rate": 5.5194605094653935e-05, - "loss": 0.00718868374824524, - "step": 8370 - }, - { - "epoch": 1.4279624893435634, - "grad_norm": 0.07434894144535065, - "learning_rate": 5.5172010719474746e-05, - "loss": 0.007591472566127777, - "step": 8375 - }, - { - "epoch": 1.4288150042625745, - "grad_norm": 0.0722346156835556, - "learning_rate": 5.514940809441309e-05, - "loss": 0.005854631587862968, - "step": 8380 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 0.07834078371524811, - "learning_rate": 5.5126797230020634e-05, - "loss": 0.007415357977151871, - "step": 8385 - }, - { - "epoch": 1.4305200341005968, - "grad_norm": 0.08718696236610413, - "learning_rate": 5.5104178136852884e-05, - "loss": 0.007089633494615555, - "step": 8390 - }, - { - "epoch": 1.4313725490196079, - "grad_norm": 0.08823035657405853, - "learning_rate": 5.508155082546918e-05, - "loss": 0.007153714448213578, - "step": 8395 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 0.07249119132757187, - "learning_rate": 5.505891530643269e-05, - "loss": 0.007651906460523605, - "step": 8400 - }, - { - "epoch": 1.43307757885763, - "grad_norm": 0.06284502893686295, - "learning_rate": 5.503627159031046e-05, - "loss": 0.007741397619247437, - "step": 8405 - }, - { - "epoch": 1.433930093776641, - "grad_norm": 0.06557357311248779, - "learning_rate": 5.501361968767331e-05, - "loss": 0.007656902819871902, - "step": 8410 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 0.0775318294763565, - "learning_rate": 5.49909596090959e-05, - "loss": 0.006714560091495514, - "step": 8415 - }, - { - "epoch": 1.4356351236146634, - "grad_norm": 0.05347822234034538, - "learning_rate": 5.496829136515672e-05, - "loss": 0.0048537302762269975, - "step": 8420 - }, - { - "epoch": 1.4364876385336744, - "grad_norm": 0.07040467113256454, - "learning_rate": 5.4945614966438046e-05, - "loss": 0.005411979556083679, - "step": 8425 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 0.09473410993814468, - "learning_rate": 5.492293042352598e-05, - "loss": 0.008005911856889725, - "step": 8430 - }, - { - "epoch": 1.4381926683716966, - "grad_norm": 0.037446580827236176, - "learning_rate": 5.4900237747010426e-05, - "loss": 0.006237779557704925, - "step": 8435 - }, - { - "epoch": 1.4390451832907076, - "grad_norm": 0.11029476672410965, - "learning_rate": 5.4877536947485074e-05, - "loss": 0.008190502226352692, - "step": 8440 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 0.0514204315841198, - "learning_rate": 5.4854828035547424e-05, - "loss": 0.006500741839408875, - "step": 8445 - }, - { - "epoch": 1.4407502131287298, - "grad_norm": 0.08411483466625214, - "learning_rate": 5.483211102179873e-05, - "loss": 0.0053235463798046116, - "step": 8450 - }, - { - "epoch": 1.4416027280477408, - "grad_norm": 0.09279052913188934, - "learning_rate": 5.480938591684407e-05, - "loss": 0.006842000037431717, - "step": 8455 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 0.09881046414375305, - "learning_rate": 5.478665273129228e-05, - "loss": 0.007064050436019898, - "step": 8460 - }, - { - "epoch": 1.443307757885763, - "grad_norm": 0.09018172323703766, - "learning_rate": 5.476391147575595e-05, - "loss": 0.005222787708044052, - "step": 8465 - }, - { - "epoch": 1.444160272804774, - "grad_norm": 0.11489493399858475, - "learning_rate": 5.4741162160851455e-05, - "loss": 0.004823528230190277, - "step": 8470 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 0.11010619252920151, - "learning_rate": 5.4718404797198955e-05, - "loss": 0.007554465532302856, - "step": 8475 - }, - { - "epoch": 1.4458653026427961, - "grad_norm": 0.10469060391187668, - "learning_rate": 5.469563939542233e-05, - "loss": 0.005817038565874099, - "step": 8480 - }, - { - "epoch": 1.4467178175618074, - "grad_norm": 0.06776002794504166, - "learning_rate": 5.467286596614922e-05, - "loss": 0.00899386927485466, - "step": 8485 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 0.08414942771196365, - "learning_rate": 5.4650084520011026e-05, - "loss": 0.00611347034573555, - "step": 8490 - }, - { - "epoch": 1.4484228473998295, - "grad_norm": 0.09625279158353806, - "learning_rate": 5.462729506764289e-05, - "loss": 0.005645812302827835, - "step": 8495 - }, - { - "epoch": 1.4492753623188406, - "grad_norm": 0.10020645707845688, - "learning_rate": 5.4604497619683674e-05, - "loss": 0.0058198563754558565, - "step": 8500 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 0.08466780185699463, - "learning_rate": 5.4581692186776e-05, - "loss": 0.005098164081573486, - "step": 8505 - }, - { - "epoch": 1.4509803921568627, - "grad_norm": 0.058955296874046326, - "learning_rate": 5.4558878779566194e-05, - "loss": 0.004072735831141472, - "step": 8510 - }, - { - "epoch": 1.4518329070758738, - "grad_norm": 0.14849397540092468, - "learning_rate": 5.4536057408704304e-05, - "loss": 0.011097650229930877, - "step": 8515 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 0.08641809970140457, - "learning_rate": 5.451322808484413e-05, - "loss": 0.006210784614086151, - "step": 8520 - }, - { - "epoch": 1.453537936913896, - "grad_norm": 0.07506752014160156, - "learning_rate": 5.4490390818643136e-05, - "loss": 0.006071462482213974, - "step": 8525 - }, - { - "epoch": 1.4543904518329072, - "grad_norm": 0.10383405536413193, - "learning_rate": 5.4467545620762545e-05, - "loss": 0.008749781548976899, - "step": 8530 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 0.08180487155914307, - "learning_rate": 5.444469250186721e-05, - "loss": 0.00613279715180397, - "step": 8535 - }, - { - "epoch": 1.4560954816709293, - "grad_norm": 0.07797367125749588, - "learning_rate": 5.442183147262577e-05, - "loss": 0.005885690450668335, - "step": 8540 - }, - { - "epoch": 1.4569479965899403, - "grad_norm": 0.0780278891324997, - "learning_rate": 5.439896254371049e-05, - "loss": 0.007259850949048996, - "step": 8545 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 0.10005395114421844, - "learning_rate": 5.437608572579737e-05, - "loss": 0.0061523888260126116, - "step": 8550 - }, - { - "epoch": 1.4586530264279625, - "grad_norm": 0.10141763836145401, - "learning_rate": 5.435320102956604e-05, - "loss": 0.006501191109418869, - "step": 8555 - }, - { - "epoch": 1.4595055413469735, - "grad_norm": 0.0938732773065567, - "learning_rate": 5.4330308465699865e-05, - "loss": 0.008337517827749252, - "step": 8560 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 0.1085699051618576, - "learning_rate": 5.430740804488582e-05, - "loss": 0.005681714415550232, - "step": 8565 - }, - { - "epoch": 1.4612105711849956, - "grad_norm": 0.07967904955148697, - "learning_rate": 5.428449977781463e-05, - "loss": 0.006299185007810593, - "step": 8570 - }, - { - "epoch": 1.4620630861040067, - "grad_norm": 0.090158611536026, - "learning_rate": 5.426158367518061e-05, - "loss": 0.007821831852197647, - "step": 8575 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 0.12222256511449814, - "learning_rate": 5.4238659747681736e-05, - "loss": 0.0065193742513656614, - "step": 8580 - }, - { - "epoch": 1.463768115942029, - "grad_norm": 0.07724417746067047, - "learning_rate": 5.421572800601971e-05, - "loss": 0.00850745365023613, - "step": 8585 - }, - { - "epoch": 1.46462063086104, - "grad_norm": 0.07322543114423752, - "learning_rate": 5.4192788460899786e-05, - "loss": 0.006478501856327057, - "step": 8590 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 0.07086360454559326, - "learning_rate": 5.416984112303095e-05, - "loss": 0.007459370046854019, - "step": 8595 - }, - { - "epoch": 1.4663256606990622, - "grad_norm": 0.08460366725921631, - "learning_rate": 5.414688600312575e-05, - "loss": 0.006461035460233688, - "step": 8600 - }, - { - "epoch": 1.4671781756180733, - "grad_norm": 0.06856394559144974, - "learning_rate": 5.412392311190041e-05, - "loss": 0.007420676201581955, - "step": 8605 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 0.06801126897335052, - "learning_rate": 5.4100952460074766e-05, - "loss": 0.006456401199102402, - "step": 8610 - }, - { - "epoch": 1.4688832054560954, - "grad_norm": 0.06273184716701508, - "learning_rate": 5.4077974058372295e-05, - "loss": 0.00508052185177803, - "step": 8615 - }, - { - "epoch": 1.4697357203751067, - "grad_norm": 0.07751575112342834, - "learning_rate": 5.405498791752007e-05, - "loss": 0.006596812605857849, - "step": 8620 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 0.10850238054990768, - "learning_rate": 5.4031994048248776e-05, - "loss": 0.006385332345962525, - "step": 8625 - }, - { - "epoch": 1.4714407502131288, - "grad_norm": 0.07195930927991867, - "learning_rate": 5.4008992461292736e-05, - "loss": 0.007354143261909485, - "step": 8630 - }, - { - "epoch": 1.4722932651321399, - "grad_norm": 0.061606891453266144, - "learning_rate": 5.3985983167389846e-05, - "loss": 0.007285259664058685, - "step": 8635 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 0.059549275785684586, - "learning_rate": 5.3962966177281616e-05, - "loss": 0.005211231112480163, - "step": 8640 - }, - { - "epoch": 1.473998294970162, - "grad_norm": 0.04548822343349457, - "learning_rate": 5.3939941501713146e-05, - "loss": 0.00805831179022789, - "step": 8645 - }, - { - "epoch": 1.474850809889173, - "grad_norm": 0.046682652086019516, - "learning_rate": 5.3916909151433096e-05, - "loss": 0.005787956342101097, - "step": 8650 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 0.06405246257781982, - "learning_rate": 5.3893869137193755e-05, - "loss": 0.005377359688282013, - "step": 8655 - }, - { - "epoch": 1.4765558397271952, - "grad_norm": 0.09410709887742996, - "learning_rate": 5.3870821469750964e-05, - "loss": 0.006961540877819061, - "step": 8660 - }, - { - "epoch": 1.4774083546462062, - "grad_norm": 0.0637243241071701, - "learning_rate": 5.384776615986414e-05, - "loss": 0.0060172989964485165, - "step": 8665 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 0.07082457840442657, - "learning_rate": 5.382470321829627e-05, - "loss": 0.005960140377283096, - "step": 8670 - }, - { - "epoch": 1.4791133844842284, - "grad_norm": 0.06502280384302139, - "learning_rate": 5.380163265581391e-05, - "loss": 0.005596417188644409, - "step": 8675 - }, - { - "epoch": 1.4799658994032396, - "grad_norm": 0.07504235208034515, - "learning_rate": 5.3778554483187134e-05, - "loss": 0.007427094876766205, - "step": 8680 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 0.08005198836326599, - "learning_rate": 5.375546871118964e-05, - "loss": 0.006888572126626968, - "step": 8685 - }, - { - "epoch": 1.4816709292412618, - "grad_norm": 0.1083201915025711, - "learning_rate": 5.373237535059861e-05, - "loss": 0.007253114879131317, - "step": 8690 - }, - { - "epoch": 1.4825234441602728, - "grad_norm": 0.060000013560056686, - "learning_rate": 5.37092744121948e-05, - "loss": 0.005570416525006294, - "step": 8695 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 0.04832584038376808, - "learning_rate": 5.3686165906762504e-05, - "loss": 0.005356843769550324, - "step": 8700 - }, - { - "epoch": 1.484228473998295, - "grad_norm": 0.061586812138557434, - "learning_rate": 5.3663049845089534e-05, - "loss": 0.005226074159145356, - "step": 8705 - }, - { - "epoch": 1.485080988917306, - "grad_norm": 0.08486256003379822, - "learning_rate": 5.363992623796724e-05, - "loss": 0.007083939760923386, - "step": 8710 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 0.09085836261510849, - "learning_rate": 5.361679509619048e-05, - "loss": 0.005988218262791634, - "step": 8715 - }, - { - "epoch": 1.4867860187553283, - "grad_norm": 0.06301745027303696, - "learning_rate": 5.359365643055765e-05, - "loss": 0.00595020055770874, - "step": 8720 - }, - { - "epoch": 1.4876385336743394, - "grad_norm": 0.07939866930246353, - "learning_rate": 5.3570510251870646e-05, - "loss": 0.006101110950112343, - "step": 8725 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 0.10560661554336548, - "learning_rate": 5.354735657093487e-05, - "loss": 0.006781180202960968, - "step": 8730 - }, - { - "epoch": 1.4893435635123615, - "grad_norm": 0.10549639165401459, - "learning_rate": 5.352419539855925e-05, - "loss": 0.006455187499523163, - "step": 8735 - }, - { - "epoch": 1.4901960784313726, - "grad_norm": 0.06474289298057556, - "learning_rate": 5.3501026745556157e-05, - "loss": 0.0078111283481121065, - "step": 8740 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 0.11109986901283264, - "learning_rate": 5.3477850622741525e-05, - "loss": 0.00798504576086998, - "step": 8745 - }, - { - "epoch": 1.4919011082693947, - "grad_norm": 0.0787222608923912, - "learning_rate": 5.3454667040934715e-05, - "loss": 0.007222773879766465, - "step": 8750 - }, - { - "epoch": 1.4927536231884058, - "grad_norm": 0.06622221320867538, - "learning_rate": 5.3431476010958613e-05, - "loss": 0.0064462460577487946, - "step": 8755 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 0.07526405900716782, - "learning_rate": 5.340827754363955e-05, - "loss": 0.005344667285680771, - "step": 8760 - }, - { - "epoch": 1.4944586530264279, - "grad_norm": 0.08911366015672684, - "learning_rate": 5.338507164980734e-05, - "loss": 0.006722994893789291, - "step": 8765 - }, - { - "epoch": 1.495311167945439, - "grad_norm": 0.03749583289027214, - "learning_rate": 5.336185834029527e-05, - "loss": 0.006120331957936287, - "step": 8770 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 0.08167645335197449, - "learning_rate": 5.333863762594008e-05, - "loss": 0.007496768981218338, - "step": 8775 - }, - { - "epoch": 1.4970161977834613, - "grad_norm": 0.09045904129743576, - "learning_rate": 5.3315409517581996e-05, - "loss": 0.007222528755664826, - "step": 8780 - }, - { - "epoch": 1.4978687127024723, - "grad_norm": 0.06064090132713318, - "learning_rate": 5.329217402606464e-05, - "loss": 0.0044986031949520115, - "step": 8785 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 0.07282263785600662, - "learning_rate": 5.3268931162235126e-05, - "loss": 0.005251912400126457, - "step": 8790 - }, - { - "epoch": 1.4995737425404945, - "grad_norm": 0.0674249604344368, - "learning_rate": 5.324568093694401e-05, - "loss": 0.006289477646350861, - "step": 8795 - }, - { - "epoch": 1.4997442455242966, - "eval_loss": 0.03760311380028725, - "eval_runtime": 3.668, - "eval_samples_per_second": 68.702, - "eval_steps_per_second": 1.091, - "step": 8796 - }, - { - "eval_cer_subset": 0.014184050678261437, - "eval_cer_subset_edit_distance": 871, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 8796 - }, - { - "epoch": 1.5004262574595055, - "grad_norm": 0.06114037334918976, - "learning_rate": 5.322242336104525e-05, - "loss": 0.005809751898050308, - "step": 8800 - }, - { - "epoch": 1.5012787723785166, - "grad_norm": 0.08830825984477997, - "learning_rate": 5.319915844539626e-05, - "loss": 0.006921032071113586, - "step": 8805 - }, - { - "epoch": 1.5021312872975279, - "grad_norm": 0.10813544690608978, - "learning_rate": 5.3175886200857873e-05, - "loss": 0.007966426759958267, - "step": 8810 - }, - { - "epoch": 1.502983802216539, - "grad_norm": 0.08357173204421997, - "learning_rate": 5.3152606638294355e-05, - "loss": 0.006943506002426147, - "step": 8815 - }, - { - "epoch": 1.50383631713555, - "grad_norm": 0.08059901744127274, - "learning_rate": 5.312931976857339e-05, - "loss": 0.0047626100480556485, - "step": 8820 - }, - { - "epoch": 1.504688832054561, - "grad_norm": 0.07412680238485336, - "learning_rate": 5.310602560256604e-05, - "loss": 0.00709492564201355, - "step": 8825 - }, - { - "epoch": 1.5055413469735721, - "grad_norm": 0.046478480100631714, - "learning_rate": 5.3082724151146814e-05, - "loss": 0.006465598940849304, - "step": 8830 - }, - { - "epoch": 1.5063938618925832, - "grad_norm": 0.11122216284275055, - "learning_rate": 5.30594154251936e-05, - "loss": 0.00888531506061554, - "step": 8835 - }, - { - "epoch": 1.5072463768115942, - "grad_norm": 0.06441432982683182, - "learning_rate": 5.3036099435587685e-05, - "loss": 0.005882937833666802, - "step": 8840 - }, - { - "epoch": 1.5080988917306053, - "grad_norm": 0.05722307041287422, - "learning_rate": 5.301277619321374e-05, - "loss": 0.0059202808886766435, - "step": 8845 - }, - { - "epoch": 1.5089514066496164, - "grad_norm": 0.06677310913801193, - "learning_rate": 5.2989445708959856e-05, - "loss": 0.0064939349889755246, - "step": 8850 - }, - { - "epoch": 1.5098039215686274, - "grad_norm": 0.08854222297668457, - "learning_rate": 5.296610799371745e-05, - "loss": 0.007034827768802643, - "step": 8855 - }, - { - "epoch": 1.5106564364876385, - "grad_norm": 0.059711627662181854, - "learning_rate": 5.2942763058381356e-05, - "loss": 0.007557753473520279, - "step": 8860 - }, - { - "epoch": 1.5115089514066495, - "grad_norm": 0.06355257332324982, - "learning_rate": 5.291941091384977e-05, - "loss": 0.006534597277641297, - "step": 8865 - }, - { - "epoch": 1.5123614663256606, - "grad_norm": 0.05741631239652634, - "learning_rate": 5.2896051571024255e-05, - "loss": 0.006453331559896469, - "step": 8870 - }, - { - "epoch": 1.5132139812446717, - "grad_norm": 0.05809224396944046, - "learning_rate": 5.287268504080972e-05, - "loss": 0.006065556779503822, - "step": 8875 - }, - { - "epoch": 1.5140664961636827, - "grad_norm": 0.04522582143545151, - "learning_rate": 5.284931133411443e-05, - "loss": 0.004097414761781692, - "step": 8880 - }, - { - "epoch": 1.514919011082694, - "grad_norm": 0.09349111467599869, - "learning_rate": 5.2825930461850014e-05, - "loss": 0.005707831308245659, - "step": 8885 - }, - { - "epoch": 1.515771526001705, - "grad_norm": 0.08951391279697418, - "learning_rate": 5.280254243493145e-05, - "loss": 0.00725678950548172, - "step": 8890 - }, - { - "epoch": 1.5166240409207161, - "grad_norm": 0.07826244086027145, - "learning_rate": 5.277914726427705e-05, - "loss": 0.008086606860160828, - "step": 8895 - }, - { - "epoch": 1.5174765558397272, - "grad_norm": 0.0619954876601696, - "learning_rate": 5.2755744960808446e-05, - "loss": 0.005462165176868439, - "step": 8900 - }, - { - "epoch": 1.5183290707587382, - "grad_norm": 0.04414132609963417, - "learning_rate": 5.273233553545062e-05, - "loss": 0.005678927898406983, - "step": 8905 - }, - { - "epoch": 1.5191815856777495, - "grad_norm": 0.07183931767940521, - "learning_rate": 5.2708918999131864e-05, - "loss": 0.007184042781591416, - "step": 8910 - }, - { - "epoch": 1.5200341005967606, - "grad_norm": 0.10447251796722412, - "learning_rate": 5.26854953627838e-05, - "loss": 0.009831231832504273, - "step": 8915 - }, - { - "epoch": 1.5208866155157716, - "grad_norm": 0.04392845183610916, - "learning_rate": 5.266206463734135e-05, - "loss": 0.006517301499843598, - "step": 8920 - }, - { - "epoch": 1.5217391304347827, - "grad_norm": 0.06292697787284851, - "learning_rate": 5.2638626833742776e-05, - "loss": 0.005328541249036789, - "step": 8925 - }, - { - "epoch": 1.5225916453537938, - "grad_norm": 0.06425110250711441, - "learning_rate": 5.2615181962929605e-05, - "loss": 0.006298693269491196, - "step": 8930 - }, - { - "epoch": 1.5234441602728048, - "grad_norm": 0.08059051632881165, - "learning_rate": 5.259173003584669e-05, - "loss": 0.008097793161869048, - "step": 8935 - }, - { - "epoch": 1.5242966751918159, - "grad_norm": 0.0625302791595459, - "learning_rate": 5.256827106344218e-05, - "loss": 0.006664089858531952, - "step": 8940 - }, - { - "epoch": 1.525149190110827, - "grad_norm": 0.06092630326747894, - "learning_rate": 5.254480505666749e-05, - "loss": 0.006084204837679863, - "step": 8945 - }, - { - "epoch": 1.526001705029838, - "grad_norm": 0.07297338545322418, - "learning_rate": 5.2521332026477344e-05, - "loss": 0.006405481696128845, - "step": 8950 - }, - { - "epoch": 1.526854219948849, - "grad_norm": 0.05876631662249565, - "learning_rate": 5.249785198382973e-05, - "loss": 0.006670171767473221, - "step": 8955 - }, - { - "epoch": 1.5277067348678601, - "grad_norm": 0.0633542388677597, - "learning_rate": 5.247436493968589e-05, - "loss": 0.004565924406051636, - "step": 8960 - }, - { - "epoch": 1.5285592497868712, - "grad_norm": 0.09164717048406601, - "learning_rate": 5.2450870905010395e-05, - "loss": 0.005662925541400909, - "step": 8965 - }, - { - "epoch": 1.5294117647058822, - "grad_norm": 0.06646572798490524, - "learning_rate": 5.2427369890771026e-05, - "loss": 0.006319984793663025, - "step": 8970 - }, - { - "epoch": 1.5302642796248933, - "grad_norm": 0.08518269658088684, - "learning_rate": 5.2403861907938826e-05, - "loss": 0.0066184550523757935, - "step": 8975 - }, - { - "epoch": 1.5311167945439044, - "grad_norm": 0.08369076251983643, - "learning_rate": 5.238034696748811e-05, - "loss": 0.005069610476493835, - "step": 8980 - }, - { - "epoch": 1.5319693094629157, - "grad_norm": 0.05607258528470993, - "learning_rate": 5.235682508039646e-05, - "loss": 0.007457223534584045, - "step": 8985 - }, - { - "epoch": 1.5328218243819267, - "grad_norm": 0.0828152522444725, - "learning_rate": 5.2333296257644646e-05, - "loss": 0.007727481424808502, - "step": 8990 - }, - { - "epoch": 1.5336743393009378, - "grad_norm": 0.09770844876766205, - "learning_rate": 5.230976051021671e-05, - "loss": 0.007591258734464645, - "step": 8995 - }, - { - "epoch": 1.5345268542199488, - "grad_norm": 0.05906900763511658, - "learning_rate": 5.2286217849099925e-05, - "loss": 0.008510296791791916, - "step": 9000 - }, - { - "epoch": 1.53537936913896, - "grad_norm": 0.07594765722751617, - "learning_rate": 5.2262668285284785e-05, - "loss": 0.005943647772073746, - "step": 9005 - }, - { - "epoch": 1.5362318840579712, - "grad_norm": 0.056658126413822174, - "learning_rate": 5.223911182976502e-05, - "loss": 0.004702667891979218, - "step": 9010 - }, - { - "epoch": 1.5370843989769822, - "grad_norm": 0.060573313385248184, - "learning_rate": 5.2215548493537556e-05, - "loss": 0.006530648469924927, - "step": 9015 - }, - { - "epoch": 1.5379369138959933, - "grad_norm": 0.06876473873853683, - "learning_rate": 5.219197828760254e-05, - "loss": 0.0070976391434669495, - "step": 9020 - }, - { - "epoch": 1.5387894288150044, - "grad_norm": 0.05402369797229767, - "learning_rate": 5.2168401222963354e-05, - "loss": 0.005997032299637795, - "step": 9025 - }, - { - "epoch": 1.5396419437340154, - "grad_norm": 0.0907805860042572, - "learning_rate": 5.214481731062652e-05, - "loss": 0.007357357442378998, - "step": 9030 - }, - { - "epoch": 1.5404944586530265, - "grad_norm": 0.07572564482688904, - "learning_rate": 5.212122656160182e-05, - "loss": 0.004879472404718399, - "step": 9035 - }, - { - "epoch": 1.5413469735720375, - "grad_norm": 0.05684768036007881, - "learning_rate": 5.209762898690218e-05, - "loss": 0.006248699128627777, - "step": 9040 - }, - { - "epoch": 1.5421994884910486, - "grad_norm": 0.070293128490448, - "learning_rate": 5.2074024597543745e-05, - "loss": 0.005055962502956391, - "step": 9045 - }, - { - "epoch": 1.5430520034100597, - "grad_norm": 0.06611300259828568, - "learning_rate": 5.2050413404545823e-05, - "loss": 0.0048581909388303755, - "step": 9050 - }, - { - "epoch": 1.5439045183290707, - "grad_norm": 0.06960003823041916, - "learning_rate": 5.202679541893092e-05, - "loss": 0.006258350610733032, - "step": 9055 - }, - { - "epoch": 1.5447570332480818, - "grad_norm": 0.059757016599178314, - "learning_rate": 5.2003170651724675e-05, - "loss": 0.006347355991601944, - "step": 9060 - }, - { - "epoch": 1.5456095481670928, - "grad_norm": 0.06531284749507904, - "learning_rate": 5.1979539113955936e-05, - "loss": 0.00543224960565567, - "step": 9065 - }, - { - "epoch": 1.546462063086104, - "grad_norm": 0.08068390935659409, - "learning_rate": 5.195590081665667e-05, - "loss": 0.004933612793684006, - "step": 9070 - }, - { - "epoch": 1.547314578005115, - "grad_norm": 0.06198716536164284, - "learning_rate": 5.193225577086203e-05, - "loss": 0.00523824393749237, - "step": 9075 - }, - { - "epoch": 1.548167092924126, - "grad_norm": 0.07734926789999008, - "learning_rate": 5.190860398761032e-05, - "loss": 0.005699950456619263, - "step": 9080 - }, - { - "epoch": 1.5490196078431373, - "grad_norm": 0.058083925396203995, - "learning_rate": 5.188494547794297e-05, - "loss": 0.006147466972470284, - "step": 9085 - }, - { - "epoch": 1.5498721227621484, - "grad_norm": 0.0675162672996521, - "learning_rate": 5.1861280252904546e-05, - "loss": 0.0059716224670410155, - "step": 9090 - }, - { - "epoch": 1.5507246376811594, - "grad_norm": 0.05415274575352669, - "learning_rate": 5.183760832354278e-05, - "loss": 0.0058246061205863954, - "step": 9095 - }, - { - "epoch": 1.5515771526001705, - "grad_norm": 0.05826190859079361, - "learning_rate": 5.1813929700908523e-05, - "loss": 0.005409573763608932, - "step": 9100 - }, - { - "epoch": 1.5524296675191815, - "grad_norm": 0.07188098877668381, - "learning_rate": 5.179024439605573e-05, - "loss": 0.00541839525103569, - "step": 9105 - }, - { - "epoch": 1.5532821824381928, - "grad_norm": 0.07955330610275269, - "learning_rate": 5.176655242004149e-05, - "loss": 0.007760365307331085, - "step": 9110 - }, - { - "epoch": 1.5541346973572039, - "grad_norm": 0.07923565059900284, - "learning_rate": 5.1742853783926e-05, - "loss": 0.00563618317246437, - "step": 9115 - }, - { - "epoch": 1.554987212276215, - "grad_norm": 0.08301008492708206, - "learning_rate": 5.171914849877258e-05, - "loss": 0.006948529183864594, - "step": 9120 - }, - { - "epoch": 1.555839727195226, - "grad_norm": 0.10905841737985611, - "learning_rate": 5.1695436575647655e-05, - "loss": 0.005861887335777282, - "step": 9125 - }, - { - "epoch": 1.556692242114237, - "grad_norm": 0.06157204881310463, - "learning_rate": 5.167171802562072e-05, - "loss": 0.005052468553185463, - "step": 9130 - }, - { - "epoch": 1.5575447570332481, - "grad_norm": 0.08309191465377808, - "learning_rate": 5.164799285976438e-05, - "loss": 0.006937308609485627, - "step": 9135 - }, - { - "epoch": 1.5583972719522592, - "grad_norm": 0.07454490661621094, - "learning_rate": 5.162426108915437e-05, - "loss": 0.00504121258854866, - "step": 9140 - }, - { - "epoch": 1.5592497868712702, - "grad_norm": 0.07217807322740555, - "learning_rate": 5.160052272486943e-05, - "loss": 0.004582167789340019, - "step": 9145 - }, - { - "epoch": 1.5601023017902813, - "grad_norm": 0.07113789767026901, - "learning_rate": 5.157677777799145e-05, - "loss": 0.0055323362350463865, - "step": 9150 - }, - { - "epoch": 1.5609548167092924, - "grad_norm": 0.10281748324632645, - "learning_rate": 5.1553026259605316e-05, - "loss": 0.006342601776123047, - "step": 9155 - }, - { - "epoch": 1.5618073316283034, - "grad_norm": 0.09731876850128174, - "learning_rate": 5.152926818079906e-05, - "loss": 0.0054936733096838, - "step": 9160 - }, - { - "epoch": 1.5626598465473145, - "grad_norm": 0.09631586819887161, - "learning_rate": 5.1505503552663734e-05, - "loss": 0.0064162641763687136, - "step": 9165 - }, - { - "epoch": 1.5635123614663256, - "grad_norm": 0.07588718831539154, - "learning_rate": 5.148173238629348e-05, - "loss": 0.0069232374429702755, - "step": 9170 - }, - { - "epoch": 1.5643648763853366, - "grad_norm": 0.10357257723808289, - "learning_rate": 5.145795469278544e-05, - "loss": 0.007076382637023926, - "step": 9175 - }, - { - "epoch": 1.5652173913043477, - "grad_norm": 0.07249122112989426, - "learning_rate": 5.1434170483239826e-05, - "loss": 0.005868781358003616, - "step": 9180 - }, - { - "epoch": 1.566069906223359, - "grad_norm": 0.06878417730331421, - "learning_rate": 5.1410379768759934e-05, - "loss": 0.006841042637825012, - "step": 9185 - }, - { - "epoch": 1.56692242114237, - "grad_norm": 0.1096004843711853, - "learning_rate": 5.138658256045203e-05, - "loss": 0.00807877779006958, - "step": 9190 - }, - { - "epoch": 1.567774936061381, - "grad_norm": 0.07194329053163528, - "learning_rate": 5.136277886942547e-05, - "loss": 0.005923056975007057, - "step": 9195 - }, - { - "epoch": 1.5686274509803921, - "grad_norm": 0.08904275298118591, - "learning_rate": 5.133896870679257e-05, - "loss": 0.006372517347335816, - "step": 9200 - }, - { - "epoch": 1.5694799658994032, - "grad_norm": 0.05133598670363426, - "learning_rate": 5.131515208366873e-05, - "loss": 0.00692460760474205, - "step": 9205 - }, - { - "epoch": 1.5703324808184145, - "grad_norm": 0.047151900827884674, - "learning_rate": 5.1291329011172345e-05, - "loss": 0.006545543670654297, - "step": 9210 - }, - { - "epoch": 1.5711849957374255, - "grad_norm": 0.07102219760417938, - "learning_rate": 5.126749950042482e-05, - "loss": 0.006531259417533875, - "step": 9215 - }, - { - "epoch": 1.5720375106564366, - "grad_norm": 0.09585709124803543, - "learning_rate": 5.124366356255056e-05, - "loss": 0.005086017400026321, - "step": 9220 - }, - { - "epoch": 1.5728900255754477, - "grad_norm": 0.06898393481969833, - "learning_rate": 5.121982120867695e-05, - "loss": 0.004247477650642395, - "step": 9225 - }, - { - "epoch": 1.5737425404944587, - "grad_norm": 0.10513560473918915, - "learning_rate": 5.119597244993443e-05, - "loss": 0.006501986831426621, - "step": 9230 - }, - { - "epoch": 1.5745950554134698, - "grad_norm": 0.06671630591154099, - "learning_rate": 5.1172117297456366e-05, - "loss": 0.007658005505800247, - "step": 9235 - }, - { - "epoch": 1.5754475703324808, - "grad_norm": 0.09480880945920944, - "learning_rate": 5.1148255762379156e-05, - "loss": 0.006366011500358581, - "step": 9240 - }, - { - "epoch": 1.576300085251492, - "grad_norm": 0.06769633293151855, - "learning_rate": 5.112438785584215e-05, - "loss": 0.00625738725066185, - "step": 9245 - }, - { - "epoch": 1.577152600170503, - "grad_norm": 0.03695152327418327, - "learning_rate": 5.1100513588987665e-05, - "loss": 0.006924654543399811, - "step": 9250 - }, - { - "epoch": 1.578005115089514, - "grad_norm": 0.05657009407877922, - "learning_rate": 5.107663297296104e-05, - "loss": 0.005848415940999985, - "step": 9255 - }, - { - "epoch": 1.578857630008525, - "grad_norm": 0.11228469014167786, - "learning_rate": 5.105274601891051e-05, - "loss": 0.005637861788272858, - "step": 9260 - }, - { - "epoch": 1.5797101449275361, - "grad_norm": 0.06454899162054062, - "learning_rate": 5.102885273798732e-05, - "loss": 0.0066472023725509645, - "step": 9265 - }, - { - "epoch": 1.5805626598465472, - "grad_norm": 0.05328953638672829, - "learning_rate": 5.1004953141345637e-05, - "loss": 0.008773463219404221, - "step": 9270 - }, - { - "epoch": 1.5814151747655583, - "grad_norm": 0.05827401205897331, - "learning_rate": 5.0981047240142576e-05, - "loss": 0.0075307883322238926, - "step": 9275 - }, - { - "epoch": 1.5822676896845693, - "grad_norm": 0.0719359889626503, - "learning_rate": 5.095713504553822e-05, - "loss": 0.007532978057861328, - "step": 9280 - }, - { - "epoch": 1.5831202046035806, - "grad_norm": 0.08982953429222107, - "learning_rate": 5.0933216568695596e-05, - "loss": 0.007915425300598144, - "step": 9285 - }, - { - "epoch": 1.5839727195225917, - "grad_norm": 0.0919221043586731, - "learning_rate": 5.090929182078061e-05, - "loss": 0.005685590207576752, - "step": 9290 - }, - { - "epoch": 1.5848252344416027, - "grad_norm": 0.0840388685464859, - "learning_rate": 5.088536081296215e-05, - "loss": 0.0070190995931625364, - "step": 9295 - }, - { - "epoch": 1.5856777493606138, - "grad_norm": 0.08340579271316528, - "learning_rate": 5.086142355641199e-05, - "loss": 0.005871276929974556, - "step": 9300 - }, - { - "epoch": 1.5865302642796248, - "grad_norm": 0.0840516984462738, - "learning_rate": 5.0837480062304865e-05, - "loss": 0.007803326845169068, - "step": 9305 - }, - { - "epoch": 1.5873827791986361, - "grad_norm": 0.08378542214632034, - "learning_rate": 5.0813530341818377e-05, - "loss": 0.005085055530071258, - "step": 9310 - }, - { - "epoch": 1.5882352941176472, - "grad_norm": 0.10764650255441666, - "learning_rate": 5.078957440613305e-05, - "loss": 0.007959616929292678, - "step": 9315 - }, - { - "epoch": 1.5890878090366582, - "grad_norm": 0.07483979314565659, - "learning_rate": 5.076561226643231e-05, - "loss": 0.004332176968455314, - "step": 9320 - }, - { - "epoch": 1.5899403239556693, - "grad_norm": 0.06658382713794708, - "learning_rate": 5.074164393390249e-05, - "loss": 0.006168607249855995, - "step": 9325 - }, - { - "epoch": 1.5907928388746804, - "grad_norm": 0.09388890862464905, - "learning_rate": 5.071766941973282e-05, - "loss": 0.006460639089345932, - "step": 9330 - }, - { - "epoch": 1.5916453537936914, - "grad_norm": 0.051856543868780136, - "learning_rate": 5.0693688735115364e-05, - "loss": 0.005657953023910522, - "step": 9335 - }, - { - "epoch": 1.5924978687127025, - "grad_norm": 0.0785013884305954, - "learning_rate": 5.066970189124513e-05, - "loss": 0.008378601819276809, - "step": 9340 - }, - { - "epoch": 1.5933503836317136, - "grad_norm": 0.0653534010052681, - "learning_rate": 5.0645708899319956e-05, - "loss": 0.006928309798240662, - "step": 9345 - }, - { - "epoch": 1.5942028985507246, - "grad_norm": 0.047050826251506805, - "learning_rate": 5.062170977054058e-05, - "loss": 0.005722399801015854, - "step": 9350 - }, - { - "epoch": 1.5950554134697357, - "grad_norm": 0.10868531465530396, - "learning_rate": 5.059770451611061e-05, - "loss": 0.009898315370082855, - "step": 9355 - }, - { - "epoch": 1.5959079283887467, - "grad_norm": 0.0615832693874836, - "learning_rate": 5.0573693147236465e-05, - "loss": 0.007755370438098907, - "step": 9360 - }, - { - "epoch": 1.5967604433077578, - "grad_norm": 0.10720556974411011, - "learning_rate": 5.054967567512747e-05, - "loss": 0.006318587809801102, - "step": 9365 - }, - { - "epoch": 1.5976129582267689, - "grad_norm": 0.06587128341197968, - "learning_rate": 5.052565211099578e-05, - "loss": 0.004849371314048767, - "step": 9370 - }, - { - "epoch": 1.59846547314578, - "grad_norm": 0.07305008918046951, - "learning_rate": 5.050162246605638e-05, - "loss": 0.005983927100896835, - "step": 9375 - }, - { - "epoch": 1.599317988064791, - "grad_norm": 0.06641892343759537, - "learning_rate": 5.0477586751527124e-05, - "loss": 0.007008136063814163, - "step": 9380 - }, - { - "epoch": 1.6001705029838023, - "grad_norm": 0.06871581077575684, - "learning_rate": 5.045354497862868e-05, - "loss": 0.0066993959248065945, - "step": 9385 - }, - { - "epoch": 1.6010230179028133, - "grad_norm": 0.07417753338813782, - "learning_rate": 5.042949715858453e-05, - "loss": 0.006360804289579391, - "step": 9390 - }, - { - "epoch": 1.6018755328218244, - "grad_norm": 0.09202401340007782, - "learning_rate": 5.040544330262102e-05, - "loss": 0.006207296252250671, - "step": 9395 - }, - { - "epoch": 1.6027280477408354, - "grad_norm": 0.06747353821992874, - "learning_rate": 5.0381383421967276e-05, - "loss": 0.006196716427803039, - "step": 9400 - }, - { - "epoch": 1.6035805626598465, - "grad_norm": 0.06609310954809189, - "learning_rate": 5.0357317527855266e-05, - "loss": 0.005642791092395782, - "step": 9405 - }, - { - "epoch": 1.6044330775788578, - "grad_norm": 0.039614174515008926, - "learning_rate": 5.0333245631519716e-05, - "loss": 0.005146804824471473, - "step": 9410 - }, - { - "epoch": 1.6052855924978688, - "grad_norm": 0.0902944952249527, - "learning_rate": 5.0309167744198234e-05, - "loss": 0.005218298360705376, - "step": 9415 - }, - { - "epoch": 1.60613810741688, - "grad_norm": 0.06527641415596008, - "learning_rate": 5.028508387713114e-05, - "loss": 0.006157718971371651, - "step": 9420 - }, - { - "epoch": 1.606990622335891, - "grad_norm": 0.10824134200811386, - "learning_rate": 5.026099404156161e-05, - "loss": 0.00577687993645668, - "step": 9425 - }, - { - "epoch": 1.607843137254902, - "grad_norm": 0.091335728764534, - "learning_rate": 5.023689824873556e-05, - "loss": 0.005114461481571198, - "step": 9430 - }, - { - "epoch": 1.608695652173913, - "grad_norm": 0.047340504825115204, - "learning_rate": 5.021279650990173e-05, - "loss": 0.005150845646858216, - "step": 9435 - }, - { - "epoch": 1.6095481670929241, - "grad_norm": 0.05847655236721039, - "learning_rate": 5.01886888363116e-05, - "loss": 0.006019642949104309, - "step": 9440 - }, - { - "epoch": 1.6104006820119352, - "grad_norm": 0.10413257032632828, - "learning_rate": 5.016457523921943e-05, - "loss": 0.0097243569791317, - "step": 9445 - }, - { - "epoch": 1.6112531969309463, - "grad_norm": 0.06559625267982483, - "learning_rate": 5.014045572988226e-05, - "loss": 0.006743426620960236, - "step": 9450 - }, - { - "epoch": 1.6121057118499573, - "grad_norm": 0.07541610300540924, - "learning_rate": 5.0116330319559865e-05, - "loss": 0.004393500834703445, - "step": 9455 - }, - { - "epoch": 1.6129582267689684, - "grad_norm": 0.04757530242204666, - "learning_rate": 5.00921990195148e-05, - "loss": 0.004641738906502724, - "step": 9460 - }, - { - "epoch": 1.6138107416879794, - "grad_norm": 0.10010012239217758, - "learning_rate": 5.0068061841012355e-05, - "loss": 0.005677872523665428, - "step": 9465 - }, - { - "epoch": 1.6146632566069905, - "grad_norm": 0.08248613774776459, - "learning_rate": 5.0043918795320576e-05, - "loss": 0.006557486951351166, - "step": 9470 - }, - { - "epoch": 1.6155157715260016, - "grad_norm": 0.06300318241119385, - "learning_rate": 5.001976989371023e-05, - "loss": 0.0052742622792720795, - "step": 9475 - }, - { - "epoch": 1.6163682864450126, - "grad_norm": 0.06455430388450623, - "learning_rate": 4.999561514745482e-05, - "loss": 0.0061374582350254055, - "step": 9480 - }, - { - "epoch": 1.617220801364024, - "grad_norm": 0.04623732715845108, - "learning_rate": 4.997145456783062e-05, - "loss": 0.007861848175525665, - "step": 9485 - }, - { - "epoch": 1.618073316283035, - "grad_norm": 0.05294455960392952, - "learning_rate": 4.994728816611655e-05, - "loss": 0.005468960478901863, - "step": 9490 - }, - { - "epoch": 1.618925831202046, - "grad_norm": 0.04539628326892853, - "learning_rate": 4.992311595359431e-05, - "loss": 0.005490221083164215, - "step": 9495 - }, - { - "epoch": 1.619778346121057, - "grad_norm": 0.04033574461936951, - "learning_rate": 4.98989379415483e-05, - "loss": 0.005296828970313072, - "step": 9500 - }, - { - "epoch": 1.6206308610400681, - "grad_norm": 0.10801003128290176, - "learning_rate": 4.98747541412656e-05, - "loss": 0.007847490906715392, - "step": 9505 - }, - { - "epoch": 1.6214833759590794, - "grad_norm": 0.05979831889271736, - "learning_rate": 4.985056456403603e-05, - "loss": 0.005352787673473358, - "step": 9510 - }, - { - "epoch": 1.6223358908780905, - "grad_norm": 0.07628990709781647, - "learning_rate": 4.9826369221152086e-05, - "loss": 0.005436672642827034, - "step": 9515 - }, - { - "epoch": 1.6231884057971016, - "grad_norm": 0.0654626339673996, - "learning_rate": 4.9802168123908955e-05, - "loss": 0.004777481406927108, - "step": 9520 - }, - { - "epoch": 1.6240409207161126, - "grad_norm": 0.08487557619810104, - "learning_rate": 4.97779612836045e-05, - "loss": 0.006834116578102112, - "step": 9525 - }, - { - "epoch": 1.6248934356351237, - "grad_norm": 0.09151525050401688, - "learning_rate": 4.9753748711539316e-05, - "loss": 0.006389729678630829, - "step": 9530 - }, - { - "epoch": 1.6257459505541347, - "grad_norm": 0.10458851605653763, - "learning_rate": 4.972953041901661e-05, - "loss": 0.005984527617692947, - "step": 9535 - }, - { - "epoch": 1.6265984654731458, - "grad_norm": 0.08780983090400696, - "learning_rate": 4.970530641734229e-05, - "loss": 0.0068392202258110045, - "step": 9540 - }, - { - "epoch": 1.6274509803921569, - "grad_norm": 0.04871044307947159, - "learning_rate": 4.968107671782493e-05, - "loss": 0.005444938316941261, - "step": 9545 - }, - { - "epoch": 1.628303495311168, - "grad_norm": 0.05514970421791077, - "learning_rate": 4.9656841331775745e-05, - "loss": 0.005353255197405815, - "step": 9550 - }, - { - "epoch": 1.629156010230179, - "grad_norm": 0.057791441679000854, - "learning_rate": 4.9632600270508655e-05, - "loss": 0.005117457732558251, - "step": 9555 - }, - { - "epoch": 1.63000852514919, - "grad_norm": 0.0816815048456192, - "learning_rate": 4.960835354534015e-05, - "loss": 0.005405401438474655, - "step": 9560 - }, - { - "epoch": 1.630861040068201, - "grad_norm": 0.087788425385952, - "learning_rate": 4.958410116758945e-05, - "loss": 0.006124432012438774, - "step": 9565 - }, - { - "epoch": 1.6317135549872122, - "grad_norm": 0.08500470966100693, - "learning_rate": 4.955984314857832e-05, - "loss": 0.00581449456512928, - "step": 9570 - }, - { - "epoch": 1.6325660699062232, - "grad_norm": 0.042804375290870667, - "learning_rate": 4.9535579499631264e-05, - "loss": 0.007793295383453369, - "step": 9575 - }, - { - "epoch": 1.6334185848252343, - "grad_norm": 0.08767658472061157, - "learning_rate": 4.951131023207533e-05, - "loss": 0.006432226300239563, - "step": 9580 - }, - { - "epoch": 1.6342710997442456, - "grad_norm": 0.0693424716591835, - "learning_rate": 4.948703535724023e-05, - "loss": 0.006517377495765686, - "step": 9585 - }, - { - "epoch": 1.6351236146632566, - "grad_norm": 0.08574991673231125, - "learning_rate": 4.9462754886458276e-05, - "loss": 0.009532185643911362, - "step": 9590 - }, - { - "epoch": 1.6359761295822677, - "grad_norm": 0.04135733097791672, - "learning_rate": 4.94384688310644e-05, - "loss": 0.005358002707362175, - "step": 9595 - }, - { - "epoch": 1.6368286445012787, - "grad_norm": 0.09947369992733002, - "learning_rate": 4.941417720239616e-05, - "loss": 0.005965238064527511, - "step": 9600 - }, - { - "epoch": 1.6376811594202898, - "grad_norm": 0.038376711308956146, - "learning_rate": 4.9389880011793665e-05, - "loss": 0.00521450936794281, - "step": 9605 - }, - { - "epoch": 1.638533674339301, - "grad_norm": 0.05022123083472252, - "learning_rate": 4.9365577270599675e-05, - "loss": 0.006678921729326248, - "step": 9610 - }, - { - "epoch": 1.6393861892583121, - "grad_norm": 0.06687050312757492, - "learning_rate": 4.93412689901595e-05, - "loss": 0.006315051764249802, - "step": 9615 - }, - { - "epoch": 1.6402387041773232, - "grad_norm": 0.08563709259033203, - "learning_rate": 4.931695518182107e-05, - "loss": 0.005977614223957062, - "step": 9620 - }, - { - "epoch": 1.6410912190963343, - "grad_norm": 0.07901418209075928, - "learning_rate": 4.929263585693486e-05, - "loss": 0.004367914795875549, - "step": 9625 - }, - { - "epoch": 1.6419437340153453, - "grad_norm": 0.05929172784090042, - "learning_rate": 4.9268311026853974e-05, - "loss": 0.00466451421380043, - "step": 9630 - }, - { - "epoch": 1.6427962489343564, - "grad_norm": 0.09167131781578064, - "learning_rate": 4.924398070293403e-05, - "loss": 0.0063233010470867155, - "step": 9635 - }, - { - "epoch": 1.6436487638533674, - "grad_norm": 0.053217221051454544, - "learning_rate": 4.921964489653321e-05, - "loss": 0.005829869210720063, - "step": 9640 - }, - { - "epoch": 1.6445012787723785, - "grad_norm": 0.05341719463467598, - "learning_rate": 4.919530361901232e-05, - "loss": 0.005165425688028335, - "step": 9645 - }, - { - "epoch": 1.6453537936913896, - "grad_norm": 0.0763968899846077, - "learning_rate": 4.917095688173466e-05, - "loss": 0.008034119009971618, - "step": 9650 - }, - { - "epoch": 1.6462063086104006, - "grad_norm": 0.07722017168998718, - "learning_rate": 4.9146604696066095e-05, - "loss": 0.008911440521478653, - "step": 9655 - }, - { - "epoch": 1.6470588235294117, - "grad_norm": 0.0639941543340683, - "learning_rate": 4.912224707337504e-05, - "loss": 0.0066375695168972015, - "step": 9660 - }, - { - "epoch": 1.6479113384484227, - "grad_norm": 0.05451088026165962, - "learning_rate": 4.9097884025032425e-05, - "loss": 0.004018183052539826, - "step": 9665 - }, - { - "epoch": 1.6487638533674338, - "grad_norm": 0.06928657740354538, - "learning_rate": 4.907351556241176e-05, - "loss": 0.0061560459434986115, - "step": 9670 - }, - { - "epoch": 1.6496163682864449, - "grad_norm": 0.0672740638256073, - "learning_rate": 4.904914169688903e-05, - "loss": 0.005010559782385826, - "step": 9675 - }, - { - "epoch": 1.6504688832054561, - "grad_norm": 0.05115605145692825, - "learning_rate": 4.902476243984279e-05, - "loss": 0.005690005421638489, - "step": 9680 - }, - { - "epoch": 1.6513213981244672, - "grad_norm": 0.08852645754814148, - "learning_rate": 4.9000377802654055e-05, - "loss": 0.0067652732133865355, - "step": 9685 - }, - { - "epoch": 1.6521739130434783, - "grad_norm": 0.08289605379104614, - "learning_rate": 4.897598779670643e-05, - "loss": 0.005946322903037071, - "step": 9690 - }, - { - "epoch": 1.6530264279624893, - "grad_norm": 0.08343428373336792, - "learning_rate": 4.895159243338594e-05, - "loss": 0.006231371313333511, - "step": 9695 - }, - { - "epoch": 1.6538789428815004, - "grad_norm": 0.08138900995254517, - "learning_rate": 4.892719172408117e-05, - "loss": 0.006785771995782852, - "step": 9700 - }, - { - "epoch": 1.6547314578005117, - "grad_norm": 0.07599585503339767, - "learning_rate": 4.890278568018318e-05, - "loss": 0.00609181635081768, - "step": 9705 - }, - { - "epoch": 1.6555839727195227, - "grad_norm": 0.07918383926153183, - "learning_rate": 4.887837431308552e-05, - "loss": 0.006991502642631531, - "step": 9710 - }, - { - "epoch": 1.6564364876385338, - "grad_norm": 0.048750922083854675, - "learning_rate": 4.8853957634184246e-05, - "loss": 0.00639684796333313, - "step": 9715 - }, - { - "epoch": 1.6572890025575449, - "grad_norm": 0.07931654155254364, - "learning_rate": 4.882953565487785e-05, - "loss": 0.004780232906341553, - "step": 9720 - }, - { - "epoch": 1.658141517476556, - "grad_norm": 0.07394375652074814, - "learning_rate": 4.8805108386567345e-05, - "loss": 0.005560039728879929, - "step": 9725 - }, - { - "epoch": 1.658994032395567, - "grad_norm": 0.07906223088502884, - "learning_rate": 4.8780675840656175e-05, - "loss": 0.006233107298612595, - "step": 9730 - }, - { - "epoch": 1.659846547314578, - "grad_norm": 0.05145291984081268, - "learning_rate": 4.875623802855027e-05, - "loss": 0.0049663417041301726, - "step": 9735 - }, - { - "epoch": 1.660699062233589, - "grad_norm": 0.06227492541074753, - "learning_rate": 4.873179496165802e-05, - "loss": 0.006139815598726272, - "step": 9740 - }, - { - "epoch": 1.6615515771526002, - "grad_norm": 0.08176816254854202, - "learning_rate": 4.870734665139028e-05, - "loss": 0.007625886052846908, - "step": 9745 - }, - { - "epoch": 1.6624040920716112, - "grad_norm": 0.06774444133043289, - "learning_rate": 4.868289310916029e-05, - "loss": 0.006510105729103088, - "step": 9750 - }, - { - "epoch": 1.6632566069906223, - "grad_norm": 0.07336006313562393, - "learning_rate": 4.8658434346383805e-05, - "loss": 0.0068834669888019565, - "step": 9755 - }, - { - "epoch": 1.6641091219096333, - "grad_norm": 0.07233051210641861, - "learning_rate": 4.863397037447899e-05, - "loss": 0.005505643784999847, - "step": 9760 - }, - { - "epoch": 1.6649616368286444, - "grad_norm": 0.037355873733758926, - "learning_rate": 4.860950120486643e-05, - "loss": 0.005151794478297234, - "step": 9765 - }, - { - "epoch": 1.6658141517476555, - "grad_norm": 0.10907282680273056, - "learning_rate": 4.8585026848969164e-05, - "loss": 0.007589263468980789, - "step": 9770 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 0.10357582569122314, - "learning_rate": 4.856054731821261e-05, - "loss": 0.006011854484677314, - "step": 9775 - }, - { - "epoch": 1.6675191815856778, - "grad_norm": 0.0713953971862793, - "learning_rate": 4.853606262402465e-05, - "loss": 0.006342334300279617, - "step": 9780 - }, - { - "epoch": 1.6683716965046889, - "grad_norm": 0.07772944122552872, - "learning_rate": 4.851157277783555e-05, - "loss": 0.005903373658657074, - "step": 9785 - }, - { - "epoch": 1.6692242114237, - "grad_norm": 0.1249493658542633, - "learning_rate": 4.848707779107797e-05, - "loss": 0.006542833894491196, - "step": 9790 - }, - { - "epoch": 1.670076726342711, - "grad_norm": 0.05137734115123749, - "learning_rate": 4.8462577675187e-05, - "loss": 0.004380676150321961, - "step": 9795 - }, - { - "epoch": 1.670929241261722, - "grad_norm": 0.09491576999425888, - "learning_rate": 4.8438072441600095e-05, - "loss": 0.005311820283532142, - "step": 9800 - }, - { - "epoch": 1.6717817561807333, - "grad_norm": 0.09257746487855911, - "learning_rate": 4.8413562101757134e-05, - "loss": 0.006033014133572578, - "step": 9805 - }, - { - "epoch": 1.6726342710997444, - "grad_norm": 0.045860812067985535, - "learning_rate": 4.838904666710034e-05, - "loss": 0.008368080109357834, - "step": 9810 - }, - { - "epoch": 1.6734867860187554, - "grad_norm": 0.033777810633182526, - "learning_rate": 4.836452614907435e-05, - "loss": 0.0045743979513645176, - "step": 9815 - }, - { - "epoch": 1.6743393009377665, - "grad_norm": 0.12888991832733154, - "learning_rate": 4.834000055912614e-05, - "loss": 0.005997149646282196, - "step": 9820 - }, - { - "epoch": 1.6751918158567776, - "grad_norm": 0.08622048050165176, - "learning_rate": 4.8315469908705074e-05, - "loss": 0.007002732157707215, - "step": 9825 - }, - { - "epoch": 1.6760443307757886, - "grad_norm": 0.04722774773836136, - "learning_rate": 4.82909342092629e-05, - "loss": 0.005374876409769058, - "step": 9830 - }, - { - "epoch": 1.6768968456947997, - "grad_norm": 0.08596520870923996, - "learning_rate": 4.826639347225366e-05, - "loss": 0.0066084228456020355, - "step": 9835 - }, - { - "epoch": 1.6777493606138107, - "grad_norm": 0.09831524640321732, - "learning_rate": 4.824184770913381e-05, - "loss": 0.004402932524681091, - "step": 9840 - }, - { - "epoch": 1.6786018755328218, - "grad_norm": 0.10586824268102646, - "learning_rate": 4.821729693136214e-05, - "loss": 0.006442143023014069, - "step": 9845 - }, - { - "epoch": 1.6794543904518329, - "grad_norm": 0.11845403164625168, - "learning_rate": 4.8192741150399735e-05, - "loss": 0.006300021708011627, - "step": 9850 - }, - { - "epoch": 1.680306905370844, - "grad_norm": 0.08749356120824814, - "learning_rate": 4.816818037771007e-05, - "loss": 0.0060168147087097164, - "step": 9855 - }, - { - "epoch": 1.681159420289855, - "grad_norm": 0.06483060121536255, - "learning_rate": 4.814361462475895e-05, - "loss": 0.00717247799038887, - "step": 9860 - }, - { - "epoch": 1.682011935208866, - "grad_norm": 0.09276239573955536, - "learning_rate": 4.811904390301444e-05, - "loss": 0.006788758933544159, - "step": 9865 - }, - { - "epoch": 1.682864450127877, - "grad_norm": 0.05662832781672478, - "learning_rate": 4.809446822394701e-05, - "loss": 0.0068000413477420805, - "step": 9870 - }, - { - "epoch": 1.6837169650468882, - "grad_norm": 0.07508451491594315, - "learning_rate": 4.80698875990294e-05, - "loss": 0.006339512765407562, - "step": 9875 - }, - { - "epoch": 1.6845694799658995, - "grad_norm": 0.06525320559740067, - "learning_rate": 4.804530203973664e-05, - "loss": 0.010082229971885681, - "step": 9880 - }, - { - "epoch": 1.6854219948849105, - "grad_norm": 0.07791458070278168, - "learning_rate": 4.8020711557546104e-05, - "loss": 0.006830710172653198, - "step": 9885 - }, - { - "epoch": 1.6862745098039216, - "grad_norm": 0.05997749790549278, - "learning_rate": 4.799611616393745e-05, - "loss": 0.00666801705956459, - "step": 9890 - }, - { - "epoch": 1.6871270247229326, - "grad_norm": 0.07050258666276932, - "learning_rate": 4.797151587039261e-05, - "loss": 0.0059244450181722644, - "step": 9895 - }, - { - "epoch": 1.6879795396419437, - "grad_norm": 0.06760186702013016, - "learning_rate": 4.794691068839585e-05, - "loss": 0.006415641307830811, - "step": 9900 - }, - { - "epoch": 1.688832054560955, - "grad_norm": 0.07285474240779877, - "learning_rate": 4.792230062943364e-05, - "loss": 0.004972729086875916, - "step": 9905 - }, - { - "epoch": 1.689684569479966, - "grad_norm": 0.02914854884147644, - "learning_rate": 4.789768570499481e-05, - "loss": 0.004819701239466667, - "step": 9910 - }, - { - "epoch": 1.690537084398977, - "grad_norm": 0.058768294751644135, - "learning_rate": 4.787306592657042e-05, - "loss": 0.00581958070397377, - "step": 9915 - }, - { - "epoch": 1.6913895993179882, - "grad_norm": 0.08694405853748322, - "learning_rate": 4.7848441305653804e-05, - "loss": 0.004998849332332611, - "step": 9920 - }, - { - "epoch": 1.6922421142369992, - "grad_norm": 0.10194200277328491, - "learning_rate": 4.782381185374054e-05, - "loss": 0.00809016153216362, - "step": 9925 - }, - { - "epoch": 1.6930946291560103, - "grad_norm": 0.04976386949419975, - "learning_rate": 4.779917758232849e-05, - "loss": 0.00392133817076683, - "step": 9930 - }, - { - "epoch": 1.6939471440750213, - "grad_norm": 0.04324428364634514, - "learning_rate": 4.777453850291774e-05, - "loss": 0.005488916113972664, - "step": 9935 - }, - { - "epoch": 1.6947996589940324, - "grad_norm": 0.128068745136261, - "learning_rate": 4.774989462701063e-05, - "loss": 0.008696570992469788, - "step": 9940 - }, - { - "epoch": 1.6956521739130435, - "grad_norm": 0.06357335299253464, - "learning_rate": 4.7725245966111764e-05, - "loss": 0.00657767504453659, - "step": 9945 - }, - { - "epoch": 1.6965046888320545, - "grad_norm": 0.09200388938188553, - "learning_rate": 4.770059253172793e-05, - "loss": 0.00511985532939434, - "step": 9950 - }, - { - "epoch": 1.6973572037510656, - "grad_norm": 0.0898200049996376, - "learning_rate": 4.767593433536819e-05, - "loss": 0.005805553123354912, - "step": 9955 - }, - { - "epoch": 1.6982097186700766, - "grad_norm": 0.06495708227157593, - "learning_rate": 4.765127138854379e-05, - "loss": 0.005122709274291992, - "step": 9960 - }, - { - "epoch": 1.6990622335890877, - "grad_norm": 0.06079862266778946, - "learning_rate": 4.762660370276824e-05, - "loss": 0.005829216912388802, - "step": 9965 - }, - { - "epoch": 1.6999147485080988, - "grad_norm": 0.07300638407468796, - "learning_rate": 4.760193128955721e-05, - "loss": 0.0057421475648880005, - "step": 9970 - }, - { - "epoch": 1.7007672634271098, - "grad_norm": 0.09826004505157471, - "learning_rate": 4.757725416042863e-05, - "loss": 0.007709302753210068, - "step": 9975 - }, - { - "epoch": 1.701619778346121, - "grad_norm": 0.08353756368160248, - "learning_rate": 4.755257232690258e-05, - "loss": 0.007458946853876114, - "step": 9980 - }, - { - "epoch": 1.7024722932651322, - "grad_norm": 0.057993657886981964, - "learning_rate": 4.752788580050137e-05, - "loss": 0.0048107530921697615, - "step": 9985 - }, - { - "epoch": 1.7033248081841432, - "grad_norm": 0.08480621874332428, - "learning_rate": 4.750319459274951e-05, - "loss": 0.007556724548339844, - "step": 9990 - }, - { - "epoch": 1.7041773231031543, - "grad_norm": 0.06563637405633926, - "learning_rate": 4.747849871517364e-05, - "loss": 0.00476250983774662, - "step": 9995 - }, - { - "epoch": 1.7050298380221653, - "grad_norm": 0.06217886507511139, - "learning_rate": 4.7453798179302656e-05, - "loss": 0.008565887063741683, - "step": 10000 - }, - { - "epoch": 1.7058823529411766, - "grad_norm": 0.07285669445991516, - "learning_rate": 4.742909299666756e-05, - "loss": 0.0062899492681026455, - "step": 10005 - }, - { - "epoch": 1.7067348678601877, - "grad_norm": 0.043275732547044754, - "learning_rate": 4.7404383178801564e-05, - "loss": 0.005467301979660988, - "step": 10010 - }, - { - "epoch": 1.7075873827791987, - "grad_norm": 0.09345486015081406, - "learning_rate": 4.7379668737240044e-05, - "loss": 0.007198603451251983, - "step": 10015 - }, - { - "epoch": 1.7084398976982098, - "grad_norm": 0.09792933613061905, - "learning_rate": 4.735494968352049e-05, - "loss": 0.009155672788619996, - "step": 10020 - }, - { - "epoch": 1.7092924126172209, - "grad_norm": 0.03888144716620445, - "learning_rate": 4.733022602918263e-05, - "loss": 0.00484597384929657, - "step": 10025 - }, - { - "epoch": 1.710144927536232, - "grad_norm": 0.050344232469797134, - "learning_rate": 4.7305497785768235e-05, - "loss": 0.00478862039744854, - "step": 10030 - }, - { - "epoch": 1.710997442455243, - "grad_norm": 0.0724092647433281, - "learning_rate": 4.728076496482131e-05, - "loss": 0.005028426647186279, - "step": 10035 - }, - { - "epoch": 1.711849957374254, - "grad_norm": 0.10781413316726685, - "learning_rate": 4.725602757788794e-05, - "loss": 0.00789962187409401, - "step": 10040 - }, - { - "epoch": 1.712702472293265, - "grad_norm": 0.0828569084405899, - "learning_rate": 4.723128563651637e-05, - "loss": 0.006212035566568375, - "step": 10045 - }, - { - "epoch": 1.7135549872122762, - "grad_norm": 0.06634854525327682, - "learning_rate": 4.720653915225695e-05, - "loss": 0.00550018809735775, - "step": 10050 - }, - { - "epoch": 1.7144075021312872, - "grad_norm": 0.07699137926101685, - "learning_rate": 4.718178813666217e-05, - "loss": 0.007427608966827393, - "step": 10055 - }, - { - "epoch": 1.7152600170502983, - "grad_norm": 0.08237455785274506, - "learning_rate": 4.715703260128663e-05, - "loss": 0.0049440376460552216, - "step": 10060 - }, - { - "epoch": 1.7161125319693094, - "grad_norm": 0.0423310324549675, - "learning_rate": 4.7132272557687034e-05, - "loss": 0.005643930658698082, - "step": 10065 - }, - { - "epoch": 1.7169650468883204, - "grad_norm": 0.08052363246679306, - "learning_rate": 4.71075080174222e-05, - "loss": 0.005594046413898468, - "step": 10070 - }, - { - "epoch": 1.7178175618073315, - "grad_norm": 0.05388827249407768, - "learning_rate": 4.7082738992053004e-05, - "loss": 0.005239073187112808, - "step": 10075 - }, - { - "epoch": 1.7186700767263428, - "grad_norm": 0.0699780210852623, - "learning_rate": 4.70579654931425e-05, - "loss": 0.004442551359534264, - "step": 10080 - }, - { - "epoch": 1.7195225916453538, - "grad_norm": 0.07259970158338547, - "learning_rate": 4.7033187532255765e-05, - "loss": 0.004775180667638779, - "step": 10085 - }, - { - "epoch": 1.7203751065643649, - "grad_norm": 0.10291304439306259, - "learning_rate": 4.700840512095995e-05, - "loss": 0.009148158878087998, - "step": 10090 - }, - { - "epoch": 1.721227621483376, - "grad_norm": 0.09639768302440643, - "learning_rate": 4.698361827082435e-05, - "loss": 0.008357913047075272, - "step": 10095 - }, - { - "epoch": 1.722080136402387, - "grad_norm": 0.08128193765878677, - "learning_rate": 4.695882699342026e-05, - "loss": 0.006467945128679276, - "step": 10100 - }, - { - "epoch": 1.7229326513213983, - "grad_norm": 0.0678371787071228, - "learning_rate": 4.6934031300321094e-05, - "loss": 0.005760467797517777, - "step": 10105 - }, - { - "epoch": 1.7237851662404093, - "grad_norm": 0.0766267478466034, - "learning_rate": 4.6909231203102285e-05, - "loss": 0.0068340465426445, - "step": 10110 - }, - { - "epoch": 1.7246376811594204, - "grad_norm": 0.04263419657945633, - "learning_rate": 4.6884426713341366e-05, - "loss": 0.005921339616179466, - "step": 10115 - }, - { - "epoch": 1.7254901960784315, - "grad_norm": 0.10168195515871048, - "learning_rate": 4.6859617842617874e-05, - "loss": 0.006926319003105164, - "step": 10120 - }, - { - "epoch": 1.7263427109974425, - "grad_norm": 0.07910803705453873, - "learning_rate": 4.683480460251343e-05, - "loss": 0.006997878849506378, - "step": 10125 - }, - { - "epoch": 1.7271952259164536, - "grad_norm": 0.045049965381622314, - "learning_rate": 4.680998700461169e-05, - "loss": 0.005594813078641891, - "step": 10130 - }, - { - "epoch": 1.7280477408354646, - "grad_norm": 0.07185275852680206, - "learning_rate": 4.678516506049832e-05, - "loss": 0.006092778965830803, - "step": 10135 - }, - { - "epoch": 1.7289002557544757, - "grad_norm": 0.07003147900104523, - "learning_rate": 4.676033878176102e-05, - "loss": 0.007595886290073395, - "step": 10140 - }, - { - "epoch": 1.7297527706734868, - "grad_norm": 0.06360077112913132, - "learning_rate": 4.6735508179989536e-05, - "loss": 0.00546439029276371, - "step": 10145 - }, - { - "epoch": 1.7306052855924978, - "grad_norm": 0.07347442954778671, - "learning_rate": 4.671067326677563e-05, - "loss": 0.004961185902357101, - "step": 10150 - }, - { - "epoch": 1.7314578005115089, - "grad_norm": 0.056153345853090286, - "learning_rate": 4.6685834053713035e-05, - "loss": 0.006820976734161377, - "step": 10155 - }, - { - "epoch": 1.73231031543052, - "grad_norm": 0.09868444502353668, - "learning_rate": 4.666099055239755e-05, - "loss": 0.004829689115285874, - "step": 10160 - }, - { - "epoch": 1.733162830349531, - "grad_norm": 0.07029838860034943, - "learning_rate": 4.663614277442694e-05, - "loss": 0.006708820164203644, - "step": 10165 - }, - { - "epoch": 1.734015345268542, - "grad_norm": 0.0785607323050499, - "learning_rate": 4.661129073140096e-05, - "loss": 0.0093411885201931, - "step": 10170 - }, - { - "epoch": 1.7348678601875531, - "grad_norm": 0.05867304652929306, - "learning_rate": 4.658643443492139e-05, - "loss": 0.004420546442270279, - "step": 10175 - }, - { - "epoch": 1.7357203751065644, - "grad_norm": 0.08736653625965118, - "learning_rate": 4.656157389659196e-05, - "loss": 0.0049125440418720245, - "step": 10180 - }, - { - "epoch": 1.7365728900255755, - "grad_norm": 0.10769468545913696, - "learning_rate": 4.653670912801842e-05, - "loss": 0.006663528829813003, - "step": 10185 - }, - { - "epoch": 1.7374254049445865, - "grad_norm": 0.054130490869283676, - "learning_rate": 4.651184014080843e-05, - "loss": 0.005649637803435326, - "step": 10190 - }, - { - "epoch": 1.7382779198635976, - "grad_norm": 0.0760764479637146, - "learning_rate": 4.648696694657171e-05, - "loss": 0.00803508386015892, - "step": 10195 - }, - { - "epoch": 1.7391304347826086, - "grad_norm": 0.08103618025779724, - "learning_rate": 4.646208955691987e-05, - "loss": 0.005645860359072686, - "step": 10200 - }, - { - "epoch": 1.73998294970162, - "grad_norm": 0.060226406902074814, - "learning_rate": 4.643720798346649e-05, - "loss": 0.005114502459764481, - "step": 10205 - }, - { - "epoch": 1.740835464620631, - "grad_norm": 0.08842508494853973, - "learning_rate": 4.641232223782713e-05, - "loss": 0.004128537327051163, - "step": 10210 - }, - { - "epoch": 1.741687979539642, - "grad_norm": 0.03715536370873451, - "learning_rate": 4.6387432331619284e-05, - "loss": 0.005640536174178123, - "step": 10215 - }, - { - "epoch": 1.742540494458653, - "grad_norm": 0.09130766242742538, - "learning_rate": 4.636253827646239e-05, - "loss": 0.0074319176375865935, - "step": 10220 - }, - { - "epoch": 1.7433930093776642, - "grad_norm": 0.08204436302185059, - "learning_rate": 4.6337640083977826e-05, - "loss": 0.006443107873201371, - "step": 10225 - }, - { - "epoch": 1.7442455242966752, - "grad_norm": 0.09834989905357361, - "learning_rate": 4.6312737765788883e-05, - "loss": 0.00825996845960617, - "step": 10230 - }, - { - "epoch": 1.7450980392156863, - "grad_norm": 0.07453756034374237, - "learning_rate": 4.628783133352078e-05, - "loss": 0.005153121426701546, - "step": 10235 - }, - { - "epoch": 1.7459505541346974, - "grad_norm": 0.0658891350030899, - "learning_rate": 4.626292079880071e-05, - "loss": 0.005568725615739822, - "step": 10240 - }, - { - "epoch": 1.7468030690537084, - "grad_norm": 0.08673261851072311, - "learning_rate": 4.623800617325772e-05, - "loss": 0.00687919333577156, - "step": 10245 - }, - { - "epoch": 1.7476555839727195, - "grad_norm": 0.08707419037818909, - "learning_rate": 4.621308746852276e-05, - "loss": 0.009814801812171935, - "step": 10250 - }, - { - "epoch": 1.7485080988917305, - "grad_norm": 0.07168986648321152, - "learning_rate": 4.618816469622874e-05, - "loss": 0.004722443222999573, - "step": 10255 - }, - { - "epoch": 1.7493606138107416, - "grad_norm": 0.07987508177757263, - "learning_rate": 4.616323786801042e-05, - "loss": 0.006749927252531052, - "step": 10260 - }, - { - "epoch": 1.7497016197783462, - "eval_loss": 0.03619376942515373, - "eval_runtime": 3.6854, - "eval_samples_per_second": 68.379, - "eval_steps_per_second": 1.085, - "step": 10262 - }, - { - "eval_cer_subset": 0.014314328985294836, - "eval_cer_subset_edit_distance": 879, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 10262 - }, - { - "epoch": 1.7502131287297527, - "grad_norm": 0.10899413377046585, - "learning_rate": 4.6138306995504495e-05, - "loss": 0.006938809901475907, - "step": 10265 - }, - { - "epoch": 1.7510656436487637, - "grad_norm": 0.10073213279247284, - "learning_rate": 4.6113372090349516e-05, - "loss": 0.00795048326253891, - "step": 10270 - }, - { - "epoch": 1.7519181585677748, - "grad_norm": 0.04800979420542717, - "learning_rate": 4.608843316418592e-05, - "loss": 0.007616385817527771, - "step": 10275 - }, - { - "epoch": 1.752770673486786, - "grad_norm": 0.09020161628723145, - "learning_rate": 4.6063490228656025e-05, - "loss": 0.005228221416473389, - "step": 10280 - }, - { - "epoch": 1.7536231884057971, - "grad_norm": 0.083438441157341, - "learning_rate": 4.603854329540403e-05, - "loss": 0.00726160854101181, - "step": 10285 - }, - { - "epoch": 1.7544757033248082, - "grad_norm": 0.07851024717092514, - "learning_rate": 4.6013592376076e-05, - "loss": 0.006890790909528733, - "step": 10290 - }, - { - "epoch": 1.7553282182438192, - "grad_norm": 0.09015098959207535, - "learning_rate": 4.598863748231985e-05, - "loss": 0.007083073258399963, - "step": 10295 - }, - { - "epoch": 1.7561807331628303, - "grad_norm": 0.04751535877585411, - "learning_rate": 4.596367862578534e-05, - "loss": 0.005376371741294861, - "step": 10300 - }, - { - "epoch": 1.7570332480818416, - "grad_norm": 0.07547739148139954, - "learning_rate": 4.5938715818124094e-05, - "loss": 0.008766484260559083, - "step": 10305 - }, - { - "epoch": 1.7578857630008526, - "grad_norm": 0.052052512764930725, - "learning_rate": 4.5913749070989616e-05, - "loss": 0.005375667661428452, - "step": 10310 - }, - { - "epoch": 1.7587382779198637, - "grad_norm": 0.11575129628181458, - "learning_rate": 4.5888778396037187e-05, - "loss": 0.006675881892442703, - "step": 10315 - }, - { - "epoch": 1.7595907928388748, - "grad_norm": 0.05995294824242592, - "learning_rate": 4.586380380492394e-05, - "loss": 0.007097356766462326, - "step": 10320 - }, - { - "epoch": 1.7604433077578858, - "grad_norm": 0.049236129969358444, - "learning_rate": 4.583882530930887e-05, - "loss": 0.004433324560523033, - "step": 10325 - }, - { - "epoch": 1.7612958226768969, - "grad_norm": 0.048296503722667694, - "learning_rate": 4.581384292085274e-05, - "loss": 0.0051886774599552155, - "step": 10330 - }, - { - "epoch": 1.762148337595908, - "grad_norm": 0.09939385205507278, - "learning_rate": 4.57888566512182e-05, - "loss": 0.006426715105772018, - "step": 10335 - }, - { - "epoch": 1.763000852514919, - "grad_norm": 0.08810277283191681, - "learning_rate": 4.5763866512069626e-05, - "loss": 0.00727301687002182, - "step": 10340 - }, - { - "epoch": 1.76385336743393, - "grad_norm": 0.05262129753828049, - "learning_rate": 4.573887251507328e-05, - "loss": 0.004860313236713409, - "step": 10345 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 0.09755868464708328, - "learning_rate": 4.571387467189718e-05, - "loss": 0.00684543177485466, - "step": 10350 - }, - { - "epoch": 1.7655583972719522, - "grad_norm": 0.08306272327899933, - "learning_rate": 4.568887299421115e-05, - "loss": 0.005363506823778152, - "step": 10355 - }, - { - "epoch": 1.7664109121909632, - "grad_norm": 0.06304962188005447, - "learning_rate": 4.566386749368681e-05, - "loss": 0.006262359023094177, - "step": 10360 - }, - { - "epoch": 1.7672634271099743, - "grad_norm": 0.099216029047966, - "learning_rate": 4.5638858181997544e-05, - "loss": 0.005263365060091019, - "step": 10365 - }, - { - "epoch": 1.7681159420289854, - "grad_norm": 0.06316341459751129, - "learning_rate": 4.5613845070818544e-05, - "loss": 0.0053974583745002745, - "step": 10370 - }, - { - "epoch": 1.7689684569479964, - "grad_norm": 0.08523725718259811, - "learning_rate": 4.5588828171826755e-05, - "loss": 0.006064000725746155, - "step": 10375 - }, - { - "epoch": 1.7698209718670077, - "grad_norm": 0.0663699060678482, - "learning_rate": 4.5563807496700925e-05, - "loss": 0.00665600374341011, - "step": 10380 - }, - { - "epoch": 1.7706734867860188, - "grad_norm": 0.10673311352729797, - "learning_rate": 4.55387830571215e-05, - "loss": 0.006540966033935547, - "step": 10385 - }, - { - "epoch": 1.7715260017050298, - "grad_norm": 0.08779574930667877, - "learning_rate": 4.551375486477074e-05, - "loss": 0.00547558106482029, - "step": 10390 - }, - { - "epoch": 1.772378516624041, - "grad_norm": 0.07451514899730682, - "learning_rate": 4.5488722931332625e-05, - "loss": 0.008499838411808014, - "step": 10395 - }, - { - "epoch": 1.773231031543052, - "grad_norm": 0.06014202535152435, - "learning_rate": 4.5463687268492904e-05, - "loss": 0.006278771907091141, - "step": 10400 - }, - { - "epoch": 1.7740835464620632, - "grad_norm": 0.039256151765584946, - "learning_rate": 4.543864788793907e-05, - "loss": 0.0037193533033132555, - "step": 10405 - }, - { - "epoch": 1.7749360613810743, - "grad_norm": 0.09449942409992218, - "learning_rate": 4.541360480136031e-05, - "loss": 0.006574592739343643, - "step": 10410 - }, - { - "epoch": 1.7757885763000854, - "grad_norm": 0.07616980373859406, - "learning_rate": 4.53885580204476e-05, - "loss": 0.006042734161019326, - "step": 10415 - }, - { - "epoch": 1.7766410912190964, - "grad_norm": 0.07019155472517014, - "learning_rate": 4.5363507556893574e-05, - "loss": 0.006044945493340492, - "step": 10420 - }, - { - "epoch": 1.7774936061381075, - "grad_norm": 0.0616939477622509, - "learning_rate": 4.533845342239266e-05, - "loss": 0.004315405339002609, - "step": 10425 - }, - { - "epoch": 1.7783461210571185, - "grad_norm": 0.09354502707719803, - "learning_rate": 4.5313395628640943e-05, - "loss": 0.005719271302223205, - "step": 10430 - }, - { - "epoch": 1.7791986359761296, - "grad_norm": 0.08747732639312744, - "learning_rate": 4.528833418733623e-05, - "loss": 0.00472431555390358, - "step": 10435 - }, - { - "epoch": 1.7800511508951407, - "grad_norm": 0.09513017535209656, - "learning_rate": 4.5263269110178034e-05, - "loss": 0.006968998908996582, - "step": 10440 - }, - { - "epoch": 1.7809036658141517, - "grad_norm": 0.09208676964044571, - "learning_rate": 4.523820040886759e-05, - "loss": 0.006609047204256058, - "step": 10445 - }, - { - "epoch": 1.7817561807331628, - "grad_norm": 0.09964144974946976, - "learning_rate": 4.521312809510778e-05, - "loss": 0.0056272163987159726, - "step": 10450 - }, - { - "epoch": 1.7826086956521738, - "grad_norm": 0.06850367784500122, - "learning_rate": 4.51880521806032e-05, - "loss": 0.005562498047947883, - "step": 10455 - }, - { - "epoch": 1.783461210571185, - "grad_norm": 0.0654430240392685, - "learning_rate": 4.5162972677060124e-05, - "loss": 0.0059367924928665165, - "step": 10460 - }, - { - "epoch": 1.784313725490196, - "grad_norm": 0.0449560284614563, - "learning_rate": 4.513788959618649e-05, - "loss": 0.005458919331431389, - "step": 10465 - }, - { - "epoch": 1.785166240409207, - "grad_norm": 0.14256814122200012, - "learning_rate": 4.511280294969192e-05, - "loss": 0.0066184431314468386, - "step": 10470 - }, - { - "epoch": 1.7860187553282183, - "grad_norm": 0.08284557610750198, - "learning_rate": 4.508771274928771e-05, - "loss": 0.007388219982385635, - "step": 10475 - }, - { - "epoch": 1.7868712702472294, - "grad_norm": 0.05675457417964935, - "learning_rate": 4.506261900668676e-05, - "loss": 0.005572458356618881, - "step": 10480 - }, - { - "epoch": 1.7877237851662404, - "grad_norm": 0.05767322704195976, - "learning_rate": 4.50375217336037e-05, - "loss": 0.0058133058249950405, - "step": 10485 - }, - { - "epoch": 1.7885763000852515, - "grad_norm": 0.03421638533473015, - "learning_rate": 4.501242094175476e-05, - "loss": 0.005268872529268265, - "step": 10490 - }, - { - "epoch": 1.7894288150042625, - "grad_norm": 0.07319685071706772, - "learning_rate": 4.4987316642857836e-05, - "loss": 0.008701664954423904, - "step": 10495 - }, - { - "epoch": 1.7902813299232738, - "grad_norm": 0.04271615296602249, - "learning_rate": 4.4962208848632426e-05, - "loss": 0.005680259317159653, - "step": 10500 - }, - { - "epoch": 1.7911338448422849, - "grad_norm": 0.05916997417807579, - "learning_rate": 4.493709757079971e-05, - "loss": 0.004779224097728729, - "step": 10505 - }, - { - "epoch": 1.791986359761296, - "grad_norm": 0.04994066804647446, - "learning_rate": 4.491198282108244e-05, - "loss": 0.00443916954100132, - "step": 10510 - }, - { - "epoch": 1.792838874680307, - "grad_norm": 0.09032617509365082, - "learning_rate": 4.488686461120504e-05, - "loss": 0.007850547134876252, - "step": 10515 - }, - { - "epoch": 1.793691389599318, - "grad_norm": 0.05055975914001465, - "learning_rate": 4.4861742952893525e-05, - "loss": 0.005925046652555466, - "step": 10520 - }, - { - "epoch": 1.7945439045183291, - "grad_norm": 0.07521310448646545, - "learning_rate": 4.48366178578755e-05, - "loss": 0.006785632669925689, - "step": 10525 - }, - { - "epoch": 1.7953964194373402, - "grad_norm": 0.06577371805906296, - "learning_rate": 4.4811489337880216e-05, - "loss": 0.005300462618470192, - "step": 10530 - }, - { - "epoch": 1.7962489343563512, - "grad_norm": 0.0451020710170269, - "learning_rate": 4.4786357404638485e-05, - "loss": 0.00612550750374794, - "step": 10535 - }, - { - "epoch": 1.7971014492753623, - "grad_norm": 0.08968023955821991, - "learning_rate": 4.4761222069882754e-05, - "loss": 0.00558510459959507, - "step": 10540 - }, - { - "epoch": 1.7979539641943734, - "grad_norm": 0.0945729911327362, - "learning_rate": 4.4736083345347015e-05, - "loss": 0.007513274252414703, - "step": 10545 - }, - { - "epoch": 1.7988064791133844, - "grad_norm": 0.10392102599143982, - "learning_rate": 4.4710941242766844e-05, - "loss": 0.006224355846643448, - "step": 10550 - }, - { - "epoch": 1.7996589940323955, - "grad_norm": 0.10485874116420746, - "learning_rate": 4.4685795773879446e-05, - "loss": 0.005821261927485466, - "step": 10555 - }, - { - "epoch": 1.8005115089514065, - "grad_norm": 0.0689731314778328, - "learning_rate": 4.466064695042355e-05, - "loss": 0.0062000565230846405, - "step": 10560 - }, - { - "epoch": 1.8013640238704176, - "grad_norm": 0.07008705288171768, - "learning_rate": 4.4635494784139463e-05, - "loss": 0.006286797672510147, - "step": 10565 - }, - { - "epoch": 1.8022165387894287, - "grad_norm": 0.07595150172710419, - "learning_rate": 4.461033928676904e-05, - "loss": 0.006704485416412354, - "step": 10570 - }, - { - "epoch": 1.80306905370844, - "grad_norm": 0.07564863562583923, - "learning_rate": 4.458518047005572e-05, - "loss": 0.005777762830257415, - "step": 10575 - }, - { - "epoch": 1.803921568627451, - "grad_norm": 0.07202555984258652, - "learning_rate": 4.4560018345744466e-05, - "loss": 0.00602865107357502, - "step": 10580 - }, - { - "epoch": 1.804774083546462, - "grad_norm": 0.10462740063667297, - "learning_rate": 4.453485292558179e-05, - "loss": 0.007622111588716507, - "step": 10585 - }, - { - "epoch": 1.8056265984654731, - "grad_norm": 0.05587150529026985, - "learning_rate": 4.450968422131578e-05, - "loss": 0.00641121193766594, - "step": 10590 - }, - { - "epoch": 1.8064791133844842, - "grad_norm": 0.0603446289896965, - "learning_rate": 4.448451224469598e-05, - "loss": 0.0073586970567703245, - "step": 10595 - }, - { - "epoch": 1.8073316283034955, - "grad_norm": 0.04228143393993378, - "learning_rate": 4.445933700747353e-05, - "loss": 0.005406339466571808, - "step": 10600 - }, - { - "epoch": 1.8081841432225065, - "grad_norm": 0.04840795323252678, - "learning_rate": 4.4434158521401065e-05, - "loss": 0.0041844088584184645, - "step": 10605 - }, - { - "epoch": 1.8090366581415176, - "grad_norm": 0.08334027975797653, - "learning_rate": 4.440897679823275e-05, - "loss": 0.008376862108707427, - "step": 10610 - }, - { - "epoch": 1.8098891730605287, - "grad_norm": 0.07879523187875748, - "learning_rate": 4.438379184972423e-05, - "loss": 0.0053595036268234255, - "step": 10615 - }, - { - "epoch": 1.8107416879795397, - "grad_norm": 0.0689932182431221, - "learning_rate": 4.435860368763269e-05, - "loss": 0.005961846932768822, - "step": 10620 - }, - { - "epoch": 1.8115942028985508, - "grad_norm": 0.07035796344280243, - "learning_rate": 4.43334123237168e-05, - "loss": 0.005833951756358147, - "step": 10625 - }, - { - "epoch": 1.8124467178175618, - "grad_norm": 0.06488184630870819, - "learning_rate": 4.4308217769736715e-05, - "loss": 0.006380685418844223, - "step": 10630 - }, - { - "epoch": 1.813299232736573, - "grad_norm": 0.1095893532037735, - "learning_rate": 4.428302003745412e-05, - "loss": 0.006500106304883957, - "step": 10635 - }, - { - "epoch": 1.814151747655584, - "grad_norm": 0.07402926683425903, - "learning_rate": 4.425781913863212e-05, - "loss": 0.010839180648326873, - "step": 10640 - }, - { - "epoch": 1.815004262574595, - "grad_norm": 0.07752810418605804, - "learning_rate": 4.4232615085035354e-05, - "loss": 0.0053322531282901766, - "step": 10645 - }, - { - "epoch": 1.815856777493606, - "grad_norm": 0.06572280824184418, - "learning_rate": 4.420740788842991e-05, - "loss": 0.0072415158152580265, - "step": 10650 - }, - { - "epoch": 1.8167092924126171, - "grad_norm": 0.07175682485103607, - "learning_rate": 4.418219756058335e-05, - "loss": 0.007061149924993515, - "step": 10655 - }, - { - "epoch": 1.8175618073316282, - "grad_norm": 0.0702451840043068, - "learning_rate": 4.4156984113264684e-05, - "loss": 0.0050024140626192095, - "step": 10660 - }, - { - "epoch": 1.8184143222506393, - "grad_norm": 0.05054900422692299, - "learning_rate": 4.4131767558244375e-05, - "loss": 0.004906433075666428, - "step": 10665 - }, - { - "epoch": 1.8192668371696503, - "grad_norm": 0.07256589829921722, - "learning_rate": 4.410654790729438e-05, - "loss": 0.006986310333013534, - "step": 10670 - }, - { - "epoch": 1.8201193520886616, - "grad_norm": 0.06617925316095352, - "learning_rate": 4.408132517218805e-05, - "loss": 0.007973263412714005, - "step": 10675 - }, - { - "epoch": 1.8209718670076727, - "grad_norm": 0.09039802104234695, - "learning_rate": 4.405609936470022e-05, - "loss": 0.007263268530368805, - "step": 10680 - }, - { - "epoch": 1.8218243819266837, - "grad_norm": 0.03763730078935623, - "learning_rate": 4.40308704966071e-05, - "loss": 0.005709199234843254, - "step": 10685 - }, - { - "epoch": 1.8226768968456948, - "grad_norm": 0.09264735877513885, - "learning_rate": 4.400563857968639e-05, - "loss": 0.006996266543865204, - "step": 10690 - }, - { - "epoch": 1.8235294117647058, - "grad_norm": 0.0882507711648941, - "learning_rate": 4.398040362571719e-05, - "loss": 0.007461686432361603, - "step": 10695 - }, - { - "epoch": 1.8243819266837171, - "grad_norm": 0.07662846893072128, - "learning_rate": 4.395516564648e-05, - "loss": 0.006977429986000061, - "step": 10700 - }, - { - "epoch": 1.8252344416027282, - "grad_norm": 0.07431378960609436, - "learning_rate": 4.392992465375676e-05, - "loss": 0.004957346618175507, - "step": 10705 - }, - { - "epoch": 1.8260869565217392, - "grad_norm": 0.06182624027132988, - "learning_rate": 4.39046806593308e-05, - "loss": 0.006677946448326111, - "step": 10710 - }, - { - "epoch": 1.8269394714407503, - "grad_norm": 0.06389910727739334, - "learning_rate": 4.3879433674986856e-05, - "loss": 0.006449097394943237, - "step": 10715 - }, - { - "epoch": 1.8277919863597614, - "grad_norm": 0.06772691756486893, - "learning_rate": 4.385418371251107e-05, - "loss": 0.004998251050710678, - "step": 10720 - }, - { - "epoch": 1.8286445012787724, - "grad_norm": 0.07048022747039795, - "learning_rate": 4.3828930783690955e-05, - "loss": 0.006418389827013015, - "step": 10725 - }, - { - "epoch": 1.8294970161977835, - "grad_norm": 0.09442687779664993, - "learning_rate": 4.3803674900315424e-05, - "loss": 0.006921603530645371, - "step": 10730 - }, - { - "epoch": 1.8303495311167945, - "grad_norm": 0.0578981414437294, - "learning_rate": 4.377841607417475e-05, - "loss": 0.007038000971078873, - "step": 10735 - }, - { - "epoch": 1.8312020460358056, - "grad_norm": 0.06990659236907959, - "learning_rate": 4.37531543170606e-05, - "loss": 0.005136258527636528, - "step": 10740 - }, - { - "epoch": 1.8320545609548167, - "grad_norm": 0.05566668137907982, - "learning_rate": 4.372788964076601e-05, - "loss": 0.005333118140697479, - "step": 10745 - }, - { - "epoch": 1.8329070758738277, - "grad_norm": 0.09198274463415146, - "learning_rate": 4.3702622057085376e-05, - "loss": 0.005783502757549286, - "step": 10750 - }, - { - "epoch": 1.8337595907928388, - "grad_norm": 0.12995415925979614, - "learning_rate": 4.3677351577814423e-05, - "loss": 0.005794361606240273, - "step": 10755 - }, - { - "epoch": 1.8346121057118498, - "grad_norm": 0.0827256515622139, - "learning_rate": 4.3652078214750264e-05, - "loss": 0.00593951866030693, - "step": 10760 - }, - { - "epoch": 1.835464620630861, - "grad_norm": 0.09131235629320145, - "learning_rate": 4.362680197969136e-05, - "loss": 0.006387272477149963, - "step": 10765 - }, - { - "epoch": 1.836317135549872, - "grad_norm": 0.06061462685465813, - "learning_rate": 4.360152288443748e-05, - "loss": 0.006085103005170822, - "step": 10770 - }, - { - "epoch": 1.8371696504688833, - "grad_norm": 0.05650132894515991, - "learning_rate": 4.357624094078976e-05, - "loss": 0.004817041009664536, - "step": 10775 - }, - { - "epoch": 1.8380221653878943, - "grad_norm": 0.09250559657812119, - "learning_rate": 4.355095616055063e-05, - "loss": 0.006116693839430809, - "step": 10780 - }, - { - "epoch": 1.8388746803069054, - "grad_norm": 0.06575264036655426, - "learning_rate": 4.352566855552389e-05, - "loss": 0.006027846410870552, - "step": 10785 - }, - { - "epoch": 1.8397271952259164, - "grad_norm": 0.07538174092769623, - "learning_rate": 4.350037813751462e-05, - "loss": 0.006624206900596619, - "step": 10790 - }, - { - "epoch": 1.8405797101449275, - "grad_norm": 0.06000296771526337, - "learning_rate": 4.347508491832924e-05, - "loss": 0.006386204063892365, - "step": 10795 - }, - { - "epoch": 1.8414322250639388, - "grad_norm": 0.058621276170015335, - "learning_rate": 4.3449788909775455e-05, - "loss": 0.006246517226099968, - "step": 10800 - }, - { - "epoch": 1.8422847399829498, - "grad_norm": 0.10082551836967468, - "learning_rate": 4.34244901236623e-05, - "loss": 0.006916524469852447, - "step": 10805 - }, - { - "epoch": 1.843137254901961, - "grad_norm": 0.07926804572343826, - "learning_rate": 4.3399188571800064e-05, - "loss": 0.006270130723714828, - "step": 10810 - }, - { - "epoch": 1.843989769820972, - "grad_norm": 0.14256511628627777, - "learning_rate": 4.3373884266000375e-05, - "loss": 0.008555002510547638, - "step": 10815 - }, - { - "epoch": 1.844842284739983, - "grad_norm": 0.0711030438542366, - "learning_rate": 4.334857721807612e-05, - "loss": 0.004097539931535721, - "step": 10820 - }, - { - "epoch": 1.845694799658994, - "grad_norm": 0.05918106436729431, - "learning_rate": 4.3323267439841464e-05, - "loss": 0.006263938546180725, - "step": 10825 - }, - { - "epoch": 1.8465473145780051, - "grad_norm": 0.06577462702989578, - "learning_rate": 4.329795494311186e-05, - "loss": 0.004532983154058456, - "step": 10830 - }, - { - "epoch": 1.8473998294970162, - "grad_norm": 0.07599867880344391, - "learning_rate": 4.327263973970401e-05, - "loss": 0.006951173394918441, - "step": 10835 - }, - { - "epoch": 1.8482523444160273, - "grad_norm": 0.055239275097846985, - "learning_rate": 4.324732184143592e-05, - "loss": 0.00514591783285141, - "step": 10840 - }, - { - "epoch": 1.8491048593350383, - "grad_norm": 0.10522980988025665, - "learning_rate": 4.322200126012681e-05, - "loss": 0.00747048556804657, - "step": 10845 - }, - { - "epoch": 1.8499573742540494, - "grad_norm": 0.08132579177618027, - "learning_rate": 4.319667800759716e-05, - "loss": 0.005432958528399467, - "step": 10850 - }, - { - "epoch": 1.8508098891730604, - "grad_norm": 0.04027591645717621, - "learning_rate": 4.3171352095668726e-05, - "loss": 0.004450181499123573, - "step": 10855 - }, - { - "epoch": 1.8516624040920715, - "grad_norm": 0.0873839259147644, - "learning_rate": 4.314602353616446e-05, - "loss": 0.006079509109258652, - "step": 10860 - }, - { - "epoch": 1.8525149190110826, - "grad_norm": 0.04989013075828552, - "learning_rate": 4.312069234090862e-05, - "loss": 0.003988634794950485, - "step": 10865 - }, - { - "epoch": 1.8533674339300936, - "grad_norm": 0.061433590948581696, - "learning_rate": 4.309535852172661e-05, - "loss": 0.0056050091981887816, - "step": 10870 - }, - { - "epoch": 1.854219948849105, - "grad_norm": 0.07007768750190735, - "learning_rate": 4.3070022090445114e-05, - "loss": 0.006938119232654571, - "step": 10875 - }, - { - "epoch": 1.855072463768116, - "grad_norm": 0.03557104617357254, - "learning_rate": 4.3044683058892024e-05, - "loss": 0.0061099715530872345, - "step": 10880 - }, - { - "epoch": 1.855924978687127, - "grad_norm": 0.07706935703754425, - "learning_rate": 4.3019341438896446e-05, - "loss": 0.0050103053450584415, - "step": 10885 - }, - { - "epoch": 1.856777493606138, - "grad_norm": 0.06719083338975906, - "learning_rate": 4.2993997242288686e-05, - "loss": 0.005047342553734779, - "step": 10890 - }, - { - "epoch": 1.8576300085251491, - "grad_norm": 0.05179615691304207, - "learning_rate": 4.296865048090024e-05, - "loss": 0.004692831635475158, - "step": 10895 - }, - { - "epoch": 1.8584825234441604, - "grad_norm": 0.08594074845314026, - "learning_rate": 4.294330116656385e-05, - "loss": 0.006039778143167496, - "step": 10900 - }, - { - "epoch": 1.8593350383631715, - "grad_norm": 0.11285590380430222, - "learning_rate": 4.291794931111339e-05, - "loss": 0.005857323482632637, - "step": 10905 - }, - { - "epoch": 1.8601875532821825, - "grad_norm": 0.056068334728479385, - "learning_rate": 4.289259492638399e-05, - "loss": 0.006339801102876663, - "step": 10910 - }, - { - "epoch": 1.8610400682011936, - "grad_norm": 0.1027015820145607, - "learning_rate": 4.2867238024211873e-05, - "loss": 0.007628202438354492, - "step": 10915 - }, - { - "epoch": 1.8618925831202047, - "grad_norm": 0.06938920170068741, - "learning_rate": 4.2841878616434516e-05, - "loss": 0.005421775206923485, - "step": 10920 - }, - { - "epoch": 1.8627450980392157, - "grad_norm": 0.06613876670598984, - "learning_rate": 4.2816516714890525e-05, - "loss": 0.00747835859656334, - "step": 10925 - }, - { - "epoch": 1.8635976129582268, - "grad_norm": 0.07735379040241241, - "learning_rate": 4.279115233141967e-05, - "loss": 0.006907149404287338, - "step": 10930 - }, - { - "epoch": 1.8644501278772379, - "grad_norm": 0.06285069137811661, - "learning_rate": 4.276578547786291e-05, - "loss": 0.005340246856212616, - "step": 10935 - }, - { - "epoch": 1.865302642796249, - "grad_norm": 0.0670136883854866, - "learning_rate": 4.274041616606232e-05, - "loss": 0.0067828245460987095, - "step": 10940 - }, - { - "epoch": 1.86615515771526, - "grad_norm": 0.07944425195455551, - "learning_rate": 4.2715044407861144e-05, - "loss": 0.006403806060552597, - "step": 10945 - }, - { - "epoch": 1.867007672634271, - "grad_norm": 0.07202634960412979, - "learning_rate": 4.268967021510375e-05, - "loss": 0.004467373341321945, - "step": 10950 - }, - { - "epoch": 1.867860187553282, - "grad_norm": 0.08753371983766556, - "learning_rate": 4.266429359963568e-05, - "loss": 0.006740668416023254, - "step": 10955 - }, - { - "epoch": 1.8687127024722932, - "grad_norm": 0.0920538380742073, - "learning_rate": 4.263891457330357e-05, - "loss": 0.007489701360464096, - "step": 10960 - }, - { - "epoch": 1.8695652173913042, - "grad_norm": 0.11196473985910416, - "learning_rate": 4.261353314795519e-05, - "loss": 0.007533909380435943, - "step": 10965 - }, - { - "epoch": 1.8704177323103153, - "grad_norm": 0.08394299447536469, - "learning_rate": 4.258814933543943e-05, - "loss": 0.005159291997551918, - "step": 10970 - }, - { - "epoch": 1.8712702472293266, - "grad_norm": 0.08024156838655472, - "learning_rate": 4.25627631476063e-05, - "loss": 0.00543709248304367, - "step": 10975 - }, - { - "epoch": 1.8721227621483376, - "grad_norm": 0.052640948444604874, - "learning_rate": 4.253737459630694e-05, - "loss": 0.004067758470773697, - "step": 10980 - }, - { - "epoch": 1.8729752770673487, - "grad_norm": 0.08472926914691925, - "learning_rate": 4.251198369339353e-05, - "loss": 0.0077335178852081295, - "step": 10985 - }, - { - "epoch": 1.8738277919863597, - "grad_norm": 0.09794485569000244, - "learning_rate": 4.248659045071942e-05, - "loss": 0.0055429480969905855, - "step": 10990 - }, - { - "epoch": 1.8746803069053708, - "grad_norm": 0.07767575234174728, - "learning_rate": 4.2461194880139016e-05, - "loss": 0.008025288581848145, - "step": 10995 - }, - { - "epoch": 1.875532821824382, - "grad_norm": 0.07427361607551575, - "learning_rate": 4.2435796993507794e-05, - "loss": 0.006437119096517563, - "step": 11000 - }, - { - "epoch": 1.8763853367433931, - "grad_norm": 0.07420040667057037, - "learning_rate": 4.241039680268237e-05, - "loss": 0.0051200386136770245, - "step": 11005 - }, - { - "epoch": 1.8772378516624042, - "grad_norm": 0.09004204720258713, - "learning_rate": 4.2384994319520355e-05, - "loss": 0.007488063722848892, - "step": 11010 - }, - { - "epoch": 1.8780903665814153, - "grad_norm": 0.060929473489522934, - "learning_rate": 4.235958955588049e-05, - "loss": 0.00483398288488388, - "step": 11015 - }, - { - "epoch": 1.8789428815004263, - "grad_norm": 0.08116185665130615, - "learning_rate": 4.2334182523622584e-05, - "loss": 0.007078002393245697, - "step": 11020 - }, - { - "epoch": 1.8797953964194374, - "grad_norm": 0.0908491238951683, - "learning_rate": 4.230877323460746e-05, - "loss": 0.007228228449821472, - "step": 11025 - }, - { - "epoch": 1.8806479113384484, - "grad_norm": 0.08618480712175369, - "learning_rate": 4.228336170069703e-05, - "loss": 0.005402455478906632, - "step": 11030 - }, - { - "epoch": 1.8815004262574595, - "grad_norm": 0.06869816035032272, - "learning_rate": 4.2257947933754236e-05, - "loss": 0.006180650368332863, - "step": 11035 - }, - { - "epoch": 1.8823529411764706, - "grad_norm": 0.0904744416475296, - "learning_rate": 4.223253194564309e-05, - "loss": 0.00636049136519432, - "step": 11040 - }, - { - "epoch": 1.8832054560954816, - "grad_norm": 0.04902644082903862, - "learning_rate": 4.220711374822859e-05, - "loss": 0.0062784947454929355, - "step": 11045 - }, - { - "epoch": 1.8840579710144927, - "grad_norm": 0.060081589967012405, - "learning_rate": 4.2181693353376817e-05, - "loss": 0.005494052171707153, - "step": 11050 - }, - { - "epoch": 1.8849104859335037, - "grad_norm": 0.058530837297439575, - "learning_rate": 4.215627077295485e-05, - "loss": 0.005457080900669098, - "step": 11055 - }, - { - "epoch": 1.8857630008525148, - "grad_norm": 0.15006953477859497, - "learning_rate": 4.2130846018830795e-05, - "loss": 0.0062985971570014955, - "step": 11060 - }, - { - "epoch": 1.8866155157715259, - "grad_norm": 0.04498155787587166, - "learning_rate": 4.210541910287377e-05, - "loss": 0.004242038726806641, - "step": 11065 - }, - { - "epoch": 1.887468030690537, - "grad_norm": 0.09093966335058212, - "learning_rate": 4.207999003695392e-05, - "loss": 0.00554364025592804, - "step": 11070 - }, - { - "epoch": 1.8883205456095482, - "grad_norm": 0.06531018018722534, - "learning_rate": 4.2054558832942365e-05, - "loss": 0.0063869751989841465, - "step": 11075 - }, - { - "epoch": 1.8891730605285593, - "grad_norm": 0.059213872998952866, - "learning_rate": 4.202912550271124e-05, - "loss": 0.004836349189281464, - "step": 11080 - }, - { - "epoch": 1.8900255754475703, - "grad_norm": 0.11074823886156082, - "learning_rate": 4.200369005813367e-05, - "loss": 0.00584055446088314, - "step": 11085 - }, - { - "epoch": 1.8908780903665814, - "grad_norm": 0.09352346509695053, - "learning_rate": 4.197825251108376e-05, - "loss": 0.006423837691545487, - "step": 11090 - }, - { - "epoch": 1.8917306052855924, - "grad_norm": 0.10930176079273224, - "learning_rate": 4.195281287343662e-05, - "loss": 0.007819923013448716, - "step": 11095 - }, - { - "epoch": 1.8925831202046037, - "grad_norm": 0.10935486853122711, - "learning_rate": 4.19273711570683e-05, - "loss": 0.008524692058563233, - "step": 11100 - }, - { - "epoch": 1.8934356351236148, - "grad_norm": 0.07407546788454056, - "learning_rate": 4.190192737385586e-05, - "loss": 0.006353407353162766, - "step": 11105 - }, - { - "epoch": 1.8942881500426259, - "grad_norm": 0.11030165106058121, - "learning_rate": 4.187648153567729e-05, - "loss": 0.007683426141738892, - "step": 11110 - }, - { - "epoch": 1.895140664961637, - "grad_norm": 0.09419413655996323, - "learning_rate": 4.185103365441155e-05, - "loss": 0.005654521286487579, - "step": 11115 - }, - { - "epoch": 1.895993179880648, - "grad_norm": 0.06284896284341812, - "learning_rate": 4.1825583741938576e-05, - "loss": 0.0048633765429258345, - "step": 11120 - }, - { - "epoch": 1.896845694799659, - "grad_norm": 0.06429705023765564, - "learning_rate": 4.180013181013921e-05, - "loss": 0.006907754391431808, - "step": 11125 - }, - { - "epoch": 1.89769820971867, - "grad_norm": 0.1234050914645195, - "learning_rate": 4.177467787089527e-05, - "loss": 0.008531783521175385, - "step": 11130 - }, - { - "epoch": 1.8985507246376812, - "grad_norm": 0.04056263715028763, - "learning_rate": 4.174922193608951e-05, - "loss": 0.006784418225288391, - "step": 11135 - }, - { - "epoch": 1.8994032395566922, - "grad_norm": 0.048422425985336304, - "learning_rate": 4.172376401760561e-05, - "loss": 0.006587067246437072, - "step": 11140 - }, - { - "epoch": 1.9002557544757033, - "grad_norm": 0.10680951178073883, - "learning_rate": 4.169830412732815e-05, - "loss": 0.005700337141752243, - "step": 11145 - }, - { - "epoch": 1.9011082693947143, - "grad_norm": 0.09418217837810516, - "learning_rate": 4.167284227714267e-05, - "loss": 0.0059782925993204115, - "step": 11150 - }, - { - "epoch": 1.9019607843137254, - "grad_norm": 0.12511073052883148, - "learning_rate": 4.1647378478935614e-05, - "loss": 0.006256800889968872, - "step": 11155 - }, - { - "epoch": 1.9028132992327365, - "grad_norm": 0.06957859545946121, - "learning_rate": 4.1621912744594316e-05, - "loss": 0.008690094202756881, - "step": 11160 - }, - { - "epoch": 1.9036658141517475, - "grad_norm": 0.10859719663858414, - "learning_rate": 4.159644508600704e-05, - "loss": 0.008262380957603455, - "step": 11165 - }, - { - "epoch": 1.9045183290707586, - "grad_norm": 0.08408714830875397, - "learning_rate": 4.157097551506292e-05, - "loss": 0.005347007513046264, - "step": 11170 - }, - { - "epoch": 1.9053708439897699, - "grad_norm": 0.05623621866106987, - "learning_rate": 4.1545504043652014e-05, - "loss": 0.005091758817434311, - "step": 11175 - }, - { - "epoch": 1.906223358908781, - "grad_norm": 0.06791777908802032, - "learning_rate": 4.1520030683665246e-05, - "loss": 0.006755101680755615, - "step": 11180 - }, - { - "epoch": 1.907075873827792, - "grad_norm": 0.039112675935029984, - "learning_rate": 4.149455544699444e-05, - "loss": 0.0063312210142612456, - "step": 11185 - }, - { - "epoch": 1.907928388746803, - "grad_norm": 0.05682097375392914, - "learning_rate": 4.146907834553227e-05, - "loss": 0.005028403550386429, - "step": 11190 - }, - { - "epoch": 1.908780903665814, - "grad_norm": 0.07670710980892181, - "learning_rate": 4.144359939117229e-05, - "loss": 0.006438900530338287, - "step": 11195 - }, - { - "epoch": 1.9096334185848254, - "grad_norm": 0.06266012787818909, - "learning_rate": 4.141811859580894e-05, - "loss": 0.006153284758329392, - "step": 11200 - }, - { - "epoch": 1.9104859335038364, - "grad_norm": 0.06892232596874237, - "learning_rate": 4.139263597133749e-05, - "loss": 0.0042446799576282505, - "step": 11205 - }, - { - "epoch": 1.9113384484228475, - "grad_norm": 0.08733050525188446, - "learning_rate": 4.136715152965409e-05, - "loss": 0.0048094093799591064, - "step": 11210 - }, - { - "epoch": 1.9121909633418586, - "grad_norm": 0.06578327715396881, - "learning_rate": 4.13416652826557e-05, - "loss": 0.0047289058566093445, - "step": 11215 - }, - { - "epoch": 1.9130434782608696, - "grad_norm": 0.06382749229669571, - "learning_rate": 4.1316177242240174e-05, - "loss": 0.004200926423072815, - "step": 11220 - }, - { - "epoch": 1.9138959931798807, - "grad_norm": 0.07368794828653336, - "learning_rate": 4.129068742030617e-05, - "loss": 0.0063028551638126375, - "step": 11225 - }, - { - "epoch": 1.9147485080988917, - "grad_norm": 0.09302657842636108, - "learning_rate": 4.1265195828753176e-05, - "loss": 0.008124063909053802, - "step": 11230 - }, - { - "epoch": 1.9156010230179028, - "grad_norm": 0.08030751347541809, - "learning_rate": 4.123970247948153e-05, - "loss": 0.009628574550151824, - "step": 11235 - }, - { - "epoch": 1.9164535379369139, - "grad_norm": 0.08395590633153915, - "learning_rate": 4.1214207384392356e-05, - "loss": 0.007773591578006745, - "step": 11240 - }, - { - "epoch": 1.917306052855925, - "grad_norm": 0.09472183138132095, - "learning_rate": 4.118871055538762e-05, - "loss": 0.005461954325437546, - "step": 11245 - }, - { - "epoch": 1.918158567774936, - "grad_norm": 0.095457524061203, - "learning_rate": 4.11632120043701e-05, - "loss": 0.005725187063217163, - "step": 11250 - }, - { - "epoch": 1.919011082693947, - "grad_norm": 0.10508730262517929, - "learning_rate": 4.113771174324336e-05, - "loss": 0.006902433931827545, - "step": 11255 - }, - { - "epoch": 1.919863597612958, - "grad_norm": 0.08675665408372879, - "learning_rate": 4.111220978391176e-05, - "loss": 0.007470531016588211, - "step": 11260 - }, - { - "epoch": 1.9207161125319692, - "grad_norm": 0.08215013146400452, - "learning_rate": 4.108670613828049e-05, - "loss": 0.005732448399066925, - "step": 11265 - }, - { - "epoch": 1.9215686274509802, - "grad_norm": 0.054156310856342316, - "learning_rate": 4.1061200818255476e-05, - "loss": 0.005808809399604797, - "step": 11270 - }, - { - "epoch": 1.9224211423699915, - "grad_norm": 0.09332830458879471, - "learning_rate": 4.103569383574346e-05, - "loss": 0.005646481737494468, - "step": 11275 - }, - { - "epoch": 1.9232736572890026, - "grad_norm": 0.05589313432574272, - "learning_rate": 4.101018520265195e-05, - "loss": 0.005581434443593025, - "step": 11280 - }, - { - "epoch": 1.9241261722080136, - "grad_norm": 0.0465618334710598, - "learning_rate": 4.098467493088922e-05, - "loss": 0.005028170347213745, - "step": 11285 - }, - { - "epoch": 1.9249786871270247, - "grad_norm": 0.07304909080266953, - "learning_rate": 4.095916303236431e-05, - "loss": 0.007494028657674789, - "step": 11290 - }, - { - "epoch": 1.9258312020460358, - "grad_norm": 0.09532103687524796, - "learning_rate": 4.0933649518987025e-05, - "loss": 0.006374432146549225, - "step": 11295 - }, - { - "epoch": 1.926683716965047, - "grad_norm": 0.07364784181118011, - "learning_rate": 4.090813440266794e-05, - "loss": 0.0053088821470737456, - "step": 11300 - }, - { - "epoch": 1.927536231884058, - "grad_norm": 0.0804903507232666, - "learning_rate": 4.088261769531834e-05, - "loss": 0.0069495439529418945, - "step": 11305 - }, - { - "epoch": 1.9283887468030692, - "grad_norm": 0.07125549763441086, - "learning_rate": 4.0857099408850264e-05, - "loss": 0.005846098065376282, - "step": 11310 - }, - { - "epoch": 1.9292412617220802, - "grad_norm": 0.017375558614730835, - "learning_rate": 4.083157955517653e-05, - "loss": 0.004308582097291946, - "step": 11315 - }, - { - "epoch": 1.9300937766410913, - "grad_norm": 0.07655836641788483, - "learning_rate": 4.080605814621063e-05, - "loss": 0.006030111759901047, - "step": 11320 - }, - { - "epoch": 1.9309462915601023, - "grad_norm": 0.05411117896437645, - "learning_rate": 4.078053519386681e-05, - "loss": 0.0069768443703651425, - "step": 11325 - }, - { - "epoch": 1.9317988064791134, - "grad_norm": 0.08431188017129898, - "learning_rate": 4.0755010710060035e-05, - "loss": 0.006973695755004883, - "step": 11330 - }, - { - "epoch": 1.9326513213981245, - "grad_norm": 0.08480583131313324, - "learning_rate": 4.072948470670598e-05, - "loss": 0.006525547057390213, - "step": 11335 - }, - { - "epoch": 1.9335038363171355, - "grad_norm": 0.073171466588974, - "learning_rate": 4.070395719572104e-05, - "loss": 0.0054599311202764515, - "step": 11340 - }, - { - "epoch": 1.9343563512361466, - "grad_norm": 0.06951522827148438, - "learning_rate": 4.0678428189022304e-05, - "loss": 0.008897364884614945, - "step": 11345 - }, - { - "epoch": 1.9352088661551576, - "grad_norm": 0.08654197305440903, - "learning_rate": 4.0652897698527557e-05, - "loss": 0.005458325147628784, - "step": 11350 - }, - { - "epoch": 1.9360613810741687, - "grad_norm": 0.07929553836584091, - "learning_rate": 4.0627365736155285e-05, - "loss": 0.00710543841123581, - "step": 11355 - }, - { - "epoch": 1.9369138959931798, - "grad_norm": 0.12434503436088562, - "learning_rate": 4.060183231382466e-05, - "loss": 0.0071723200380802155, - "step": 11360 - }, - { - "epoch": 1.9377664109121908, - "grad_norm": 0.06440022587776184, - "learning_rate": 4.057629744345551e-05, - "loss": 0.006010268628597259, - "step": 11365 - }, - { - "epoch": 1.938618925831202, - "grad_norm": 0.09477414190769196, - "learning_rate": 4.0550761136968404e-05, - "loss": 0.007152469456195831, - "step": 11370 - }, - { - "epoch": 1.9394714407502132, - "grad_norm": 0.06758873164653778, - "learning_rate": 4.0525223406284516e-05, - "loss": 0.004493400454521179, - "step": 11375 - }, - { - "epoch": 1.9403239556692242, - "grad_norm": 0.06823158264160156, - "learning_rate": 4.0499684263325695e-05, - "loss": 0.0058505676686763765, - "step": 11380 - }, - { - "epoch": 1.9411764705882353, - "grad_norm": 0.10731697082519531, - "learning_rate": 4.0474143720014485e-05, - "loss": 0.00592585802078247, - "step": 11385 - }, - { - "epoch": 1.9420289855072463, - "grad_norm": 0.09786538779735565, - "learning_rate": 4.044860178827405e-05, - "loss": 0.008860854804515839, - "step": 11390 - }, - { - "epoch": 1.9428815004262576, - "grad_norm": 0.08662491291761398, - "learning_rate": 4.042305848002822e-05, - "loss": 0.00579673945903778, - "step": 11395 - }, - { - "epoch": 1.9437340153452687, - "grad_norm": 0.08446741849184036, - "learning_rate": 4.039751380720145e-05, - "loss": 0.0067916139960289, - "step": 11400 - }, - { - "epoch": 1.9445865302642797, - "grad_norm": 0.08059567958116531, - "learning_rate": 4.037196778171885e-05, - "loss": 0.007273902744054794, - "step": 11405 - }, - { - "epoch": 1.9454390451832908, - "grad_norm": 0.067914679646492, - "learning_rate": 4.0346420415506156e-05, - "loss": 0.00854090303182602, - "step": 11410 - }, - { - "epoch": 1.9462915601023019, - "grad_norm": 0.06519316136837006, - "learning_rate": 4.032087172048973e-05, - "loss": 0.006127477809786797, - "step": 11415 - }, - { - "epoch": 1.947144075021313, - "grad_norm": 0.10216967016458511, - "learning_rate": 4.029532170859655e-05, - "loss": 0.007330343872308731, - "step": 11420 - }, - { - "epoch": 1.947996589940324, - "grad_norm": 0.07684756815433502, - "learning_rate": 4.02697703917542e-05, - "loss": 0.006121716648340225, - "step": 11425 - }, - { - "epoch": 1.948849104859335, - "grad_norm": 0.08026126027107239, - "learning_rate": 4.0244217781890906e-05, - "loss": 0.006386417150497437, - "step": 11430 - }, - { - "epoch": 1.949701619778346, - "grad_norm": 0.09047527611255646, - "learning_rate": 4.021866389093546e-05, - "loss": 0.004208286106586456, - "step": 11435 - }, - { - "epoch": 1.9505541346973572, - "grad_norm": 0.047482747584581375, - "learning_rate": 4.0193108730817284e-05, - "loss": 0.005754061415791512, - "step": 11440 - }, - { - "epoch": 1.9514066496163682, - "grad_norm": 0.054364416748285294, - "learning_rate": 4.0167552313466355e-05, - "loss": 0.004412830248475075, - "step": 11445 - }, - { - "epoch": 1.9522591645353793, - "grad_norm": 0.07640549540519714, - "learning_rate": 4.014199465081327e-05, - "loss": 0.005214530602097511, - "step": 11450 - }, - { - "epoch": 1.9531116794543903, - "grad_norm": 0.07241252809762955, - "learning_rate": 4.0116435754789206e-05, - "loss": 0.005129393562674523, - "step": 11455 - }, - { - "epoch": 1.9539641943734014, - "grad_norm": 0.048170432448387146, - "learning_rate": 4.009087563732589e-05, - "loss": 0.005180074647068977, - "step": 11460 - }, - { - "epoch": 1.9548167092924125, - "grad_norm": 0.07336216419935226, - "learning_rate": 4.006531431035566e-05, - "loss": 0.009098170697689057, - "step": 11465 - }, - { - "epoch": 1.9556692242114238, - "grad_norm": 0.04934614151716232, - "learning_rate": 4.0039751785811346e-05, - "loss": 0.005307629331946373, - "step": 11470 - }, - { - "epoch": 1.9565217391304348, - "grad_norm": 0.08941303193569183, - "learning_rate": 4.001418807562643e-05, - "loss": 0.0069742932915687565, - "step": 11475 - }, - { - "epoch": 1.9573742540494459, - "grad_norm": 0.05791569501161575, - "learning_rate": 3.998862319173488e-05, - "loss": 0.0050424404442310335, - "step": 11480 - }, - { - "epoch": 1.958226768968457, - "grad_norm": 0.04596787318587303, - "learning_rate": 3.996305714607125e-05, - "loss": 0.004805172979831696, - "step": 11485 - }, - { - "epoch": 1.959079283887468, - "grad_norm": 0.07698309421539307, - "learning_rate": 3.993748995057061e-05, - "loss": 0.006605527549982071, - "step": 11490 - }, - { - "epoch": 1.9599317988064793, - "grad_norm": 0.08400565385818481, - "learning_rate": 3.9911921617168565e-05, - "loss": 0.0085490882396698, - "step": 11495 - }, - { - "epoch": 1.9607843137254903, - "grad_norm": 0.1446380764245987, - "learning_rate": 3.9886352157801296e-05, - "loss": 0.005958027392625809, - "step": 11500 - }, - { - "epoch": 1.9616368286445014, - "grad_norm": 0.06108809635043144, - "learning_rate": 3.986078158440544e-05, - "loss": 0.0054461218416690825, - "step": 11505 - }, - { - "epoch": 1.9624893435635125, - "grad_norm": 0.1163720190525055, - "learning_rate": 3.983520990891823e-05, - "loss": 0.0065662160515785216, - "step": 11510 - }, - { - "epoch": 1.9633418584825235, - "grad_norm": 0.08339548110961914, - "learning_rate": 3.980963714327734e-05, - "loss": 0.007503192871809006, - "step": 11515 - }, - { - "epoch": 1.9641943734015346, - "grad_norm": 0.07774331420660019, - "learning_rate": 3.9784063299421e-05, - "loss": 0.005831217020750045, - "step": 11520 - }, - { - "epoch": 1.9650468883205456, - "grad_norm": 0.08897018432617188, - "learning_rate": 3.9758488389287936e-05, - "loss": 0.006972354650497436, - "step": 11525 - }, - { - "epoch": 1.9658994032395567, - "grad_norm": 0.07708834111690521, - "learning_rate": 3.9732912424817374e-05, - "loss": 0.0059847764670848845, - "step": 11530 - }, - { - "epoch": 1.9667519181585678, - "grad_norm": 0.133201003074646, - "learning_rate": 3.9707335417949015e-05, - "loss": 0.005828146636486053, - "step": 11535 - }, - { - "epoch": 1.9676044330775788, - "grad_norm": 0.05620214343070984, - "learning_rate": 3.968175738062303e-05, - "loss": 0.004607116058468819, - "step": 11540 - }, - { - "epoch": 1.9684569479965899, - "grad_norm": 0.05371567979454994, - "learning_rate": 3.965617832478015e-05, - "loss": 0.004455961659550667, - "step": 11545 - }, - { - "epoch": 1.969309462915601, - "grad_norm": 0.10317978262901306, - "learning_rate": 3.96305982623615e-05, - "loss": 0.004697806015610695, - "step": 11550 - }, - { - "epoch": 1.970161977834612, - "grad_norm": 0.08786958456039429, - "learning_rate": 3.96050172053087e-05, - "loss": 0.005183818191289902, - "step": 11555 - }, - { - "epoch": 1.971014492753623, - "grad_norm": 0.07750507444143295, - "learning_rate": 3.957943516556385e-05, - "loss": 0.005475999787449837, - "step": 11560 - }, - { - "epoch": 1.9718670076726341, - "grad_norm": 0.07066313922405243, - "learning_rate": 3.955385215506949e-05, - "loss": 0.005772550404071808, - "step": 11565 - }, - { - "epoch": 1.9727195225916454, - "grad_norm": 0.08183038979768753, - "learning_rate": 3.952826818576863e-05, - "loss": 0.005305550992488861, - "step": 11570 - }, - { - "epoch": 1.9735720375106565, - "grad_norm": 0.075381800532341, - "learning_rate": 3.95026832696047e-05, - "loss": 0.00803310126066208, - "step": 11575 - }, - { - "epoch": 1.9744245524296675, - "grad_norm": 0.09064166992902756, - "learning_rate": 3.9477097418521616e-05, - "loss": 0.006380292773246765, - "step": 11580 - }, - { - "epoch": 1.9752770673486786, - "grad_norm": 0.09140465408563614, - "learning_rate": 3.945151064446367e-05, - "loss": 0.00863645225763321, - "step": 11585 - }, - { - "epoch": 1.9761295822676896, - "grad_norm": 0.09985008090734482, - "learning_rate": 3.942592295937565e-05, - "loss": 0.005205995962023735, - "step": 11590 - }, - { - "epoch": 1.976982097186701, - "grad_norm": 0.07968702167272568, - "learning_rate": 3.940033437520273e-05, - "loss": 0.006467466801404953, - "step": 11595 - }, - { - "epoch": 1.977834612105712, - "grad_norm": 0.0925409123301506, - "learning_rate": 3.937474490389051e-05, - "loss": 0.006804432719945908, - "step": 11600 - }, - { - "epoch": 1.978687127024723, - "grad_norm": 0.053421750664711, - "learning_rate": 3.9349154557385e-05, - "loss": 0.0067677564918994905, - "step": 11605 - }, - { - "epoch": 1.979539641943734, - "grad_norm": 0.07791347801685333, - "learning_rate": 3.9323563347632624e-05, - "loss": 0.006826826930046081, - "step": 11610 - }, - { - "epoch": 1.9803921568627452, - "grad_norm": 0.08627293258905411, - "learning_rate": 3.929797128658024e-05, - "loss": 0.00804663747549057, - "step": 11615 - }, - { - "epoch": 1.9812446717817562, - "grad_norm": 0.06506595015525818, - "learning_rate": 3.927237838617503e-05, - "loss": 0.005456966534256935, - "step": 11620 - }, - { - "epoch": 1.9820971867007673, - "grad_norm": 0.09555826336145401, - "learning_rate": 3.924678465836465e-05, - "loss": 0.005365721881389618, - "step": 11625 - }, - { - "epoch": 1.9829497016197783, - "grad_norm": 0.09176401793956757, - "learning_rate": 3.922119011509706e-05, - "loss": 0.006210924685001373, - "step": 11630 - }, - { - "epoch": 1.9838022165387894, - "grad_norm": 0.05260130763053894, - "learning_rate": 3.919559476832069e-05, - "loss": 0.004408955946564675, - "step": 11635 - }, - { - "epoch": 1.9846547314578005, - "grad_norm": 0.0875319391489029, - "learning_rate": 3.916999862998427e-05, - "loss": 0.005069036781787872, - "step": 11640 - }, - { - "epoch": 1.9855072463768115, - "grad_norm": 0.10335614532232285, - "learning_rate": 3.9144401712036936e-05, - "loss": 0.007199827581644058, - "step": 11645 - }, - { - "epoch": 1.9863597612958226, - "grad_norm": 0.09518889337778091, - "learning_rate": 3.9118804026428194e-05, - "loss": 0.00541754923760891, - "step": 11650 - }, - { - "epoch": 1.9872122762148337, - "grad_norm": 0.06707368791103363, - "learning_rate": 3.9093205585107863e-05, - "loss": 0.00641927570104599, - "step": 11655 - }, - { - "epoch": 1.9880647911338447, - "grad_norm": 0.10102292895317078, - "learning_rate": 3.906760640002618e-05, - "loss": 0.007096148282289505, - "step": 11660 - }, - { - "epoch": 1.9889173060528558, - "grad_norm": 0.0690481886267662, - "learning_rate": 3.904200648313368e-05, - "loss": 0.0063364550471305845, - "step": 11665 - }, - { - "epoch": 1.989769820971867, - "grad_norm": 0.1051480695605278, - "learning_rate": 3.901640584638126e-05, - "loss": 0.009133437275886535, - "step": 11670 - }, - { - "epoch": 1.9906223358908781, - "grad_norm": 0.0857042595744133, - "learning_rate": 3.899080450172015e-05, - "loss": 0.007245839387178421, - "step": 11675 - }, - { - "epoch": 1.9914748508098892, - "grad_norm": 0.04038793221116066, - "learning_rate": 3.8965202461101904e-05, - "loss": 0.005575920641422272, - "step": 11680 - }, - { - "epoch": 1.9923273657289002, - "grad_norm": 0.06331093609333038, - "learning_rate": 3.893959973647842e-05, - "loss": 0.004866635054349899, - "step": 11685 - }, - { - "epoch": 1.9931798806479113, - "grad_norm": 0.08694019168615341, - "learning_rate": 3.891399633980188e-05, - "loss": 0.004249059408903122, - "step": 11690 - }, - { - "epoch": 1.9940323955669226, - "grad_norm": 0.06739087402820587, - "learning_rate": 3.888839228302482e-05, - "loss": 0.006520142406225204, - "step": 11695 - }, - { - "epoch": 1.9948849104859336, - "grad_norm": 0.09432726353406906, - "learning_rate": 3.886278757810005e-05, - "loss": 0.006377060711383819, - "step": 11700 - }, - { - "epoch": 1.9957374254049447, - "grad_norm": 0.040565814822912216, - "learning_rate": 3.883718223698071e-05, - "loss": 0.0062430910766124725, - "step": 11705 - }, - { - "epoch": 1.9965899403239558, - "grad_norm": 0.09249477833509445, - "learning_rate": 3.881157627162022e-05, - "loss": 0.005447167158126831, - "step": 11710 - }, - { - "epoch": 1.9974424552429668, - "grad_norm": 0.08561582118272781, - "learning_rate": 3.87859696939723e-05, - "loss": 0.0067646786570549015, - "step": 11715 - }, - { - "epoch": 1.9982949701619779, - "grad_norm": 0.0771077573299408, - "learning_rate": 3.876036251599094e-05, - "loss": 0.006473222374916076, - "step": 11720 - }, - { - "epoch": 1.999147485080989, - "grad_norm": 0.047942496836185455, - "learning_rate": 3.873475474963044e-05, - "loss": 0.004876254498958588, - "step": 11725 - }, - { - "epoch": 1.9996589940323957, - "eval_loss": 0.03507082909345627, - "eval_runtime": 3.6311, - "eval_samples_per_second": 69.4, - "eval_steps_per_second": 1.102, - "step": 11728 - }, - { - "eval_cer_subset": 0.01172504763300601, - "eval_cer_subset_edit_distance": 720, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 11728 - }, - { - "epoch": 2.0, - "grad_norm": 0.09595198184251785, - "learning_rate": 3.8709146406845345e-05, - "loss": 0.005297505855560302, - "step": 11730 - }, - { - "epoch": 2.000852514919011, - "grad_norm": 0.0500548854470253, - "learning_rate": 3.8683537499590486e-05, - "loss": 0.0029773740097880363, - "step": 11735 - }, - { - "epoch": 2.001705029838022, - "grad_norm": 0.043145813047885895, - "learning_rate": 3.865792803982097e-05, - "loss": 0.0026956576853990554, - "step": 11740 - }, - { - "epoch": 2.002557544757033, - "grad_norm": 0.06828423589468002, - "learning_rate": 3.86323180394921e-05, - "loss": 0.0032785605639219285, - "step": 11745 - }, - { - "epoch": 2.0034100596760442, - "grad_norm": 0.05070719122886658, - "learning_rate": 3.8606707510559514e-05, - "loss": 0.0025875838473439217, - "step": 11750 - }, - { - "epoch": 2.0042625745950553, - "grad_norm": 0.03793288394808769, - "learning_rate": 3.8581096464979046e-05, - "loss": 0.003196726739406586, - "step": 11755 - }, - { - "epoch": 2.0051150895140664, - "grad_norm": 0.058611899614334106, - "learning_rate": 3.8555484914706783e-05, - "loss": 0.0025842227041721344, - "step": 11760 - }, - { - "epoch": 2.0059676044330774, - "grad_norm": 0.05239633843302727, - "learning_rate": 3.8529872871699064e-05, - "loss": 0.0033856891095638275, - "step": 11765 - }, - { - "epoch": 2.0068201193520885, - "grad_norm": 0.0694168210029602, - "learning_rate": 3.8504260347912414e-05, - "loss": 0.0036750052124261854, - "step": 11770 - }, - { - "epoch": 2.0076726342710995, - "grad_norm": 0.05401293560862541, - "learning_rate": 3.847864735530364e-05, - "loss": 0.0020644858479499815, - "step": 11775 - }, - { - "epoch": 2.008525149190111, - "grad_norm": 0.024288944900035858, - "learning_rate": 3.8453033905829715e-05, - "loss": 0.0030498920008540154, - "step": 11780 - }, - { - "epoch": 2.009377664109122, - "grad_norm": 0.07617825269699097, - "learning_rate": 3.842742001144787e-05, - "loss": 0.002012002095580101, - "step": 11785 - }, - { - "epoch": 2.010230179028133, - "grad_norm": 0.05394979938864708, - "learning_rate": 3.8401805684115514e-05, - "loss": 0.0013803424313664435, - "step": 11790 - }, - { - "epoch": 2.0110826939471442, - "grad_norm": 0.04705117642879486, - "learning_rate": 3.837619093579025e-05, - "loss": 0.0019010987132787704, - "step": 11795 - }, - { - "epoch": 2.0119352088661553, - "grad_norm": 0.04174034297466278, - "learning_rate": 3.835057577842993e-05, - "loss": 0.00339580737054348, - "step": 11800 - }, - { - "epoch": 2.0127877237851663, - "grad_norm": 0.05027524381875992, - "learning_rate": 3.832496022399254e-05, - "loss": 0.003779648244380951, - "step": 11805 - }, - { - "epoch": 2.0136402387041774, - "grad_norm": 0.06344325840473175, - "learning_rate": 3.829934428443627e-05, - "loss": 0.003120606765151024, - "step": 11810 - }, - { - "epoch": 2.0144927536231885, - "grad_norm": 0.04142893850803375, - "learning_rate": 3.827372797171949e-05, - "loss": 0.001541936956346035, - "step": 11815 - }, - { - "epoch": 2.0153452685421995, - "grad_norm": 0.05739682540297508, - "learning_rate": 3.8248111297800766e-05, - "loss": 0.002022813446819782, - "step": 11820 - }, - { - "epoch": 2.0161977834612106, - "grad_norm": 0.05701421573758125, - "learning_rate": 3.82224942746388e-05, - "loss": 0.0032159242779016494, - "step": 11825 - }, - { - "epoch": 2.0170502983802217, - "grad_norm": 0.05839217081665993, - "learning_rate": 3.8196876914192476e-05, - "loss": 0.0019759060814976692, - "step": 11830 - }, - { - "epoch": 2.0179028132992327, - "grad_norm": 0.04104325920343399, - "learning_rate": 3.8171259228420824e-05, - "loss": 0.0030811641365289686, - "step": 11835 - }, - { - "epoch": 2.0187553282182438, - "grad_norm": 0.05367572233080864, - "learning_rate": 3.814564122928303e-05, - "loss": 0.0024660680443048476, - "step": 11840 - }, - { - "epoch": 2.019607843137255, - "grad_norm": 0.07062884420156479, - "learning_rate": 3.8120022928738444e-05, - "loss": 0.0028638459742069246, - "step": 11845 - }, - { - "epoch": 2.020460358056266, - "grad_norm": 0.1098889485001564, - "learning_rate": 3.809440433874652e-05, - "loss": 0.002245648391544819, - "step": 11850 - }, - { - "epoch": 2.021312872975277, - "grad_norm": 0.11214791238307953, - "learning_rate": 3.806878547126689e-05, - "loss": 0.0025152696296572687, - "step": 11855 - }, - { - "epoch": 2.022165387894288, - "grad_norm": 0.0809246301651001, - "learning_rate": 3.804316633825926e-05, - "loss": 0.0030847292393445967, - "step": 11860 - }, - { - "epoch": 2.023017902813299, - "grad_norm": 0.05590866506099701, - "learning_rate": 3.801754695168353e-05, - "loss": 0.002259066328406334, - "step": 11865 - }, - { - "epoch": 2.02387041773231, - "grad_norm": 0.061719413846731186, - "learning_rate": 3.799192732349967e-05, - "loss": 0.003117647022008896, - "step": 11870 - }, - { - "epoch": 2.024722932651321, - "grad_norm": 0.05439555272459984, - "learning_rate": 3.796630746566777e-05, - "loss": 0.00280950702726841, - "step": 11875 - }, - { - "epoch": 2.0255754475703327, - "grad_norm": 0.07110737264156342, - "learning_rate": 3.794068739014802e-05, - "loss": 0.0022924147546291352, - "step": 11880 - }, - { - "epoch": 2.0264279624893438, - "grad_norm": 0.035215508192777634, - "learning_rate": 3.791506710890075e-05, - "loss": 0.0014670810662209988, - "step": 11885 - }, - { - "epoch": 2.027280477408355, - "grad_norm": 0.04196110740303993, - "learning_rate": 3.7889446633886345e-05, - "loss": 0.002128283493220806, - "step": 11890 - }, - { - "epoch": 2.028132992327366, - "grad_norm": 0.02117479033768177, - "learning_rate": 3.7863825977065295e-05, - "loss": 0.002085634134709835, - "step": 11895 - }, - { - "epoch": 2.028985507246377, - "grad_norm": 0.137400820851326, - "learning_rate": 3.783820515039818e-05, - "loss": 0.003697726130485535, - "step": 11900 - }, - { - "epoch": 2.029838022165388, - "grad_norm": 0.05773406848311424, - "learning_rate": 3.781258416584565e-05, - "loss": 0.0020811671391129495, - "step": 11905 - }, - { - "epoch": 2.030690537084399, - "grad_norm": 0.02017928846180439, - "learning_rate": 3.7786963035368425e-05, - "loss": 0.002070310711860657, - "step": 11910 - }, - { - "epoch": 2.03154305200341, - "grad_norm": 0.023989839479327202, - "learning_rate": 3.7761341770927314e-05, - "loss": 0.0035201530903577805, - "step": 11915 - }, - { - "epoch": 2.032395566922421, - "grad_norm": 0.05773681029677391, - "learning_rate": 3.7735720384483176e-05, - "loss": 0.002326706610620022, - "step": 11920 - }, - { - "epoch": 2.0332480818414322, - "grad_norm": 0.06733391433954239, - "learning_rate": 3.771009888799692e-05, - "loss": 0.0019989268854260444, - "step": 11925 - }, - { - "epoch": 2.0341005967604433, - "grad_norm": 0.09590540081262589, - "learning_rate": 3.76844772934295e-05, - "loss": 0.0024355117231607435, - "step": 11930 - }, - { - "epoch": 2.0349531116794544, - "grad_norm": 0.027501709759235382, - "learning_rate": 3.765885561274196e-05, - "loss": 0.0011640249751508236, - "step": 11935 - }, - { - "epoch": 2.0358056265984654, - "grad_norm": 0.031739719212055206, - "learning_rate": 3.7633233857895326e-05, - "loss": 0.0022482817992568018, - "step": 11940 - }, - { - "epoch": 2.0366581415174765, - "grad_norm": 0.027232196182012558, - "learning_rate": 3.760761204085071e-05, - "loss": 0.0018043683841824532, - "step": 11945 - }, - { - "epoch": 2.0375106564364875, - "grad_norm": 0.08634094148874283, - "learning_rate": 3.75819901735692e-05, - "loss": 0.0024119339883327483, - "step": 11950 - }, - { - "epoch": 2.0383631713554986, - "grad_norm": 0.05877285450696945, - "learning_rate": 3.755636826801194e-05, - "loss": 0.0009346712380647659, - "step": 11955 - }, - { - "epoch": 2.0392156862745097, - "grad_norm": 0.034714680165052414, - "learning_rate": 3.7530746336140115e-05, - "loss": 0.0021316718310117723, - "step": 11960 - }, - { - "epoch": 2.0400682011935207, - "grad_norm": 0.05897806957364082, - "learning_rate": 3.750512438991487e-05, - "loss": 0.0029691245406866075, - "step": 11965 - }, - { - "epoch": 2.040920716112532, - "grad_norm": 0.07302019745111465, - "learning_rate": 3.747950244129739e-05, - "loss": 0.0023437861353158953, - "step": 11970 - }, - { - "epoch": 2.041773231031543, - "grad_norm": 0.07187193632125854, - "learning_rate": 3.745388050224885e-05, - "loss": 0.0016971008852124214, - "step": 11975 - }, - { - "epoch": 2.0426257459505544, - "grad_norm": 0.05619340017437935, - "learning_rate": 3.742825858473044e-05, - "loss": 0.0021343151107430457, - "step": 11980 - }, - { - "epoch": 2.0434782608695654, - "grad_norm": 0.08814098685979843, - "learning_rate": 3.7402636700703295e-05, - "loss": 0.0026463912799954414, - "step": 11985 - }, - { - "epoch": 2.0443307757885765, - "grad_norm": 0.10130181908607483, - "learning_rate": 3.737701486212859e-05, - "loss": 0.0020437544211745263, - "step": 11990 - }, - { - "epoch": 2.0451832907075875, - "grad_norm": 0.06105076149106026, - "learning_rate": 3.7351393080967416e-05, - "loss": 0.002344959042966366, - "step": 11995 - }, - { - "epoch": 2.0460358056265986, - "grad_norm": 0.052822742611169815, - "learning_rate": 3.732577136918091e-05, - "loss": 0.0020566854625940325, - "step": 12000 - }, - { - "epoch": 2.0468883205456097, - "grad_norm": 0.10074899345636368, - "learning_rate": 3.730014973873013e-05, - "loss": 0.0026124339550733567, - "step": 12005 - }, - { - "epoch": 2.0477408354646207, - "grad_norm": 0.025597436353564262, - "learning_rate": 3.7274528201576095e-05, - "loss": 0.001905813068151474, - "step": 12010 - }, - { - "epoch": 2.0485933503836318, - "grad_norm": 0.05437781289219856, - "learning_rate": 3.7248906769679776e-05, - "loss": 0.0025912046432495115, - "step": 12015 - }, - { - "epoch": 2.049445865302643, - "grad_norm": 0.07095912098884583, - "learning_rate": 3.722328545500215e-05, - "loss": 0.002769463881850243, - "step": 12020 - }, - { - "epoch": 2.050298380221654, - "grad_norm": 0.14383850991725922, - "learning_rate": 3.719766426950408e-05, - "loss": 0.0038499854505062102, - "step": 12025 - }, - { - "epoch": 2.051150895140665, - "grad_norm": 0.06089269369840622, - "learning_rate": 3.7172043225146386e-05, - "loss": 0.002288899011909962, - "step": 12030 - }, - { - "epoch": 2.052003410059676, - "grad_norm": 0.05808301270008087, - "learning_rate": 3.7146422333889824e-05, - "loss": 0.0028305932879447936, - "step": 12035 - }, - { - "epoch": 2.052855924978687, - "grad_norm": 0.13334520161151886, - "learning_rate": 3.712080160769506e-05, - "loss": 0.00331525094807148, - "step": 12040 - }, - { - "epoch": 2.053708439897698, - "grad_norm": 0.03266080096364021, - "learning_rate": 3.709518105852273e-05, - "loss": 0.0020869884639978407, - "step": 12045 - }, - { - "epoch": 2.054560954816709, - "grad_norm": 0.07307332009077072, - "learning_rate": 3.706956069833336e-05, - "loss": 0.0023028414696455004, - "step": 12050 - }, - { - "epoch": 2.0554134697357203, - "grad_norm": 0.06093568354845047, - "learning_rate": 3.7043940539087366e-05, - "loss": 0.0022027945145964623, - "step": 12055 - }, - { - "epoch": 2.0562659846547313, - "grad_norm": 0.04207700863480568, - "learning_rate": 3.70183205927451e-05, - "loss": 0.0016738155856728554, - "step": 12060 - }, - { - "epoch": 2.0571184995737424, - "grad_norm": 0.046319641172885895, - "learning_rate": 3.699270087126679e-05, - "loss": 0.002480871044099331, - "step": 12065 - }, - { - "epoch": 2.0579710144927534, - "grad_norm": 0.042888909578323364, - "learning_rate": 3.69670813866126e-05, - "loss": 0.0020912257954478265, - "step": 12070 - }, - { - "epoch": 2.0588235294117645, - "grad_norm": 0.05530136078596115, - "learning_rate": 3.694146215074256e-05, - "loss": 0.0021427463740110396, - "step": 12075 - }, - { - "epoch": 2.059676044330776, - "grad_norm": 0.04992877319455147, - "learning_rate": 3.6915843175616555e-05, - "loss": 0.001970967650413513, - "step": 12080 - }, - { - "epoch": 2.060528559249787, - "grad_norm": 0.07341081649065018, - "learning_rate": 3.6890224473194373e-05, - "loss": 0.003120069019496441, - "step": 12085 - }, - { - "epoch": 2.061381074168798, - "grad_norm": 0.05361134931445122, - "learning_rate": 3.686460605543571e-05, - "loss": 0.0030833475291728975, - "step": 12090 - }, - { - "epoch": 2.062233589087809, - "grad_norm": 0.0904894769191742, - "learning_rate": 3.683898793430008e-05, - "loss": 0.0020733945071697234, - "step": 12095 - }, - { - "epoch": 2.0630861040068202, - "grad_norm": 0.03312591835856438, - "learning_rate": 3.681337012174686e-05, - "loss": 0.002308916300535202, - "step": 12100 - }, - { - "epoch": 2.0639386189258313, - "grad_norm": 0.05372268706560135, - "learning_rate": 3.6787752629735314e-05, - "loss": 0.0024915780872106553, - "step": 12105 - }, - { - "epoch": 2.0647911338448424, - "grad_norm": 0.08257800340652466, - "learning_rate": 3.676213547022452e-05, - "loss": 0.001413002610206604, - "step": 12110 - }, - { - "epoch": 2.0656436487638534, - "grad_norm": 0.037859030067920685, - "learning_rate": 3.673651865517344e-05, - "loss": 0.002315748296678066, - "step": 12115 - }, - { - "epoch": 2.0664961636828645, - "grad_norm": 0.04125140607357025, - "learning_rate": 3.6710902196540856e-05, - "loss": 0.0022393757477402686, - "step": 12120 - }, - { - "epoch": 2.0673486786018755, - "grad_norm": 0.09325335919857025, - "learning_rate": 3.668528610628538e-05, - "loss": 0.003246062248945236, - "step": 12125 - }, - { - "epoch": 2.0682011935208866, - "grad_norm": 0.09278098493814468, - "learning_rate": 3.665967039636543e-05, - "loss": 0.0027722738683223723, - "step": 12130 - }, - { - "epoch": 2.0690537084398977, - "grad_norm": 0.07906672358512878, - "learning_rate": 3.663405507873931e-05, - "loss": 0.0035691894590854645, - "step": 12135 - }, - { - "epoch": 2.0699062233589087, - "grad_norm": 0.04077119752764702, - "learning_rate": 3.660844016536507e-05, - "loss": 0.0018417894840240478, - "step": 12140 - }, - { - "epoch": 2.07075873827792, - "grad_norm": 0.08916836231946945, - "learning_rate": 3.6582825668200636e-05, - "loss": 0.0019499020650982856, - "step": 12145 - }, - { - "epoch": 2.071611253196931, - "grad_norm": 0.017643144354224205, - "learning_rate": 3.655721159920368e-05, - "loss": 0.0018016694113612175, - "step": 12150 - }, - { - "epoch": 2.072463768115942, - "grad_norm": 0.046675924211740494, - "learning_rate": 3.6531597970331704e-05, - "loss": 0.0023558875545859337, - "step": 12155 - }, - { - "epoch": 2.073316283034953, - "grad_norm": 0.06159510463476181, - "learning_rate": 3.650598479354202e-05, - "loss": 0.003485919535160065, - "step": 12160 - }, - { - "epoch": 2.074168797953964, - "grad_norm": 0.10620608925819397, - "learning_rate": 3.64803720807917e-05, - "loss": 0.0021355047821998594, - "step": 12165 - }, - { - "epoch": 2.075021312872975, - "grad_norm": 0.03321434184908867, - "learning_rate": 3.645475984403761e-05, - "loss": 0.0027330033481121063, - "step": 12170 - }, - { - "epoch": 2.075873827791986, - "grad_norm": 0.05574263632297516, - "learning_rate": 3.642914809523639e-05, - "loss": 0.0017123395577073098, - "step": 12175 - }, - { - "epoch": 2.0767263427109977, - "grad_norm": 0.045334603637456894, - "learning_rate": 3.640353684634446e-05, - "loss": 0.001525832526385784, - "step": 12180 - }, - { - "epoch": 2.0775788576300087, - "grad_norm": 0.05117806792259216, - "learning_rate": 3.6377926109318005e-05, - "loss": 0.0022421007975935935, - "step": 12185 - }, - { - "epoch": 2.0784313725490198, - "grad_norm": 0.02836792916059494, - "learning_rate": 3.635231589611297e-05, - "loss": 0.003241851553320885, - "step": 12190 - }, - { - "epoch": 2.079283887468031, - "grad_norm": 0.13245631754398346, - "learning_rate": 3.632670621868506e-05, - "loss": 0.0028171174228191374, - "step": 12195 - }, - { - "epoch": 2.080136402387042, - "grad_norm": 0.04175787791609764, - "learning_rate": 3.63010970889897e-05, - "loss": 0.0026013338938355446, - "step": 12200 - }, - { - "epoch": 2.080988917306053, - "grad_norm": 0.022211721166968346, - "learning_rate": 3.6275488518982104e-05, - "loss": 0.0029422508552670477, - "step": 12205 - }, - { - "epoch": 2.081841432225064, - "grad_norm": 0.0889682024717331, - "learning_rate": 3.6249880520617205e-05, - "loss": 0.002521348185837269, - "step": 12210 - }, - { - "epoch": 2.082693947144075, - "grad_norm": 0.022678803652524948, - "learning_rate": 3.622427310584967e-05, - "loss": 0.0010427280329167842, - "step": 12215 - }, - { - "epoch": 2.083546462063086, - "grad_norm": 0.07812847197055817, - "learning_rate": 3.6198666286633886e-05, - "loss": 0.002325686253607273, - "step": 12220 - }, - { - "epoch": 2.084398976982097, - "grad_norm": 0.06912051141262054, - "learning_rate": 3.6173060074923945e-05, - "loss": 0.0022675972431898117, - "step": 12225 - }, - { - "epoch": 2.0852514919011083, - "grad_norm": 0.02951810136437416, - "learning_rate": 3.6147454482673715e-05, - "loss": 0.00159697774797678, - "step": 12230 - }, - { - "epoch": 2.0861040068201193, - "grad_norm": 0.11821833997964859, - "learning_rate": 3.6121849521836735e-05, - "loss": 0.002206057496368885, - "step": 12235 - }, - { - "epoch": 2.0869565217391304, - "grad_norm": 0.05461777001619339, - "learning_rate": 3.609624520436624e-05, - "loss": 0.0012241648510098457, - "step": 12240 - }, - { - "epoch": 2.0878090366581414, - "grad_norm": 0.05038715526461601, - "learning_rate": 3.607064154221516e-05, - "loss": 0.002225806750357151, - "step": 12245 - }, - { - "epoch": 2.0886615515771525, - "grad_norm": 0.03050738200545311, - "learning_rate": 3.604503854733617e-05, - "loss": 0.0020998189225792884, - "step": 12250 - }, - { - "epoch": 2.0895140664961636, - "grad_norm": 0.07000287622213364, - "learning_rate": 3.6019436231681585e-05, - "loss": 0.0022106122225522993, - "step": 12255 - }, - { - "epoch": 2.0903665814151746, - "grad_norm": 0.0332137756049633, - "learning_rate": 3.5993834607203416e-05, - "loss": 0.0020401908084750177, - "step": 12260 - }, - { - "epoch": 2.0912190963341857, - "grad_norm": 0.0996270552277565, - "learning_rate": 3.596823368585336e-05, - "loss": 0.002487153559923172, - "step": 12265 - }, - { - "epoch": 2.0920716112531967, - "grad_norm": 0.1305847465991974, - "learning_rate": 3.594263347958276e-05, - "loss": 0.0028627485036849974, - "step": 12270 - }, - { - "epoch": 2.092924126172208, - "grad_norm": 0.0762234702706337, - "learning_rate": 3.5917034000342664e-05, - "loss": 0.0020220713689923287, - "step": 12275 - }, - { - "epoch": 2.0937766410912193, - "grad_norm": 0.015480007976293564, - "learning_rate": 3.589143526008376e-05, - "loss": 0.00215108972042799, - "step": 12280 - }, - { - "epoch": 2.0946291560102304, - "grad_norm": 0.0862250104546547, - "learning_rate": 3.5865837270756385e-05, - "loss": 0.0020705640316009523, - "step": 12285 - }, - { - "epoch": 2.0954816709292414, - "grad_norm": 0.03390849754214287, - "learning_rate": 3.584024004431052e-05, - "loss": 0.002040168456733227, - "step": 12290 - }, - { - "epoch": 2.0963341858482525, - "grad_norm": 0.07754851132631302, - "learning_rate": 3.581464359269582e-05, - "loss": 0.0029265256598591805, - "step": 12295 - }, - { - "epoch": 2.0971867007672635, - "grad_norm": 0.0625162348151207, - "learning_rate": 3.578904792786155e-05, - "loss": 0.0020755715668201447, - "step": 12300 - }, - { - "epoch": 2.0980392156862746, - "grad_norm": 0.10999561101198196, - "learning_rate": 3.576345306175663e-05, - "loss": 0.0027062267065048216, - "step": 12305 - }, - { - "epoch": 2.0988917306052857, - "grad_norm": 0.03573682904243469, - "learning_rate": 3.573785900632959e-05, - "loss": 0.00178314708173275, - "step": 12310 - }, - { - "epoch": 2.0997442455242967, - "grad_norm": 0.07235981523990631, - "learning_rate": 3.5712265773528564e-05, - "loss": 0.00233871191740036, - "step": 12315 - }, - { - "epoch": 2.100596760443308, - "grad_norm": 0.054438747465610504, - "learning_rate": 3.568667337530135e-05, - "loss": 0.0031350374221801756, - "step": 12320 - }, - { - "epoch": 2.101449275362319, - "grad_norm": 0.07696446031332016, - "learning_rate": 3.566108182359533e-05, - "loss": 0.0019116310402750968, - "step": 12325 - }, - { - "epoch": 2.10230179028133, - "grad_norm": 0.0676850974559784, - "learning_rate": 3.563549113035749e-05, - "loss": 0.0011704936623573303, - "step": 12330 - }, - { - "epoch": 2.103154305200341, - "grad_norm": 0.07241418212652206, - "learning_rate": 3.5609901307534416e-05, - "loss": 0.002332131937146187, - "step": 12335 - }, - { - "epoch": 2.104006820119352, - "grad_norm": 0.0832296758890152, - "learning_rate": 3.558431236707227e-05, - "loss": 0.002539648115634918, - "step": 12340 - }, - { - "epoch": 2.104859335038363, - "grad_norm": 0.046911224722862244, - "learning_rate": 3.555872432091684e-05, - "loss": 0.0015112090855836867, - "step": 12345 - }, - { - "epoch": 2.105711849957374, - "grad_norm": 0.09462827444076538, - "learning_rate": 3.553313718101348e-05, - "loss": 0.0024237846955657005, - "step": 12350 - }, - { - "epoch": 2.106564364876385, - "grad_norm": 0.06934045255184174, - "learning_rate": 3.550755095930711e-05, - "loss": 0.0014186175540089607, - "step": 12355 - }, - { - "epoch": 2.1074168797953963, - "grad_norm": 0.05409622564911842, - "learning_rate": 3.5481965667742216e-05, - "loss": 0.0016573246568441391, - "step": 12360 - }, - { - "epoch": 2.1082693947144073, - "grad_norm": 0.05712766572833061, - "learning_rate": 3.545638131826289e-05, - "loss": 0.0029039720073342325, - "step": 12365 - }, - { - "epoch": 2.1091219096334184, - "grad_norm": 0.05685155466198921, - "learning_rate": 3.543079792281274e-05, - "loss": 0.0016390934586524963, - "step": 12370 - }, - { - "epoch": 2.10997442455243, - "grad_norm": 0.06140974909067154, - "learning_rate": 3.5405215493334966e-05, - "loss": 0.0038812048733234406, - "step": 12375 - }, - { - "epoch": 2.110826939471441, - "grad_norm": 0.0662747323513031, - "learning_rate": 3.537963404177227e-05, - "loss": 0.0029465768486261366, - "step": 12380 - }, - { - "epoch": 2.111679454390452, - "grad_norm": 0.05666056647896767, - "learning_rate": 3.535405358006694e-05, - "loss": 0.0028562054038047792, - "step": 12385 - }, - { - "epoch": 2.112531969309463, - "grad_norm": 0.02187039703130722, - "learning_rate": 3.532847412016077e-05, - "loss": 0.0017194624990224839, - "step": 12390 - }, - { - "epoch": 2.113384484228474, - "grad_norm": 0.040781840682029724, - "learning_rate": 3.530289567399513e-05, - "loss": 0.0026536308228969573, - "step": 12395 - }, - { - "epoch": 2.114236999147485, - "grad_norm": 0.05844609811902046, - "learning_rate": 3.527731825351088e-05, - "loss": 0.0018391696736216544, - "step": 12400 - }, - { - "epoch": 2.1150895140664963, - "grad_norm": 0.08661946654319763, - "learning_rate": 3.52517418706484e-05, - "loss": 0.0028108954429626465, - "step": 12405 - }, - { - "epoch": 2.1159420289855073, - "grad_norm": 0.05540858209133148, - "learning_rate": 3.52261665373476e-05, - "loss": 0.001869852840900421, - "step": 12410 - }, - { - "epoch": 2.1167945439045184, - "grad_norm": 0.05183592066168785, - "learning_rate": 3.520059226554789e-05, - "loss": 0.0038085319101810455, - "step": 12415 - }, - { - "epoch": 2.1176470588235294, - "grad_norm": 0.09019337594509125, - "learning_rate": 3.517501906718822e-05, - "loss": 0.0025485800579190254, - "step": 12420 - }, - { - "epoch": 2.1184995737425405, - "grad_norm": 0.05994381010532379, - "learning_rate": 3.514944695420698e-05, - "loss": 0.0023555709049105644, - "step": 12425 - }, - { - "epoch": 2.1193520886615516, - "grad_norm": 0.07013200968503952, - "learning_rate": 3.512387593854208e-05, - "loss": 0.0023415835574269296, - "step": 12430 - }, - { - "epoch": 2.1202046035805626, - "grad_norm": 0.0558604821562767, - "learning_rate": 3.509830603213094e-05, - "loss": 0.002999695762991905, - "step": 12435 - }, - { - "epoch": 2.1210571184995737, - "grad_norm": 0.054457131773233414, - "learning_rate": 3.507273724691045e-05, - "loss": 0.0022147590294480323, - "step": 12440 - }, - { - "epoch": 2.1219096334185847, - "grad_norm": 0.052365075796842575, - "learning_rate": 3.5047169594816955e-05, - "loss": 0.0023635342717170716, - "step": 12445 - }, - { - "epoch": 2.122762148337596, - "grad_norm": 0.047059565782547, - "learning_rate": 3.502160308778627e-05, - "loss": 0.0015694497153162957, - "step": 12450 - }, - { - "epoch": 2.123614663256607, - "grad_norm": 0.03100336343050003, - "learning_rate": 3.499603773775371e-05, - "loss": 0.0020049646496772765, - "step": 12455 - }, - { - "epoch": 2.124467178175618, - "grad_norm": 0.07436710596084595, - "learning_rate": 3.4970473556654027e-05, - "loss": 0.004277446493506432, - "step": 12460 - }, - { - "epoch": 2.125319693094629, - "grad_norm": 0.044698864221572876, - "learning_rate": 3.4944910556421444e-05, - "loss": 0.0032587334513664245, - "step": 12465 - }, - { - "epoch": 2.12617220801364, - "grad_norm": 0.04725298285484314, - "learning_rate": 3.491934874898961e-05, - "loss": 0.0018061451613903047, - "step": 12470 - }, - { - "epoch": 2.127024722932651, - "grad_norm": 0.04054245352745056, - "learning_rate": 3.4893788146291604e-05, - "loss": 0.0017766639590263366, - "step": 12475 - }, - { - "epoch": 2.1278772378516626, - "grad_norm": 0.06061461195349693, - "learning_rate": 3.486822876025999e-05, - "loss": 0.0025131702423095703, - "step": 12480 - }, - { - "epoch": 2.1287297527706737, - "grad_norm": 0.058438993990421295, - "learning_rate": 3.4842670602826744e-05, - "loss": 0.002218405343592167, - "step": 12485 - }, - { - "epoch": 2.1295822676896847, - "grad_norm": 0.057440634816884995, - "learning_rate": 3.481711368592327e-05, - "loss": 0.0015549706295132637, - "step": 12490 - }, - { - "epoch": 2.130434782608696, - "grad_norm": 0.06638845056295395, - "learning_rate": 3.4791558021480355e-05, - "loss": 0.002662469446659088, - "step": 12495 - }, - { - "epoch": 2.131287297527707, - "grad_norm": 0.06725790351629257, - "learning_rate": 3.476600362142824e-05, - "loss": 0.0024463947862386703, - "step": 12500 - }, - { - "epoch": 2.132139812446718, - "grad_norm": 0.07708985358476639, - "learning_rate": 3.474045049769659e-05, - "loss": 0.0034916583448648454, - "step": 12505 - }, - { - "epoch": 2.132992327365729, - "grad_norm": 0.06412148475646973, - "learning_rate": 3.4714898662214454e-05, - "loss": 0.002831364795565605, - "step": 12510 - }, - { - "epoch": 2.13384484228474, - "grad_norm": 0.04649505391716957, - "learning_rate": 3.468934812691027e-05, - "loss": 0.002048753574490547, - "step": 12515 - }, - { - "epoch": 2.134697357203751, - "grad_norm": 0.04807932674884796, - "learning_rate": 3.4663798903711865e-05, - "loss": 0.0018209950998425485, - "step": 12520 - }, - { - "epoch": 2.135549872122762, - "grad_norm": 0.043283116072416306, - "learning_rate": 3.4638251004546476e-05, - "loss": 0.001797056198120117, - "step": 12525 - }, - { - "epoch": 2.136402387041773, - "grad_norm": 0.015419692732393742, - "learning_rate": 3.4612704441340716e-05, - "loss": 0.002100854739546776, - "step": 12530 - }, - { - "epoch": 2.1372549019607843, - "grad_norm": 0.05244193226099014, - "learning_rate": 3.458715922602057e-05, - "loss": 0.002430478297173977, - "step": 12535 - }, - { - "epoch": 2.1381074168797953, - "grad_norm": 0.08995307981967926, - "learning_rate": 3.4561615370511394e-05, - "loss": 0.0023157089948654176, - "step": 12540 - }, - { - "epoch": 2.1389599317988064, - "grad_norm": 0.06513174623250961, - "learning_rate": 3.4536072886737894e-05, - "loss": 0.002109107933938503, - "step": 12545 - }, - { - "epoch": 2.1398124467178175, - "grad_norm": 0.12199243903160095, - "learning_rate": 3.4510531786624176e-05, - "loss": 0.0016247857362031936, - "step": 12550 - }, - { - "epoch": 2.1406649616368285, - "grad_norm": 0.06062543764710426, - "learning_rate": 3.4484992082093665e-05, - "loss": 0.0033494606614112854, - "step": 12555 - }, - { - "epoch": 2.1415174765558396, - "grad_norm": 0.08636222034692764, - "learning_rate": 3.445945378506915e-05, - "loss": 0.0037529505789279938, - "step": 12560 - }, - { - "epoch": 2.1423699914748506, - "grad_norm": 0.027961688116192818, - "learning_rate": 3.443391690747274e-05, - "loss": 0.0016466494649648666, - "step": 12565 - }, - { - "epoch": 2.1432225063938617, - "grad_norm": 0.033621031790971756, - "learning_rate": 3.440838146122591e-05, - "loss": 0.002477872557938099, - "step": 12570 - }, - { - "epoch": 2.144075021312873, - "grad_norm": 0.08104594051837921, - "learning_rate": 3.4382847458249453e-05, - "loss": 0.0031348835676908494, - "step": 12575 - }, - { - "epoch": 2.1449275362318843, - "grad_norm": 0.07412353157997131, - "learning_rate": 3.4357314910463506e-05, - "loss": 0.002509618178009987, - "step": 12580 - }, - { - "epoch": 2.1457800511508953, - "grad_norm": 0.04261288791894913, - "learning_rate": 3.43317838297875e-05, - "loss": 0.0021477997303009032, - "step": 12585 - }, - { - "epoch": 2.1466325660699064, - "grad_norm": 0.15133292973041534, - "learning_rate": 3.430625422814018e-05, - "loss": 0.0033604972064495086, - "step": 12590 - }, - { - "epoch": 2.1474850809889174, - "grad_norm": 0.08455967903137207, - "learning_rate": 3.428072611743962e-05, - "loss": 0.0035134248435497286, - "step": 12595 - }, - { - "epoch": 2.1483375959079285, - "grad_norm": 0.10830427706241608, - "learning_rate": 3.425519950960321e-05, - "loss": 0.003783620521426201, - "step": 12600 - }, - { - "epoch": 2.1491901108269396, - "grad_norm": 0.05701782926917076, - "learning_rate": 3.422967441654761e-05, - "loss": 0.0017763100564479827, - "step": 12605 - }, - { - "epoch": 2.1500426257459506, - "grad_norm": 0.058323513716459274, - "learning_rate": 3.420415085018878e-05, - "loss": 0.003765106201171875, - "step": 12610 - }, - { - "epoch": 2.1508951406649617, - "grad_norm": 0.08780697733163834, - "learning_rate": 3.417862882244195e-05, - "loss": 0.0021065909415483473, - "step": 12615 - }, - { - "epoch": 2.1517476555839727, - "grad_norm": 0.08741293847560883, - "learning_rate": 3.415310834522168e-05, - "loss": 0.0022673629224300384, - "step": 12620 - }, - { - "epoch": 2.152600170502984, - "grad_norm": 0.08681067824363708, - "learning_rate": 3.412758943044177e-05, - "loss": 0.0029561318457126617, - "step": 12625 - }, - { - "epoch": 2.153452685421995, - "grad_norm": 0.05104825645685196, - "learning_rate": 3.4102072090015306e-05, - "loss": 0.0028430519625544546, - "step": 12630 - }, - { - "epoch": 2.154305200341006, - "grad_norm": 0.05437494069337845, - "learning_rate": 3.4076556335854606e-05, - "loss": 0.0026259947568178176, - "step": 12635 - }, - { - "epoch": 2.155157715260017, - "grad_norm": 0.016572406515479088, - "learning_rate": 3.4051042179871286e-05, - "loss": 0.00198390893638134, - "step": 12640 - }, - { - "epoch": 2.156010230179028, - "grad_norm": 0.04134957864880562, - "learning_rate": 3.4025529633976216e-05, - "loss": 0.0017651205882430077, - "step": 12645 - }, - { - "epoch": 2.156862745098039, - "grad_norm": 0.04091856628656387, - "learning_rate": 3.400001871007949e-05, - "loss": 0.002631684020161629, - "step": 12650 - }, - { - "epoch": 2.15771526001705, - "grad_norm": 0.08851557224988937, - "learning_rate": 3.397450942009046e-05, - "loss": 0.004056418687105179, - "step": 12655 - }, - { - "epoch": 2.1585677749360612, - "grad_norm": 0.09870146960020065, - "learning_rate": 3.3949001775917686e-05, - "loss": 0.0017272619530558585, - "step": 12660 - }, - { - "epoch": 2.1594202898550723, - "grad_norm": 0.059828147292137146, - "learning_rate": 3.3923495789469016e-05, - "loss": 0.0018833462148904801, - "step": 12665 - }, - { - "epoch": 2.1602728047740833, - "grad_norm": 0.04078202694654465, - "learning_rate": 3.3897991472651495e-05, - "loss": 0.0015183920040726662, - "step": 12670 - }, - { - "epoch": 2.1611253196930944, - "grad_norm": 0.09713901579380035, - "learning_rate": 3.387248883737137e-05, - "loss": 0.002313835546374321, - "step": 12675 - }, - { - "epoch": 2.161977834612106, - "grad_norm": 0.13590694963932037, - "learning_rate": 3.3846987895534116e-05, - "loss": 0.002948279120028019, - "step": 12680 - }, - { - "epoch": 2.162830349531117, - "grad_norm": 0.05830051749944687, - "learning_rate": 3.3821488659044435e-05, - "loss": 0.002866750955581665, - "step": 12685 - }, - { - "epoch": 2.163682864450128, - "grad_norm": 0.08523424714803696, - "learning_rate": 3.3795991139806205e-05, - "loss": 0.001992848888039589, - "step": 12690 - }, - { - "epoch": 2.164535379369139, - "grad_norm": 0.07573958486318588, - "learning_rate": 3.3770495349722534e-05, - "loss": 0.003222312778234482, - "step": 12695 - }, - { - "epoch": 2.16538789428815, - "grad_norm": 0.1144784539937973, - "learning_rate": 3.374500130069569e-05, - "loss": 0.0023121457546949387, - "step": 12700 - }, - { - "epoch": 2.166240409207161, - "grad_norm": 0.037679724395275116, - "learning_rate": 3.371950900462716e-05, - "loss": 0.0022720521315932273, - "step": 12705 - }, - { - "epoch": 2.1670929241261723, - "grad_norm": 0.09523876011371613, - "learning_rate": 3.369401847341756e-05, - "loss": 0.0026744550094008447, - "step": 12710 - }, - { - "epoch": 2.1679454390451833, - "grad_norm": 0.08904188126325607, - "learning_rate": 3.3668529718966753e-05, - "loss": 0.0025367535650730132, - "step": 12715 - }, - { - "epoch": 2.1687979539641944, - "grad_norm": 0.065862737596035, - "learning_rate": 3.364304275317373e-05, - "loss": 0.0017513807862997055, - "step": 12720 - }, - { - "epoch": 2.1696504688832055, - "grad_norm": 0.03308388963341713, - "learning_rate": 3.361755758793665e-05, - "loss": 0.001534645166248083, - "step": 12725 - }, - { - "epoch": 2.1705029838022165, - "grad_norm": 0.11249089986085892, - "learning_rate": 3.359207423515283e-05, - "loss": 0.0012927086092531681, - "step": 12730 - }, - { - "epoch": 2.1713554987212276, - "grad_norm": 0.09918250143527985, - "learning_rate": 3.356659270671875e-05, - "loss": 0.0035567093640565873, - "step": 12735 - }, - { - "epoch": 2.1722080136402386, - "grad_norm": 0.008674295619130135, - "learning_rate": 3.354111301453005e-05, - "loss": 0.0013304737396538258, - "step": 12740 - }, - { - "epoch": 2.1730605285592497, - "grad_norm": 0.09038940817117691, - "learning_rate": 3.351563517048149e-05, - "loss": 0.0022449616342782976, - "step": 12745 - }, - { - "epoch": 2.1739130434782608, - "grad_norm": 0.11863812804222107, - "learning_rate": 3.349015918646695e-05, - "loss": 0.0029456689953804016, - "step": 12750 - }, - { - "epoch": 2.174765558397272, - "grad_norm": 0.055224135518074036, - "learning_rate": 3.34646850743795e-05, - "loss": 0.0021983785554766655, - "step": 12755 - }, - { - "epoch": 2.175618073316283, - "grad_norm": 0.05251838266849518, - "learning_rate": 3.34392128461113e-05, - "loss": 0.0018048876896500587, - "step": 12760 - }, - { - "epoch": 2.176470588235294, - "grad_norm": 0.07146445661783218, - "learning_rate": 3.341374251355361e-05, - "loss": 0.0030932359397411345, - "step": 12765 - }, - { - "epoch": 2.177323103154305, - "grad_norm": 0.03640792518854141, - "learning_rate": 3.338827408859686e-05, - "loss": 0.0016893571242690085, - "step": 12770 - }, - { - "epoch": 2.1781756180733165, - "grad_norm": 0.0680721327662468, - "learning_rate": 3.336280758313052e-05, - "loss": 0.0037735387682914733, - "step": 12775 - }, - { - "epoch": 2.1790281329923276, - "grad_norm": 0.047598470002412796, - "learning_rate": 3.333734300904322e-05, - "loss": 0.002026566304266453, - "step": 12780 - }, - { - "epoch": 2.1798806479113386, - "grad_norm": 0.08361580222845078, - "learning_rate": 3.3311880378222695e-05, - "loss": 0.002865005284547806, - "step": 12785 - }, - { - "epoch": 2.1807331628303497, - "grad_norm": 0.04869835823774338, - "learning_rate": 3.328641970255572e-05, - "loss": 0.0018146531656384468, - "step": 12790 - }, - { - "epoch": 2.1815856777493607, - "grad_norm": 0.06970708072185516, - "learning_rate": 3.326096099392819e-05, - "loss": 0.0022316936403512953, - "step": 12795 - }, - { - "epoch": 2.182438192668372, - "grad_norm": 0.07073621451854706, - "learning_rate": 3.323550426422508e-05, - "loss": 0.0021546846255660057, - "step": 12800 - }, - { - "epoch": 2.183290707587383, - "grad_norm": 0.0552116334438324, - "learning_rate": 3.3210049525330426e-05, - "loss": 0.0022750692442059517, - "step": 12805 - }, - { - "epoch": 2.184143222506394, - "grad_norm": 0.08244488388299942, - "learning_rate": 3.318459678912737e-05, - "loss": 0.0027180306613445284, - "step": 12810 - }, - { - "epoch": 2.184995737425405, - "grad_norm": 0.07275483757257462, - "learning_rate": 3.315914606749808e-05, - "loss": 0.002150987088680267, - "step": 12815 - }, - { - "epoch": 2.185848252344416, - "grad_norm": 0.06152818351984024, - "learning_rate": 3.3133697372323804e-05, - "loss": 0.002709987387061119, - "step": 12820 - }, - { - "epoch": 2.186700767263427, - "grad_norm": 0.07358045876026154, - "learning_rate": 3.310825071548483e-05, - "loss": 0.0029207577928900717, - "step": 12825 - }, - { - "epoch": 2.187553282182438, - "grad_norm": 0.07633842527866364, - "learning_rate": 3.3082806108860516e-05, - "loss": 0.0028854381293058396, - "step": 12830 - }, - { - "epoch": 2.1884057971014492, - "grad_norm": 0.0533052496612072, - "learning_rate": 3.305736356432926e-05, - "loss": 0.0023338528349995612, - "step": 12835 - }, - { - "epoch": 2.1892583120204603, - "grad_norm": 0.09400077164173126, - "learning_rate": 3.303192309376846e-05, - "loss": 0.00362023301422596, - "step": 12840 - }, - { - "epoch": 2.1901108269394713, - "grad_norm": 0.09847433120012283, - "learning_rate": 3.300648470905459e-05, - "loss": 0.003238249197602272, - "step": 12845 - }, - { - "epoch": 2.1909633418584824, - "grad_norm": 0.09695439040660858, - "learning_rate": 3.298104842206314e-05, - "loss": 0.002254056558012962, - "step": 12850 - }, - { - "epoch": 2.1918158567774935, - "grad_norm": 0.07510244101285934, - "learning_rate": 3.295561424466861e-05, - "loss": 0.002555438503623009, - "step": 12855 - }, - { - "epoch": 2.1926683716965045, - "grad_norm": 0.07085850089788437, - "learning_rate": 3.2930182188744524e-05, - "loss": 0.0029295925050973892, - "step": 12860 - }, - { - "epoch": 2.1935208866155156, - "grad_norm": 0.12662498652935028, - "learning_rate": 3.290475226616339e-05, - "loss": 0.0019443847239017486, - "step": 12865 - }, - { - "epoch": 2.1943734015345266, - "grad_norm": 0.08738470077514648, - "learning_rate": 3.2879324488796755e-05, - "loss": 0.002229847013950348, - "step": 12870 - }, - { - "epoch": 2.1952259164535377, - "grad_norm": 0.04957102984189987, - "learning_rate": 3.285389886851517e-05, - "loss": 0.0017434298992156983, - "step": 12875 - }, - { - "epoch": 2.196078431372549, - "grad_norm": 0.057968392968177795, - "learning_rate": 3.282847541718814e-05, - "loss": 0.003453432023525238, - "step": 12880 - }, - { - "epoch": 2.1969309462915603, - "grad_norm": 0.1128922700881958, - "learning_rate": 3.280305414668419e-05, - "loss": 0.0025962982326745987, - "step": 12885 - }, - { - "epoch": 2.1977834612105713, - "grad_norm": 0.0661446675658226, - "learning_rate": 3.2777635068870784e-05, - "loss": 0.002279244549572468, - "step": 12890 - }, - { - "epoch": 2.1986359761295824, - "grad_norm": 0.09260411560535431, - "learning_rate": 3.275221819561443e-05, - "loss": 0.002637815475463867, - "step": 12895 - }, - { - "epoch": 2.1994884910485935, - "grad_norm": 0.08168021589517593, - "learning_rate": 3.272680353878056e-05, - "loss": 0.0029386602342128753, - "step": 12900 - }, - { - "epoch": 2.2003410059676045, - "grad_norm": 0.06187237799167633, - "learning_rate": 3.270139111023358e-05, - "loss": 0.0018257955089211464, - "step": 12905 - }, - { - "epoch": 2.2011935208866156, - "grad_norm": 0.09450117498636246, - "learning_rate": 3.267598092183684e-05, - "loss": 0.0023655250668525698, - "step": 12910 - }, - { - "epoch": 2.2020460358056266, - "grad_norm": 0.060870688408613205, - "learning_rate": 3.2650572985452685e-05, - "loss": 0.001705418713390827, - "step": 12915 - }, - { - "epoch": 2.2028985507246377, - "grad_norm": 0.06867264956235886, - "learning_rate": 3.262516731294237e-05, - "loss": 0.00248488187789917, - "step": 12920 - }, - { - "epoch": 2.2037510656436488, - "grad_norm": 0.07654258608818054, - "learning_rate": 3.259976391616612e-05, - "loss": 0.002200855314731598, - "step": 12925 - }, - { - "epoch": 2.20460358056266, - "grad_norm": 0.06781245768070221, - "learning_rate": 3.257436280698308e-05, - "loss": 0.002006441354751587, - "step": 12930 - }, - { - "epoch": 2.205456095481671, - "grad_norm": 0.045858342200517654, - "learning_rate": 3.254896399725132e-05, - "loss": 0.0020667938515543938, - "step": 12935 - }, - { - "epoch": 2.206308610400682, - "grad_norm": 0.06805605441331863, - "learning_rate": 3.2523567498827865e-05, - "loss": 0.002215307205915451, - "step": 12940 - }, - { - "epoch": 2.207161125319693, - "grad_norm": 0.07554472237825394, - "learning_rate": 3.2498173323568645e-05, - "loss": 0.0021156981587409974, - "step": 12945 - }, - { - "epoch": 2.208013640238704, - "grad_norm": 0.049611154943704605, - "learning_rate": 3.2472781483328506e-05, - "loss": 0.0037985272705554963, - "step": 12950 - }, - { - "epoch": 2.208866155157715, - "grad_norm": 0.04867832362651825, - "learning_rate": 3.24473919899612e-05, - "loss": 0.0011579260230064393, - "step": 12955 - }, - { - "epoch": 2.209718670076726, - "grad_norm": 0.04439609497785568, - "learning_rate": 3.2422004855319376e-05, - "loss": 0.0033864513039588927, - "step": 12960 - }, - { - "epoch": 2.2105711849957372, - "grad_norm": 0.054114069789648056, - "learning_rate": 3.23966200912546e-05, - "loss": 0.0017186013981699943, - "step": 12965 - }, - { - "epoch": 2.2114236999147483, - "grad_norm": 0.03286417946219444, - "learning_rate": 3.237123770961735e-05, - "loss": 0.0013779066503047943, - "step": 12970 - }, - { - "epoch": 2.21227621483376, - "grad_norm": 0.05740232393145561, - "learning_rate": 3.234585772225694e-05, - "loss": 0.00376686155796051, - "step": 12975 - }, - { - "epoch": 2.213128729752771, - "grad_norm": 0.11821190267801285, - "learning_rate": 3.232048014102158e-05, - "loss": 0.003515421971678734, - "step": 12980 - }, - { - "epoch": 2.213981244671782, - "grad_norm": 0.06561318039894104, - "learning_rate": 3.229510497775838e-05, - "loss": 0.0034034676849842072, - "step": 12985 - }, - { - "epoch": 2.214833759590793, - "grad_norm": 0.06076068431138992, - "learning_rate": 3.226973224431333e-05, - "loss": 0.0018323207274079322, - "step": 12990 - }, - { - "epoch": 2.215686274509804, - "grad_norm": 0.05743642896413803, - "learning_rate": 3.2244361952531266e-05, - "loss": 0.002844391018152237, - "step": 12995 - }, - { - "epoch": 2.216538789428815, - "grad_norm": 0.0632607489824295, - "learning_rate": 3.221899411425586e-05, - "loss": 0.003329380601644516, - "step": 13000 - }, - { - "epoch": 2.217391304347826, - "grad_norm": 0.06082088127732277, - "learning_rate": 3.219362874132966e-05, - "loss": 0.0026398774236440657, - "step": 13005 - }, - { - "epoch": 2.2182438192668372, - "grad_norm": 0.07731121778488159, - "learning_rate": 3.2168265845594075e-05, - "loss": 0.00193992517888546, - "step": 13010 - }, - { - "epoch": 2.2190963341858483, - "grad_norm": 0.08783961087465286, - "learning_rate": 3.214290543888938e-05, - "loss": 0.0019096124917268753, - "step": 13015 - }, - { - "epoch": 2.2199488491048593, - "grad_norm": 0.07576426863670349, - "learning_rate": 3.211754753305461e-05, - "loss": 0.002824045717716217, - "step": 13020 - }, - { - "epoch": 2.2208013640238704, - "grad_norm": 0.0671941265463829, - "learning_rate": 3.20921921399277e-05, - "loss": 0.0025903450325131415, - "step": 13025 - }, - { - "epoch": 2.2216538789428815, - "grad_norm": 0.025313038378953934, - "learning_rate": 3.206683927134538e-05, - "loss": 0.001357127632945776, - "step": 13030 - }, - { - "epoch": 2.2225063938618925, - "grad_norm": 0.0281735397875309, - "learning_rate": 3.204148893914323e-05, - "loss": 0.0018472330644726752, - "step": 13035 - }, - { - "epoch": 2.2233589087809036, - "grad_norm": 0.027222834527492523, - "learning_rate": 3.2016141155155625e-05, - "loss": 0.0018411261960864067, - "step": 13040 - }, - { - "epoch": 2.2242114236999146, - "grad_norm": 0.04794001951813698, - "learning_rate": 3.199079593121574e-05, - "loss": 0.0015307093039155007, - "step": 13045 - }, - { - "epoch": 2.2250639386189257, - "grad_norm": 0.05856316536664963, - "learning_rate": 3.196545327915558e-05, - "loss": 0.001051103323698044, - "step": 13050 - }, - { - "epoch": 2.2259164535379368, - "grad_norm": 0.037851642817258835, - "learning_rate": 3.194011321080592e-05, - "loss": 0.0020413145422935484, - "step": 13055 - }, - { - "epoch": 2.226768968456948, - "grad_norm": 0.04197809472680092, - "learning_rate": 3.191477573799638e-05, - "loss": 0.0025324104353785515, - "step": 13060 - }, - { - "epoch": 2.227621483375959, - "grad_norm": 0.04126058518886566, - "learning_rate": 3.188944087255531e-05, - "loss": 0.001765124499797821, - "step": 13065 - }, - { - "epoch": 2.2284739982949704, - "grad_norm": 0.13436861336231232, - "learning_rate": 3.186410862630988e-05, - "loss": 0.003620542213320732, - "step": 13070 - }, - { - "epoch": 2.229326513213981, - "grad_norm": 0.05177616328001022, - "learning_rate": 3.183877901108601e-05, - "loss": 0.001679854467511177, - "step": 13075 - }, - { - "epoch": 2.2301790281329925, - "grad_norm": 0.03360729292035103, - "learning_rate": 3.1813452038708415e-05, - "loss": 0.002009689994156361, - "step": 13080 - }, - { - "epoch": 2.2310315430520036, - "grad_norm": 0.102437824010849, - "learning_rate": 3.178812772100058e-05, - "loss": 0.002533908933401108, - "step": 13085 - }, - { - "epoch": 2.2318840579710146, - "grad_norm": 0.045174695551395416, - "learning_rate": 3.176280606978473e-05, - "loss": 0.0023472383618354797, - "step": 13090 - }, - { - "epoch": 2.2327365728900257, - "grad_norm": 0.0679149329662323, - "learning_rate": 3.173748709688184e-05, - "loss": 0.00249241441488266, - "step": 13095 - }, - { - "epoch": 2.2335890878090368, - "grad_norm": 0.1367262750864029, - "learning_rate": 3.171217081411166e-05, - "loss": 0.002387053519487381, - "step": 13100 - }, - { - "epoch": 2.234441602728048, - "grad_norm": 0.06661707162857056, - "learning_rate": 3.168685723329269e-05, - "loss": 0.002376999333500862, - "step": 13105 - }, - { - "epoch": 2.235294117647059, - "grad_norm": 0.08916410058736801, - "learning_rate": 3.166154636624214e-05, - "loss": 0.0027421964332461357, - "step": 13110 - }, - { - "epoch": 2.23614663256607, - "grad_norm": 0.058119386434555054, - "learning_rate": 3.163623822477595e-05, - "loss": 0.0018962904810905456, - "step": 13115 - }, - { - "epoch": 2.236999147485081, - "grad_norm": 0.06457269936800003, - "learning_rate": 3.161093282070882e-05, - "loss": 0.001441392581909895, - "step": 13120 - }, - { - "epoch": 2.237851662404092, - "grad_norm": 0.1250019371509552, - "learning_rate": 3.158563016585412e-05, - "loss": 0.002274188958108425, - "step": 13125 - }, - { - "epoch": 2.238704177323103, - "grad_norm": 0.03324245661497116, - "learning_rate": 3.156033027202403e-05, - "loss": 0.002002820558845997, - "step": 13130 - }, - { - "epoch": 2.239556692242114, - "grad_norm": 0.01897227205336094, - "learning_rate": 3.153503315102934e-05, - "loss": 0.0016582176089286805, - "step": 13135 - }, - { - "epoch": 2.2404092071611252, - "grad_norm": 0.07142049074172974, - "learning_rate": 3.15097388146796e-05, - "loss": 0.002489439025521278, - "step": 13140 - }, - { - "epoch": 2.2412617220801363, - "grad_norm": 0.05619347095489502, - "learning_rate": 3.148444727478303e-05, - "loss": 0.0021767957136034966, - "step": 13145 - }, - { - "epoch": 2.2421142369991474, - "grad_norm": 0.0950259119272232, - "learning_rate": 3.14591585431466e-05, - "loss": 0.001732981950044632, - "step": 13150 - }, - { - "epoch": 2.2429667519181584, - "grad_norm": 0.06186724454164505, - "learning_rate": 3.143387263157591e-05, - "loss": 0.001604793407022953, - "step": 13155 - }, - { - "epoch": 2.2438192668371695, - "grad_norm": 0.0921434834599495, - "learning_rate": 3.1408589551875256e-05, - "loss": 0.001957142725586891, - "step": 13160 - }, - { - "epoch": 2.2446717817561805, - "grad_norm": 0.05556231364607811, - "learning_rate": 3.138330931584763e-05, - "loss": 0.002686610072851181, - "step": 13165 - }, - { - "epoch": 2.2455242966751916, - "grad_norm": 0.10184850543737411, - "learning_rate": 3.1358031935294666e-05, - "loss": 0.0019098062068223954, - "step": 13170 - }, - { - "epoch": 2.246376811594203, - "grad_norm": 0.08860436826944351, - "learning_rate": 3.133275742201673e-05, - "loss": 0.002402664348483086, - "step": 13175 - }, - { - "epoch": 2.247229326513214, - "grad_norm": 0.06324724107980728, - "learning_rate": 3.130748578781278e-05, - "loss": 0.0018930312246084214, - "step": 13180 - }, - { - "epoch": 2.2480818414322252, - "grad_norm": 0.07382629811763763, - "learning_rate": 3.128221704448045e-05, - "loss": 0.0026824956759810446, - "step": 13185 - }, - { - "epoch": 2.2489343563512363, - "grad_norm": 0.1002819687128067, - "learning_rate": 3.125695120381603e-05, - "loss": 0.0030449360609054567, - "step": 13190 - }, - { - "epoch": 2.249616368286445, - "eval_loss": 0.046705588698387146, - "eval_runtime": 3.7196, - "eval_samples_per_second": 67.748, - "eval_steps_per_second": 1.075, - "step": 13194 - }, - { - "eval_cer_subset": 0.013842070122298761, - "eval_cer_subset_edit_distance": 850, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 13194 - }, - { - "epoch": 2.2497868712702473, - "grad_norm": 0.030392520129680634, - "learning_rate": 3.123168827761447e-05, - "loss": 0.0015232504345476627, - "step": 13195 - }, - { - "epoch": 2.2506393861892584, - "grad_norm": 0.04160630702972412, - "learning_rate": 3.1206428277669336e-05, - "loss": 0.0026638204231858253, - "step": 13200 - }, - { - "epoch": 2.2514919011082695, - "grad_norm": 0.06140404939651489, - "learning_rate": 3.118117121577284e-05, - "loss": 0.003001154027879238, - "step": 13205 - }, - { - "epoch": 2.2523444160272805, - "grad_norm": 0.06974830478429794, - "learning_rate": 3.115591710371581e-05, - "loss": 0.0032261811196804047, - "step": 13210 - }, - { - "epoch": 2.2531969309462916, - "grad_norm": 0.09120716899633408, - "learning_rate": 3.1130665953287695e-05, - "loss": 0.001386938989162445, - "step": 13215 - }, - { - "epoch": 2.2540494458653026, - "grad_norm": 0.06130429729819298, - "learning_rate": 3.110541777627661e-05, - "loss": 0.0014743787236511708, - "step": 13220 - }, - { - "epoch": 2.2549019607843137, - "grad_norm": 0.07033205777406693, - "learning_rate": 3.108017258446921e-05, - "loss": 0.003749256581068039, - "step": 13225 - }, - { - "epoch": 2.2557544757033248, - "grad_norm": 0.08650046586990356, - "learning_rate": 3.1054930389650804e-05, - "loss": 0.0023554276674985887, - "step": 13230 - }, - { - "epoch": 2.256606990622336, - "grad_norm": 0.06045643612742424, - "learning_rate": 3.102969120360529e-05, - "loss": 0.0019686706364154816, - "step": 13235 - }, - { - "epoch": 2.257459505541347, - "grad_norm": 0.1004268005490303, - "learning_rate": 3.100445503811514e-05, - "loss": 0.003136196732521057, - "step": 13240 - }, - { - "epoch": 2.258312020460358, - "grad_norm": 0.08810209482908249, - "learning_rate": 3.097922190496146e-05, - "loss": 0.002239716053009033, - "step": 13245 - }, - { - "epoch": 2.259164535379369, - "grad_norm": 0.10518727451562881, - "learning_rate": 3.095399181592392e-05, - "loss": 0.002447150461375713, - "step": 13250 - }, - { - "epoch": 2.26001705029838, - "grad_norm": 0.049536559730768204, - "learning_rate": 3.092876478278074e-05, - "loss": 0.0023296492174267767, - "step": 13255 - }, - { - "epoch": 2.260869565217391, - "grad_norm": 0.057701822370290756, - "learning_rate": 3.0903540817308734e-05, - "loss": 0.0018970953300595284, - "step": 13260 - }, - { - "epoch": 2.261722080136402, - "grad_norm": 0.04391616955399513, - "learning_rate": 3.087831993128333e-05, - "loss": 0.0026229951530694962, - "step": 13265 - }, - { - "epoch": 2.2625745950554137, - "grad_norm": 0.048150911927223206, - "learning_rate": 3.0853102136478444e-05, - "loss": 0.0015288691036403179, - "step": 13270 - }, - { - "epoch": 2.2634271099744243, - "grad_norm": 0.12074416130781174, - "learning_rate": 3.082788744466659e-05, - "loss": 0.0025329213589429856, - "step": 13275 - }, - { - "epoch": 2.264279624893436, - "grad_norm": 0.05400107055902481, - "learning_rate": 3.080267586761881e-05, - "loss": 0.0017294475808739662, - "step": 13280 - }, - { - "epoch": 2.265132139812447, - "grad_norm": 0.07027488201856613, - "learning_rate": 3.0777467417104717e-05, - "loss": 0.0026237966492772104, - "step": 13285 - }, - { - "epoch": 2.265984654731458, - "grad_norm": 0.06868001073598862, - "learning_rate": 3.075226210489247e-05, - "loss": 0.0021411897614598274, - "step": 13290 - }, - { - "epoch": 2.266837169650469, - "grad_norm": 0.07447243481874466, - "learning_rate": 3.072705994274874e-05, - "loss": 0.002808676287531853, - "step": 13295 - }, - { - "epoch": 2.26768968456948, - "grad_norm": 0.04292432591319084, - "learning_rate": 3.070186094243872e-05, - "loss": 0.001994679495692253, - "step": 13300 - }, - { - "epoch": 2.268542199488491, - "grad_norm": 0.06083334609866142, - "learning_rate": 3.067666511572614e-05, - "loss": 0.001621294766664505, - "step": 13305 - }, - { - "epoch": 2.269394714407502, - "grad_norm": 0.04339296743273735, - "learning_rate": 3.065147247437327e-05, - "loss": 0.002122482657432556, - "step": 13310 - }, - { - "epoch": 2.2702472293265132, - "grad_norm": 0.07901404052972794, - "learning_rate": 3.062628303014087e-05, - "loss": 0.0030757525935769083, - "step": 13315 - }, - { - "epoch": 2.2710997442455243, - "grad_norm": 0.046554502099752426, - "learning_rate": 3.060109679478821e-05, - "loss": 0.0022816451266407965, - "step": 13320 - }, - { - "epoch": 2.2719522591645354, - "grad_norm": 0.03428821638226509, - "learning_rate": 3.0575913780073036e-05, - "loss": 0.002047870494425297, - "step": 13325 - }, - { - "epoch": 2.2728047740835464, - "grad_norm": 0.09298217296600342, - "learning_rate": 3.0550733997751634e-05, - "loss": 0.002046193927526474, - "step": 13330 - }, - { - "epoch": 2.2736572890025575, - "grad_norm": 0.08458553999662399, - "learning_rate": 3.0525557459578786e-05, - "loss": 0.002566727437078953, - "step": 13335 - }, - { - "epoch": 2.2745098039215685, - "grad_norm": 0.10309132188558578, - "learning_rate": 3.050038417730772e-05, - "loss": 0.00358976349234581, - "step": 13340 - }, - { - "epoch": 2.2753623188405796, - "grad_norm": 0.08116701990365982, - "learning_rate": 3.0475214162690144e-05, - "loss": 0.003372102603316307, - "step": 13345 - }, - { - "epoch": 2.2762148337595907, - "grad_norm": 0.09258918464183807, - "learning_rate": 3.0450047427476292e-05, - "loss": 0.0037133049219846724, - "step": 13350 - }, - { - "epoch": 2.2770673486786017, - "grad_norm": 0.09618882834911346, - "learning_rate": 3.0424883983414797e-05, - "loss": 0.0024330444633960725, - "step": 13355 - }, - { - "epoch": 2.277919863597613, - "grad_norm": 0.04637463390827179, - "learning_rate": 3.039972384225282e-05, - "loss": 0.0011583495885133742, - "step": 13360 - }, - { - "epoch": 2.2787723785166243, - "grad_norm": 0.04919019341468811, - "learning_rate": 3.0374567015735953e-05, - "loss": 0.0017433254048228263, - "step": 13365 - }, - { - "epoch": 2.279624893435635, - "grad_norm": 0.07092445343732834, - "learning_rate": 3.0349413515608213e-05, - "loss": 0.0010275598615407944, - "step": 13370 - }, - { - "epoch": 2.2804774083546464, - "grad_norm": 0.06819095462560654, - "learning_rate": 3.03242633536121e-05, - "loss": 0.0018655678257346153, - "step": 13375 - }, - { - "epoch": 2.2813299232736575, - "grad_norm": 0.13397860527038574, - "learning_rate": 3.029911654148857e-05, - "loss": 0.0029754094779491425, - "step": 13380 - }, - { - "epoch": 2.2821824381926685, - "grad_norm": 0.09142930805683136, - "learning_rate": 3.0273973090976974e-05, - "loss": 0.0027707524597644804, - "step": 13385 - }, - { - "epoch": 2.2830349531116796, - "grad_norm": 0.06282728165388107, - "learning_rate": 3.0248833013815112e-05, - "loss": 0.0018412042409181595, - "step": 13390 - }, - { - "epoch": 2.2838874680306906, - "grad_norm": 0.05533494055271149, - "learning_rate": 3.0223696321739196e-05, - "loss": 0.0025158364325761793, - "step": 13395 - }, - { - "epoch": 2.2847399829497017, - "grad_norm": 0.08349598199129105, - "learning_rate": 3.0198563026483876e-05, - "loss": 0.002777436375617981, - "step": 13400 - }, - { - "epoch": 2.2855924978687128, - "grad_norm": 0.07469198107719421, - "learning_rate": 3.0173433139782227e-05, - "loss": 0.001951916702091694, - "step": 13405 - }, - { - "epoch": 2.286445012787724, - "grad_norm": 0.07126526534557343, - "learning_rate": 3.0148306673365708e-05, - "loss": 0.0031182590872049333, - "step": 13410 - }, - { - "epoch": 2.287297527706735, - "grad_norm": 0.06499479711055756, - "learning_rate": 3.0123183638964183e-05, - "loss": 0.001717902161180973, - "step": 13415 - }, - { - "epoch": 2.288150042625746, - "grad_norm": 0.03133346140384674, - "learning_rate": 3.0098064048305917e-05, - "loss": 0.0015830917283892632, - "step": 13420 - }, - { - "epoch": 2.289002557544757, - "grad_norm": 0.06725561618804932, - "learning_rate": 3.0072947913117573e-05, - "loss": 0.004541714489459991, - "step": 13425 - }, - { - "epoch": 2.289855072463768, - "grad_norm": 0.13644525408744812, - "learning_rate": 3.0047835245124216e-05, - "loss": 0.0027179479598999023, - "step": 13430 - }, - { - "epoch": 2.290707587382779, - "grad_norm": 0.06966832280158997, - "learning_rate": 3.0022726056049262e-05, - "loss": 0.0026542846113443374, - "step": 13435 - }, - { - "epoch": 2.29156010230179, - "grad_norm": 0.04449222609400749, - "learning_rate": 2.999762035761451e-05, - "loss": 0.0014596210792660713, - "step": 13440 - }, - { - "epoch": 2.2924126172208013, - "grad_norm": 0.05453059822320938, - "learning_rate": 2.9972518161540124e-05, - "loss": 0.0024629242718219755, - "step": 13445 - }, - { - "epoch": 2.2932651321398123, - "grad_norm": 0.06370346248149872, - "learning_rate": 2.9947419479544677e-05, - "loss": 0.0018157381564378738, - "step": 13450 - }, - { - "epoch": 2.2941176470588234, - "grad_norm": 0.149154931306839, - "learning_rate": 2.992232432334505e-05, - "loss": 0.0038953136652708055, - "step": 13455 - }, - { - "epoch": 2.2949701619778344, - "grad_norm": 0.08758609741926193, - "learning_rate": 2.9897232704656494e-05, - "loss": 0.00197781715542078, - "step": 13460 - }, - { - "epoch": 2.2958226768968455, - "grad_norm": 0.04877983406186104, - "learning_rate": 2.9872144635192625e-05, - "loss": 0.0018029011785984038, - "step": 13465 - }, - { - "epoch": 2.296675191815857, - "grad_norm": 0.029492873698472977, - "learning_rate": 2.984706012666536e-05, - "loss": 0.00226336307823658, - "step": 13470 - }, - { - "epoch": 2.2975277067348676, - "grad_norm": 0.09038830548524857, - "learning_rate": 2.982197919078502e-05, - "loss": 0.0024063091725111006, - "step": 13475 - }, - { - "epoch": 2.298380221653879, - "grad_norm": 0.08629653602838516, - "learning_rate": 2.97969018392602e-05, - "loss": 0.0019390033558011055, - "step": 13480 - }, - { - "epoch": 2.29923273657289, - "grad_norm": 0.08667116612195969, - "learning_rate": 2.9771828083797832e-05, - "loss": 0.003171199932694435, - "step": 13485 - }, - { - "epoch": 2.3000852514919012, - "grad_norm": 0.07069036364555359, - "learning_rate": 2.974675793610318e-05, - "loss": 0.002098524570465088, - "step": 13490 - }, - { - "epoch": 2.3009377664109123, - "grad_norm": 0.0887150913476944, - "learning_rate": 2.972169140787985e-05, - "loss": 0.001710166409611702, - "step": 13495 - }, - { - "epoch": 2.3017902813299234, - "grad_norm": 0.08873872458934784, - "learning_rate": 2.969662851082972e-05, - "loss": 0.002029442973434925, - "step": 13500 - }, - { - "epoch": 2.3026427962489344, - "grad_norm": 0.09199293702840805, - "learning_rate": 2.9671569256652976e-05, - "loss": 0.0015904868021607399, - "step": 13505 - }, - { - "epoch": 2.3034953111679455, - "grad_norm": 0.07347019016742706, - "learning_rate": 2.9646513657048106e-05, - "loss": 0.002239963971078396, - "step": 13510 - }, - { - "epoch": 2.3043478260869565, - "grad_norm": 0.056011516600847244, - "learning_rate": 2.9621461723711897e-05, - "loss": 0.003089374490082264, - "step": 13515 - }, - { - "epoch": 2.3052003410059676, - "grad_norm": 0.05805368721485138, - "learning_rate": 2.9596413468339447e-05, - "loss": 0.0011475264094769956, - "step": 13520 - }, - { - "epoch": 2.3060528559249787, - "grad_norm": 0.08263146877288818, - "learning_rate": 2.95713689026241e-05, - "loss": 0.0027705669403076173, - "step": 13525 - }, - { - "epoch": 2.3069053708439897, - "grad_norm": 0.10079067945480347, - "learning_rate": 2.954632803825749e-05, - "loss": 0.0038317229598760607, - "step": 13530 - }, - { - "epoch": 2.307757885763001, - "grad_norm": 0.07248156517744064, - "learning_rate": 2.9521290886929514e-05, - "loss": 0.0017008930444717407, - "step": 13535 - }, - { - "epoch": 2.308610400682012, - "grad_norm": 0.09252380579710007, - "learning_rate": 2.949625746032838e-05, - "loss": 0.0021895600482821466, - "step": 13540 - }, - { - "epoch": 2.309462915601023, - "grad_norm": 0.03231853246688843, - "learning_rate": 2.947122777014051e-05, - "loss": 0.002471560053527355, - "step": 13545 - }, - { - "epoch": 2.310315430520034, - "grad_norm": 0.09625072032213211, - "learning_rate": 2.944620182805059e-05, - "loss": 0.002643503434956074, - "step": 13550 - }, - { - "epoch": 2.311167945439045, - "grad_norm": 0.11135435849428177, - "learning_rate": 2.9421179645741552e-05, - "loss": 0.0015677452087402345, - "step": 13555 - }, - { - "epoch": 2.312020460358056, - "grad_norm": 0.07239774614572525, - "learning_rate": 2.939616123489459e-05, - "loss": 0.0020940851420164107, - "step": 13560 - }, - { - "epoch": 2.3128729752770676, - "grad_norm": 0.0686500295996666, - "learning_rate": 2.937114660718915e-05, - "loss": 0.004896241426467896, - "step": 13565 - }, - { - "epoch": 2.313725490196078, - "grad_norm": 0.04634196311235428, - "learning_rate": 2.934613577430288e-05, - "loss": 0.0017542928457260133, - "step": 13570 - }, - { - "epoch": 2.3145780051150897, - "grad_norm": 0.08693452924489975, - "learning_rate": 2.9321128747911657e-05, - "loss": 0.003124900534749031, - "step": 13575 - }, - { - "epoch": 2.3154305200341008, - "grad_norm": 0.053911175578832626, - "learning_rate": 2.9296125539689615e-05, - "loss": 0.001699080690741539, - "step": 13580 - }, - { - "epoch": 2.316283034953112, - "grad_norm": 0.07346964627504349, - "learning_rate": 2.9271126161309052e-05, - "loss": 0.0027174966409802435, - "step": 13585 - }, - { - "epoch": 2.317135549872123, - "grad_norm": 0.07157005369663239, - "learning_rate": 2.9246130624440546e-05, - "loss": 0.0026199813932180406, - "step": 13590 - }, - { - "epoch": 2.317988064791134, - "grad_norm": 0.0852048397064209, - "learning_rate": 2.922113894075282e-05, - "loss": 0.002349478751420975, - "step": 13595 - }, - { - "epoch": 2.318840579710145, - "grad_norm": 0.069539375603199, - "learning_rate": 2.9196151121912828e-05, - "loss": 0.002428753860294819, - "step": 13600 - }, - { - "epoch": 2.319693094629156, - "grad_norm": 0.06993792951107025, - "learning_rate": 2.9171167179585712e-05, - "loss": 0.0025543162599205972, - "step": 13605 - }, - { - "epoch": 2.320545609548167, - "grad_norm": 0.09210001677274704, - "learning_rate": 2.9146187125434826e-05, - "loss": 0.004642657563090324, - "step": 13610 - }, - { - "epoch": 2.321398124467178, - "grad_norm": 0.03652270883321762, - "learning_rate": 2.9121210971121674e-05, - "loss": 0.0019740790128707887, - "step": 13615 - }, - { - "epoch": 2.3222506393861893, - "grad_norm": 0.032051410526037216, - "learning_rate": 2.9096238728305957e-05, - "loss": 0.0020309314131736755, - "step": 13620 - }, - { - "epoch": 2.3231031543052003, - "grad_norm": 0.08898582309484482, - "learning_rate": 2.907127040864556e-05, - "loss": 0.0012645654380321503, - "step": 13625 - }, - { - "epoch": 2.3239556692242114, - "grad_norm": 0.21863117814064026, - "learning_rate": 2.9046306023796493e-05, - "loss": 0.0025009674951434135, - "step": 13630 - }, - { - "epoch": 2.3248081841432224, - "grad_norm": 0.06401807814836502, - "learning_rate": 2.9021345585413004e-05, - "loss": 0.002794540859758854, - "step": 13635 - }, - { - "epoch": 2.3256606990622335, - "grad_norm": 0.049468256533145905, - "learning_rate": 2.8996389105147437e-05, - "loss": 0.0024725871160626413, - "step": 13640 - }, - { - "epoch": 2.3265132139812446, - "grad_norm": 0.0904751718044281, - "learning_rate": 2.8971436594650292e-05, - "loss": 0.0033982183784246446, - "step": 13645 - }, - { - "epoch": 2.3273657289002556, - "grad_norm": 0.11576029658317566, - "learning_rate": 2.8946488065570242e-05, - "loss": 0.004228492826223373, - "step": 13650 - }, - { - "epoch": 2.3282182438192667, - "grad_norm": 0.08191253244876862, - "learning_rate": 2.892154352955411e-05, - "loss": 0.0015400771982967854, - "step": 13655 - }, - { - "epoch": 2.3290707587382777, - "grad_norm": 0.03641185909509659, - "learning_rate": 2.8896602998246817e-05, - "loss": 0.002032958157360554, - "step": 13660 - }, - { - "epoch": 2.329923273657289, - "grad_norm": 0.09123575687408447, - "learning_rate": 2.8871666483291433e-05, - "loss": 0.00326089970767498, - "step": 13665 - }, - { - "epoch": 2.3307757885763003, - "grad_norm": 0.07897967845201492, - "learning_rate": 2.8846733996329148e-05, - "loss": 0.0022133901715278626, - "step": 13670 - }, - { - "epoch": 2.3316283034953114, - "grad_norm": 0.0802898034453392, - "learning_rate": 2.8821805548999275e-05, - "loss": 0.002646717242896557, - "step": 13675 - }, - { - "epoch": 2.3324808184143224, - "grad_norm": 0.05337275192141533, - "learning_rate": 2.879688115293926e-05, - "loss": 0.0022310430184006693, - "step": 13680 - }, - { - "epoch": 2.3333333333333335, - "grad_norm": 0.026133684441447258, - "learning_rate": 2.8771960819784635e-05, - "loss": 0.0013902435079216958, - "step": 13685 - }, - { - "epoch": 2.3341858482523445, - "grad_norm": 0.0701865404844284, - "learning_rate": 2.8747044561169026e-05, - "loss": 0.0030527923256158827, - "step": 13690 - }, - { - "epoch": 2.3350383631713556, - "grad_norm": 0.023815227672457695, - "learning_rate": 2.8722132388724187e-05, - "loss": 0.001688534766435623, - "step": 13695 - }, - { - "epoch": 2.3358908780903667, - "grad_norm": 0.0819278210401535, - "learning_rate": 2.8697224314079928e-05, - "loss": 0.0028546562418341635, - "step": 13700 - }, - { - "epoch": 2.3367433930093777, - "grad_norm": 0.03683038055896759, - "learning_rate": 2.86723203488642e-05, - "loss": 0.0024238623678684234, - "step": 13705 - }, - { - "epoch": 2.337595907928389, - "grad_norm": 0.050080958753824234, - "learning_rate": 2.8647420504702977e-05, - "loss": 0.001459009852260351, - "step": 13710 - }, - { - "epoch": 2.3384484228474, - "grad_norm": 0.04246260225772858, - "learning_rate": 2.8622524793220336e-05, - "loss": 0.0024909645318984984, - "step": 13715 - }, - { - "epoch": 2.339300937766411, - "grad_norm": 0.04298778250813484, - "learning_rate": 2.8597633226038422e-05, - "loss": 0.0017042815685272216, - "step": 13720 - }, - { - "epoch": 2.340153452685422, - "grad_norm": 0.08792980760335922, - "learning_rate": 2.857274581477747e-05, - "loss": 0.0021930102258920668, - "step": 13725 - }, - { - "epoch": 2.341005967604433, - "grad_norm": 0.030293628573417664, - "learning_rate": 2.854786257105573e-05, - "loss": 0.002472694218158722, - "step": 13730 - }, - { - "epoch": 2.341858482523444, - "grad_norm": 0.100398488342762, - "learning_rate": 2.852298350648953e-05, - "loss": 0.0016385417431592942, - "step": 13735 - }, - { - "epoch": 2.342710997442455, - "grad_norm": 0.056936830282211304, - "learning_rate": 2.849810863269325e-05, - "loss": 0.0014652124606072902, - "step": 13740 - }, - { - "epoch": 2.343563512361466, - "grad_norm": 0.04332558810710907, - "learning_rate": 2.8473237961279293e-05, - "loss": 0.0029267419129610063, - "step": 13745 - }, - { - "epoch": 2.3444160272804773, - "grad_norm": 0.051982469856739044, - "learning_rate": 2.8448371503858143e-05, - "loss": 0.001836571842432022, - "step": 13750 - }, - { - "epoch": 2.3452685421994883, - "grad_norm": 0.1215415671467781, - "learning_rate": 2.8423509272038276e-05, - "loss": 0.002749188058078289, - "step": 13755 - }, - { - "epoch": 2.3461210571184994, - "grad_norm": 0.044508881866931915, - "learning_rate": 2.8398651277426203e-05, - "loss": 0.0023854803293943405, - "step": 13760 - }, - { - "epoch": 2.346973572037511, - "grad_norm": 0.09419308602809906, - "learning_rate": 2.837379753162647e-05, - "loss": 0.00259498693048954, - "step": 13765 - }, - { - "epoch": 2.3478260869565215, - "grad_norm": 0.0996370017528534, - "learning_rate": 2.8348948046241616e-05, - "loss": 0.003275657445192337, - "step": 13770 - }, - { - "epoch": 2.348678601875533, - "grad_norm": 0.0585092268884182, - "learning_rate": 2.8324102832872238e-05, - "loss": 0.0023032236844301225, - "step": 13775 - }, - { - "epoch": 2.349531116794544, - "grad_norm": 0.06259947270154953, - "learning_rate": 2.829926190311689e-05, - "loss": 0.0022853843867778776, - "step": 13780 - }, - { - "epoch": 2.350383631713555, - "grad_norm": 0.1343093067407608, - "learning_rate": 2.827442526857214e-05, - "loss": 0.0019558047875761985, - "step": 13785 - }, - { - "epoch": 2.351236146632566, - "grad_norm": 0.03901712968945503, - "learning_rate": 2.8249592940832552e-05, - "loss": 0.0019383212551474572, - "step": 13790 - }, - { - "epoch": 2.3520886615515773, - "grad_norm": 0.08933644741773605, - "learning_rate": 2.8224764931490707e-05, - "loss": 0.0019501563161611556, - "step": 13795 - }, - { - "epoch": 2.3529411764705883, - "grad_norm": 0.06790988147258759, - "learning_rate": 2.819994125213713e-05, - "loss": 0.0018905265256762504, - "step": 13800 - }, - { - "epoch": 2.3537936913895994, - "grad_norm": 0.10576235502958298, - "learning_rate": 2.817512191436033e-05, - "loss": 0.0017807571217417716, - "step": 13805 - }, - { - "epoch": 2.3546462063086104, - "grad_norm": 0.07914351671934128, - "learning_rate": 2.8150306929746826e-05, - "loss": 0.002854841575026512, - "step": 13810 - }, - { - "epoch": 2.3554987212276215, - "grad_norm": 0.10912367701530457, - "learning_rate": 2.812549630988104e-05, - "loss": 0.0028494328260421755, - "step": 13815 - }, - { - "epoch": 2.3563512361466326, - "grad_norm": 0.07309834659099579, - "learning_rate": 2.8100690066345434e-05, - "loss": 0.001808878593146801, - "step": 13820 - }, - { - "epoch": 2.3572037510656436, - "grad_norm": 0.07053545117378235, - "learning_rate": 2.807588821072037e-05, - "loss": 0.0024722769856452944, - "step": 13825 - }, - { - "epoch": 2.3580562659846547, - "grad_norm": 0.06512318551540375, - "learning_rate": 2.8051090754584176e-05, - "loss": 0.0025828687474131586, - "step": 13830 - }, - { - "epoch": 2.3589087809036657, - "grad_norm": 0.06797149777412415, - "learning_rate": 2.8026297709513125e-05, - "loss": 0.0021874068304896356, - "step": 13835 - }, - { - "epoch": 2.359761295822677, - "grad_norm": 0.12261441349983215, - "learning_rate": 2.800150908708145e-05, - "loss": 0.00291924811899662, - "step": 13840 - }, - { - "epoch": 2.360613810741688, - "grad_norm": 0.05696386098861694, - "learning_rate": 2.797672489886131e-05, - "loss": 0.003488580882549286, - "step": 13845 - }, - { - "epoch": 2.361466325660699, - "grad_norm": 0.3340120315551758, - "learning_rate": 2.795194515642276e-05, - "loss": 0.0033275336027145386, - "step": 13850 - }, - { - "epoch": 2.36231884057971, - "grad_norm": 0.08209964632987976, - "learning_rate": 2.7927169871333836e-05, - "loss": 0.0020242417231202126, - "step": 13855 - }, - { - "epoch": 2.363171355498721, - "grad_norm": 0.04942183569073677, - "learning_rate": 2.7902399055160435e-05, - "loss": 0.0015470117330551147, - "step": 13860 - }, - { - "epoch": 2.364023870417732, - "grad_norm": 0.07711990922689438, - "learning_rate": 2.7877632719466438e-05, - "loss": 0.002402086555957794, - "step": 13865 - }, - { - "epoch": 2.3648763853367436, - "grad_norm": 0.06835886090993881, - "learning_rate": 2.7852870875813572e-05, - "loss": 0.002709807641804218, - "step": 13870 - }, - { - "epoch": 2.3657289002557547, - "grad_norm": 0.01572684571146965, - "learning_rate": 2.7828113535761476e-05, - "loss": 0.0037427868694067, - "step": 13875 - }, - { - "epoch": 2.3665814151747657, - "grad_norm": 0.03897464647889137, - "learning_rate": 2.7803360710867728e-05, - "loss": 0.0029004696756601334, - "step": 13880 - }, - { - "epoch": 2.367433930093777, - "grad_norm": 0.1281740814447403, - "learning_rate": 2.777861241268774e-05, - "loss": 0.0021549168974161147, - "step": 13885 - }, - { - "epoch": 2.368286445012788, - "grad_norm": 0.04390920698642731, - "learning_rate": 2.7753868652774873e-05, - "loss": 0.0019567809998989106, - "step": 13890 - }, - { - "epoch": 2.369138959931799, - "grad_norm": 0.09526315331459045, - "learning_rate": 2.7729129442680314e-05, - "loss": 0.001414876524358988, - "step": 13895 - }, - { - "epoch": 2.36999147485081, - "grad_norm": 0.041541386395692825, - "learning_rate": 2.7704394793953162e-05, - "loss": 0.0023986730724573136, - "step": 13900 - }, - { - "epoch": 2.370843989769821, - "grad_norm": 0.056684307754039764, - "learning_rate": 2.7679664718140354e-05, - "loss": 0.0023011576384305956, - "step": 13905 - }, - { - "epoch": 2.371696504688832, - "grad_norm": 0.04548821225762367, - "learning_rate": 2.765493922678674e-05, - "loss": 0.002776668407022953, - "step": 13910 - }, - { - "epoch": 2.372549019607843, - "grad_norm": 0.05635173246264458, - "learning_rate": 2.763021833143499e-05, - "loss": 0.0021549917757511137, - "step": 13915 - }, - { - "epoch": 2.373401534526854, - "grad_norm": 0.06744635850191116, - "learning_rate": 2.7605502043625636e-05, - "loss": 0.0014210479333996774, - "step": 13920 - }, - { - "epoch": 2.3742540494458653, - "grad_norm": 0.03131572902202606, - "learning_rate": 2.758079037489707e-05, - "loss": 0.002670668438076973, - "step": 13925 - }, - { - "epoch": 2.3751065643648763, - "grad_norm": 0.1132262721657753, - "learning_rate": 2.75560833367855e-05, - "loss": 0.004025829955935478, - "step": 13930 - }, - { - "epoch": 2.3759590792838874, - "grad_norm": 0.08719862997531891, - "learning_rate": 2.753138094082502e-05, - "loss": 0.0026264961808919905, - "step": 13935 - }, - { - "epoch": 2.3768115942028984, - "grad_norm": 0.045282550156116486, - "learning_rate": 2.7506683198547527e-05, - "loss": 0.0016890913248062134, - "step": 13940 - }, - { - "epoch": 2.3776641091219095, - "grad_norm": 0.03815371170639992, - "learning_rate": 2.7481990121482737e-05, - "loss": 0.0017980627715587616, - "step": 13945 - }, - { - "epoch": 2.3785166240409206, - "grad_norm": 0.05136419087648392, - "learning_rate": 2.745730172115819e-05, - "loss": 0.0017518583685159684, - "step": 13950 - }, - { - "epoch": 2.3793691389599316, - "grad_norm": 0.076651431620121, - "learning_rate": 2.743261800909929e-05, - "loss": 0.0021933792158961296, - "step": 13955 - }, - { - "epoch": 2.3802216538789427, - "grad_norm": 0.04328504204750061, - "learning_rate": 2.740793899682919e-05, - "loss": 0.0015049883164465427, - "step": 13960 - }, - { - "epoch": 2.381074168797954, - "grad_norm": 0.029004819691181183, - "learning_rate": 2.7383264695868863e-05, - "loss": 0.0023387337103486063, - "step": 13965 - }, - { - "epoch": 2.381926683716965, - "grad_norm": 0.11483976989984512, - "learning_rate": 2.7358595117737118e-05, - "loss": 0.00246519148349762, - "step": 13970 - }, - { - "epoch": 2.3827791986359763, - "grad_norm": 0.09073470532894135, - "learning_rate": 2.733393027395051e-05, - "loss": 0.0031791247427463533, - "step": 13975 - }, - { - "epoch": 2.3836317135549874, - "grad_norm": 0.12094864249229431, - "learning_rate": 2.7309270176023436e-05, - "loss": 0.0025795340538024903, - "step": 13980 - }, - { - "epoch": 2.3844842284739984, - "grad_norm": 0.13568098843097687, - "learning_rate": 2.7284614835468035e-05, - "loss": 0.0057578980922698975, - "step": 13985 - }, - { - "epoch": 2.3853367433930095, - "grad_norm": 0.06415567547082901, - "learning_rate": 2.725996426379423e-05, - "loss": 0.0024575673043727873, - "step": 13990 - }, - { - "epoch": 2.3861892583120206, - "grad_norm": 0.05898221582174301, - "learning_rate": 2.723531847250975e-05, - "loss": 0.0013358716852962971, - "step": 13995 - }, - { - "epoch": 2.3870417732310316, - "grad_norm": 0.019117049872875214, - "learning_rate": 2.721067747312004e-05, - "loss": 0.0016026780009269713, - "step": 14000 - }, - { - "epoch": 2.3878942881500427, - "grad_norm": 0.028591491281986237, - "learning_rate": 2.7186041277128383e-05, - "loss": 0.001663113385438919, - "step": 14005 - }, - { - "epoch": 2.3887468030690537, - "grad_norm": 0.03701665997505188, - "learning_rate": 2.7161409896035733e-05, - "loss": 0.0012899260967969895, - "step": 14010 - }, - { - "epoch": 2.389599317988065, - "grad_norm": 0.05777057632803917, - "learning_rate": 2.7136783341340862e-05, - "loss": 0.0018556809052824974, - "step": 14015 - }, - { - "epoch": 2.390451832907076, - "grad_norm": 0.04922354966402054, - "learning_rate": 2.711216162454024e-05, - "loss": 0.002131880074739456, - "step": 14020 - }, - { - "epoch": 2.391304347826087, - "grad_norm": 0.045851659029722214, - "learning_rate": 2.708754475712814e-05, - "loss": 0.001147150807082653, - "step": 14025 - }, - { - "epoch": 2.392156862745098, - "grad_norm": 0.11482678353786469, - "learning_rate": 2.7062932750596514e-05, - "loss": 0.0027298804372549055, - "step": 14030 - }, - { - "epoch": 2.393009377664109, - "grad_norm": 0.054821670055389404, - "learning_rate": 2.7038325616435058e-05, - "loss": 0.0018268844112753868, - "step": 14035 - }, - { - "epoch": 2.39386189258312, - "grad_norm": 0.09821441024541855, - "learning_rate": 2.701372336613122e-05, - "loss": 0.002109052799642086, - "step": 14040 - }, - { - "epoch": 2.394714407502131, - "grad_norm": 0.04923141747713089, - "learning_rate": 2.6989126011170115e-05, - "loss": 0.0021799976006150247, - "step": 14045 - }, - { - "epoch": 2.395566922421142, - "grad_norm": 0.1223372220993042, - "learning_rate": 2.6964533563034648e-05, - "loss": 0.00261150524020195, - "step": 14050 - }, - { - "epoch": 2.3964194373401533, - "grad_norm": 0.04964495450258255, - "learning_rate": 2.6939946033205374e-05, - "loss": 0.001747405156493187, - "step": 14055 - }, - { - "epoch": 2.397271952259165, - "grad_norm": 0.05354087054729462, - "learning_rate": 2.6915363433160562e-05, - "loss": 0.0017880409955978393, - "step": 14060 - }, - { - "epoch": 2.3981244671781754, - "grad_norm": 0.0796194076538086, - "learning_rate": 2.6890785774376188e-05, - "loss": 0.002280256152153015, - "step": 14065 - }, - { - "epoch": 2.398976982097187, - "grad_norm": 0.048979468643665314, - "learning_rate": 2.6866213068325942e-05, - "loss": 0.0034266695380210876, - "step": 14070 - }, - { - "epoch": 2.399829497016198, - "grad_norm": 0.11115774512290955, - "learning_rate": 2.6841645326481166e-05, - "loss": 0.0014098694548010827, - "step": 14075 - }, - { - "epoch": 2.400682011935209, - "grad_norm": 0.14144426584243774, - "learning_rate": 2.681708256031089e-05, - "loss": 0.0017399771139025688, - "step": 14080 - }, - { - "epoch": 2.40153452685422, - "grad_norm": 0.060562510043382645, - "learning_rate": 2.6792524781281846e-05, - "loss": 0.0031288094818592072, - "step": 14085 - }, - { - "epoch": 2.402387041773231, - "grad_norm": 0.08271291851997375, - "learning_rate": 2.6767972000858402e-05, - "loss": 0.002268883027136326, - "step": 14090 - }, - { - "epoch": 2.403239556692242, - "grad_norm": 0.08203598111867905, - "learning_rate": 2.674342423050264e-05, - "loss": 0.0017265897244215012, - "step": 14095 - }, - { - "epoch": 2.4040920716112533, - "grad_norm": 0.07809042930603027, - "learning_rate": 2.6718881481674265e-05, - "loss": 0.0032232727855443953, - "step": 14100 - }, - { - "epoch": 2.4049445865302643, - "grad_norm": 0.043053366243839264, - "learning_rate": 2.6694343765830633e-05, - "loss": 0.0014350255951285362, - "step": 14105 - }, - { - "epoch": 2.4057971014492754, - "grad_norm": 0.2139715999364853, - "learning_rate": 2.666981109442679e-05, - "loss": 0.002208554185926914, - "step": 14110 - }, - { - "epoch": 2.4066496163682864, - "grad_norm": 0.028433851897716522, - "learning_rate": 2.6645283478915373e-05, - "loss": 0.0033426061272621155, - "step": 14115 - }, - { - "epoch": 2.4075021312872975, - "grad_norm": 0.03152618184685707, - "learning_rate": 2.6620760930746726e-05, - "loss": 0.0017683111131191255, - "step": 14120 - }, - { - "epoch": 2.4083546462063086, - "grad_norm": 0.11559031158685684, - "learning_rate": 2.6596243461368762e-05, - "loss": 0.0027762461453676225, - "step": 14125 - }, - { - "epoch": 2.4092071611253196, - "grad_norm": 0.08188942819833755, - "learning_rate": 2.6571731082227068e-05, - "loss": 0.0029629599303007126, - "step": 14130 - }, - { - "epoch": 2.4100596760443307, - "grad_norm": 0.03179270401597023, - "learning_rate": 2.654722380476482e-05, - "loss": 0.001593652181327343, - "step": 14135 - }, - { - "epoch": 2.4109121909633418, - "grad_norm": 0.03763008117675781, - "learning_rate": 2.652272164042285e-05, - "loss": 0.003974568471312523, - "step": 14140 - }, - { - "epoch": 2.411764705882353, - "grad_norm": 0.06221388280391693, - "learning_rate": 2.649822460063958e-05, - "loss": 0.0021382227540016176, - "step": 14145 - }, - { - "epoch": 2.412617220801364, - "grad_norm": 0.13541199266910553, - "learning_rate": 2.6473732696851025e-05, - "loss": 0.0030446551740169526, - "step": 14150 - }, - { - "epoch": 2.413469735720375, - "grad_norm": 0.07515605539083481, - "learning_rate": 2.6449245940490843e-05, - "loss": 0.0023170780390501023, - "step": 14155 - }, - { - "epoch": 2.414322250639386, - "grad_norm": 0.029287993907928467, - "learning_rate": 2.6424764342990247e-05, - "loss": 0.002732834219932556, - "step": 14160 - }, - { - "epoch": 2.4151747655583975, - "grad_norm": 0.056158751249313354, - "learning_rate": 2.6400287915778073e-05, - "loss": 0.0026283055543899537, - "step": 14165 - }, - { - "epoch": 2.416027280477408, - "grad_norm": 0.05005735903978348, - "learning_rate": 2.6375816670280742e-05, - "loss": 0.0021377095952630045, - "step": 14170 - }, - { - "epoch": 2.4168797953964196, - "grad_norm": 0.039338257163763046, - "learning_rate": 2.6351350617922217e-05, - "loss": 0.0010171877220273018, - "step": 14175 - }, - { - "epoch": 2.4177323103154307, - "grad_norm": 0.054605189710855484, - "learning_rate": 2.6326889770124074e-05, - "loss": 0.0015358464792370797, - "step": 14180 - }, - { - "epoch": 2.4185848252344417, - "grad_norm": 0.05107913911342621, - "learning_rate": 2.630243413830547e-05, - "loss": 0.0014638695865869522, - "step": 14185 - }, - { - "epoch": 2.419437340153453, - "grad_norm": 0.14121516048908234, - "learning_rate": 2.62779837338831e-05, - "loss": 0.0018762655556201935, - "step": 14190 - }, - { - "epoch": 2.420289855072464, - "grad_norm": 0.13554073870182037, - "learning_rate": 2.625353856827121e-05, - "loss": 0.002315247431397438, - "step": 14195 - }, - { - "epoch": 2.421142369991475, - "grad_norm": 0.07378100603818893, - "learning_rate": 2.6229098652881636e-05, - "loss": 0.0017681105062365531, - "step": 14200 - }, - { - "epoch": 2.421994884910486, - "grad_norm": 0.0729142278432846, - "learning_rate": 2.6204663999123712e-05, - "loss": 0.0013508319854736329, - "step": 14205 - }, - { - "epoch": 2.422847399829497, - "grad_norm": 0.09028290957212448, - "learning_rate": 2.6180234618404393e-05, - "loss": 0.0023917261511087417, - "step": 14210 - }, - { - "epoch": 2.423699914748508, - "grad_norm": 0.042102012783288956, - "learning_rate": 2.6155810522128105e-05, - "loss": 0.001337253674864769, - "step": 14215 - }, - { - "epoch": 2.424552429667519, - "grad_norm": 0.126102477312088, - "learning_rate": 2.6131391721696812e-05, - "loss": 0.0030670080333948134, - "step": 14220 - }, - { - "epoch": 2.42540494458653, - "grad_norm": 0.08583983033895493, - "learning_rate": 2.6106978228510047e-05, - "loss": 0.0025723014026880265, - "step": 14225 - }, - { - "epoch": 2.4262574595055413, - "grad_norm": 0.0516071692109108, - "learning_rate": 2.608257005396482e-05, - "loss": 0.0020857708528637885, - "step": 14230 - }, - { - "epoch": 2.4271099744245523, - "grad_norm": 0.08321108669042587, - "learning_rate": 2.6058167209455697e-05, - "loss": 0.0023237552493810655, - "step": 14235 - }, - { - "epoch": 2.4279624893435634, - "grad_norm": 0.04344337806105614, - "learning_rate": 2.6033769706374727e-05, - "loss": 0.0016502588987350464, - "step": 14240 - }, - { - "epoch": 2.4288150042625745, - "grad_norm": 0.10716593265533447, - "learning_rate": 2.6009377556111488e-05, - "loss": 0.002213199995458126, - "step": 14245 - }, - { - "epoch": 2.4296675191815855, - "grad_norm": 0.08346270024776459, - "learning_rate": 2.598499077005302e-05, - "loss": 0.0023431163281202316, - "step": 14250 - }, - { - "epoch": 2.4305200341005966, - "grad_norm": 0.032770343124866486, - "learning_rate": 2.596060935958392e-05, - "loss": 0.0011562082916498184, - "step": 14255 - }, - { - "epoch": 2.431372549019608, - "grad_norm": 0.09246552735567093, - "learning_rate": 2.593623333608623e-05, - "loss": 0.002459176816046238, - "step": 14260 - }, - { - "epoch": 2.4322250639386187, - "grad_norm": 0.05482151731848717, - "learning_rate": 2.5911862710939474e-05, - "loss": 0.0019333874806761742, - "step": 14265 - }, - { - "epoch": 2.43307757885763, - "grad_norm": 0.03243163228034973, - "learning_rate": 2.588749749552069e-05, - "loss": 0.0017584215849637986, - "step": 14270 - }, - { - "epoch": 2.4339300937766413, - "grad_norm": 0.07286939024925232, - "learning_rate": 2.586313770120434e-05, - "loss": 0.002444162592291832, - "step": 14275 - }, - { - "epoch": 2.4347826086956523, - "grad_norm": 0.05575154721736908, - "learning_rate": 2.583878333936243e-05, - "loss": 0.0024999476969242095, - "step": 14280 - }, - { - "epoch": 2.4356351236146634, - "grad_norm": 0.10262400656938553, - "learning_rate": 2.5814434421364354e-05, - "loss": 0.0018360136076807977, - "step": 14285 - }, - { - "epoch": 2.4364876385336744, - "grad_norm": 0.023329658433794975, - "learning_rate": 2.5790090958577017e-05, - "loss": 0.002157992497086525, - "step": 14290 - }, - { - "epoch": 2.4373401534526855, - "grad_norm": 0.11155838519334793, - "learning_rate": 2.576575296236473e-05, - "loss": 0.002236923947930336, - "step": 14295 - }, - { - "epoch": 2.4381926683716966, - "grad_norm": 0.015751022845506668, - "learning_rate": 2.5741420444089317e-05, - "loss": 0.0023830370977520944, - "step": 14300 - }, - { - "epoch": 2.4390451832907076, - "grad_norm": 0.06451129168272018, - "learning_rate": 2.5717093415109982e-05, - "loss": 0.0012244164943695068, - "step": 14305 - }, - { - "epoch": 2.4398976982097187, - "grad_norm": 0.05141889676451683, - "learning_rate": 2.569277188678339e-05, - "loss": 0.0008386586792767048, - "step": 14310 - }, - { - "epoch": 2.4407502131287298, - "grad_norm": 0.07528503239154816, - "learning_rate": 2.5668455870463654e-05, - "loss": 0.0027780460193753244, - "step": 14315 - }, - { - "epoch": 2.441602728047741, - "grad_norm": 0.0676177367568016, - "learning_rate": 2.5644145377502277e-05, - "loss": 0.002171286940574646, - "step": 14320 - }, - { - "epoch": 2.442455242966752, - "grad_norm": 0.03209437057375908, - "learning_rate": 2.5619840419248228e-05, - "loss": 0.0011549444869160652, - "step": 14325 - }, - { - "epoch": 2.443307757885763, - "grad_norm": 0.0711345300078392, - "learning_rate": 2.559554100704787e-05, - "loss": 0.0029217278584837913, - "step": 14330 - }, - { - "epoch": 2.444160272804774, - "grad_norm": 0.07314640283584595, - "learning_rate": 2.5571247152244955e-05, - "loss": 0.0019763100892305372, - "step": 14335 - }, - { - "epoch": 2.445012787723785, - "grad_norm": 0.058573171496391296, - "learning_rate": 2.5546958866180686e-05, - "loss": 0.0023175042122602465, - "step": 14340 - }, - { - "epoch": 2.445865302642796, - "grad_norm": 0.06780791282653809, - "learning_rate": 2.552267616019362e-05, - "loss": 0.0022560084238648415, - "step": 14345 - }, - { - "epoch": 2.446717817561807, - "grad_norm": 0.0834873840212822, - "learning_rate": 2.5498399045619755e-05, - "loss": 0.0015980398282408714, - "step": 14350 - }, - { - "epoch": 2.4475703324808182, - "grad_norm": 0.06677491962909698, - "learning_rate": 2.5474127533792443e-05, - "loss": 0.002242721430957317, - "step": 14355 - }, - { - "epoch": 2.4484228473998293, - "grad_norm": 0.11220566183328629, - "learning_rate": 2.5449861636042443e-05, - "loss": 0.001862034946680069, - "step": 14360 - }, - { - "epoch": 2.449275362318841, - "grad_norm": 0.05493709817528725, - "learning_rate": 2.542560136369786e-05, - "loss": 0.0020324042066931724, - "step": 14365 - }, - { - "epoch": 2.4501278772378514, - "grad_norm": 0.09586431086063385, - "learning_rate": 2.5401346728084225e-05, - "loss": 0.001961209811270237, - "step": 14370 - }, - { - "epoch": 2.450980392156863, - "grad_norm": 0.06384766101837158, - "learning_rate": 2.5377097740524402e-05, - "loss": 0.002969523146748543, - "step": 14375 - }, - { - "epoch": 2.451832907075874, - "grad_norm": 0.11584383249282837, - "learning_rate": 2.5352854412338607e-05, - "loss": 0.0037360407412052156, - "step": 14380 - }, - { - "epoch": 2.452685421994885, - "grad_norm": 0.05268854275345802, - "learning_rate": 2.5328616754844447e-05, - "loss": 0.0024207277223467828, - "step": 14385 - }, - { - "epoch": 2.453537936913896, - "grad_norm": 0.10550973564386368, - "learning_rate": 2.5304384779356855e-05, - "loss": 0.002147519588470459, - "step": 14390 - }, - { - "epoch": 2.454390451832907, - "grad_norm": 0.11402281373739243, - "learning_rate": 2.5280158497188144e-05, - "loss": 0.0030479192733764648, - "step": 14395 - }, - { - "epoch": 2.455242966751918, - "grad_norm": 0.042928412556648254, - "learning_rate": 2.5255937919647928e-05, - "loss": 0.0009582490660250187, - "step": 14400 - }, - { - "epoch": 2.4560954816709293, - "grad_norm": 0.09466255456209183, - "learning_rate": 2.52317230580432e-05, - "loss": 0.0028877202421426773, - "step": 14405 - }, - { - "epoch": 2.4569479965899403, - "grad_norm": 0.0167491864413023, - "learning_rate": 2.5207513923678246e-05, - "loss": 0.002237674966454506, - "step": 14410 - }, - { - "epoch": 2.4578005115089514, - "grad_norm": 0.11767696589231491, - "learning_rate": 2.518331052785468e-05, - "loss": 0.00270021203905344, - "step": 14415 - }, - { - "epoch": 2.4586530264279625, - "grad_norm": 0.13400165736675262, - "learning_rate": 2.5159112881871494e-05, - "loss": 0.0025584336370229723, - "step": 14420 - }, - { - "epoch": 2.4595055413469735, - "grad_norm": 0.051460813730955124, - "learning_rate": 2.5134920997024915e-05, - "loss": 0.001182288955897093, - "step": 14425 - }, - { - "epoch": 2.4603580562659846, - "grad_norm": 0.05078651383519173, - "learning_rate": 2.511073488460855e-05, - "loss": 0.001340255793184042, - "step": 14430 - }, - { - "epoch": 2.4612105711849956, - "grad_norm": 0.06714113801717758, - "learning_rate": 2.5086554555913245e-05, - "loss": 0.0019190860912203789, - "step": 14435 - }, - { - "epoch": 2.4620630861040067, - "grad_norm": 0.05757109820842743, - "learning_rate": 2.5062380022227226e-05, - "loss": 0.0016031917184591293, - "step": 14440 - }, - { - "epoch": 2.4629156010230178, - "grad_norm": 0.045739807188510895, - "learning_rate": 2.5038211294835944e-05, - "loss": 0.0020723894238471987, - "step": 14445 - }, - { - "epoch": 2.463768115942029, - "grad_norm": 0.06381653994321823, - "learning_rate": 2.5014048385022156e-05, - "loss": 0.002237732522189617, - "step": 14450 - }, - { - "epoch": 2.46462063086104, - "grad_norm": 0.08096056431531906, - "learning_rate": 2.498989130406594e-05, - "loss": 0.0017275322228670121, - "step": 14455 - }, - { - "epoch": 2.4654731457800514, - "grad_norm": 0.04627775028347969, - "learning_rate": 2.4965740063244582e-05, - "loss": 0.0028135737404227255, - "step": 14460 - }, - { - "epoch": 2.466325660699062, - "grad_norm": 0.07789458334445953, - "learning_rate": 2.4941594673832737e-05, - "loss": 0.0017165482044219972, - "step": 14465 - }, - { - "epoch": 2.4671781756180735, - "grad_norm": 0.03633275255560875, - "learning_rate": 2.491745514710224e-05, - "loss": 0.003017013892531395, - "step": 14470 - }, - { - "epoch": 2.4680306905370846, - "grad_norm": 0.07425010204315186, - "learning_rate": 2.489332149432224e-05, - "loss": 0.002849162742495537, - "step": 14475 - }, - { - "epoch": 2.4688832054560956, - "grad_norm": 0.08738066256046295, - "learning_rate": 2.486919372675911e-05, - "loss": 0.003103286027908325, - "step": 14480 - }, - { - "epoch": 2.4697357203751067, - "grad_norm": 0.059462107717990875, - "learning_rate": 2.4845071855676526e-05, - "loss": 0.003129242733120918, - "step": 14485 - }, - { - "epoch": 2.4705882352941178, - "grad_norm": 0.12157633155584335, - "learning_rate": 2.4820955892335358e-05, - "loss": 0.00188961960375309, - "step": 14490 - }, - { - "epoch": 2.471440750213129, - "grad_norm": 0.04780135303735733, - "learning_rate": 2.4796845847993743e-05, - "loss": 0.001777658425271511, - "step": 14495 - }, - { - "epoch": 2.47229326513214, - "grad_norm": 0.08734847605228424, - "learning_rate": 2.477274173390706e-05, - "loss": 0.0025872459635138513, - "step": 14500 - }, - { - "epoch": 2.473145780051151, - "grad_norm": 0.08637238293886185, - "learning_rate": 2.4748643561327887e-05, - "loss": 0.0034623559564352035, - "step": 14505 - }, - { - "epoch": 2.473998294970162, - "grad_norm": 0.1351020187139511, - "learning_rate": 2.4724551341506083e-05, - "loss": 0.0025932226330041886, - "step": 14510 - }, - { - "epoch": 2.474850809889173, - "grad_norm": 0.0965266153216362, - "learning_rate": 2.4700465085688678e-05, - "loss": 0.0021650340408086778, - "step": 14515 - }, - { - "epoch": 2.475703324808184, - "grad_norm": 0.06353217363357544, - "learning_rate": 2.4676384805119954e-05, - "loss": 0.0017436511814594268, - "step": 14520 - }, - { - "epoch": 2.476555839727195, - "grad_norm": 0.09694099426269531, - "learning_rate": 2.4652310511041376e-05, - "loss": 0.002511733956634998, - "step": 14525 - }, - { - "epoch": 2.4774083546462062, - "grad_norm": 0.13362912833690643, - "learning_rate": 2.4628242214691614e-05, - "loss": 0.0020636413246393204, - "step": 14530 - }, - { - "epoch": 2.4782608695652173, - "grad_norm": 0.05283635854721069, - "learning_rate": 2.4604179927306575e-05, - "loss": 0.002218991331756115, - "step": 14535 - }, - { - "epoch": 2.4791133844842284, - "grad_norm": 0.062003809958696365, - "learning_rate": 2.4580123660119317e-05, - "loss": 0.0021969690918922425, - "step": 14540 - }, - { - "epoch": 2.4799658994032394, - "grad_norm": 0.1058121919631958, - "learning_rate": 2.4556073424360115e-05, - "loss": 0.002514044567942619, - "step": 14545 - }, - { - "epoch": 2.4808184143222505, - "grad_norm": 0.06746378540992737, - "learning_rate": 2.4532029231256397e-05, - "loss": 0.001485797483474016, - "step": 14550 - }, - { - "epoch": 2.4816709292412615, - "grad_norm": 0.043892405927181244, - "learning_rate": 2.4507991092032832e-05, - "loss": 0.0021189235150814055, - "step": 14555 - }, - { - "epoch": 2.4825234441602726, - "grad_norm": 0.04537670686841011, - "learning_rate": 2.4483959017911195e-05, - "loss": 0.0018616810441017151, - "step": 14560 - }, - { - "epoch": 2.483375959079284, - "grad_norm": 0.04895998165011406, - "learning_rate": 2.445993302011046e-05, - "loss": 0.0016737811267375946, - "step": 14565 - }, - { - "epoch": 2.484228473998295, - "grad_norm": 0.07096420228481293, - "learning_rate": 2.4435913109846773e-05, - "loss": 0.0032933827489614485, - "step": 14570 - }, - { - "epoch": 2.485080988917306, - "grad_norm": 0.07391496002674103, - "learning_rate": 2.4411899298333403e-05, - "loss": 0.0021815944463014604, - "step": 14575 - }, - { - "epoch": 2.4859335038363173, - "grad_norm": 0.12835897505283356, - "learning_rate": 2.438789159678083e-05, - "loss": 0.0032001670449972154, - "step": 14580 - }, - { - "epoch": 2.4867860187553283, - "grad_norm": 0.0947527140378952, - "learning_rate": 2.436389001639662e-05, - "loss": 0.002512381225824356, - "step": 14585 - }, - { - "epoch": 2.4876385336743394, - "grad_norm": 0.06699662655591965, - "learning_rate": 2.4339894568385526e-05, - "loss": 0.0014906782656908036, - "step": 14590 - }, - { - "epoch": 2.4884910485933505, - "grad_norm": 0.042523179203271866, - "learning_rate": 2.4315905263949404e-05, - "loss": 0.0012685291469097138, - "step": 14595 - }, - { - "epoch": 2.4893435635123615, - "grad_norm": 0.03687009960412979, - "learning_rate": 2.4291922114287286e-05, - "loss": 0.0016289660707116127, - "step": 14600 - }, - { - "epoch": 2.4901960784313726, - "grad_norm": 0.07698170840740204, - "learning_rate": 2.4267945130595287e-05, - "loss": 0.002090749144554138, - "step": 14605 - }, - { - "epoch": 2.4910485933503836, - "grad_norm": 0.08533983677625656, - "learning_rate": 2.4243974324066653e-05, - "loss": 0.002234157919883728, - "step": 14610 - }, - { - "epoch": 2.4919011082693947, - "grad_norm": 0.10050603002309799, - "learning_rate": 2.422000970589177e-05, - "loss": 0.002818283811211586, - "step": 14615 - }, - { - "epoch": 2.4927536231884058, - "grad_norm": 0.057129960507154465, - "learning_rate": 2.4196051287258095e-05, - "loss": 0.004226747527718544, - "step": 14620 - }, - { - "epoch": 2.493606138107417, - "grad_norm": 0.08218846470117569, - "learning_rate": 2.4172099079350256e-05, - "loss": 0.0016387354582548142, - "step": 14625 - }, - { - "epoch": 2.494458653026428, - "grad_norm": 0.07963220775127411, - "learning_rate": 2.4148153093349894e-05, - "loss": 0.002778450772166252, - "step": 14630 - }, - { - "epoch": 2.495311167945439, - "grad_norm": 0.058049995452165604, - "learning_rate": 2.4124213340435834e-05, - "loss": 0.0024016743525862696, - "step": 14635 - }, - { - "epoch": 2.49616368286445, - "grad_norm": 0.13127438724040985, - "learning_rate": 2.410027983178392e-05, - "loss": 0.0038317706435918807, - "step": 14640 - }, - { - "epoch": 2.497016197783461, - "grad_norm": 0.048698920756578445, - "learning_rate": 2.407635257856711e-05, - "loss": 0.00152621790766716, - "step": 14645 - }, - { - "epoch": 2.497868712702472, - "grad_norm": 0.02338201180100441, - "learning_rate": 2.405243159195546e-05, - "loss": 0.0027417311444878577, - "step": 14650 - }, - { - "epoch": 2.498721227621483, - "grad_norm": 0.07108049094676971, - "learning_rate": 2.402851688311607e-05, - "loss": 0.001716497913002968, - "step": 14655 - }, - { - "epoch": 2.4995737425404947, - "grad_norm": 0.028342491015791893, - "learning_rate": 2.4004608463213126e-05, - "loss": 0.0013954185880720616, - "step": 14660 - }, - { - "epoch": 2.4995737425404947, - "eval_loss": 0.04806143045425415, - "eval_runtime": 3.6619, - "eval_samples_per_second": 68.816, - "eval_steps_per_second": 1.092, - "step": 14660 - }, - { - "eval_cer_subset": 0.01446089208070741, - "eval_cer_subset_edit_distance": 888, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 14660 - }, - { - "epoch": 2.5004262574595053, - "grad_norm": 0.0640476793050766, - "learning_rate": 2.398070634340786e-05, - "loss": 0.002191193774342537, - "step": 14665 - }, - { - "epoch": 2.501278772378517, - "grad_norm": 0.034168582409620285, - "learning_rate": 2.3956810534858607e-05, - "loss": 0.0013371256180107593, - "step": 14670 - }, - { - "epoch": 2.502131287297528, - "grad_norm": 0.07181207835674286, - "learning_rate": 2.3932921048720698e-05, - "loss": 0.0019236713647842406, - "step": 14675 - }, - { - "epoch": 2.502983802216539, - "grad_norm": 0.0469810888171196, - "learning_rate": 2.3909037896146552e-05, - "loss": 0.0018267782405018806, - "step": 14680 - }, - { - "epoch": 2.50383631713555, - "grad_norm": 0.028826232999563217, - "learning_rate": 2.3885161088285617e-05, - "loss": 0.0010010387748479843, - "step": 14685 - }, - { - "epoch": 2.504688832054561, - "grad_norm": 0.10193654894828796, - "learning_rate": 2.386129063628437e-05, - "loss": 0.0024211697280406954, - "step": 14690 - }, - { - "epoch": 2.505541346973572, - "grad_norm": 0.14754284918308258, - "learning_rate": 2.3837426551286357e-05, - "loss": 0.0020253278315067293, - "step": 14695 - }, - { - "epoch": 2.506393861892583, - "grad_norm": 0.12155842036008835, - "learning_rate": 2.3813568844432103e-05, - "loss": 0.002546634152531624, - "step": 14700 - }, - { - "epoch": 2.5072463768115942, - "grad_norm": 0.07209643721580505, - "learning_rate": 2.378971752685919e-05, - "loss": 0.002657034806907177, - "step": 14705 - }, - { - "epoch": 2.5080988917306053, - "grad_norm": 0.1210934966802597, - "learning_rate": 2.3765872609702192e-05, - "loss": 0.002788655459880829, - "step": 14710 - }, - { - "epoch": 2.5089514066496164, - "grad_norm": 0.05647290125489235, - "learning_rate": 2.374203410409274e-05, - "loss": 0.0022313324734568596, - "step": 14715 - }, - { - "epoch": 2.5098039215686274, - "grad_norm": 0.18282383680343628, - "learning_rate": 2.371820202115942e-05, - "loss": 0.0019404273480176926, - "step": 14720 - }, - { - "epoch": 2.5106564364876385, - "grad_norm": 0.022936735302209854, - "learning_rate": 2.369437637202784e-05, - "loss": 0.0015677658841013909, - "step": 14725 - }, - { - "epoch": 2.5115089514066495, - "grad_norm": 0.023840973153710365, - "learning_rate": 2.3670557167820614e-05, - "loss": 0.0017029233276844025, - "step": 14730 - }, - { - "epoch": 2.5123614663256606, - "grad_norm": 0.01897628791630268, - "learning_rate": 2.3646744419657323e-05, - "loss": 0.001359708234667778, - "step": 14735 - }, - { - "epoch": 2.5132139812446717, - "grad_norm": 0.03326602280139923, - "learning_rate": 2.3622938138654575e-05, - "loss": 0.0006220267619937659, - "step": 14740 - }, - { - "epoch": 2.5140664961636827, - "grad_norm": 0.0619979090988636, - "learning_rate": 2.3599138335925914e-05, - "loss": 0.002030659094452858, - "step": 14745 - }, - { - "epoch": 2.514919011082694, - "grad_norm": 0.03451136127114296, - "learning_rate": 2.3575345022581896e-05, - "loss": 0.0016797656193375588, - "step": 14750 - }, - { - "epoch": 2.5157715260017053, - "grad_norm": 0.0554860420525074, - "learning_rate": 2.3551558209730018e-05, - "loss": 0.0016403241083025933, - "step": 14755 - }, - { - "epoch": 2.516624040920716, - "grad_norm": 0.08686158061027527, - "learning_rate": 2.3527777908474744e-05, - "loss": 0.003415975719690323, - "step": 14760 - }, - { - "epoch": 2.5174765558397274, - "grad_norm": 0.06636729836463928, - "learning_rate": 2.3504004129917542e-05, - "loss": 0.0018416630104184152, - "step": 14765 - }, - { - "epoch": 2.518329070758738, - "grad_norm": 0.08038193732500076, - "learning_rate": 2.3480236885156776e-05, - "loss": 0.002185085415840149, - "step": 14770 - }, - { - "epoch": 2.5191815856777495, - "grad_norm": 0.06370148062705994, - "learning_rate": 2.3456476185287802e-05, - "loss": 0.001394746359437704, - "step": 14775 - }, - { - "epoch": 2.5200341005967606, - "grad_norm": 0.05585980415344238, - "learning_rate": 2.3432722041402886e-05, - "loss": 0.003035778924822807, - "step": 14780 - }, - { - "epoch": 2.5208866155157716, - "grad_norm": 0.08676521480083466, - "learning_rate": 2.340897446459128e-05, - "loss": 0.002279702201485634, - "step": 14785 - }, - { - "epoch": 2.5217391304347827, - "grad_norm": 0.0421539731323719, - "learning_rate": 2.3385233465939124e-05, - "loss": 0.0015795350074768067, - "step": 14790 - }, - { - "epoch": 2.5225916453537938, - "grad_norm": 0.09380512684583664, - "learning_rate": 2.3361499056529516e-05, - "loss": 0.0024957180023193358, - "step": 14795 - }, - { - "epoch": 2.523444160272805, - "grad_norm": 0.05541060492396355, - "learning_rate": 2.3337771247442457e-05, - "loss": 0.0022170023992657663, - "step": 14800 - }, - { - "epoch": 2.524296675191816, - "grad_norm": 0.030795352533459663, - "learning_rate": 2.3314050049754872e-05, - "loss": 0.0015011204406619072, - "step": 14805 - }, - { - "epoch": 2.525149190110827, - "grad_norm": 0.040677715092897415, - "learning_rate": 2.329033547454063e-05, - "loss": 0.0023216739296913146, - "step": 14810 - }, - { - "epoch": 2.526001705029838, - "grad_norm": 0.036884501576423645, - "learning_rate": 2.3266627532870462e-05, - "loss": 0.0025786716490983964, - "step": 14815 - }, - { - "epoch": 2.526854219948849, - "grad_norm": 0.02398660033941269, - "learning_rate": 2.324292623581204e-05, - "loss": 0.0017933860421180726, - "step": 14820 - }, - { - "epoch": 2.52770673486786, - "grad_norm": 0.06601176410913467, - "learning_rate": 2.321923159442989e-05, - "loss": 0.002885073609650135, - "step": 14825 - }, - { - "epoch": 2.528559249786871, - "grad_norm": 0.08684834837913513, - "learning_rate": 2.3195543619785496e-05, - "loss": 0.0026486974209547045, - "step": 14830 - }, - { - "epoch": 2.5294117647058822, - "grad_norm": 0.04674920067191124, - "learning_rate": 2.3171862322937173e-05, - "loss": 0.0025192024186253548, - "step": 14835 - }, - { - "epoch": 2.5302642796248933, - "grad_norm": 0.059271425008773804, - "learning_rate": 2.314818771494013e-05, - "loss": 0.001517033763229847, - "step": 14840 - }, - { - "epoch": 2.5311167945439044, - "grad_norm": 0.03094577044248581, - "learning_rate": 2.312451980684648e-05, - "loss": 0.001731237769126892, - "step": 14845 - }, - { - "epoch": 2.531969309462916, - "grad_norm": 0.04043465852737427, - "learning_rate": 2.3100858609705167e-05, - "loss": 0.002348044328391552, - "step": 14850 - }, - { - "epoch": 2.5328218243819265, - "grad_norm": 0.05144953727722168, - "learning_rate": 2.3077204134562054e-05, - "loss": 0.0019841600209474564, - "step": 14855 - }, - { - "epoch": 2.533674339300938, - "grad_norm": 0.07220125198364258, - "learning_rate": 2.3053556392459813e-05, - "loss": 0.002818341739475727, - "step": 14860 - }, - { - "epoch": 2.5345268542199486, - "grad_norm": 0.08199959248304367, - "learning_rate": 2.302991539443801e-05, - "loss": 0.0024914808571338655, - "step": 14865 - }, - { - "epoch": 2.53537936913896, - "grad_norm": 0.07761549204587936, - "learning_rate": 2.3006281151533047e-05, - "loss": 0.003526497259736061, - "step": 14870 - }, - { - "epoch": 2.536231884057971, - "grad_norm": 0.1002337783575058, - "learning_rate": 2.298265367477816e-05, - "loss": 0.0022296305745840073, - "step": 14875 - }, - { - "epoch": 2.5370843989769822, - "grad_norm": 0.05918731540441513, - "learning_rate": 2.295903297520346e-05, - "loss": 0.0012512234039604663, - "step": 14880 - }, - { - "epoch": 2.5379369138959933, - "grad_norm": 0.053112782537937164, - "learning_rate": 2.2935419063835868e-05, - "loss": 0.0017477553337812424, - "step": 14885 - }, - { - "epoch": 2.5387894288150044, - "grad_norm": 0.061820488423109055, - "learning_rate": 2.2911811951699155e-05, - "loss": 0.0022626927122473715, - "step": 14890 - }, - { - "epoch": 2.5396419437340154, - "grad_norm": 0.11703728139400482, - "learning_rate": 2.288821164981387e-05, - "loss": 0.0025926090776920317, - "step": 14895 - }, - { - "epoch": 2.5404944586530265, - "grad_norm": 0.04635873809456825, - "learning_rate": 2.2864618169197468e-05, - "loss": 0.0017809070646762847, - "step": 14900 - }, - { - "epoch": 2.5413469735720375, - "grad_norm": 0.11287315934896469, - "learning_rate": 2.2841031520864136e-05, - "loss": 0.00238190982490778, - "step": 14905 - }, - { - "epoch": 2.5421994884910486, - "grad_norm": 0.026871057227253914, - "learning_rate": 2.2817451715824924e-05, - "loss": 0.0015336395241320132, - "step": 14910 - }, - { - "epoch": 2.5430520034100597, - "grad_norm": 0.06438672542572021, - "learning_rate": 2.279387876508766e-05, - "loss": 0.001995333656668663, - "step": 14915 - }, - { - "epoch": 2.5439045183290707, - "grad_norm": 0.06547024846076965, - "learning_rate": 2.277031267965697e-05, - "loss": 0.002060149237513542, - "step": 14920 - }, - { - "epoch": 2.544757033248082, - "grad_norm": 0.07563283294439316, - "learning_rate": 2.2746753470534322e-05, - "loss": 0.0017446789890527726, - "step": 14925 - }, - { - "epoch": 2.545609548167093, - "grad_norm": 0.028652798384428024, - "learning_rate": 2.27232011487179e-05, - "loss": 0.0022552181035280228, - "step": 14930 - }, - { - "epoch": 2.546462063086104, - "grad_norm": 0.0893654152750969, - "learning_rate": 2.269965572520274e-05, - "loss": 0.0029813185334205627, - "step": 14935 - }, - { - "epoch": 2.547314578005115, - "grad_norm": 0.04628995433449745, - "learning_rate": 2.26761172109806e-05, - "loss": 0.0025255372747778893, - "step": 14940 - }, - { - "epoch": 2.548167092924126, - "grad_norm": 0.09175702929496765, - "learning_rate": 2.2652585617040076e-05, - "loss": 0.004577648639678955, - "step": 14945 - }, - { - "epoch": 2.549019607843137, - "grad_norm": 0.041957542300224304, - "learning_rate": 2.262906095436648e-05, - "loss": 0.002292825095355511, - "step": 14950 - }, - { - "epoch": 2.5498721227621486, - "grad_norm": 0.061231136322021484, - "learning_rate": 2.2605543233941904e-05, - "loss": 0.002193107083439827, - "step": 14955 - }, - { - "epoch": 2.550724637681159, - "grad_norm": 0.08939548581838608, - "learning_rate": 2.2582032466745206e-05, - "loss": 0.0013388695195317268, - "step": 14960 - }, - { - "epoch": 2.5515771526001707, - "grad_norm": 0.10106560587882996, - "learning_rate": 2.255852866375199e-05, - "loss": 0.004271790385246277, - "step": 14965 - }, - { - "epoch": 2.5524296675191813, - "grad_norm": 0.04756918177008629, - "learning_rate": 2.253503183593463e-05, - "loss": 0.002253059670329094, - "step": 14970 - }, - { - "epoch": 2.553282182438193, - "grad_norm": 0.06336323171854019, - "learning_rate": 2.2511541994262203e-05, - "loss": 0.0019065763801336289, - "step": 14975 - }, - { - "epoch": 2.554134697357204, - "grad_norm": 0.021801188588142395, - "learning_rate": 2.2488059149700568e-05, - "loss": 0.001671123132109642, - "step": 14980 - }, - { - "epoch": 2.554987212276215, - "grad_norm": 0.07580327987670898, - "learning_rate": 2.2464583313212294e-05, - "loss": 0.0031477130949497225, - "step": 14985 - }, - { - "epoch": 2.555839727195226, - "grad_norm": 0.07757267355918884, - "learning_rate": 2.244111449575666e-05, - "loss": 0.0026445770636200905, - "step": 14990 - }, - { - "epoch": 2.556692242114237, - "grad_norm": 0.043020669370889664, - "learning_rate": 2.2417652708289726e-05, - "loss": 0.002764601819217205, - "step": 14995 - }, - { - "epoch": 2.557544757033248, - "grad_norm": 0.03915635868906975, - "learning_rate": 2.2394197961764212e-05, - "loss": 0.002291044779121876, - "step": 15000 - }, - { - "epoch": 2.558397271952259, - "grad_norm": 0.0665091872215271, - "learning_rate": 2.2370750267129586e-05, - "loss": 0.0017822932451963425, - "step": 15005 - }, - { - "epoch": 2.5592497868712702, - "grad_norm": 0.08525653183460236, - "learning_rate": 2.234730963533199e-05, - "loss": 0.0018473496660590173, - "step": 15010 - }, - { - "epoch": 2.5601023017902813, - "grad_norm": 0.05346886068582535, - "learning_rate": 2.2323876077314327e-05, - "loss": 0.002567983791232109, - "step": 15015 - }, - { - "epoch": 2.5609548167092924, - "grad_norm": 0.04240184277296066, - "learning_rate": 2.2300449604016123e-05, - "loss": 0.0021606752648949622, - "step": 15020 - }, - { - "epoch": 2.5618073316283034, - "grad_norm": 0.08507288247346878, - "learning_rate": 2.2277030226373667e-05, - "loss": 0.0023022485896945, - "step": 15025 - }, - { - "epoch": 2.5626598465473145, - "grad_norm": 0.07468844205141068, - "learning_rate": 2.225361795531989e-05, - "loss": 0.0030104584991931917, - "step": 15030 - }, - { - "epoch": 2.5635123614663256, - "grad_norm": 0.03731158375740051, - "learning_rate": 2.22302128017844e-05, - "loss": 0.0019535191357135774, - "step": 15035 - }, - { - "epoch": 2.5643648763853366, - "grad_norm": 0.09111307561397552, - "learning_rate": 2.2206814776693536e-05, - "loss": 0.0016553621739149094, - "step": 15040 - }, - { - "epoch": 2.5652173913043477, - "grad_norm": 0.04197632521390915, - "learning_rate": 2.2183423890970255e-05, - "loss": 0.0018846508115530013, - "step": 15045 - }, - { - "epoch": 2.566069906223359, - "grad_norm": 0.09259206801652908, - "learning_rate": 2.2160040155534206e-05, - "loss": 0.0028481241315603256, - "step": 15050 - }, - { - "epoch": 2.56692242114237, - "grad_norm": 0.07880257815122604, - "learning_rate": 2.2136663581301696e-05, - "loss": 0.002117951214313507, - "step": 15055 - }, - { - "epoch": 2.5677749360613813, - "grad_norm": 0.0969267189502716, - "learning_rate": 2.2113294179185667e-05, - "loss": 0.00240680705755949, - "step": 15060 - }, - { - "epoch": 2.568627450980392, - "grad_norm": 0.06295698881149292, - "learning_rate": 2.2089931960095754e-05, - "loss": 0.0012395468540489674, - "step": 15065 - }, - { - "epoch": 2.5694799658994034, - "grad_norm": 0.0716724842786789, - "learning_rate": 2.2066576934938224e-05, - "loss": 0.004721567407250405, - "step": 15070 - }, - { - "epoch": 2.5703324808184145, - "grad_norm": 0.04790467768907547, - "learning_rate": 2.2043229114615967e-05, - "loss": 0.0016566522419452668, - "step": 15075 - }, - { - "epoch": 2.5711849957374255, - "grad_norm": 0.014919254928827286, - "learning_rate": 2.2019888510028515e-05, - "loss": 0.00200834795832634, - "step": 15080 - }, - { - "epoch": 2.5720375106564366, - "grad_norm": 0.07281307876110077, - "learning_rate": 2.1996555132072063e-05, - "loss": 0.0021370718255639075, - "step": 15085 - }, - { - "epoch": 2.5728900255754477, - "grad_norm": 0.04918764904141426, - "learning_rate": 2.197322899163938e-05, - "loss": 0.002188747748732567, - "step": 15090 - }, - { - "epoch": 2.5737425404944587, - "grad_norm": 0.05246208980679512, - "learning_rate": 2.1949910099619913e-05, - "loss": 0.002106213942170143, - "step": 15095 - }, - { - "epoch": 2.57459505541347, - "grad_norm": 0.07900833338499069, - "learning_rate": 2.1926598466899674e-05, - "loss": 0.0014828240498900413, - "step": 15100 - }, - { - "epoch": 2.575447570332481, - "grad_norm": 0.1235758364200592, - "learning_rate": 2.19032941043613e-05, - "loss": 0.0033482640981674196, - "step": 15105 - }, - { - "epoch": 2.576300085251492, - "grad_norm": 0.06170985475182533, - "learning_rate": 2.187999702288408e-05, - "loss": 0.0019921788945794104, - "step": 15110 - }, - { - "epoch": 2.577152600170503, - "grad_norm": 0.1210661381483078, - "learning_rate": 2.185670723334384e-05, - "loss": 0.0019077232107520103, - "step": 15115 - }, - { - "epoch": 2.578005115089514, - "grad_norm": 0.06942020356655121, - "learning_rate": 2.1833424746613026e-05, - "loss": 0.0019503291696310043, - "step": 15120 - }, - { - "epoch": 2.578857630008525, - "grad_norm": 0.09329917281866074, - "learning_rate": 2.1810149573560693e-05, - "loss": 0.0026118636131286623, - "step": 15125 - }, - { - "epoch": 2.579710144927536, - "grad_norm": 0.1026659607887268, - "learning_rate": 2.1786881725052445e-05, - "loss": 0.002567945420742035, - "step": 15130 - }, - { - "epoch": 2.580562659846547, - "grad_norm": 0.06306809186935425, - "learning_rate": 2.1763621211950517e-05, - "loss": 0.001768135279417038, - "step": 15135 - }, - { - "epoch": 2.5814151747655583, - "grad_norm": 0.07647090405225754, - "learning_rate": 2.174036804511367e-05, - "loss": 0.0015752470120787621, - "step": 15140 - }, - { - "epoch": 2.5822676896845693, - "grad_norm": 0.045121923089027405, - "learning_rate": 2.171712223539726e-05, - "loss": 0.0025726621970534325, - "step": 15145 - }, - { - "epoch": 2.5831202046035804, - "grad_norm": 0.040667545050382614, - "learning_rate": 2.1693883793653188e-05, - "loss": 0.002222199738025665, - "step": 15150 - }, - { - "epoch": 2.583972719522592, - "grad_norm": 0.08505896478891373, - "learning_rate": 2.1670652730729968e-05, - "loss": 0.0030935727059841155, - "step": 15155 - }, - { - "epoch": 2.5848252344416025, - "grad_norm": 0.05064573138952255, - "learning_rate": 2.164742905747261e-05, - "loss": 0.002387380041182041, - "step": 15160 - }, - { - "epoch": 2.585677749360614, - "grad_norm": 0.0372583344578743, - "learning_rate": 2.1624212784722684e-05, - "loss": 0.0026363788172602655, - "step": 15165 - }, - { - "epoch": 2.5865302642796246, - "grad_norm": 0.06209828332066536, - "learning_rate": 2.1601003923318344e-05, - "loss": 0.0029974017292261125, - "step": 15170 - }, - { - "epoch": 2.587382779198636, - "grad_norm": 0.049798715859651566, - "learning_rate": 2.157780248409424e-05, - "loss": 0.0016345694661140443, - "step": 15175 - }, - { - "epoch": 2.588235294117647, - "grad_norm": 0.06752602010965347, - "learning_rate": 2.1554608477881597e-05, - "loss": 0.0025367341935634614, - "step": 15180 - }, - { - "epoch": 2.5890878090366582, - "grad_norm": 0.10456907004117966, - "learning_rate": 2.1531421915508137e-05, - "loss": 0.002495551109313965, - "step": 15185 - }, - { - "epoch": 2.5899403239556693, - "grad_norm": 0.0790029838681221, - "learning_rate": 2.1508242807798114e-05, - "loss": 0.0025735165923833846, - "step": 15190 - }, - { - "epoch": 2.5907928388746804, - "grad_norm": 0.030237069353461266, - "learning_rate": 2.1485071165572298e-05, - "loss": 0.0018124323338270187, - "step": 15195 - }, - { - "epoch": 2.5916453537936914, - "grad_norm": 0.06030745431780815, - "learning_rate": 2.1461906999648008e-05, - "loss": 0.002845403365790844, - "step": 15200 - }, - { - "epoch": 2.5924978687127025, - "grad_norm": 0.10071806609630585, - "learning_rate": 2.1438750320839037e-05, - "loss": 0.002326494827866554, - "step": 15205 - }, - { - "epoch": 2.5933503836317136, - "grad_norm": 0.050379570573568344, - "learning_rate": 2.1415601139955686e-05, - "loss": 0.0019888151437044144, - "step": 15210 - }, - { - "epoch": 2.5942028985507246, - "grad_norm": 0.09101511538028717, - "learning_rate": 2.1392459467804753e-05, - "loss": 0.003049125336110592, - "step": 15215 - }, - { - "epoch": 2.5950554134697357, - "grad_norm": 0.03804527968168259, - "learning_rate": 2.1369325315189553e-05, - "loss": 0.0016767382621765137, - "step": 15220 - }, - { - "epoch": 2.5959079283887467, - "grad_norm": 0.0779503807425499, - "learning_rate": 2.1346198692909895e-05, - "loss": 0.001964661478996277, - "step": 15225 - }, - { - "epoch": 2.596760443307758, - "grad_norm": 0.07922998070716858, - "learning_rate": 2.1323079611762033e-05, - "loss": 0.001821339875459671, - "step": 15230 - }, - { - "epoch": 2.597612958226769, - "grad_norm": 0.045152947306632996, - "learning_rate": 2.1299968082538734e-05, - "loss": 0.0011449499055743218, - "step": 15235 - }, - { - "epoch": 2.59846547314578, - "grad_norm": 0.026626303791999817, - "learning_rate": 2.1276864116029207e-05, - "loss": 0.0016753975301980971, - "step": 15240 - }, - { - "epoch": 2.599317988064791, - "grad_norm": 0.10935933142900467, - "learning_rate": 2.1253767723019188e-05, - "loss": 0.0026281427592039107, - "step": 15245 - }, - { - "epoch": 2.6001705029838025, - "grad_norm": 0.08133106678724289, - "learning_rate": 2.123067891429082e-05, - "loss": 0.001925770938396454, - "step": 15250 - }, - { - "epoch": 2.601023017902813, - "grad_norm": 0.04865674301981926, - "learning_rate": 2.1207597700622728e-05, - "loss": 0.0019936567172408105, - "step": 15255 - }, - { - "epoch": 2.6018755328218246, - "grad_norm": 0.11841622740030289, - "learning_rate": 2.1184524092789982e-05, - "loss": 0.00298205092549324, - "step": 15260 - }, - { - "epoch": 2.602728047740835, - "grad_norm": 0.04416264593601227, - "learning_rate": 2.1161458101564115e-05, - "loss": 0.0036853265017271044, - "step": 15265 - }, - { - "epoch": 2.6035805626598467, - "grad_norm": 0.08603575825691223, - "learning_rate": 2.1138399737713118e-05, - "loss": 0.004533383995294571, - "step": 15270 - }, - { - "epoch": 2.604433077578858, - "grad_norm": 0.0626961886882782, - "learning_rate": 2.1115349012001388e-05, - "loss": 0.0017330382019281388, - "step": 15275 - }, - { - "epoch": 2.605285592497869, - "grad_norm": 0.12894456088542938, - "learning_rate": 2.1092305935189773e-05, - "loss": 0.0037327542901039123, - "step": 15280 - }, - { - "epoch": 2.60613810741688, - "grad_norm": 0.10542263090610504, - "learning_rate": 2.106927051803554e-05, - "loss": 0.0026806583628058434, - "step": 15285 - }, - { - "epoch": 2.606990622335891, - "grad_norm": 0.05068397521972656, - "learning_rate": 2.1046242771292386e-05, - "loss": 0.0014822190627455712, - "step": 15290 - }, - { - "epoch": 2.607843137254902, - "grad_norm": 0.08927716314792633, - "learning_rate": 2.102322270571045e-05, - "loss": 0.003242380917072296, - "step": 15295 - }, - { - "epoch": 2.608695652173913, - "grad_norm": 0.05792883411049843, - "learning_rate": 2.1000210332036248e-05, - "loss": 0.0017583563923835755, - "step": 15300 - }, - { - "epoch": 2.609548167092924, - "grad_norm": 0.0648881196975708, - "learning_rate": 2.09772056610127e-05, - "loss": 0.002197427675127983, - "step": 15305 - }, - { - "epoch": 2.610400682011935, - "grad_norm": 0.060977645218372345, - "learning_rate": 2.095420870337919e-05, - "loss": 0.002055848389863968, - "step": 15310 - }, - { - "epoch": 2.6112531969309463, - "grad_norm": 0.04654461517930031, - "learning_rate": 2.093121946987146e-05, - "loss": 0.002073242887854576, - "step": 15315 - }, - { - "epoch": 2.6121057118499573, - "grad_norm": 0.04738753288984299, - "learning_rate": 2.0908237971221634e-05, - "loss": 0.0017290839925408364, - "step": 15320 - }, - { - "epoch": 2.6129582267689684, - "grad_norm": 0.07519782334566116, - "learning_rate": 2.0885264218158248e-05, - "loss": 0.0012821624055504798, - "step": 15325 - }, - { - "epoch": 2.6138107416879794, - "grad_norm": 0.06078832224011421, - "learning_rate": 2.0862298221406206e-05, - "loss": 0.0019888199865818025, - "step": 15330 - }, - { - "epoch": 2.6146632566069905, - "grad_norm": 0.04823920503258705, - "learning_rate": 2.083933999168679e-05, - "loss": 0.0015226650051772595, - "step": 15335 - }, - { - "epoch": 2.6155157715260016, - "grad_norm": 0.04050251096487045, - "learning_rate": 2.0816389539717694e-05, - "loss": 0.0024490740150213243, - "step": 15340 - }, - { - "epoch": 2.6163682864450126, - "grad_norm": 0.08443193882703781, - "learning_rate": 2.0793446876212937e-05, - "loss": 0.0027990926057100294, - "step": 15345 - }, - { - "epoch": 2.6172208013640237, - "grad_norm": 0.03322751075029373, - "learning_rate": 2.07705120118829e-05, - "loss": 0.0011861051432788372, - "step": 15350 - }, - { - "epoch": 2.618073316283035, - "grad_norm": 0.06874673068523407, - "learning_rate": 2.0747584957434375e-05, - "loss": 0.0018939610570669174, - "step": 15355 - }, - { - "epoch": 2.618925831202046, - "grad_norm": 0.04990018159151077, - "learning_rate": 2.0724665723570437e-05, - "loss": 0.0013890796341001987, - "step": 15360 - }, - { - "epoch": 2.6197783461210573, - "grad_norm": 0.06342940032482147, - "learning_rate": 2.0701754320990586e-05, - "loss": 0.0019270982593297958, - "step": 15365 - }, - { - "epoch": 2.620630861040068, - "grad_norm": 0.05647345632314682, - "learning_rate": 2.0678850760390607e-05, - "loss": 0.0019773844629526137, - "step": 15370 - }, - { - "epoch": 2.6214833759590794, - "grad_norm": 0.09767530113458633, - "learning_rate": 2.0655955052462643e-05, - "loss": 0.0025425378233194353, - "step": 15375 - }, - { - "epoch": 2.6223358908780905, - "grad_norm": 0.05659051984548569, - "learning_rate": 2.063306720789516e-05, - "loss": 0.0016861587762832641, - "step": 15380 - }, - { - "epoch": 2.6231884057971016, - "grad_norm": 0.07679109275341034, - "learning_rate": 2.061018723737299e-05, - "loss": 0.0012974600307643414, - "step": 15385 - }, - { - "epoch": 2.6240409207161126, - "grad_norm": 0.032084014266729355, - "learning_rate": 2.0587315151577257e-05, - "loss": 0.0009737671352922916, - "step": 15390 - }, - { - "epoch": 2.6248934356351237, - "grad_norm": 0.07588861882686615, - "learning_rate": 2.056445096118539e-05, - "loss": 0.002771071344614029, - "step": 15395 - }, - { - "epoch": 2.6257459505541347, - "grad_norm": 0.07706267386674881, - "learning_rate": 2.0541594676871188e-05, - "loss": 0.002124561369419098, - "step": 15400 - }, - { - "epoch": 2.626598465473146, - "grad_norm": 0.05501805245876312, - "learning_rate": 2.051874630930469e-05, - "loss": 0.001449206192046404, - "step": 15405 - }, - { - "epoch": 2.627450980392157, - "grad_norm": 0.07360731810331345, - "learning_rate": 2.0495905869152303e-05, - "loss": 0.0014007428660988807, - "step": 15410 - }, - { - "epoch": 2.628303495311168, - "grad_norm": 0.03651239722967148, - "learning_rate": 2.04730733670767e-05, - "loss": 0.0013091465458273887, - "step": 15415 - }, - { - "epoch": 2.629156010230179, - "grad_norm": 0.05154712125658989, - "learning_rate": 2.0450248813736842e-05, - "loss": 0.0017904775217175485, - "step": 15420 - }, - { - "epoch": 2.63000852514919, - "grad_norm": 0.03202452138066292, - "learning_rate": 2.0427432219787978e-05, - "loss": 0.002919047139585018, - "step": 15425 - }, - { - "epoch": 2.630861040068201, - "grad_norm": 0.08954522758722305, - "learning_rate": 2.040462359588169e-05, - "loss": 0.0031249357387423517, - "step": 15430 - }, - { - "epoch": 2.631713554987212, - "grad_norm": 0.09551462531089783, - "learning_rate": 2.038182295266577e-05, - "loss": 0.0016073914244771003, - "step": 15435 - }, - { - "epoch": 2.632566069906223, - "grad_norm": 0.0576794371008873, - "learning_rate": 2.035903030078432e-05, - "loss": 0.0028427325189113615, - "step": 15440 - }, - { - "epoch": 2.6334185848252343, - "grad_norm": 0.0879262238740921, - "learning_rate": 2.0336245650877728e-05, - "loss": 0.0012862576171755792, - "step": 15445 - }, - { - "epoch": 2.634271099744246, - "grad_norm": 0.1022641509771347, - "learning_rate": 2.03134690135826e-05, - "loss": 0.002296357229351997, - "step": 15450 - }, - { - "epoch": 2.6351236146632564, - "grad_norm": 0.07090801000595093, - "learning_rate": 2.029070039953186e-05, - "loss": 0.0032129865139722824, - "step": 15455 - }, - { - "epoch": 2.635976129582268, - "grad_norm": 0.06394338607788086, - "learning_rate": 2.026793981935463e-05, - "loss": 0.0022887293249368667, - "step": 15460 - }, - { - "epoch": 2.6368286445012785, - "grad_norm": 0.03828660771250725, - "learning_rate": 2.0245187283676316e-05, - "loss": 0.0023141488432884215, - "step": 15465 - }, - { - "epoch": 2.63768115942029, - "grad_norm": 0.0748148262500763, - "learning_rate": 2.0222442803118537e-05, - "loss": 0.002477791905403137, - "step": 15470 - }, - { - "epoch": 2.638533674339301, - "grad_norm": 0.02352295070886612, - "learning_rate": 2.019970638829921e-05, - "loss": 0.0021653104573488235, - "step": 15475 - }, - { - "epoch": 2.639386189258312, - "grad_norm": 0.019303878769278526, - "learning_rate": 2.017697804983243e-05, - "loss": 0.0014067382551729679, - "step": 15480 - }, - { - "epoch": 2.640238704177323, - "grad_norm": 0.036747269332408905, - "learning_rate": 2.015425779832854e-05, - "loss": 0.002145359478890896, - "step": 15485 - }, - { - "epoch": 2.6410912190963343, - "grad_norm": 0.04195109382271767, - "learning_rate": 2.0131545644394096e-05, - "loss": 0.0014138499274849892, - "step": 15490 - }, - { - "epoch": 2.6419437340153453, - "grad_norm": 0.07388610392808914, - "learning_rate": 2.0108841598631904e-05, - "loss": 0.0025294892489910125, - "step": 15495 - }, - { - "epoch": 2.6427962489343564, - "grad_norm": 0.0890735536813736, - "learning_rate": 2.0086145671640973e-05, - "loss": 0.0026762137189507485, - "step": 15500 - }, - { - "epoch": 2.6436487638533674, - "grad_norm": 0.07587535679340363, - "learning_rate": 2.006345787401652e-05, - "loss": 0.0031544029712677, - "step": 15505 - }, - { - "epoch": 2.6445012787723785, - "grad_norm": 0.10948733240365982, - "learning_rate": 2.004077821634995e-05, - "loss": 0.0023899499326944353, - "step": 15510 - }, - { - "epoch": 2.6453537936913896, - "grad_norm": 0.07914752513170242, - "learning_rate": 2.0018106709228886e-05, - "loss": 0.004097612574696541, - "step": 15515 - }, - { - "epoch": 2.6462063086104006, - "grad_norm": 0.07947845757007599, - "learning_rate": 1.9995443363237126e-05, - "loss": 0.0022834014147520066, - "step": 15520 - }, - { - "epoch": 2.6470588235294117, - "grad_norm": 0.05973362177610397, - "learning_rate": 1.9972788188954704e-05, - "loss": 0.001445610448718071, - "step": 15525 - }, - { - "epoch": 2.6479113384484227, - "grad_norm": 0.07292830944061279, - "learning_rate": 1.9950141196957792e-05, - "loss": 0.0023502418771386147, - "step": 15530 - }, - { - "epoch": 2.648763853367434, - "grad_norm": 0.09226574003696442, - "learning_rate": 1.9927502397818745e-05, - "loss": 0.002285385876893997, - "step": 15535 - }, - { - "epoch": 2.649616368286445, - "grad_norm": 0.08981240540742874, - "learning_rate": 1.9904871802106124e-05, - "loss": 0.0023617954924702645, - "step": 15540 - }, - { - "epoch": 2.6504688832054564, - "grad_norm": 0.07505398988723755, - "learning_rate": 1.988224942038466e-05, - "loss": 0.0016136666759848594, - "step": 15545 - }, - { - "epoch": 2.651321398124467, - "grad_norm": 0.06795456260442734, - "learning_rate": 1.9859635263215215e-05, - "loss": 0.0014020048081874847, - "step": 15550 - }, - { - "epoch": 2.6521739130434785, - "grad_norm": 0.07863990217447281, - "learning_rate": 1.983702934115483e-05, - "loss": 0.0016099724918603898, - "step": 15555 - }, - { - "epoch": 2.653026427962489, - "grad_norm": 0.15475937724113464, - "learning_rate": 1.9814431664756705e-05, - "loss": 0.0028660917654633523, - "step": 15560 - }, - { - "epoch": 2.6538789428815006, - "grad_norm": 0.09072619676589966, - "learning_rate": 1.979184224457017e-05, - "loss": 0.0038232788443565368, - "step": 15565 - }, - { - "epoch": 2.6547314578005117, - "grad_norm": 0.04944036900997162, - "learning_rate": 1.9769261091140746e-05, - "loss": 0.002762124501168728, - "step": 15570 - }, - { - "epoch": 2.6555839727195227, - "grad_norm": 0.07315114885568619, - "learning_rate": 1.974668821501005e-05, - "loss": 0.0018053753301501274, - "step": 15575 - }, - { - "epoch": 2.656436487638534, - "grad_norm": 0.03133604675531387, - "learning_rate": 1.972412362671584e-05, - "loss": 0.0012923687696456908, - "step": 15580 - }, - { - "epoch": 2.657289002557545, - "grad_norm": 0.07396573573350906, - "learning_rate": 1.9701567336792037e-05, - "loss": 0.004405549541115761, - "step": 15585 - }, - { - "epoch": 2.658141517476556, - "grad_norm": 0.05702332779765129, - "learning_rate": 1.967901935576867e-05, - "loss": 0.001864958368241787, - "step": 15590 - }, - { - "epoch": 2.658994032395567, - "grad_norm": 0.06003536656498909, - "learning_rate": 1.9656479694171882e-05, - "loss": 0.0025712646543979644, - "step": 15595 - }, - { - "epoch": 2.659846547314578, - "grad_norm": 0.06424745172262192, - "learning_rate": 1.963394836252393e-05, - "loss": 0.002156762033700943, - "step": 15600 - }, - { - "epoch": 2.660699062233589, - "grad_norm": 0.0703018307685852, - "learning_rate": 1.9611425371343193e-05, - "loss": 0.0034677576273679732, - "step": 15605 - }, - { - "epoch": 2.6615515771526, - "grad_norm": 0.06616941094398499, - "learning_rate": 1.958891073114414e-05, - "loss": 0.002393544837832451, - "step": 15610 - }, - { - "epoch": 2.662404092071611, - "grad_norm": 0.04988931491971016, - "learning_rate": 1.9566404452437374e-05, - "loss": 0.00223421361297369, - "step": 15615 - }, - { - "epoch": 2.6632566069906223, - "grad_norm": 0.04869920015335083, - "learning_rate": 1.9543906545729573e-05, - "loss": 0.0013234581798315047, - "step": 15620 - }, - { - "epoch": 2.6641091219096333, - "grad_norm": 0.06417152285575867, - "learning_rate": 1.9521417021523482e-05, - "loss": 0.002075556293129921, - "step": 15625 - }, - { - "epoch": 2.6649616368286444, - "grad_norm": 0.07077648490667343, - "learning_rate": 1.949893589031799e-05, - "loss": 0.002136031910777092, - "step": 15630 - }, - { - "epoch": 2.6658141517476555, - "grad_norm": 0.060446277260780334, - "learning_rate": 1.9476463162608016e-05, - "loss": 0.0025891490280628205, - "step": 15635 - }, - { - "epoch": 2.6666666666666665, - "grad_norm": 0.0396479107439518, - "learning_rate": 1.94539988488846e-05, - "loss": 0.0018554994836449622, - "step": 15640 - }, - { - "epoch": 2.6675191815856776, - "grad_norm": 0.06787905842065811, - "learning_rate": 1.9431542959634817e-05, - "loss": 0.0017763305455446242, - "step": 15645 - }, - { - "epoch": 2.668371696504689, - "grad_norm": 0.08236365765333176, - "learning_rate": 1.940909550534182e-05, - "loss": 0.0018430478870868684, - "step": 15650 - }, - { - "epoch": 2.6692242114236997, - "grad_norm": 0.10013429820537567, - "learning_rate": 1.9386656496484816e-05, - "loss": 0.0021154627203941343, - "step": 15655 - }, - { - "epoch": 2.670076726342711, - "grad_norm": 0.046891309320926666, - "learning_rate": 1.936422594353911e-05, - "loss": 0.0018745694309473039, - "step": 15660 - }, - { - "epoch": 2.670929241261722, - "grad_norm": 0.03863799199461937, - "learning_rate": 1.9341803856976016e-05, - "loss": 0.002184972912073135, - "step": 15665 - }, - { - "epoch": 2.6717817561807333, - "grad_norm": 0.05828642472624779, - "learning_rate": 1.9319390247262896e-05, - "loss": 0.0022127529606223106, - "step": 15670 - }, - { - "epoch": 2.6726342710997444, - "grad_norm": 0.08675269782543182, - "learning_rate": 1.9296985124863194e-05, - "loss": 0.002008732967078686, - "step": 15675 - }, - { - "epoch": 2.6734867860187554, - "grad_norm": 0.0700579434633255, - "learning_rate": 1.9274588500236345e-05, - "loss": 0.0024785833433270455, - "step": 15680 - }, - { - "epoch": 2.6743393009377665, - "grad_norm": 0.10120563209056854, - "learning_rate": 1.9252200383837854e-05, - "loss": 0.002890965715050697, - "step": 15685 - }, - { - "epoch": 2.6751918158567776, - "grad_norm": 0.10622604191303253, - "learning_rate": 1.9229820786119235e-05, - "loss": 0.002458018809556961, - "step": 15690 - }, - { - "epoch": 2.6760443307757886, - "grad_norm": 0.07292070984840393, - "learning_rate": 1.920744971752803e-05, - "loss": 0.0030672624707221985, - "step": 15695 - }, - { - "epoch": 2.6768968456947997, - "grad_norm": 0.030893906950950623, - "learning_rate": 1.918508718850779e-05, - "loss": 0.002947884239256382, - "step": 15700 - }, - { - "epoch": 2.6777493606138107, - "grad_norm": 0.07428406924009323, - "learning_rate": 1.9162733209498077e-05, - "loss": 0.003342418372631073, - "step": 15705 - }, - { - "epoch": 2.678601875532822, - "grad_norm": 0.014073869213461876, - "learning_rate": 1.9140387790934502e-05, - "loss": 0.0020058237016201018, - "step": 15710 - }, - { - "epoch": 2.679454390451833, - "grad_norm": 0.08076811581850052, - "learning_rate": 1.911805094324863e-05, - "loss": 0.0020218659192323685, - "step": 15715 - }, - { - "epoch": 2.680306905370844, - "grad_norm": 0.059959858655929565, - "learning_rate": 1.909572267686804e-05, - "loss": 0.0012911208905279636, - "step": 15720 - }, - { - "epoch": 2.681159420289855, - "grad_norm": 0.08688201755285263, - "learning_rate": 1.9073403002216313e-05, - "loss": 0.001681494526565075, - "step": 15725 - }, - { - "epoch": 2.682011935208866, - "grad_norm": 0.08083862066268921, - "learning_rate": 1.905109192971304e-05, - "loss": 0.002467956393957138, - "step": 15730 - }, - { - "epoch": 2.682864450127877, - "grad_norm": 0.06145070865750313, - "learning_rate": 1.902878946977375e-05, - "loss": 0.004355132207274437, - "step": 15735 - }, - { - "epoch": 2.683716965046888, - "grad_norm": 0.07410819083452225, - "learning_rate": 1.900649563280997e-05, - "loss": 0.0029904641211032867, - "step": 15740 - }, - { - "epoch": 2.6845694799658997, - "grad_norm": 0.03833797574043274, - "learning_rate": 1.8984210429229217e-05, - "loss": 0.0012984732165932655, - "step": 15745 - }, - { - "epoch": 2.6854219948849103, - "grad_norm": 0.023088248446583748, - "learning_rate": 1.896193386943494e-05, - "loss": 0.001397434249520302, - "step": 15750 - }, - { - "epoch": 2.686274509803922, - "grad_norm": 0.06918703764677048, - "learning_rate": 1.8939665963826616e-05, - "loss": 0.0015222997404634952, - "step": 15755 - }, - { - "epoch": 2.6871270247229324, - "grad_norm": 0.0286374781280756, - "learning_rate": 1.891740672279962e-05, - "loss": 0.0015881337225437165, - "step": 15760 - }, - { - "epoch": 2.687979539641944, - "grad_norm": 0.05485616624355316, - "learning_rate": 1.88951561567453e-05, - "loss": 0.0034288309514522554, - "step": 15765 - }, - { - "epoch": 2.688832054560955, - "grad_norm": 0.05021583288908005, - "learning_rate": 1.887291427605097e-05, - "loss": 0.0013944344595074654, - "step": 15770 - }, - { - "epoch": 2.689684569479966, - "grad_norm": 0.06752395629882812, - "learning_rate": 1.8850681091099895e-05, - "loss": 0.002590004727244377, - "step": 15775 - }, - { - "epoch": 2.690537084398977, - "grad_norm": 0.04273150861263275, - "learning_rate": 1.8828456612271255e-05, - "loss": 0.0019359454512596131, - "step": 15780 - }, - { - "epoch": 2.691389599317988, - "grad_norm": 0.0928453654050827, - "learning_rate": 1.8806240849940167e-05, - "loss": 0.003046049177646637, - "step": 15785 - }, - { - "epoch": 2.692242114236999, - "grad_norm": 0.025754287838935852, - "learning_rate": 1.8784033814477692e-05, - "loss": 0.0018295232206583024, - "step": 15790 - }, - { - "epoch": 2.6930946291560103, - "grad_norm": 0.07345419377088547, - "learning_rate": 1.8761835516250806e-05, - "loss": 0.0018985627219080925, - "step": 15795 - }, - { - "epoch": 2.6939471440750213, - "grad_norm": 0.08317514508962631, - "learning_rate": 1.873964596562243e-05, - "loss": 0.0030419353395700456, - "step": 15800 - }, - { - "epoch": 2.6947996589940324, - "grad_norm": 0.07300770282745361, - "learning_rate": 1.8717465172951377e-05, - "loss": 0.002040323428809643, - "step": 15805 - }, - { - "epoch": 2.6956521739130435, - "grad_norm": 0.07284363359212875, - "learning_rate": 1.8695293148592362e-05, - "loss": 0.001639954373240471, - "step": 15810 - }, - { - "epoch": 2.6965046888320545, - "grad_norm": 0.05817059800028801, - "learning_rate": 1.867312990289606e-05, - "loss": 0.0015234597958624363, - "step": 15815 - }, - { - "epoch": 2.6973572037510656, - "grad_norm": 0.11319714039564133, - "learning_rate": 1.865097544620897e-05, - "loss": 0.0018295228481292724, - "step": 15820 - }, - { - "epoch": 2.6982097186700766, - "grad_norm": 0.10493957251310349, - "learning_rate": 1.8628829788873567e-05, - "loss": 0.0025029994547367098, - "step": 15825 - }, - { - "epoch": 2.6990622335890877, - "grad_norm": 0.03161423280835152, - "learning_rate": 1.860669294122816e-05, - "loss": 0.0014271627180278302, - "step": 15830 - }, - { - "epoch": 2.6999147485080988, - "grad_norm": 0.03267689794301987, - "learning_rate": 1.858456491360697e-05, - "loss": 0.0012216478586196899, - "step": 15835 - }, - { - "epoch": 2.70076726342711, - "grad_norm": 0.07986247539520264, - "learning_rate": 1.856244571634008e-05, - "loss": 0.0018704459071159363, - "step": 15840 - }, - { - "epoch": 2.701619778346121, - "grad_norm": 0.10120461881160736, - "learning_rate": 1.85403353597535e-05, - "loss": 0.0020706810057163237, - "step": 15845 - }, - { - "epoch": 2.7024722932651324, - "grad_norm": 0.05339881405234337, - "learning_rate": 1.8518233854169056e-05, - "loss": 0.0017986055463552475, - "step": 15850 - }, - { - "epoch": 2.703324808184143, - "grad_norm": 0.11433786898851395, - "learning_rate": 1.8496141209904464e-05, - "loss": 0.0034054510295391084, - "step": 15855 - }, - { - "epoch": 2.7041773231031545, - "grad_norm": 0.061081189662218094, - "learning_rate": 1.8474057437273328e-05, - "loss": 0.002348882704973221, - "step": 15860 - }, - { - "epoch": 2.705029838022165, - "grad_norm": 0.055195316672325134, - "learning_rate": 1.8451982546585055e-05, - "loss": 0.0015221487730741501, - "step": 15865 - }, - { - "epoch": 2.7058823529411766, - "grad_norm": 0.06800514459609985, - "learning_rate": 1.8429916548144973e-05, - "loss": 0.0023088542744517325, - "step": 15870 - }, - { - "epoch": 2.7067348678601877, - "grad_norm": 0.05646739527583122, - "learning_rate": 1.8407859452254206e-05, - "loss": 0.0024141166359186172, - "step": 15875 - }, - { - "epoch": 2.7075873827791987, - "grad_norm": 0.10886628180742264, - "learning_rate": 1.8385811269209743e-05, - "loss": 0.0019476715475320815, - "step": 15880 - }, - { - "epoch": 2.70843989769821, - "grad_norm": 0.04279763624072075, - "learning_rate": 1.8363772009304395e-05, - "loss": 0.002021237276494503, - "step": 15885 - }, - { - "epoch": 2.709292412617221, - "grad_norm": 0.09583209455013275, - "learning_rate": 1.8341741682826852e-05, - "loss": 0.002025018632411957, - "step": 15890 - }, - { - "epoch": 2.710144927536232, - "grad_norm": 0.06695323437452316, - "learning_rate": 1.8319720300061582e-05, - "loss": 0.0026269391179084777, - "step": 15895 - }, - { - "epoch": 2.710997442455243, - "grad_norm": 0.07438764721155167, - "learning_rate": 1.829770787128889e-05, - "loss": 0.0014647828415036202, - "step": 15900 - }, - { - "epoch": 2.711849957374254, - "grad_norm": 0.05395448952913284, - "learning_rate": 1.8275704406784933e-05, - "loss": 0.0024559808894991874, - "step": 15905 - }, - { - "epoch": 2.712702472293265, - "grad_norm": 0.03163938969373703, - "learning_rate": 1.825370991682164e-05, - "loss": 0.0022430509328842164, - "step": 15910 - }, - { - "epoch": 2.713554987212276, - "grad_norm": 0.104282446205616, - "learning_rate": 1.8231724411666794e-05, - "loss": 0.001472956594079733, - "step": 15915 - }, - { - "epoch": 2.7144075021312872, - "grad_norm": 0.07355596870183945, - "learning_rate": 1.8209747901583944e-05, - "loss": 0.0023859225213527678, - "step": 15920 - }, - { - "epoch": 2.7152600170502983, - "grad_norm": 0.06525922566652298, - "learning_rate": 1.8187780396832463e-05, - "loss": 0.00265895314514637, - "step": 15925 - }, - { - "epoch": 2.7161125319693094, - "grad_norm": 0.09379115700721741, - "learning_rate": 1.8165821907667505e-05, - "loss": 0.002496413141489029, - "step": 15930 - }, - { - "epoch": 2.7169650468883204, - "grad_norm": 0.05254679545760155, - "learning_rate": 1.8143872444340017e-05, - "loss": 0.0022162407636642455, - "step": 15935 - }, - { - "epoch": 2.7178175618073315, - "grad_norm": 0.06203889846801758, - "learning_rate": 1.8121932017096758e-05, - "loss": 0.0016900423914194107, - "step": 15940 - }, - { - "epoch": 2.718670076726343, - "grad_norm": 0.08532653003931046, - "learning_rate": 1.810000063618023e-05, - "loss": 0.0028453752398490905, - "step": 15945 - }, - { - "epoch": 2.7195225916453536, - "grad_norm": 0.08361469209194183, - "learning_rate": 1.807807831182875e-05, - "loss": 0.0029737703502178193, - "step": 15950 - }, - { - "epoch": 2.720375106564365, - "grad_norm": 0.06439653784036636, - "learning_rate": 1.805616505427637e-05, - "loss": 0.002233676239848137, - "step": 15955 - }, - { - "epoch": 2.7212276214833757, - "grad_norm": 0.09197837114334106, - "learning_rate": 1.803426087375295e-05, - "loss": 0.0020749013870954513, - "step": 15960 - }, - { - "epoch": 2.722080136402387, - "grad_norm": 0.055145513266325, - "learning_rate": 1.8012365780484074e-05, - "loss": 0.0013141044415533542, - "step": 15965 - }, - { - "epoch": 2.7229326513213983, - "grad_norm": 0.06788767874240875, - "learning_rate": 1.7990479784691105e-05, - "loss": 0.0023008717224001886, - "step": 15970 - }, - { - "epoch": 2.7237851662404093, - "grad_norm": 0.10216987133026123, - "learning_rate": 1.7968602896591152e-05, - "loss": 0.002799564599990845, - "step": 15975 - }, - { - "epoch": 2.7246376811594204, - "grad_norm": 0.0995464101433754, - "learning_rate": 1.7946735126397056e-05, - "loss": 0.0023927824571728707, - "step": 15980 - }, - { - "epoch": 2.7254901960784315, - "grad_norm": 0.05936437472701073, - "learning_rate": 1.7924876484317453e-05, - "loss": 0.001253789383918047, - "step": 15985 - }, - { - "epoch": 2.7263427109974425, - "grad_norm": 0.06160435080528259, - "learning_rate": 1.7903026980556672e-05, - "loss": 0.00238239299505949, - "step": 15990 - }, - { - "epoch": 2.7271952259164536, - "grad_norm": 0.05691118910908699, - "learning_rate": 1.788118662531477e-05, - "loss": 0.0015995081514120102, - "step": 15995 - }, - { - "epoch": 2.7280477408354646, - "grad_norm": 0.0878402590751648, - "learning_rate": 1.7859355428787564e-05, - "loss": 0.002066444233059883, - "step": 16000 - }, - { - "epoch": 2.7289002557544757, - "grad_norm": 0.04065166413784027, - "learning_rate": 1.7837533401166598e-05, - "loss": 0.0022698283195495606, - "step": 16005 - }, - { - "epoch": 2.7297527706734868, - "grad_norm": 0.08980758488178253, - "learning_rate": 1.7815720552639105e-05, - "loss": 0.0016043156385421753, - "step": 16010 - }, - { - "epoch": 2.730605285592498, - "grad_norm": 0.05619784817099571, - "learning_rate": 1.7793916893388055e-05, - "loss": 0.0025583259761333466, - "step": 16015 - }, - { - "epoch": 2.731457800511509, - "grad_norm": 0.09853291511535645, - "learning_rate": 1.7772122433592116e-05, - "loss": 0.0025311170145869257, - "step": 16020 - }, - { - "epoch": 2.73231031543052, - "grad_norm": 0.044340990483760834, - "learning_rate": 1.7750337183425652e-05, - "loss": 0.0020809115841984747, - "step": 16025 - }, - { - "epoch": 2.733162830349531, - "grad_norm": 0.024994025006890297, - "learning_rate": 1.772856115305877e-05, - "loss": 0.001932576857507229, - "step": 16030 - }, - { - "epoch": 2.734015345268542, - "grad_norm": 0.07059352099895477, - "learning_rate": 1.770679435265724e-05, - "loss": 0.002347341552376747, - "step": 16035 - }, - { - "epoch": 2.734867860187553, - "grad_norm": 0.08121193200349808, - "learning_rate": 1.7685036792382506e-05, - "loss": 0.0015123223885893822, - "step": 16040 - }, - { - "epoch": 2.735720375106564, - "grad_norm": 0.07900503277778625, - "learning_rate": 1.766328848239175e-05, - "loss": 0.0028667191043496134, - "step": 16045 - }, - { - "epoch": 2.7365728900255757, - "grad_norm": 0.08574212342500687, - "learning_rate": 1.7641549432837778e-05, - "loss": 0.002038617432117462, - "step": 16050 - }, - { - "epoch": 2.7374254049445863, - "grad_norm": 0.06154071167111397, - "learning_rate": 1.7619819653869132e-05, - "loss": 0.0017743892967700958, - "step": 16055 - }, - { - "epoch": 2.738277919863598, - "grad_norm": 0.06745338439941406, - "learning_rate": 1.7598099155629982e-05, - "loss": 0.0018204674124717712, - "step": 16060 - }, - { - "epoch": 2.7391304347826084, - "grad_norm": 0.029756128787994385, - "learning_rate": 1.7576387948260175e-05, - "loss": 0.0020426372066140175, - "step": 16065 - }, - { - "epoch": 2.73998294970162, - "grad_norm": 0.13447973132133484, - "learning_rate": 1.7554686041895217e-05, - "loss": 0.0023698143661022185, - "step": 16070 - }, - { - "epoch": 2.740835464620631, - "grad_norm": 0.09888533502817154, - "learning_rate": 1.7532993446666298e-05, - "loss": 0.0024117348715662957, - "step": 16075 - }, - { - "epoch": 2.741687979539642, - "grad_norm": 0.05919703096151352, - "learning_rate": 1.751131017270024e-05, - "loss": 0.0027751058340072634, - "step": 16080 - }, - { - "epoch": 2.742540494458653, - "grad_norm": 0.04920949414372444, - "learning_rate": 1.74896362301195e-05, - "loss": 0.0022046850994229318, - "step": 16085 - }, - { - "epoch": 2.743393009377664, - "grad_norm": 0.028095668181777, - "learning_rate": 1.746797162904222e-05, - "loss": 0.001455264538526535, - "step": 16090 - }, - { - "epoch": 2.7442455242966752, - "grad_norm": 0.03558868542313576, - "learning_rate": 1.7446316379582125e-05, - "loss": 0.0023241037502884864, - "step": 16095 - }, - { - "epoch": 2.7450980392156863, - "grad_norm": 0.07124538719654083, - "learning_rate": 1.742467049184864e-05, - "loss": 0.0014614716172218322, - "step": 16100 - }, - { - "epoch": 2.7459505541346974, - "grad_norm": 0.07355284690856934, - "learning_rate": 1.7403033975946774e-05, - "loss": 0.0018932107836008073, - "step": 16105 - }, - { - "epoch": 2.7468030690537084, - "grad_norm": 0.06485545635223389, - "learning_rate": 1.738140684197717e-05, - "loss": 0.0021881703287363052, - "step": 16110 - }, - { - "epoch": 2.7476555839727195, - "grad_norm": 0.05748758837580681, - "learning_rate": 1.735978910003607e-05, - "loss": 0.0019190840423107148, - "step": 16115 - }, - { - "epoch": 2.7485080988917305, - "grad_norm": 0.04986255615949631, - "learning_rate": 1.7338180760215395e-05, - "loss": 0.001525167189538479, - "step": 16120 - }, - { - "epoch": 2.7493606138107416, - "grad_norm": 0.06383983045816422, - "learning_rate": 1.731658183260262e-05, - "loss": 0.0026792695745825766, - "step": 16125 - }, - { - "epoch": 2.749531116794544, - "eval_loss": 0.047858335077762604, - "eval_runtime": 3.7263, - "eval_samples_per_second": 67.627, - "eval_steps_per_second": 1.073, - "step": 16126 - }, - { - "eval_cer_subset": 0.01459117038774081, - "eval_cer_subset_edit_distance": 896, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 16126 - }, - { - "epoch": 2.7502131287297527, - "grad_norm": 0.10121899098157883, - "learning_rate": 1.7294992327280826e-05, - "loss": 0.0027876641601324082, - "step": 16130 - }, - { - "epoch": 2.7510656436487637, - "grad_norm": 0.029004251584410667, - "learning_rate": 1.7273412254328743e-05, - "loss": 0.0015729216858744622, - "step": 16135 - }, - { - "epoch": 2.7519181585677748, - "grad_norm": 0.06613736599683762, - "learning_rate": 1.7251841623820638e-05, - "loss": 0.0020701587200164795, - "step": 16140 - }, - { - "epoch": 2.7527706734867863, - "grad_norm": 0.09212189167737961, - "learning_rate": 1.7230280445826422e-05, - "loss": 0.0026726944372057913, - "step": 16145 - }, - { - "epoch": 2.753623188405797, - "grad_norm": 0.12724192440509796, - "learning_rate": 1.720872873041157e-05, - "loss": 0.002590762265026569, - "step": 16150 - }, - { - "epoch": 2.7544757033248084, - "grad_norm": 0.043855708092451096, - "learning_rate": 1.7187186487637124e-05, - "loss": 0.001780974492430687, - "step": 16155 - }, - { - "epoch": 2.755328218243819, - "grad_norm": 0.10562611371278763, - "learning_rate": 1.7165653727559725e-05, - "loss": 0.002336742728948593, - "step": 16160 - }, - { - "epoch": 2.7561807331628305, - "grad_norm": 0.05162282660603523, - "learning_rate": 1.7144130460231574e-05, - "loss": 0.0018106916919350623, - "step": 16165 - }, - { - "epoch": 2.7570332480818416, - "grad_norm": 0.020110471174120903, - "learning_rate": 1.7122616695700467e-05, - "loss": 0.0014431983232498168, - "step": 16170 - }, - { - "epoch": 2.7578857630008526, - "grad_norm": 0.15154017508029938, - "learning_rate": 1.7101112444009725e-05, - "loss": 0.0019074320793151856, - "step": 16175 - }, - { - "epoch": 2.7587382779198637, - "grad_norm": 0.03481750935316086, - "learning_rate": 1.7079617715198264e-05, - "loss": 0.0037923645228147506, - "step": 16180 - }, - { - "epoch": 2.7595907928388748, - "grad_norm": 0.024081731215119362, - "learning_rate": 1.7058132519300524e-05, - "loss": 0.002791491337120533, - "step": 16185 - }, - { - "epoch": 2.760443307757886, - "grad_norm": 0.07880852371454239, - "learning_rate": 1.703665686634653e-05, - "loss": 0.0028480572625994684, - "step": 16190 - }, - { - "epoch": 2.761295822676897, - "grad_norm": 0.06910362094640732, - "learning_rate": 1.701519076636182e-05, - "loss": 0.0018049828708171845, - "step": 16195 - }, - { - "epoch": 2.762148337595908, - "grad_norm": 0.09321995079517365, - "learning_rate": 1.699373422936748e-05, - "loss": 0.001952703855931759, - "step": 16200 - }, - { - "epoch": 2.763000852514919, - "grad_norm": 0.05871212109923363, - "learning_rate": 1.6972287265380137e-05, - "loss": 0.00121518075466156, - "step": 16205 - }, - { - "epoch": 2.76385336743393, - "grad_norm": 0.10542161762714386, - "learning_rate": 1.6950849884411936e-05, - "loss": 0.0024038642644882203, - "step": 16210 - }, - { - "epoch": 2.764705882352941, - "grad_norm": 0.0580933652818203, - "learning_rate": 1.6929422096470582e-05, - "loss": 0.0021081961691379546, - "step": 16215 - }, - { - "epoch": 2.765558397271952, - "grad_norm": 0.024878472089767456, - "learning_rate": 1.6908003911559256e-05, - "loss": 0.0022545790299773215, - "step": 16220 - }, - { - "epoch": 2.7664109121909632, - "grad_norm": 0.060553766787052155, - "learning_rate": 1.6886595339676703e-05, - "loss": 0.0015277018770575523, - "step": 16225 - }, - { - "epoch": 2.7672634271099743, - "grad_norm": 0.06857582181692123, - "learning_rate": 1.6865196390817137e-05, - "loss": 0.001996198855340481, - "step": 16230 - }, - { - "epoch": 2.7681159420289854, - "grad_norm": 0.06866193562746048, - "learning_rate": 1.6843807074970316e-05, - "loss": 0.0014093054458498954, - "step": 16235 - }, - { - "epoch": 2.7689684569479964, - "grad_norm": 0.12889426946640015, - "learning_rate": 1.6822427402121476e-05, - "loss": 0.0029415406286716463, - "step": 16240 - }, - { - "epoch": 2.7698209718670075, - "grad_norm": 0.05907638370990753, - "learning_rate": 1.6801057382251363e-05, - "loss": 0.0020356021821498873, - "step": 16245 - }, - { - "epoch": 2.770673486786019, - "grad_norm": 0.05899703502655029, - "learning_rate": 1.6779697025336205e-05, - "loss": 0.0010949989780783653, - "step": 16250 - }, - { - "epoch": 2.7715260017050296, - "grad_norm": 0.048360541462898254, - "learning_rate": 1.6758346341347716e-05, - "loss": 0.002375531755387783, - "step": 16255 - }, - { - "epoch": 2.772378516624041, - "grad_norm": 0.06712590157985687, - "learning_rate": 1.6737005340253134e-05, - "loss": 0.0016120089218020438, - "step": 16260 - }, - { - "epoch": 2.7732310315430517, - "grad_norm": 0.04694962501525879, - "learning_rate": 1.6715674032015137e-05, - "loss": 0.0010866542346775533, - "step": 16265 - }, - { - "epoch": 2.7740835464620632, - "grad_norm": 0.06813527643680573, - "learning_rate": 1.6694352426591873e-05, - "loss": 0.001494432892650366, - "step": 16270 - }, - { - "epoch": 2.7749360613810743, - "grad_norm": 0.12899814546108246, - "learning_rate": 1.6673040533937004e-05, - "loss": 0.003590015694499016, - "step": 16275 - }, - { - "epoch": 2.7757885763000854, - "grad_norm": 0.013963109813630581, - "learning_rate": 1.6651738363999604e-05, - "loss": 0.0019298167899250984, - "step": 16280 - }, - { - "epoch": 2.7766410912190964, - "grad_norm": 0.03605286777019501, - "learning_rate": 1.6630445926724262e-05, - "loss": 0.0031480703502893446, - "step": 16285 - }, - { - "epoch": 2.7774936061381075, - "grad_norm": 0.10986622422933578, - "learning_rate": 1.660916323205098e-05, - "loss": 0.002917572297155857, - "step": 16290 - }, - { - "epoch": 2.7783461210571185, - "grad_norm": 0.057930365204811096, - "learning_rate": 1.658789028991523e-05, - "loss": 0.0026299282908439636, - "step": 16295 - }, - { - "epoch": 2.7791986359761296, - "grad_norm": 0.029447276145219803, - "learning_rate": 1.6566627110247917e-05, - "loss": 0.0022400498390197756, - "step": 16300 - }, - { - "epoch": 2.7800511508951407, - "grad_norm": 0.045625604689121246, - "learning_rate": 1.6545373702975423e-05, - "loss": 0.0010993774980306626, - "step": 16305 - }, - { - "epoch": 2.7809036658141517, - "grad_norm": 0.03276116028428078, - "learning_rate": 1.6524130078019536e-05, - "loss": 0.0017030857503414153, - "step": 16310 - }, - { - "epoch": 2.7817561807331628, - "grad_norm": 0.07950260490179062, - "learning_rate": 1.650289624529747e-05, - "loss": 0.0029186248779296876, - "step": 16315 - }, - { - "epoch": 2.782608695652174, - "grad_norm": 0.03196907788515091, - "learning_rate": 1.6481672214721915e-05, - "loss": 0.0021285150200128556, - "step": 16320 - }, - { - "epoch": 2.783461210571185, - "grad_norm": 0.08347548544406891, - "learning_rate": 1.6460457996200926e-05, - "loss": 0.0018467068672180175, - "step": 16325 - }, - { - "epoch": 2.784313725490196, - "grad_norm": 0.062316033989191055, - "learning_rate": 1.643925359963803e-05, - "loss": 0.002080459892749786, - "step": 16330 - }, - { - "epoch": 2.785166240409207, - "grad_norm": 0.06067380681633949, - "learning_rate": 1.641805903493214e-05, - "loss": 0.0014378841035068036, - "step": 16335 - }, - { - "epoch": 2.786018755328218, - "grad_norm": 0.19668881595134735, - "learning_rate": 1.6396874311977574e-05, - "loss": 0.0018663834780454636, - "step": 16340 - }, - { - "epoch": 2.7868712702472296, - "grad_norm": 0.03857511281967163, - "learning_rate": 1.637569944066407e-05, - "loss": 0.0017508219927549363, - "step": 16345 - }, - { - "epoch": 2.78772378516624, - "grad_norm": 0.06684751063585281, - "learning_rate": 1.6354534430876746e-05, - "loss": 0.0021339647471904756, - "step": 16350 - }, - { - "epoch": 2.7885763000852517, - "grad_norm": 0.0722980946302414, - "learning_rate": 1.633337929249616e-05, - "loss": 0.002456018142402172, - "step": 16355 - }, - { - "epoch": 2.7894288150042623, - "grad_norm": 0.013861587271094322, - "learning_rate": 1.6312234035398214e-05, - "loss": 0.0013738014735281468, - "step": 16360 - }, - { - "epoch": 2.790281329923274, - "grad_norm": 0.05103524401783943, - "learning_rate": 1.6291098669454237e-05, - "loss": 0.0012777662836015225, - "step": 16365 - }, - { - "epoch": 2.791133844842285, - "grad_norm": 0.08019815385341644, - "learning_rate": 1.6269973204530896e-05, - "loss": 0.0021779144182801245, - "step": 16370 - }, - { - "epoch": 2.791986359761296, - "grad_norm": 0.11674029380083084, - "learning_rate": 1.6248857650490287e-05, - "loss": 0.003945905342698097, - "step": 16375 - }, - { - "epoch": 2.792838874680307, - "grad_norm": 0.10289142280817032, - "learning_rate": 1.622775201718984e-05, - "loss": 0.0033991221338510514, - "step": 16380 - }, - { - "epoch": 2.793691389599318, - "grad_norm": 0.08295715600252151, - "learning_rate": 1.6206656314482372e-05, - "loss": 0.0025476697832345963, - "step": 16385 - }, - { - "epoch": 2.794543904518329, - "grad_norm": 0.09820916503667831, - "learning_rate": 1.618557055221605e-05, - "loss": 0.002469751611351967, - "step": 16390 - }, - { - "epoch": 2.79539641943734, - "grad_norm": 0.04547690227627754, - "learning_rate": 1.61644947402344e-05, - "loss": 0.0017419423907995223, - "step": 16395 - }, - { - "epoch": 2.7962489343563512, - "grad_norm": 0.09098807722330093, - "learning_rate": 1.6143428888376336e-05, - "loss": 0.0025540072470903396, - "step": 16400 - }, - { - "epoch": 2.7971014492753623, - "grad_norm": 0.06253538280725479, - "learning_rate": 1.6122373006476078e-05, - "loss": 0.00161474347114563, - "step": 16405 - }, - { - "epoch": 2.7979539641943734, - "grad_norm": 0.10068398714065552, - "learning_rate": 1.6101327104363236e-05, - "loss": 0.0030464882031083105, - "step": 16410 - }, - { - "epoch": 2.7988064791133844, - "grad_norm": 0.04052518680691719, - "learning_rate": 1.6080291191862708e-05, - "loss": 0.001292982418090105, - "step": 16415 - }, - { - "epoch": 2.7996589940323955, - "grad_norm": 0.09480784833431244, - "learning_rate": 1.605926527879478e-05, - "loss": 0.002949811331927776, - "step": 16420 - }, - { - "epoch": 2.8005115089514065, - "grad_norm": 0.08064186573028564, - "learning_rate": 1.603824937497505e-05, - "loss": 0.001863202080130577, - "step": 16425 - }, - { - "epoch": 2.8013640238704176, - "grad_norm": 0.03577118366956711, - "learning_rate": 1.601724349021443e-05, - "loss": 0.0015472200699150561, - "step": 16430 - }, - { - "epoch": 2.8022165387894287, - "grad_norm": 0.04698857292532921, - "learning_rate": 1.5996247634319162e-05, - "loss": 0.002168430760502815, - "step": 16435 - }, - { - "epoch": 2.80306905370844, - "grad_norm": 0.09031593799591064, - "learning_rate": 1.5975261817090803e-05, - "loss": 0.0017798427492380143, - "step": 16440 - }, - { - "epoch": 2.803921568627451, - "grad_norm": 0.24021683633327484, - "learning_rate": 1.5954286048326258e-05, - "loss": 0.0024022582918405535, - "step": 16445 - }, - { - "epoch": 2.8047740835464623, - "grad_norm": 0.07379221171140671, - "learning_rate": 1.5933320337817685e-05, - "loss": 0.0016447069123387338, - "step": 16450 - }, - { - "epoch": 2.805626598465473, - "grad_norm": 0.07145442813634872, - "learning_rate": 1.59123646953526e-05, - "loss": 0.002100140042603016, - "step": 16455 - }, - { - "epoch": 2.8064791133844844, - "grad_norm": 0.06444204598665237, - "learning_rate": 1.5891419130713783e-05, - "loss": 0.0022544978186488152, - "step": 16460 - }, - { - "epoch": 2.8073316283034955, - "grad_norm": 0.07764707505702972, - "learning_rate": 1.5870483653679307e-05, - "loss": 0.002028309740126133, - "step": 16465 - }, - { - "epoch": 2.8081841432225065, - "grad_norm": 0.13890637457370758, - "learning_rate": 1.584955827402257e-05, - "loss": 0.001833663322031498, - "step": 16470 - }, - { - "epoch": 2.8090366581415176, - "grad_norm": 0.06412612646818161, - "learning_rate": 1.5828643001512236e-05, - "loss": 0.0017296869307756424, - "step": 16475 - }, - { - "epoch": 2.8098891730605287, - "grad_norm": 0.05978688597679138, - "learning_rate": 1.5807737845912234e-05, - "loss": 0.001933468133211136, - "step": 16480 - }, - { - "epoch": 2.8107416879795397, - "grad_norm": 0.1131395548582077, - "learning_rate": 1.5786842816981778e-05, - "loss": 0.003291580080986023, - "step": 16485 - }, - { - "epoch": 2.8115942028985508, - "grad_norm": 0.0549713559448719, - "learning_rate": 1.5765957924475394e-05, - "loss": 0.0019789932295680044, - "step": 16490 - }, - { - "epoch": 2.812446717817562, - "grad_norm": 0.08038460463285446, - "learning_rate": 1.5745083178142833e-05, - "loss": 0.002347235009074211, - "step": 16495 - }, - { - "epoch": 2.813299232736573, - "grad_norm": 0.05014783889055252, - "learning_rate": 1.5724218587729098e-05, - "loss": 0.0016623528674244881, - "step": 16500 - }, - { - "epoch": 2.814151747655584, - "grad_norm": 0.05042316019535065, - "learning_rate": 1.5703364162974503e-05, - "loss": 0.0018199939280748368, - "step": 16505 - }, - { - "epoch": 2.815004262574595, - "grad_norm": 0.056051138788461685, - "learning_rate": 1.5682519913614565e-05, - "loss": 0.0016215803101658822, - "step": 16510 - }, - { - "epoch": 2.815856777493606, - "grad_norm": 0.04295732453465462, - "learning_rate": 1.5661685849380098e-05, - "loss": 0.0020044256001710893, - "step": 16515 - }, - { - "epoch": 2.816709292412617, - "grad_norm": 0.02020161598920822, - "learning_rate": 1.564086197999712e-05, - "loss": 0.0018876813352108, - "step": 16520 - }, - { - "epoch": 2.817561807331628, - "grad_norm": 0.09220346808433533, - "learning_rate": 1.562004831518691e-05, - "loss": 0.0015535833314061164, - "step": 16525 - }, - { - "epoch": 2.8184143222506393, - "grad_norm": 0.09728234261274338, - "learning_rate": 1.5599244864665966e-05, - "loss": 0.0015536649152636528, - "step": 16530 - }, - { - "epoch": 2.8192668371696503, - "grad_norm": 0.17288024723529816, - "learning_rate": 1.5578451638146053e-05, - "loss": 0.0021170184016227724, - "step": 16535 - }, - { - "epoch": 2.8201193520886614, - "grad_norm": 0.056582558900117874, - "learning_rate": 1.5557668645334132e-05, - "loss": 0.0030540911480784415, - "step": 16540 - }, - { - "epoch": 2.820971867007673, - "grad_norm": 0.17674417793750763, - "learning_rate": 1.553689589593238e-05, - "loss": 0.001543693896383047, - "step": 16545 - }, - { - "epoch": 2.8218243819266835, - "grad_norm": 0.06186344474554062, - "learning_rate": 1.551613339963823e-05, - "loss": 0.001764528639614582, - "step": 16550 - }, - { - "epoch": 2.822676896845695, - "grad_norm": 0.13224560022354126, - "learning_rate": 1.5495381166144288e-05, - "loss": 0.004735496640205383, - "step": 16555 - }, - { - "epoch": 2.8235294117647056, - "grad_norm": 0.1427813619375229, - "learning_rate": 1.5474639205138406e-05, - "loss": 0.003041662834584713, - "step": 16560 - }, - { - "epoch": 2.824381926683717, - "grad_norm": 0.09970462322235107, - "learning_rate": 1.5453907526303614e-05, - "loss": 0.0025150768458843233, - "step": 16565 - }, - { - "epoch": 2.825234441602728, - "grad_norm": 0.02305634692311287, - "learning_rate": 1.5433186139318144e-05, - "loss": 0.001219399645924568, - "step": 16570 - }, - { - "epoch": 2.8260869565217392, - "grad_norm": 0.04805911332368851, - "learning_rate": 1.541247505385543e-05, - "loss": 0.0012801218777894973, - "step": 16575 - }, - { - "epoch": 2.8269394714407503, - "grad_norm": 0.08059800416231155, - "learning_rate": 1.539177427958408e-05, - "loss": 0.0031003907322883608, - "step": 16580 - }, - { - "epoch": 2.8277919863597614, - "grad_norm": 0.05763188377022743, - "learning_rate": 1.537108382616794e-05, - "loss": 0.002337191253900528, - "step": 16585 - }, - { - "epoch": 2.8286445012787724, - "grad_norm": 0.06821907311677933, - "learning_rate": 1.535040370326597e-05, - "loss": 0.0030008716508746146, - "step": 16590 - }, - { - "epoch": 2.8294970161977835, - "grad_norm": 0.12901924550533295, - "learning_rate": 1.5329733920532358e-05, - "loss": 0.0035179533064365388, - "step": 16595 - }, - { - "epoch": 2.8303495311167945, - "grad_norm": 0.040896832942962646, - "learning_rate": 1.5309074487616435e-05, - "loss": 0.0020170003175735475, - "step": 16600 - }, - { - "epoch": 2.8312020460358056, - "grad_norm": 0.06776095926761627, - "learning_rate": 1.5288425414162725e-05, - "loss": 0.0017662534490227699, - "step": 16605 - }, - { - "epoch": 2.8320545609548167, - "grad_norm": 0.08130808174610138, - "learning_rate": 1.5267786709810897e-05, - "loss": 0.0018257603049278259, - "step": 16610 - }, - { - "epoch": 2.8329070758738277, - "grad_norm": 0.05846976861357689, - "learning_rate": 1.5247158384195778e-05, - "loss": 0.0013240544125437737, - "step": 16615 - }, - { - "epoch": 2.833759590792839, - "grad_norm": 0.113974429666996, - "learning_rate": 1.522654044694736e-05, - "loss": 0.002671768143773079, - "step": 16620 - }, - { - "epoch": 2.83461210571185, - "grad_norm": 0.03519630804657936, - "learning_rate": 1.5205932907690771e-05, - "loss": 0.001667863130569458, - "step": 16625 - }, - { - "epoch": 2.835464620630861, - "grad_norm": 0.014673003926873207, - "learning_rate": 1.5185335776046322e-05, - "loss": 0.002035524509847164, - "step": 16630 - }, - { - "epoch": 2.836317135549872, - "grad_norm": 0.05683857575058937, - "learning_rate": 1.5164749061629407e-05, - "loss": 0.0021878147497773172, - "step": 16635 - }, - { - "epoch": 2.8371696504688835, - "grad_norm": 0.08671200275421143, - "learning_rate": 1.5144172774050623e-05, - "loss": 0.002064511738717556, - "step": 16640 - }, - { - "epoch": 2.838022165387894, - "grad_norm": 0.041581057012081146, - "learning_rate": 1.512360692291563e-05, - "loss": 0.0019536083564162254, - "step": 16645 - }, - { - "epoch": 2.8388746803069056, - "grad_norm": 0.10846979171037674, - "learning_rate": 1.5103051517825288e-05, - "loss": 0.0026564691215753555, - "step": 16650 - }, - { - "epoch": 2.839727195225916, - "grad_norm": 0.026884516701102257, - "learning_rate": 1.5082506568375526e-05, - "loss": 0.0026851309463381766, - "step": 16655 - }, - { - "epoch": 2.8405797101449277, - "grad_norm": 0.0613347552716732, - "learning_rate": 1.506197208415741e-05, - "loss": 0.0014739801175892354, - "step": 16660 - }, - { - "epoch": 2.8414322250639388, - "grad_norm": 0.06315013766288757, - "learning_rate": 1.504144807475712e-05, - "loss": 0.0026756677776575088, - "step": 16665 - }, - { - "epoch": 2.84228473998295, - "grad_norm": 0.04869166761636734, - "learning_rate": 1.5020934549755933e-05, - "loss": 0.0020816361531615256, - "step": 16670 - }, - { - "epoch": 2.843137254901961, - "grad_norm": 0.07282520830631256, - "learning_rate": 1.5000431518730273e-05, - "loss": 0.0008225045166909695, - "step": 16675 - }, - { - "epoch": 2.843989769820972, - "grad_norm": 0.051693812012672424, - "learning_rate": 1.4979938991251607e-05, - "loss": 0.002745438739657402, - "step": 16680 - }, - { - "epoch": 2.844842284739983, - "grad_norm": 0.1495431363582611, - "learning_rate": 1.4959456976886558e-05, - "loss": 0.001805400662124157, - "step": 16685 - }, - { - "epoch": 2.845694799658994, - "grad_norm": 0.05393834039568901, - "learning_rate": 1.4938985485196799e-05, - "loss": 0.0017135551199316979, - "step": 16690 - }, - { - "epoch": 2.846547314578005, - "grad_norm": 0.06205644831061363, - "learning_rate": 1.4918524525739088e-05, - "loss": 0.002358596958220005, - "step": 16695 - }, - { - "epoch": 2.847399829497016, - "grad_norm": 0.1177382543683052, - "learning_rate": 1.4898074108065306e-05, - "loss": 0.00382155142724514, - "step": 16700 - }, - { - "epoch": 2.8482523444160273, - "grad_norm": 0.06532850116491318, - "learning_rate": 1.487763424172238e-05, - "loss": 0.002384480834007263, - "step": 16705 - }, - { - "epoch": 2.8491048593350383, - "grad_norm": 0.05195530876517296, - "learning_rate": 1.4857204936252313e-05, - "loss": 0.0030395207926630975, - "step": 16710 - }, - { - "epoch": 2.8499573742540494, - "grad_norm": 0.06609994173049927, - "learning_rate": 1.4836786201192182e-05, - "loss": 0.002476612851023674, - "step": 16715 - }, - { - "epoch": 2.8508098891730604, - "grad_norm": 0.07928726077079773, - "learning_rate": 1.4816378046074146e-05, - "loss": 0.001881701312959194, - "step": 16720 - }, - { - "epoch": 2.8516624040920715, - "grad_norm": 0.08206343650817871, - "learning_rate": 1.4795980480425392e-05, - "loss": 0.0017553886398673057, - "step": 16725 - }, - { - "epoch": 2.8525149190110826, - "grad_norm": 0.08301947265863419, - "learning_rate": 1.4775593513768202e-05, - "loss": 0.0031315773725509644, - "step": 16730 - }, - { - "epoch": 2.8533674339300936, - "grad_norm": 0.034867819398641586, - "learning_rate": 1.4755217155619887e-05, - "loss": 0.0016052091494202613, - "step": 16735 - }, - { - "epoch": 2.8542199488491047, - "grad_norm": 0.03188352286815643, - "learning_rate": 1.4734851415492789e-05, - "loss": 0.002192831225693226, - "step": 16740 - }, - { - "epoch": 2.855072463768116, - "grad_norm": 0.07953578233718872, - "learning_rate": 1.4714496302894339e-05, - "loss": 0.002898801490664482, - "step": 16745 - }, - { - "epoch": 2.855924978687127, - "grad_norm": 0.06410107016563416, - "learning_rate": 1.4694151827326966e-05, - "loss": 0.0023399315774440765, - "step": 16750 - }, - { - "epoch": 2.8567774936061383, - "grad_norm": 0.09000501781702042, - "learning_rate": 1.4673817998288152e-05, - "loss": 0.003346502408385277, - "step": 16755 - }, - { - "epoch": 2.857630008525149, - "grad_norm": 0.07080121338367462, - "learning_rate": 1.465349482527039e-05, - "loss": 0.002062254026532173, - "step": 16760 - }, - { - "epoch": 2.8584825234441604, - "grad_norm": 0.03813991695642471, - "learning_rate": 1.4633182317761244e-05, - "loss": 0.0037174589931964876, - "step": 16765 - }, - { - "epoch": 2.8593350383631715, - "grad_norm": 0.035782843828201294, - "learning_rate": 1.4612880485243246e-05, - "loss": 0.0017096459865570067, - "step": 16770 - }, - { - "epoch": 2.8601875532821825, - "grad_norm": 0.058607637882232666, - "learning_rate": 1.4592589337193962e-05, - "loss": 0.0013915538787841798, - "step": 16775 - }, - { - "epoch": 2.8610400682011936, - "grad_norm": 0.06763444095849991, - "learning_rate": 1.4572308883085995e-05, - "loss": 0.0025088803842663763, - "step": 16780 - }, - { - "epoch": 2.8618925831202047, - "grad_norm": 0.08015233278274536, - "learning_rate": 1.4552039132386913e-05, - "loss": 0.001922524720430374, - "step": 16785 - }, - { - "epoch": 2.8627450980392157, - "grad_norm": 0.07501938939094543, - "learning_rate": 1.4531780094559332e-05, - "loss": 0.0023180417716503142, - "step": 16790 - }, - { - "epoch": 2.863597612958227, - "grad_norm": 0.1105467900633812, - "learning_rate": 1.4511531779060838e-05, - "loss": 0.0017500972375273705, - "step": 16795 - }, - { - "epoch": 2.864450127877238, - "grad_norm": 0.016127226874232292, - "learning_rate": 1.4491294195344016e-05, - "loss": 0.0029237957671284674, - "step": 16800 - }, - { - "epoch": 2.865302642796249, - "grad_norm": 0.06432373076677322, - "learning_rate": 1.447106735285644e-05, - "loss": 0.002439063973724842, - "step": 16805 - }, - { - "epoch": 2.86615515771526, - "grad_norm": 0.07629001885652542, - "learning_rate": 1.4450851261040664e-05, - "loss": 0.0021009005606174467, - "step": 16810 - }, - { - "epoch": 2.867007672634271, - "grad_norm": 0.05186440795660019, - "learning_rate": 1.4430645929334253e-05, - "loss": 0.0010275249369442463, - "step": 16815 - }, - { - "epoch": 2.867860187553282, - "grad_norm": 0.06517529487609863, - "learning_rate": 1.4410451367169705e-05, - "loss": 0.0022583767771720887, - "step": 16820 - }, - { - "epoch": 2.868712702472293, - "grad_norm": 0.03262385353446007, - "learning_rate": 1.4390267583974544e-05, - "loss": 0.002132249251008034, - "step": 16825 - }, - { - "epoch": 2.869565217391304, - "grad_norm": 0.04578368365764618, - "learning_rate": 1.4370094589171199e-05, - "loss": 0.0015474225394427777, - "step": 16830 - }, - { - "epoch": 2.8704177323103153, - "grad_norm": 0.11160826683044434, - "learning_rate": 1.4349932392177122e-05, - "loss": 0.001869696006178856, - "step": 16835 - }, - { - "epoch": 2.8712702472293268, - "grad_norm": 0.07949322462081909, - "learning_rate": 1.4329781002404687e-05, - "loss": 0.002716188505291939, - "step": 16840 - }, - { - "epoch": 2.8721227621483374, - "grad_norm": 0.12685050070285797, - "learning_rate": 1.430964042926123e-05, - "loss": 0.0026786208152770998, - "step": 16845 - }, - { - "epoch": 2.872975277067349, - "grad_norm": 0.03826960548758507, - "learning_rate": 1.428951068214904e-05, - "loss": 0.0015644762665033341, - "step": 16850 - }, - { - "epoch": 2.8738277919863595, - "grad_norm": 0.0909774899482727, - "learning_rate": 1.4269391770465346e-05, - "loss": 0.0020492007955908776, - "step": 16855 - }, - { - "epoch": 2.874680306905371, - "grad_norm": 0.09891391545534134, - "learning_rate": 1.4249283703602345e-05, - "loss": 0.0028120437636971474, - "step": 16860 - }, - { - "epoch": 2.875532821824382, - "grad_norm": 0.06281251460313797, - "learning_rate": 1.4229186490947126e-05, - "loss": 0.001888560503721237, - "step": 16865 - }, - { - "epoch": 2.876385336743393, - "grad_norm": 0.0330815464258194, - "learning_rate": 1.4209100141881763e-05, - "loss": 0.002112870290875435, - "step": 16870 - }, - { - "epoch": 2.877237851662404, - "grad_norm": 0.053650904446840286, - "learning_rate": 1.4189024665783207e-05, - "loss": 0.0012864695861935615, - "step": 16875 - }, - { - "epoch": 2.8780903665814153, - "grad_norm": 0.035941146314144135, - "learning_rate": 1.4168960072023384e-05, - "loss": 0.0028607305139303207, - "step": 16880 - }, - { - "epoch": 2.8789428815004263, - "grad_norm": 0.025085339322686195, - "learning_rate": 1.41489063699691e-05, - "loss": 0.001800362393260002, - "step": 16885 - }, - { - "epoch": 2.8797953964194374, - "grad_norm": 0.08627615869045258, - "learning_rate": 1.4128863568982088e-05, - "loss": 0.0023837506771087645, - "step": 16890 - }, - { - "epoch": 2.8806479113384484, - "grad_norm": 0.11542297154664993, - "learning_rate": 1.4108831678419e-05, - "loss": 0.003114992380142212, - "step": 16895 - }, - { - "epoch": 2.8815004262574595, - "grad_norm": 0.04762958735227585, - "learning_rate": 1.4088810707631375e-05, - "loss": 0.0020215384662151336, - "step": 16900 - }, - { - "epoch": 2.8823529411764706, - "grad_norm": 0.08232380449771881, - "learning_rate": 1.4068800665965687e-05, - "loss": 0.002120315283536911, - "step": 16905 - }, - { - "epoch": 2.8832054560954816, - "grad_norm": 0.04248562082648277, - "learning_rate": 1.4048801562763272e-05, - "loss": 0.001563185639679432, - "step": 16910 - }, - { - "epoch": 2.8840579710144927, - "grad_norm": 0.058416519314050674, - "learning_rate": 1.4028813407360393e-05, - "loss": 0.0017185319215059281, - "step": 16915 - }, - { - "epoch": 2.8849104859335037, - "grad_norm": 0.03542419150471687, - "learning_rate": 1.4008836209088185e-05, - "loss": 0.0017645543441176415, - "step": 16920 - }, - { - "epoch": 2.885763000852515, - "grad_norm": 0.055227622389793396, - "learning_rate": 1.3988869977272645e-05, - "loss": 0.002331301011145115, - "step": 16925 - }, - { - "epoch": 2.886615515771526, - "grad_norm": 0.02851465903222561, - "learning_rate": 1.3968914721234703e-05, - "loss": 0.00188722126185894, - "step": 16930 - }, - { - "epoch": 2.887468030690537, - "grad_norm": 0.10346336662769318, - "learning_rate": 1.3948970450290129e-05, - "loss": 0.003334081172943115, - "step": 16935 - }, - { - "epoch": 2.888320545609548, - "grad_norm": 0.040114935487508774, - "learning_rate": 1.3929037173749564e-05, - "loss": 0.002542957104742527, - "step": 16940 - }, - { - "epoch": 2.8891730605285595, - "grad_norm": 0.06734409183263779, - "learning_rate": 1.3909114900918517e-05, - "loss": 0.002022533863782883, - "step": 16945 - }, - { - "epoch": 2.89002557544757, - "grad_norm": 0.03672570362687111, - "learning_rate": 1.3889203641097392e-05, - "loss": 0.0017688646912574768, - "step": 16950 - }, - { - "epoch": 2.8908780903665816, - "grad_norm": 0.016099590808153152, - "learning_rate": 1.3869303403581397e-05, - "loss": 0.002179678343236446, - "step": 16955 - }, - { - "epoch": 2.8917306052855922, - "grad_norm": 0.0655573159456253, - "learning_rate": 1.384941419766066e-05, - "loss": 0.0020285720005631448, - "step": 16960 - }, - { - "epoch": 2.8925831202046037, - "grad_norm": 0.03573548421263695, - "learning_rate": 1.3829536032620105e-05, - "loss": 0.002248694933950901, - "step": 16965 - }, - { - "epoch": 2.893435635123615, - "grad_norm": 0.06182318180799484, - "learning_rate": 1.3809668917739507e-05, - "loss": 0.002159777097404003, - "step": 16970 - }, - { - "epoch": 2.894288150042626, - "grad_norm": 0.09492490440607071, - "learning_rate": 1.3789812862293527e-05, - "loss": 0.0027505803853273393, - "step": 16975 - }, - { - "epoch": 2.895140664961637, - "grad_norm": 0.043292637914419174, - "learning_rate": 1.3769967875551613e-05, - "loss": 0.0018307223916053772, - "step": 16980 - }, - { - "epoch": 2.895993179880648, - "grad_norm": 0.08455146849155426, - "learning_rate": 1.375013396677807e-05, - "loss": 0.0019843194633722304, - "step": 16985 - }, - { - "epoch": 2.896845694799659, - "grad_norm": 0.06926032900810242, - "learning_rate": 1.3730311145232023e-05, - "loss": 0.0024761717766523363, - "step": 16990 - }, - { - "epoch": 2.89769820971867, - "grad_norm": 0.0860179215669632, - "learning_rate": 1.3710499420167413e-05, - "loss": 0.002175389975309372, - "step": 16995 - }, - { - "epoch": 2.898550724637681, - "grad_norm": 0.10651890188455582, - "learning_rate": 1.3690698800833026e-05, - "loss": 0.0033860310912132265, - "step": 17000 - }, - { - "epoch": 2.899403239556692, - "grad_norm": 0.09691976010799408, - "learning_rate": 1.3670909296472464e-05, - "loss": 0.0021878845989704134, - "step": 17005 - }, - { - "epoch": 2.9002557544757033, - "grad_norm": 0.11704960465431213, - "learning_rate": 1.3651130916324107e-05, - "loss": 0.00286871287971735, - "step": 17010 - }, - { - "epoch": 2.9011082693947143, - "grad_norm": 0.09645909816026688, - "learning_rate": 1.3631363669621153e-05, - "loss": 0.001873398572206497, - "step": 17015 - }, - { - "epoch": 2.9019607843137254, - "grad_norm": 0.13174127042293549, - "learning_rate": 1.3611607565591639e-05, - "loss": 0.00285712368786335, - "step": 17020 - }, - { - "epoch": 2.9028132992327365, - "grad_norm": 0.07539260387420654, - "learning_rate": 1.359186261345835e-05, - "loss": 0.0027119526639580727, - "step": 17025 - }, - { - "epoch": 2.9036658141517475, - "grad_norm": 0.06165684387087822, - "learning_rate": 1.3572128822438892e-05, - "loss": 0.0018354985862970353, - "step": 17030 - }, - { - "epoch": 2.9045183290707586, - "grad_norm": 0.06021244078874588, - "learning_rate": 1.3552406201745654e-05, - "loss": 0.0016940701752901077, - "step": 17035 - }, - { - "epoch": 2.90537084398977, - "grad_norm": 0.09488464146852493, - "learning_rate": 1.3532694760585795e-05, - "loss": 0.0019129924476146698, - "step": 17040 - }, - { - "epoch": 2.9062233589087807, - "grad_norm": 0.04894041642546654, - "learning_rate": 1.3512994508161307e-05, - "loss": 0.002598444186151028, - "step": 17045 - }, - { - "epoch": 2.907075873827792, - "grad_norm": 0.045589860528707504, - "learning_rate": 1.349330545366889e-05, - "loss": 0.0018267668783664703, - "step": 17050 - }, - { - "epoch": 2.907928388746803, - "grad_norm": 0.04273771867156029, - "learning_rate": 1.3473627606300071e-05, - "loss": 0.0013479530811309815, - "step": 17055 - }, - { - "epoch": 2.9087809036658143, - "grad_norm": 0.050675440579652786, - "learning_rate": 1.345396097524111e-05, - "loss": 0.001664750650525093, - "step": 17060 - }, - { - "epoch": 2.9096334185848254, - "grad_norm": 0.07637523114681244, - "learning_rate": 1.3434305569673059e-05, - "loss": 0.001719363033771515, - "step": 17065 - }, - { - "epoch": 2.9104859335038364, - "grad_norm": 0.03540422394871712, - "learning_rate": 1.3414661398771711e-05, - "loss": 0.002338713780045509, - "step": 17070 - }, - { - "epoch": 2.9113384484228475, - "grad_norm": 0.09252000600099564, - "learning_rate": 1.3395028471707613e-05, - "loss": 0.0018722079694271088, - "step": 17075 - }, - { - "epoch": 2.9121909633418586, - "grad_norm": 0.08759574592113495, - "learning_rate": 1.3375406797646068e-05, - "loss": 0.003211042284965515, - "step": 17080 - }, - { - "epoch": 2.9130434782608696, - "grad_norm": 0.07291707396507263, - "learning_rate": 1.3355796385747121e-05, - "loss": 0.002141663059592247, - "step": 17085 - }, - { - "epoch": 2.9138959931798807, - "grad_norm": 0.03608965128660202, - "learning_rate": 1.3336197245165578e-05, - "loss": 0.0015133512206375599, - "step": 17090 - }, - { - "epoch": 2.9147485080988917, - "grad_norm": 0.0686686635017395, - "learning_rate": 1.3316609385050954e-05, - "loss": 0.0015084316954016685, - "step": 17095 - }, - { - "epoch": 2.915601023017903, - "grad_norm": 0.052468664944171906, - "learning_rate": 1.3297032814547539e-05, - "loss": 0.00120701240375638, - "step": 17100 - }, - { - "epoch": 2.916453537936914, - "grad_norm": 0.06129363924264908, - "learning_rate": 1.3277467542794304e-05, - "loss": 0.002575872652232647, - "step": 17105 - }, - { - "epoch": 2.917306052855925, - "grad_norm": 0.06045043095946312, - "learning_rate": 1.3257913578924969e-05, - "loss": 0.0022510627284646036, - "step": 17110 - }, - { - "epoch": 2.918158567774936, - "grad_norm": 0.09090365469455719, - "learning_rate": 1.3238370932067996e-05, - "loss": 0.002203880250453949, - "step": 17115 - }, - { - "epoch": 2.919011082693947, - "grad_norm": 0.03382663428783417, - "learning_rate": 1.3218839611346522e-05, - "loss": 0.0009420939721167087, - "step": 17120 - }, - { - "epoch": 2.919863597612958, - "grad_norm": 0.06900735199451447, - "learning_rate": 1.3199319625878431e-05, - "loss": 0.0021647622808814047, - "step": 17125 - }, - { - "epoch": 2.920716112531969, - "grad_norm": 0.04494655504822731, - "learning_rate": 1.3179810984776277e-05, - "loss": 0.0027208495885133743, - "step": 17130 - }, - { - "epoch": 2.9215686274509802, - "grad_norm": 0.05262625217437744, - "learning_rate": 1.3160313697147373e-05, - "loss": 0.0015311154536902904, - "step": 17135 - }, - { - "epoch": 2.9224211423699913, - "grad_norm": 0.025083297863602638, - "learning_rate": 1.314082777209368e-05, - "loss": 0.00193443913012743, - "step": 17140 - }, - { - "epoch": 2.923273657289003, - "grad_norm": 0.08246373385190964, - "learning_rate": 1.3121353218711892e-05, - "loss": 0.0019143052399158479, - "step": 17145 - }, - { - "epoch": 2.9241261722080134, - "grad_norm": 0.1049862802028656, - "learning_rate": 1.3101890046093376e-05, - "loss": 0.002230258658528328, - "step": 17150 - }, - { - "epoch": 2.924978687127025, - "grad_norm": 0.042054325342178345, - "learning_rate": 1.3082438263324169e-05, - "loss": 0.0011081861332058907, - "step": 17155 - }, - { - "epoch": 2.9258312020460355, - "grad_norm": 0.0713399276137352, - "learning_rate": 1.3062997879485033e-05, - "loss": 0.0015817128121852874, - "step": 17160 - }, - { - "epoch": 2.926683716965047, - "grad_norm": 0.07212921977043152, - "learning_rate": 1.3043568903651381e-05, - "loss": 0.002985073998570442, - "step": 17165 - }, - { - "epoch": 2.927536231884058, - "grad_norm": 0.14285585284233093, - "learning_rate": 1.3024151344893299e-05, - "loss": 0.0019961275160312653, - "step": 17170 - }, - { - "epoch": 2.928388746803069, - "grad_norm": 0.06164155155420303, - "learning_rate": 1.3004745212275543e-05, - "loss": 0.0017055023461580276, - "step": 17175 - }, - { - "epoch": 2.92924126172208, - "grad_norm": 0.02376371994614601, - "learning_rate": 1.298535051485756e-05, - "loss": 0.0013552471064031124, - "step": 17180 - }, - { - "epoch": 2.9300937766410913, - "grad_norm": 0.07454569637775421, - "learning_rate": 1.296596726169342e-05, - "loss": 0.002513031102716923, - "step": 17185 - }, - { - "epoch": 2.9309462915601023, - "grad_norm": 0.0765121579170227, - "learning_rate": 1.2946595461831892e-05, - "loss": 0.0019039563834667207, - "step": 17190 - }, - { - "epoch": 2.9317988064791134, - "grad_norm": 0.07360806316137314, - "learning_rate": 1.2927235124316362e-05, - "loss": 0.001339799538254738, - "step": 17195 - }, - { - "epoch": 2.9326513213981245, - "grad_norm": 0.18903285264968872, - "learning_rate": 1.2907886258184876e-05, - "loss": 0.003720489144325256, - "step": 17200 - }, - { - "epoch": 2.9335038363171355, - "grad_norm": 0.07760016620159149, - "learning_rate": 1.2888548872470143e-05, - "loss": 0.0015237806364893913, - "step": 17205 - }, - { - "epoch": 2.9343563512361466, - "grad_norm": 0.055864643305540085, - "learning_rate": 1.286922297619949e-05, - "loss": 0.0014091457240283489, - "step": 17210 - }, - { - "epoch": 2.9352088661551576, - "grad_norm": 0.08161517977714539, - "learning_rate": 1.2849908578394888e-05, - "loss": 0.002047298289835453, - "step": 17215 - }, - { - "epoch": 2.9360613810741687, - "grad_norm": 0.11219590902328491, - "learning_rate": 1.283060568807294e-05, - "loss": 0.0023268122225999833, - "step": 17220 - }, - { - "epoch": 2.9369138959931798, - "grad_norm": 0.10008323192596436, - "learning_rate": 1.2811314314244867e-05, - "loss": 0.002319963276386261, - "step": 17225 - }, - { - "epoch": 2.937766410912191, - "grad_norm": 0.077080138027668, - "learning_rate": 1.2792034465916536e-05, - "loss": 0.0020459359511733055, - "step": 17230 - }, - { - "epoch": 2.938618925831202, - "grad_norm": 0.09049349278211594, - "learning_rate": 1.2772766152088431e-05, - "loss": 0.0038630947470664977, - "step": 17235 - }, - { - "epoch": 2.9394714407502134, - "grad_norm": 0.09306768327951431, - "learning_rate": 1.275350938175563e-05, - "loss": 0.0017305316403508186, - "step": 17240 - }, - { - "epoch": 2.940323955669224, - "grad_norm": 0.061699800193309784, - "learning_rate": 1.2734264163907824e-05, - "loss": 0.00341113954782486, - "step": 17245 - }, - { - "epoch": 2.9411764705882355, - "grad_norm": 0.11029893159866333, - "learning_rate": 1.2715030507529347e-05, - "loss": 0.0023353056982159614, - "step": 17250 - }, - { - "epoch": 2.942028985507246, - "grad_norm": 0.06272252649068832, - "learning_rate": 1.2695808421599087e-05, - "loss": 0.0012727061286568642, - "step": 17255 - }, - { - "epoch": 2.9428815004262576, - "grad_norm": 0.02106044627726078, - "learning_rate": 1.2676597915090567e-05, - "loss": 0.0020675512030720712, - "step": 17260 - }, - { - "epoch": 2.9437340153452687, - "grad_norm": 0.08245997875928879, - "learning_rate": 1.2657398996971883e-05, - "loss": 0.002128716930747032, - "step": 17265 - }, - { - "epoch": 2.9445865302642797, - "grad_norm": 0.10804266482591629, - "learning_rate": 1.2638211676205718e-05, - "loss": 0.0012407343834638595, - "step": 17270 - }, - { - "epoch": 2.945439045183291, - "grad_norm": 0.0485721081495285, - "learning_rate": 1.2619035961749375e-05, - "loss": 0.0019056517630815506, - "step": 17275 - }, - { - "epoch": 2.946291560102302, - "grad_norm": 0.04094598814845085, - "learning_rate": 1.2599871862554694e-05, - "loss": 0.0014778503216803073, - "step": 17280 - }, - { - "epoch": 2.947144075021313, - "grad_norm": 0.08831547200679779, - "learning_rate": 1.2580719387568133e-05, - "loss": 0.002304557338356972, - "step": 17285 - }, - { - "epoch": 2.947996589940324, - "grad_norm": 0.02547610178589821, - "learning_rate": 1.2561578545730685e-05, - "loss": 0.0010631450451910496, - "step": 17290 - }, - { - "epoch": 2.948849104859335, - "grad_norm": 0.09562932699918747, - "learning_rate": 1.2542449345977952e-05, - "loss": 0.0021377883851528166, - "step": 17295 - }, - { - "epoch": 2.949701619778346, - "grad_norm": 0.02090577222406864, - "learning_rate": 1.2523331797240072e-05, - "loss": 0.001333952508866787, - "step": 17300 - }, - { - "epoch": 2.950554134697357, - "grad_norm": 0.12461904436349869, - "learning_rate": 1.2504225908441751e-05, - "loss": 0.0025647601112723352, - "step": 17305 - }, - { - "epoch": 2.9514066496163682, - "grad_norm": 0.047791410237550735, - "learning_rate": 1.2485131688502254e-05, - "loss": 0.0014650242403149605, - "step": 17310 - }, - { - "epoch": 2.9522591645353793, - "grad_norm": 0.055085547268390656, - "learning_rate": 1.2466049146335387e-05, - "loss": 0.002520528435707092, - "step": 17315 - }, - { - "epoch": 2.9531116794543903, - "grad_norm": 0.09370748698711395, - "learning_rate": 1.2446978290849538e-05, - "loss": 0.002327192947268486, - "step": 17320 - }, - { - "epoch": 2.9539641943734014, - "grad_norm": 0.06663045287132263, - "learning_rate": 1.242791913094759e-05, - "loss": 0.0025285203009843826, - "step": 17325 - }, - { - "epoch": 2.9548167092924125, - "grad_norm": 0.06620613485574722, - "learning_rate": 1.2408871675527022e-05, - "loss": 0.001520772185176611, - "step": 17330 - }, - { - "epoch": 2.955669224211424, - "grad_norm": 0.08397935330867767, - "learning_rate": 1.2389835933479805e-05, - "loss": 0.001917354017496109, - "step": 17335 - }, - { - "epoch": 2.9565217391304346, - "grad_norm": 0.037347212433815, - "learning_rate": 1.2370811913692447e-05, - "loss": 0.001991302520036697, - "step": 17340 - }, - { - "epoch": 2.957374254049446, - "grad_norm": 0.09309769421815872, - "learning_rate": 1.2351799625046013e-05, - "loss": 0.0028038494288921355, - "step": 17345 - }, - { - "epoch": 2.9582267689684567, - "grad_norm": 0.03684366121888161, - "learning_rate": 1.2332799076416064e-05, - "loss": 0.0017773956060409546, - "step": 17350 - }, - { - "epoch": 2.959079283887468, - "grad_norm": 0.05473257228732109, - "learning_rate": 1.2313810276672687e-05, - "loss": 0.0012853020802140237, - "step": 17355 - }, - { - "epoch": 2.9599317988064793, - "grad_norm": 0.042117465287446976, - "learning_rate": 1.2294833234680473e-05, - "loss": 0.001919369027018547, - "step": 17360 - }, - { - "epoch": 2.9607843137254903, - "grad_norm": 0.05097515508532524, - "learning_rate": 1.2275867959298559e-05, - "loss": 0.001891462691128254, - "step": 17365 - }, - { - "epoch": 2.9616368286445014, - "grad_norm": 0.09409259259700775, - "learning_rate": 1.2256914459380544e-05, - "loss": 0.0014902386814355851, - "step": 17370 - }, - { - "epoch": 2.9624893435635125, - "grad_norm": 0.09465356171131134, - "learning_rate": 1.2237972743774576e-05, - "loss": 0.002463678829371929, - "step": 17375 - }, - { - "epoch": 2.9633418584825235, - "grad_norm": 0.02534087561070919, - "learning_rate": 1.221904282132327e-05, - "loss": 0.0023292653262615205, - "step": 17380 - }, - { - "epoch": 2.9641943734015346, - "grad_norm": 0.1058032363653183, - "learning_rate": 1.2200124700863723e-05, - "loss": 0.002900855429470539, - "step": 17385 - }, - { - "epoch": 2.9650468883205456, - "grad_norm": 0.07726191729307175, - "learning_rate": 1.218121839122757e-05, - "loss": 0.0014870663173496724, - "step": 17390 - }, - { - "epoch": 2.9658994032395567, - "grad_norm": 0.0792614072561264, - "learning_rate": 1.21623239012409e-05, - "loss": 0.001744781993329525, - "step": 17395 - }, - { - "epoch": 2.9667519181585678, - "grad_norm": 0.07266564667224884, - "learning_rate": 1.214344123972428e-05, - "loss": 0.002622047811746597, - "step": 17400 - }, - { - "epoch": 2.967604433077579, - "grad_norm": 0.06203412637114525, - "learning_rate": 1.2124570415492758e-05, - "loss": 0.002504969388246536, - "step": 17405 - }, - { - "epoch": 2.96845694799659, - "grad_norm": 0.07259709388017654, - "learning_rate": 1.2105711437355884e-05, - "loss": 0.0018782744184136391, - "step": 17410 - }, - { - "epoch": 2.969309462915601, - "grad_norm": 0.05496470257639885, - "learning_rate": 1.2086864314117633e-05, - "loss": 0.0018179532140493392, - "step": 17415 - }, - { - "epoch": 2.970161977834612, - "grad_norm": 0.0235351100564003, - "learning_rate": 1.2068029054576496e-05, - "loss": 0.0015613840892910956, - "step": 17420 - }, - { - "epoch": 2.971014492753623, - "grad_norm": 0.046441882848739624, - "learning_rate": 1.2049205667525383e-05, - "loss": 0.0014228712767362594, - "step": 17425 - }, - { - "epoch": 2.971867007672634, - "grad_norm": 0.06290153414011002, - "learning_rate": 1.2030394161751664e-05, - "loss": 0.0011624433100223541, - "step": 17430 - }, - { - "epoch": 2.972719522591645, - "grad_norm": 0.0662989467382431, - "learning_rate": 1.2011594546037205e-05, - "loss": 0.002170179411768913, - "step": 17435 - }, - { - "epoch": 2.9735720375106567, - "grad_norm": 0.06470426172018051, - "learning_rate": 1.1992806829158275e-05, - "loss": 0.0010997526347637176, - "step": 17440 - }, - { - "epoch": 2.9744245524296673, - "grad_norm": 0.039091553539037704, - "learning_rate": 1.1974031019885612e-05, - "loss": 0.0014238604344427586, - "step": 17445 - }, - { - "epoch": 2.975277067348679, - "grad_norm": 0.03796529024839401, - "learning_rate": 1.1955267126984376e-05, - "loss": 0.002270728349685669, - "step": 17450 - }, - { - "epoch": 2.9761295822676894, - "grad_norm": 0.09608127176761627, - "learning_rate": 1.1936515159214177e-05, - "loss": 0.0030095497146248817, - "step": 17455 - }, - { - "epoch": 2.976982097186701, - "grad_norm": 0.09011568874120712, - "learning_rate": 1.1917775125329063e-05, - "loss": 0.0031840000301599503, - "step": 17460 - }, - { - "epoch": 2.977834612105712, - "grad_norm": 0.057273294776678085, - "learning_rate": 1.1899047034077522e-05, - "loss": 0.0011888986453413963, - "step": 17465 - }, - { - "epoch": 2.978687127024723, - "grad_norm": 0.14515799283981323, - "learning_rate": 1.1880330894202432e-05, - "loss": 0.001710682176053524, - "step": 17470 - }, - { - "epoch": 2.979539641943734, - "grad_norm": 0.09522838145494461, - "learning_rate": 1.1861626714441096e-05, - "loss": 0.002519896999001503, - "step": 17475 - }, - { - "epoch": 2.980392156862745, - "grad_norm": 0.08164853602647781, - "learning_rate": 1.1842934503525282e-05, - "loss": 0.002578527852892876, - "step": 17480 - }, - { - "epoch": 2.9812446717817562, - "grad_norm": 0.08428774774074554, - "learning_rate": 1.1824254270181112e-05, - "loss": 0.0012953916564583778, - "step": 17485 - }, - { - "epoch": 2.9820971867007673, - "grad_norm": 0.07469037175178528, - "learning_rate": 1.180558602312915e-05, - "loss": 0.0037867244333028792, - "step": 17490 - }, - { - "epoch": 2.9829497016197783, - "grad_norm": 0.08371725678443909, - "learning_rate": 1.1786929771084346e-05, - "loss": 0.002791520766913891, - "step": 17495 - }, - { - "epoch": 2.9838022165387894, - "grad_norm": 0.014852025546133518, - "learning_rate": 1.1768285522756056e-05, - "loss": 0.0014176778495311737, - "step": 17500 - }, - { - "epoch": 2.9846547314578005, - "grad_norm": 0.04576858505606651, - "learning_rate": 1.174965328684804e-05, - "loss": 0.002578184753656387, - "step": 17505 - }, - { - "epoch": 2.9855072463768115, - "grad_norm": 0.05726059526205063, - "learning_rate": 1.1731033072058464e-05, - "loss": 0.0016687212511897088, - "step": 17510 - }, - { - "epoch": 2.9863597612958226, - "grad_norm": 0.0770409107208252, - "learning_rate": 1.171242488707984e-05, - "loss": 0.0013428821228444576, - "step": 17515 - }, - { - "epoch": 2.9872122762148337, - "grad_norm": 0.10322020947933197, - "learning_rate": 1.1693828740599093e-05, - "loss": 0.0019340002909302711, - "step": 17520 - }, - { - "epoch": 2.9880647911338447, - "grad_norm": 0.08900497853755951, - "learning_rate": 1.1675244641297531e-05, - "loss": 0.002262430638074875, - "step": 17525 - }, - { - "epoch": 2.9889173060528558, - "grad_norm": 0.06439421325922012, - "learning_rate": 1.1656672597850828e-05, - "loss": 0.003663495182991028, - "step": 17530 - }, - { - "epoch": 2.9897698209718673, - "grad_norm": 0.032524604350328445, - "learning_rate": 1.1638112618929023e-05, - "loss": 0.00146266371011734, - "step": 17535 - }, - { - "epoch": 2.990622335890878, - "grad_norm": 0.09089723974466324, - "learning_rate": 1.1619564713196542e-05, - "loss": 0.002597668394446373, - "step": 17540 - }, - { - "epoch": 2.9914748508098894, - "grad_norm": 0.11931595206260681, - "learning_rate": 1.1601028889312144e-05, - "loss": 0.0025284418836236, - "step": 17545 - }, - { - "epoch": 2.9923273657289, - "grad_norm": 0.05474149063229561, - "learning_rate": 1.1582505155928994e-05, - "loss": 0.002077813073992729, - "step": 17550 - }, - { - "epoch": 2.9931798806479115, - "grad_norm": 0.060414139181375504, - "learning_rate": 1.1563993521694564e-05, - "loss": 0.0014027852565050125, - "step": 17555 - }, - { - "epoch": 2.9940323955669226, - "grad_norm": 0.03036579303443432, - "learning_rate": 1.1545493995250727e-05, - "loss": 0.0008949190378189087, - "step": 17560 - }, - { - "epoch": 2.9948849104859336, - "grad_norm": 0.030154328793287277, - "learning_rate": 1.1527006585233662e-05, - "loss": 0.002073490060865879, - "step": 17565 - }, - { - "epoch": 2.9957374254049447, - "grad_norm": 0.04413657262921333, - "learning_rate": 1.1508531300273893e-05, - "loss": 0.0018356587737798692, - "step": 17570 - }, - { - "epoch": 2.9965899403239558, - "grad_norm": 0.022916359826922417, - "learning_rate": 1.1490068148996329e-05, - "loss": 0.0018058544024825095, - "step": 17575 - }, - { - "epoch": 2.997442455242967, - "grad_norm": 0.059595149010419846, - "learning_rate": 1.1471617140020162e-05, - "loss": 0.0019177049398422241, - "step": 17580 - }, - { - "epoch": 2.998294970161978, - "grad_norm": 0.038439393043518066, - "learning_rate": 1.1453178281958944e-05, - "loss": 0.002159320004284382, - "step": 17585 - }, - { - "epoch": 2.999147485080989, - "grad_norm": 0.021210921928286552, - "learning_rate": 1.1434751583420536e-05, - "loss": 0.0014576959423720838, - "step": 17590 - }, - { - "epoch": 2.9994884910485933, - "eval_loss": 0.04721549153327942, - "eval_runtime": 3.7007, - "eval_samples_per_second": 68.095, - "eval_steps_per_second": 1.081, - "step": 17592 - }, - { - "eval_cer_subset": 0.014346898562053186, - "eval_cer_subset_edit_distance": 881, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 17592 - }, - { - "epoch": 3.0, - "grad_norm": 0.12229876965284348, - "learning_rate": 1.1416337053007148e-05, - "loss": 0.0023294053971767426, - "step": 17595 - }, - { - "epoch": 3.000852514919011, - "grad_norm": 0.00884711928665638, - "learning_rate": 1.1397934699315283e-05, - "loss": 0.0007601963356137275, - "step": 17600 - }, - { - "epoch": 3.001705029838022, - "grad_norm": 0.006744864396750927, - "learning_rate": 1.1379544530935788e-05, - "loss": 0.0009616459719836712, - "step": 17605 - }, - { - "epoch": 3.002557544757033, - "grad_norm": 0.015270856209099293, - "learning_rate": 1.1361166556453794e-05, - "loss": 0.0008831757120788097, - "step": 17610 - }, - { - "epoch": 3.0034100596760442, - "grad_norm": 0.03363358601927757, - "learning_rate": 1.1342800784448747e-05, - "loss": 0.000755470572039485, - "step": 17615 - }, - { - "epoch": 3.0042625745950553, - "grad_norm": 0.013082304038107395, - "learning_rate": 1.1324447223494415e-05, - "loss": 0.0005882583092898131, - "step": 17620 - }, - { - "epoch": 3.0051150895140664, - "grad_norm": 0.02833358384668827, - "learning_rate": 1.1306105882158842e-05, - "loss": 0.0011209994554519653, - "step": 17625 - }, - { - "epoch": 3.0059676044330774, - "grad_norm": 0.018923277035355568, - "learning_rate": 1.1287776769004374e-05, - "loss": 0.0006263695657253265, - "step": 17630 - }, - { - "epoch": 3.0068201193520885, - "grad_norm": 0.06693795323371887, - "learning_rate": 1.1269459892587659e-05, - "loss": 0.0004380833823233843, - "step": 17635 - }, - { - "epoch": 3.0076726342710995, - "grad_norm": 0.046298157423734665, - "learning_rate": 1.1251155261459601e-05, - "loss": 0.0008744291961193084, - "step": 17640 - }, - { - "epoch": 3.008525149190111, - "grad_norm": 0.01699778623878956, - "learning_rate": 1.1232862884165428e-05, - "loss": 0.0010823222808539867, - "step": 17645 - }, - { - "epoch": 3.009377664109122, - "grad_norm": 0.03985033556818962, - "learning_rate": 1.1214582769244643e-05, - "loss": 0.0010692596435546875, - "step": 17650 - }, - { - "epoch": 3.010230179028133, - "grad_norm": 0.03855466470122337, - "learning_rate": 1.1196314925231002e-05, - "loss": 0.0005569665227085352, - "step": 17655 - }, - { - "epoch": 3.0110826939471442, - "grad_norm": 0.04734903946518898, - "learning_rate": 1.1178059360652523e-05, - "loss": 0.0006342111155390739, - "step": 17660 - }, - { - "epoch": 3.0119352088661553, - "grad_norm": 0.016796378418803215, - "learning_rate": 1.115981608403154e-05, - "loss": 0.0004393692128360271, - "step": 17665 - }, - { - "epoch": 3.0127877237851663, - "grad_norm": 0.052275605499744415, - "learning_rate": 1.1141585103884607e-05, - "loss": 0.0007596808485686779, - "step": 17670 - }, - { - "epoch": 3.0136402387041774, - "grad_norm": 0.01877668686211109, - "learning_rate": 1.1123366428722558e-05, - "loss": 0.000605479022487998, - "step": 17675 - }, - { - "epoch": 3.0144927536231885, - "grad_norm": 0.03435613960027695, - "learning_rate": 1.1105160067050468e-05, - "loss": 0.0006241496652364731, - "step": 17680 - }, - { - "epoch": 3.0153452685421995, - "grad_norm": 0.017725376412272453, - "learning_rate": 1.1086966027367666e-05, - "loss": 0.0004620179533958435, - "step": 17685 - }, - { - "epoch": 3.0161977834612106, - "grad_norm": 0.08081609010696411, - "learning_rate": 1.1068784318167741e-05, - "loss": 0.0007450764998793602, - "step": 17690 - }, - { - "epoch": 3.0170502983802217, - "grad_norm": 0.01702817529439926, - "learning_rate": 1.105061494793854e-05, - "loss": 0.0004339885897934437, - "step": 17695 - }, - { - "epoch": 3.0179028132992327, - "grad_norm": 0.01536079403012991, - "learning_rate": 1.1032457925162112e-05, - "loss": 0.00040107620880007743, - "step": 17700 - }, - { - "epoch": 3.0187553282182438, - "grad_norm": 0.005985455587506294, - "learning_rate": 1.101431325831475e-05, - "loss": 0.0005834253039211035, - "step": 17705 - }, - { - "epoch": 3.019607843137255, - "grad_norm": 0.06888663023710251, - "learning_rate": 1.099618095586701e-05, - "loss": 0.00041420520283281804, - "step": 17710 - }, - { - "epoch": 3.020460358056266, - "grad_norm": 0.017603853717446327, - "learning_rate": 1.097806102628364e-05, - "loss": 0.001230797078460455, - "step": 17715 - }, - { - "epoch": 3.021312872975277, - "grad_norm": 0.08015599846839905, - "learning_rate": 1.0959953478023628e-05, - "loss": 0.0010655376128852368, - "step": 17720 - }, - { - "epoch": 3.022165387894288, - "grad_norm": 0.03606700897216797, - "learning_rate": 1.0941858319540184e-05, - "loss": 0.0005988342221826315, - "step": 17725 - }, - { - "epoch": 3.023017902813299, - "grad_norm": 0.01536708977073431, - "learning_rate": 1.0923775559280712e-05, - "loss": 0.0006389651913195849, - "step": 17730 - }, - { - "epoch": 3.02387041773231, - "grad_norm": 0.007655138149857521, - "learning_rate": 1.090570520568686e-05, - "loss": 0.00042916592210531237, - "step": 17735 - }, - { - "epoch": 3.024722932651321, - "grad_norm": 0.029535889625549316, - "learning_rate": 1.0887647267194479e-05, - "loss": 0.0005783494096249342, - "step": 17740 - }, - { - "epoch": 3.0255754475703327, - "grad_norm": 0.04330015555024147, - "learning_rate": 1.0869601752233612e-05, - "loss": 0.000689673563465476, - "step": 17745 - }, - { - "epoch": 3.0264279624893438, - "grad_norm": 0.010324080474674702, - "learning_rate": 1.08515686692285e-05, - "loss": 0.00033456801902502774, - "step": 17750 - }, - { - "epoch": 3.027280477408355, - "grad_norm": 0.021205585449934006, - "learning_rate": 1.0833548026597568e-05, - "loss": 0.0005186548456549645, - "step": 17755 - }, - { - "epoch": 3.028132992327366, - "grad_norm": 0.01829897239804268, - "learning_rate": 1.081553983275349e-05, - "loss": 0.0010600530542433262, - "step": 17760 - }, - { - "epoch": 3.028985507246377, - "grad_norm": 0.014599860645830631, - "learning_rate": 1.0797544096103066e-05, - "loss": 0.0007877435535192489, - "step": 17765 - }, - { - "epoch": 3.029838022165388, - "grad_norm": 0.04433593526482582, - "learning_rate": 1.0779560825047306e-05, - "loss": 0.0006474100053310394, - "step": 17770 - }, - { - "epoch": 3.030690537084399, - "grad_norm": 0.037262968719005585, - "learning_rate": 1.0761590027981393e-05, - "loss": 0.0009919026866555213, - "step": 17775 - }, - { - "epoch": 3.03154305200341, - "grad_norm": 0.009379384107887745, - "learning_rate": 1.0743631713294696e-05, - "loss": 0.0008003567345440388, - "step": 17780 - }, - { - "epoch": 3.032395566922421, - "grad_norm": 0.05301728472113609, - "learning_rate": 1.0725685889370778e-05, - "loss": 0.0005272284150123596, - "step": 17785 - }, - { - "epoch": 3.0332480818414322, - "grad_norm": 0.014656663872301579, - "learning_rate": 1.0707752564587322e-05, - "loss": 0.0008352659642696381, - "step": 17790 - }, - { - "epoch": 3.0341005967604433, - "grad_norm": 0.022964198142290115, - "learning_rate": 1.0689831747316206e-05, - "loss": 0.0004729554522782564, - "step": 17795 - }, - { - "epoch": 3.0349531116794544, - "grad_norm": 0.00882013700902462, - "learning_rate": 1.0671923445923454e-05, - "loss": 0.0004315647296607494, - "step": 17800 - }, - { - "epoch": 3.0358056265984654, - "grad_norm": 0.03546347841620445, - "learning_rate": 1.0654027668769282e-05, - "loss": 0.0006825461052358151, - "step": 17805 - }, - { - "epoch": 3.0366581415174765, - "grad_norm": 0.009973641484975815, - "learning_rate": 1.063614442420801e-05, - "loss": 0.00043485076166689397, - "step": 17810 - }, - { - "epoch": 3.0375106564364875, - "grad_norm": 0.04359155148267746, - "learning_rate": 1.0618273720588144e-05, - "loss": 0.0005858796648681164, - "step": 17815 - }, - { - "epoch": 3.0383631713554986, - "grad_norm": 0.03281440958380699, - "learning_rate": 1.0600415566252307e-05, - "loss": 0.0004939477425068617, - "step": 17820 - }, - { - "epoch": 3.0392156862745097, - "grad_norm": 0.008886588737368584, - "learning_rate": 1.0582569969537304e-05, - "loss": 0.00031390462536364796, - "step": 17825 - }, - { - "epoch": 3.0400682011935207, - "grad_norm": 0.03827208653092384, - "learning_rate": 1.0564736938774028e-05, - "loss": 0.0004257161170244217, - "step": 17830 - }, - { - "epoch": 3.040920716112532, - "grad_norm": 0.01186210848391056, - "learning_rate": 1.0546916482287554e-05, - "loss": 0.0004740235395729542, - "step": 17835 - }, - { - "epoch": 3.041773231031543, - "grad_norm": 0.03290088102221489, - "learning_rate": 1.0529108608397058e-05, - "loss": 0.0005131486803293228, - "step": 17840 - }, - { - "epoch": 3.0426257459505544, - "grad_norm": 0.0059613995254039764, - "learning_rate": 1.0511313325415826e-05, - "loss": 0.0003371193306520581, - "step": 17845 - }, - { - "epoch": 3.0434782608695654, - "grad_norm": 0.008181710727512836, - "learning_rate": 1.049353064165132e-05, - "loss": 0.0003253704868257046, - "step": 17850 - }, - { - "epoch": 3.0443307757885765, - "grad_norm": 0.0913059189915657, - "learning_rate": 1.0475760565405071e-05, - "loss": 0.0008136253803968429, - "step": 17855 - }, - { - "epoch": 3.0451832907075875, - "grad_norm": 0.022350724786520004, - "learning_rate": 1.0458003104972746e-05, - "loss": 0.0003261453006416559, - "step": 17860 - }, - { - "epoch": 3.0460358056265986, - "grad_norm": 0.006000332068651915, - "learning_rate": 1.0440258268644106e-05, - "loss": 0.0004110721405595541, - "step": 17865 - }, - { - "epoch": 3.0468883205456097, - "grad_norm": 0.029682369902729988, - "learning_rate": 1.0422526064703051e-05, - "loss": 0.0003804177977144718, - "step": 17870 - }, - { - "epoch": 3.0477408354646207, - "grad_norm": 0.027596490457654, - "learning_rate": 1.0404806501427545e-05, - "loss": 0.00084029920399189, - "step": 17875 - }, - { - "epoch": 3.0485933503836318, - "grad_norm": 0.024016134440898895, - "learning_rate": 1.0387099587089688e-05, - "loss": 0.0004024073481559753, - "step": 17880 - }, - { - "epoch": 3.049445865302643, - "grad_norm": 0.03249691426753998, - "learning_rate": 1.0369405329955648e-05, - "loss": 0.000899493508040905, - "step": 17885 - }, - { - "epoch": 3.050298380221654, - "grad_norm": 0.06273671239614487, - "learning_rate": 1.035172373828568e-05, - "loss": 0.0005969330668449402, - "step": 17890 - }, - { - "epoch": 3.051150895140665, - "grad_norm": 0.02702365443110466, - "learning_rate": 1.0334054820334163e-05, - "loss": 0.0006494319997727871, - "step": 17895 - }, - { - "epoch": 3.052003410059676, - "grad_norm": 0.026773499324917793, - "learning_rate": 1.0316398584349527e-05, - "loss": 0.000355540681630373, - "step": 17900 - }, - { - "epoch": 3.052855924978687, - "grad_norm": 0.008991194888949394, - "learning_rate": 1.0298755038574284e-05, - "loss": 0.00036041475832462313, - "step": 17905 - }, - { - "epoch": 3.053708439897698, - "grad_norm": 0.0076339710503816605, - "learning_rate": 1.0281124191245031e-05, - "loss": 0.0003042724449187517, - "step": 17910 - }, - { - "epoch": 3.054560954816709, - "grad_norm": 0.026531491428613663, - "learning_rate": 1.0263506050592423e-05, - "loss": 0.0005534607917070389, - "step": 17915 - }, - { - "epoch": 3.0554134697357203, - "grad_norm": 0.009419003501534462, - "learning_rate": 1.0245900624841207e-05, - "loss": 0.0007199038751423359, - "step": 17920 - }, - { - "epoch": 3.0562659846547313, - "grad_norm": 0.02511359192430973, - "learning_rate": 1.0228307922210192e-05, - "loss": 0.0004511539824306965, - "step": 17925 - }, - { - "epoch": 3.0571184995737424, - "grad_norm": 0.007004071492701769, - "learning_rate": 1.0210727950912223e-05, - "loss": 0.0005472676362842322, - "step": 17930 - }, - { - "epoch": 3.0579710144927534, - "grad_norm": 0.005720047280192375, - "learning_rate": 1.0193160719154206e-05, - "loss": 0.0003349650418385863, - "step": 17935 - }, - { - "epoch": 3.0588235294117645, - "grad_norm": 0.00688981031998992, - "learning_rate": 1.017560623513713e-05, - "loss": 0.00028961682692170144, - "step": 17940 - }, - { - "epoch": 3.059676044330776, - "grad_norm": 0.034819502383470535, - "learning_rate": 1.0158064507056004e-05, - "loss": 0.0006485281512141228, - "step": 17945 - }, - { - "epoch": 3.060528559249787, - "grad_norm": 0.016207491979002953, - "learning_rate": 1.0140535543099885e-05, - "loss": 0.0006803269498050213, - "step": 17950 - }, - { - "epoch": 3.061381074168798, - "grad_norm": 0.013904010877013206, - "learning_rate": 1.0123019351451886e-05, - "loss": 0.001280614733695984, - "step": 17955 - }, - { - "epoch": 3.062233589087809, - "grad_norm": 0.006732371635735035, - "learning_rate": 1.0105515940289128e-05, - "loss": 0.0004976587370038033, - "step": 17960 - }, - { - "epoch": 3.0630861040068202, - "grad_norm": 0.004747320897877216, - "learning_rate": 1.0088025317782798e-05, - "loss": 0.00041887001134455204, - "step": 17965 - }, - { - "epoch": 3.0639386189258313, - "grad_norm": 0.024853700771927834, - "learning_rate": 1.0070547492098114e-05, - "loss": 0.0002716945484280586, - "step": 17970 - }, - { - "epoch": 3.0647911338448424, - "grad_norm": 0.014338959008455276, - "learning_rate": 1.0053082471394292e-05, - "loss": 0.00021331470925360917, - "step": 17975 - }, - { - "epoch": 3.0656436487638534, - "grad_norm": 0.09530264884233475, - "learning_rate": 1.003563026382459e-05, - "loss": 0.0005737710744142532, - "step": 17980 - }, - { - "epoch": 3.0664961636828645, - "grad_norm": 0.055152688175439835, - "learning_rate": 1.0018190877536263e-05, - "loss": 0.0005966671742498875, - "step": 17985 - }, - { - "epoch": 3.0673486786018755, - "grad_norm": 0.014675126411020756, - "learning_rate": 1.0000764320670622e-05, - "loss": 0.00021265523973852397, - "step": 17990 - }, - { - "epoch": 3.0682011935208866, - "grad_norm": 0.03896075114607811, - "learning_rate": 9.983350601362952e-06, - "loss": 0.00042183417826890945, - "step": 17995 - }, - { - "epoch": 3.0690537084398977, - "grad_norm": 0.026887232437729836, - "learning_rate": 9.965949727742554e-06, - "loss": 0.0004932911600917578, - "step": 18000 - }, - { - "epoch": 3.0699062233589087, - "grad_norm": 0.031420499086380005, - "learning_rate": 9.948561707932722e-06, - "loss": 0.0003844423685222864, - "step": 18005 - }, - { - "epoch": 3.07075873827792, - "grad_norm": 0.036880481988191605, - "learning_rate": 9.931186550050781e-06, - "loss": 0.0004236038308590651, - "step": 18010 - }, - { - "epoch": 3.071611253196931, - "grad_norm": 0.09423381090164185, - "learning_rate": 9.913824262208035e-06, - "loss": 0.0002767757046967745, - "step": 18015 - }, - { - "epoch": 3.072463768115942, - "grad_norm": 0.02230706810951233, - "learning_rate": 9.896474852509774e-06, - "loss": 0.00040940651670098307, - "step": 18020 - }, - { - "epoch": 3.073316283034953, - "grad_norm": 0.016502562910318375, - "learning_rate": 9.879138329055277e-06, - "loss": 0.0003771143034100533, - "step": 18025 - }, - { - "epoch": 3.074168797953964, - "grad_norm": 0.11179275810718536, - "learning_rate": 9.861814699937794e-06, - "loss": 0.0011194558814167977, - "step": 18030 - }, - { - "epoch": 3.075021312872975, - "grad_norm": 0.0195760540664196, - "learning_rate": 9.844503973244599e-06, - "loss": 0.00031050120014697313, - "step": 18035 - }, - { - "epoch": 3.075873827791986, - "grad_norm": 0.022706160321831703, - "learning_rate": 9.827206157056901e-06, - "loss": 0.0006133354268968106, - "step": 18040 - }, - { - "epoch": 3.0767263427109977, - "grad_norm": 0.04711553826928139, - "learning_rate": 9.809921259449896e-06, - "loss": 0.0005805216729640961, - "step": 18045 - }, - { - "epoch": 3.0775788576300087, - "grad_norm": 0.013767831958830357, - "learning_rate": 9.792649288492741e-06, - "loss": 0.0018730144947767258, - "step": 18050 - }, - { - "epoch": 3.0784313725490198, - "grad_norm": 0.03198297694325447, - "learning_rate": 9.775390252248584e-06, - "loss": 0.0008407266810536385, - "step": 18055 - }, - { - "epoch": 3.079283887468031, - "grad_norm": 0.004666489083319902, - "learning_rate": 9.758144158774502e-06, - "loss": 0.0006300830282270908, - "step": 18060 - }, - { - "epoch": 3.080136402387042, - "grad_norm": 0.046730559319257736, - "learning_rate": 9.740911016121561e-06, - "loss": 0.0010341707617044448, - "step": 18065 - }, - { - "epoch": 3.080988917306053, - "grad_norm": 0.028454085811972618, - "learning_rate": 9.72369083233476e-06, - "loss": 0.0006070803385227919, - "step": 18070 - }, - { - "epoch": 3.081841432225064, - "grad_norm": 0.03427174314856529, - "learning_rate": 9.706483615453036e-06, - "loss": 0.0005169651005417109, - "step": 18075 - }, - { - "epoch": 3.082693947144075, - "grad_norm": 0.06954972445964813, - "learning_rate": 9.689289373509316e-06, - "loss": 0.0006448618602007628, - "step": 18080 - }, - { - "epoch": 3.083546462063086, - "grad_norm": 0.06108829006552696, - "learning_rate": 9.672108114530434e-06, - "loss": 0.000641945656388998, - "step": 18085 - }, - { - "epoch": 3.084398976982097, - "grad_norm": 0.06737220287322998, - "learning_rate": 9.65493984653717e-06, - "loss": 0.0002690809080377221, - "step": 18090 - }, - { - "epoch": 3.0852514919011083, - "grad_norm": 0.016048768535256386, - "learning_rate": 9.637784577544234e-06, - "loss": 0.00035306806676089765, - "step": 18095 - }, - { - "epoch": 3.0861040068201193, - "grad_norm": 0.0573379211127758, - "learning_rate": 9.620642315560295e-06, - "loss": 0.0006647071335464716, - "step": 18100 - }, - { - "epoch": 3.0869565217391304, - "grad_norm": 0.006947176530957222, - "learning_rate": 9.603513068587913e-06, - "loss": 0.00039295474998652936, - "step": 18105 - }, - { - "epoch": 3.0878090366581414, - "grad_norm": 0.012169529683887959, - "learning_rate": 9.586396844623612e-06, - "loss": 0.0002381766214966774, - "step": 18110 - }, - { - "epoch": 3.0886615515771525, - "grad_norm": 0.007689214311540127, - "learning_rate": 9.569293651657802e-06, - "loss": 0.00043741161935031416, - "step": 18115 - }, - { - "epoch": 3.0895140664961636, - "grad_norm": 0.007992210797965527, - "learning_rate": 9.552203497674813e-06, - "loss": 0.00020460875239223242, - "step": 18120 - }, - { - "epoch": 3.0903665814151746, - "grad_norm": 0.0637383833527565, - "learning_rate": 9.535126390652917e-06, - "loss": 0.0009160020388662815, - "step": 18125 - }, - { - "epoch": 3.0912190963341857, - "grad_norm": 0.027393560856580734, - "learning_rate": 9.518062338564269e-06, - "loss": 0.00038999966345727445, - "step": 18130 - }, - { - "epoch": 3.0920716112531967, - "grad_norm": 0.07132820785045624, - "learning_rate": 9.501011349374927e-06, - "loss": 0.0006502022966742516, - "step": 18135 - }, - { - "epoch": 3.092924126172208, - "grad_norm": 0.03045077994465828, - "learning_rate": 9.48397343104486e-06, - "loss": 0.00084984190762043, - "step": 18140 - }, - { - "epoch": 3.0937766410912193, - "grad_norm": 0.009866426698863506, - "learning_rate": 9.466948591527953e-06, - "loss": 0.0005647209007292985, - "step": 18145 - }, - { - "epoch": 3.0946291560102304, - "grad_norm": 0.11743370443582535, - "learning_rate": 9.449936838771943e-06, - "loss": 0.0014441744424402714, - "step": 18150 - }, - { - "epoch": 3.0954816709292414, - "grad_norm": 0.03290848433971405, - "learning_rate": 9.432938180718506e-06, - "loss": 0.00036750044673681257, - "step": 18155 - }, - { - "epoch": 3.0963341858482525, - "grad_norm": 0.011831770651042461, - "learning_rate": 9.415952625303169e-06, - "loss": 0.0004473600536584854, - "step": 18160 - }, - { - "epoch": 3.0971867007672635, - "grad_norm": 0.08015407621860504, - "learning_rate": 9.398980180455355e-06, - "loss": 0.0006069076247513294, - "step": 18165 - }, - { - "epoch": 3.0980392156862746, - "grad_norm": 0.029129406437277794, - "learning_rate": 9.382020854098356e-06, - "loss": 0.0007575173862278461, - "step": 18170 - }, - { - "epoch": 3.0988917306052857, - "grad_norm": 0.0051441071555018425, - "learning_rate": 9.365074654149368e-06, - "loss": 0.00029567121528089045, - "step": 18175 - }, - { - "epoch": 3.0997442455242967, - "grad_norm": 0.055952105671167374, - "learning_rate": 9.348141588519435e-06, - "loss": 0.0005467975046485662, - "step": 18180 - }, - { - "epoch": 3.100596760443308, - "grad_norm": 0.009275187738239765, - "learning_rate": 9.331221665113471e-06, - "loss": 0.0003922369331121445, - "step": 18185 - }, - { - "epoch": 3.101449275362319, - "grad_norm": 0.052929461002349854, - "learning_rate": 9.314314891830251e-06, - "loss": 0.0005707596894353629, - "step": 18190 - }, - { - "epoch": 3.10230179028133, - "grad_norm": 0.011049921624362469, - "learning_rate": 9.297421276562426e-06, - "loss": 0.0002663507591933012, - "step": 18195 - }, - { - "epoch": 3.103154305200341, - "grad_norm": 0.04371742531657219, - "learning_rate": 9.280540827196516e-06, - "loss": 0.00039334925822913646, - "step": 18200 - }, - { - "epoch": 3.104006820119352, - "grad_norm": 0.04068119451403618, - "learning_rate": 9.263673551612858e-06, - "loss": 0.00039259335026144984, - "step": 18205 - }, - { - "epoch": 3.104859335038363, - "grad_norm": 0.020368283614516258, - "learning_rate": 9.246819457685662e-06, - "loss": 0.00041896156035363673, - "step": 18210 - }, - { - "epoch": 3.105711849957374, - "grad_norm": 0.03870120272040367, - "learning_rate": 9.229978553282968e-06, - "loss": 0.00047820848412811757, - "step": 18215 - }, - { - "epoch": 3.106564364876385, - "grad_norm": 0.013779827393591404, - "learning_rate": 9.213150846266686e-06, - "loss": 0.0003055138513445854, - "step": 18220 - }, - { - "epoch": 3.1074168797953963, - "grad_norm": 0.013860267587006092, - "learning_rate": 9.19633634449255e-06, - "loss": 0.00031585688702762127, - "step": 18225 - }, - { - "epoch": 3.1082693947144073, - "grad_norm": 0.05094626918435097, - "learning_rate": 9.179535055810118e-06, - "loss": 0.002102493681013584, - "step": 18230 - }, - { - "epoch": 3.1091219096334184, - "grad_norm": 0.007574997376650572, - "learning_rate": 9.162746988062783e-06, - "loss": 0.00019260718254372479, - "step": 18235 - }, - { - "epoch": 3.10997442455243, - "grad_norm": 0.04581240937113762, - "learning_rate": 9.145972149087787e-06, - "loss": 0.0008758898824453354, - "step": 18240 - }, - { - "epoch": 3.110826939471441, - "grad_norm": 0.02130374312400818, - "learning_rate": 9.129210546716194e-06, - "loss": 0.0003915982786566019, - "step": 18245 - }, - { - "epoch": 3.111679454390452, - "grad_norm": 0.022723006084561348, - "learning_rate": 9.112462188772862e-06, - "loss": 0.0007300690747797489, - "step": 18250 - }, - { - "epoch": 3.112531969309463, - "grad_norm": 0.08266850560903549, - "learning_rate": 9.095727083076484e-06, - "loss": 0.0005315537564456463, - "step": 18255 - }, - { - "epoch": 3.113384484228474, - "grad_norm": 0.06052851676940918, - "learning_rate": 9.079005237439557e-06, - "loss": 0.000990215130150318, - "step": 18260 - }, - { - "epoch": 3.114236999147485, - "grad_norm": 0.005194041877985001, - "learning_rate": 9.062296659668411e-06, - "loss": 0.00022260420955717565, - "step": 18265 - }, - { - "epoch": 3.1150895140664963, - "grad_norm": 0.06306616961956024, - "learning_rate": 9.045601357563156e-06, - "loss": 0.00041153267957270143, - "step": 18270 - }, - { - "epoch": 3.1159420289855073, - "grad_norm": 0.010921395383775234, - "learning_rate": 9.028919338917712e-06, - "loss": 0.0006207648664712905, - "step": 18275 - }, - { - "epoch": 3.1167945439045184, - "grad_norm": 0.04868682101368904, - "learning_rate": 9.012250611519802e-06, - "loss": 0.0006932941731065511, - "step": 18280 - }, - { - "epoch": 3.1176470588235294, - "grad_norm": 0.08955781161785126, - "learning_rate": 8.99559518315094e-06, - "loss": 0.0011549662798643113, - "step": 18285 - }, - { - "epoch": 3.1184995737425405, - "grad_norm": 0.013239394873380661, - "learning_rate": 8.978953061586447e-06, - "loss": 0.0007932853884994983, - "step": 18290 - }, - { - "epoch": 3.1193520886615516, - "grad_norm": 0.018361147493124008, - "learning_rate": 8.962324254595406e-06, - "loss": 0.0008410025388002395, - "step": 18295 - }, - { - "epoch": 3.1202046035805626, - "grad_norm": 0.01051001250743866, - "learning_rate": 8.94570876994071e-06, - "loss": 0.000252532446756959, - "step": 18300 - }, - { - "epoch": 3.1210571184995737, - "grad_norm": 0.03304922580718994, - "learning_rate": 8.929106615378996e-06, - "loss": 0.00035131536424160004, - "step": 18305 - }, - { - "epoch": 3.1219096334185847, - "grad_norm": 0.04879309609532356, - "learning_rate": 8.912517798660728e-06, - "loss": 0.000421084463596344, - "step": 18310 - }, - { - "epoch": 3.122762148337596, - "grad_norm": 0.010428894311189651, - "learning_rate": 8.89594232753011e-06, - "loss": 0.0004888340365141631, - "step": 18315 - }, - { - "epoch": 3.123614663256607, - "grad_norm": 0.03332305699586868, - "learning_rate": 8.879380209725114e-06, - "loss": 0.0005710380151867867, - "step": 18320 - }, - { - "epoch": 3.124467178175618, - "grad_norm": 0.11709732562303543, - "learning_rate": 8.862831452977486e-06, - "loss": 0.0007624867372214794, - "step": 18325 - }, - { - "epoch": 3.125319693094629, - "grad_norm": 0.010226447135210037, - "learning_rate": 8.846296065012758e-06, - "loss": 0.0002884409856051207, - "step": 18330 - }, - { - "epoch": 3.12617220801364, - "grad_norm": 0.06316721439361572, - "learning_rate": 8.829774053550167e-06, - "loss": 0.0005763838067650795, - "step": 18335 - }, - { - "epoch": 3.127024722932651, - "grad_norm": 0.009583157487213612, - "learning_rate": 8.813265426302772e-06, - "loss": 0.000740795349702239, - "step": 18340 - }, - { - "epoch": 3.1278772378516626, - "grad_norm": 0.09729946404695511, - "learning_rate": 8.796770190977332e-06, - "loss": 0.0009914403781294823, - "step": 18345 - }, - { - "epoch": 3.1287297527706737, - "grad_norm": 0.06878595054149628, - "learning_rate": 8.78028835527436e-06, - "loss": 0.000869260635226965, - "step": 18350 - }, - { - "epoch": 3.1295822676896847, - "grad_norm": 0.07358408719301224, - "learning_rate": 8.763819926888147e-06, - "loss": 0.0003912035841494799, - "step": 18355 - }, - { - "epoch": 3.130434782608696, - "grad_norm": 0.023181110620498657, - "learning_rate": 8.747364913506694e-06, - "loss": 0.0006454653572291136, - "step": 18360 - }, - { - "epoch": 3.131287297527707, - "grad_norm": 0.014834016561508179, - "learning_rate": 8.730923322811748e-06, - "loss": 0.0004834470339119434, - "step": 18365 - }, - { - "epoch": 3.132139812446718, - "grad_norm": 0.013688327744603157, - "learning_rate": 8.714495162478786e-06, - "loss": 0.00042675542645156386, - "step": 18370 - }, - { - "epoch": 3.132992327365729, - "grad_norm": 0.09365646541118622, - "learning_rate": 8.69808044017703e-06, - "loss": 0.000931151770055294, - "step": 18375 - }, - { - "epoch": 3.13384484228474, - "grad_norm": 0.026040131226181984, - "learning_rate": 8.681679163569399e-06, - "loss": 0.0008362406864762306, - "step": 18380 - }, - { - "epoch": 3.134697357203751, - "grad_norm": 0.026200976222753525, - "learning_rate": 8.665291340312585e-06, - "loss": 0.00024140358436852694, - "step": 18385 - }, - { - "epoch": 3.135549872122762, - "grad_norm": 0.06846249848604202, - "learning_rate": 8.648916978056948e-06, - "loss": 0.00040455334819853304, - "step": 18390 - }, - { - "epoch": 3.136402387041773, - "grad_norm": 0.005816053133457899, - "learning_rate": 8.632556084446594e-06, - "loss": 0.00026596912648528813, - "step": 18395 - }, - { - "epoch": 3.1372549019607843, - "grad_norm": 0.04414185881614685, - "learning_rate": 8.616208667119315e-06, - "loss": 0.0012877457775175571, - "step": 18400 - }, - { - "epoch": 3.1381074168797953, - "grad_norm": 0.03767494857311249, - "learning_rate": 8.59987473370665e-06, - "loss": 0.0010142676532268525, - "step": 18405 - }, - { - "epoch": 3.1389599317988064, - "grad_norm": 0.04332097992300987, - "learning_rate": 8.583554291833817e-06, - "loss": 0.0006124789826571942, - "step": 18410 - }, - { - "epoch": 3.1398124467178175, - "grad_norm": 0.007427348289638758, - "learning_rate": 8.567247349119739e-06, - "loss": 0.0007707455195486545, - "step": 18415 - }, - { - "epoch": 3.1406649616368285, - "grad_norm": 0.04704085737466812, - "learning_rate": 8.550953913177026e-06, - "loss": 0.0003297704039141536, - "step": 18420 - }, - { - "epoch": 3.1415174765558396, - "grad_norm": 0.015286453999578953, - "learning_rate": 8.534673991612011e-06, - "loss": 0.0007448584772646427, - "step": 18425 - }, - { - "epoch": 3.1423699914748506, - "grad_norm": 0.06501411646604538, - "learning_rate": 8.518407592024712e-06, - "loss": 0.0009272911585867405, - "step": 18430 - }, - { - "epoch": 3.1432225063938617, - "grad_norm": 0.07314588874578476, - "learning_rate": 8.50215472200881e-06, - "loss": 0.000575255136936903, - "step": 18435 - }, - { - "epoch": 3.144075021312873, - "grad_norm": 0.01809004321694374, - "learning_rate": 8.485915389151694e-06, - "loss": 0.000459101889282465, - "step": 18440 - }, - { - "epoch": 3.1449275362318843, - "grad_norm": 0.0908508151769638, - "learning_rate": 8.469689601034406e-06, - "loss": 0.00044624172151088717, - "step": 18445 - }, - { - "epoch": 3.1457800511508953, - "grad_norm": 0.029488559812307358, - "learning_rate": 8.45347736523171e-06, - "loss": 0.0003597501665353775, - "step": 18450 - }, - { - "epoch": 3.1466325660699064, - "grad_norm": 0.015282983891665936, - "learning_rate": 8.437278689312007e-06, - "loss": 0.0005331444554030895, - "step": 18455 - }, - { - "epoch": 3.1474850809889174, - "grad_norm": 0.0475476048886776, - "learning_rate": 8.421093580837374e-06, - "loss": 0.0010153815150260926, - "step": 18460 - }, - { - "epoch": 3.1483375959079285, - "grad_norm": 0.09270385652780533, - "learning_rate": 8.404922047363548e-06, - "loss": 0.0007084616459906101, - "step": 18465 - }, - { - "epoch": 3.1491901108269396, - "grad_norm": 0.025847190991044044, - "learning_rate": 8.388764096439953e-06, - "loss": 0.00023725461214780809, - "step": 18470 - }, - { - "epoch": 3.1500426257459506, - "grad_norm": 0.023858604952692986, - "learning_rate": 8.372619735609662e-06, - "loss": 0.0003485321067273617, - "step": 18475 - }, - { - "epoch": 3.1508951406649617, - "grad_norm": 0.005237930454313755, - "learning_rate": 8.356488972409398e-06, - "loss": 0.0005028464831411839, - "step": 18480 - }, - { - "epoch": 3.1517476555839727, - "grad_norm": 0.04220377653837204, - "learning_rate": 8.340371814369532e-06, - "loss": 0.0009449001401662827, - "step": 18485 - }, - { - "epoch": 3.152600170502984, - "grad_norm": 0.0714297816157341, - "learning_rate": 8.324268269014078e-06, - "loss": 0.0004925032146275044, - "step": 18490 - }, - { - "epoch": 3.153452685421995, - "grad_norm": 0.023220403119921684, - "learning_rate": 8.308178343860729e-06, - "loss": 0.0006748316343873739, - "step": 18495 - }, - { - "epoch": 3.154305200341006, - "grad_norm": 0.05557497963309288, - "learning_rate": 8.292102046420787e-06, - "loss": 0.0007373414933681488, - "step": 18500 - }, - { - "epoch": 3.155157715260017, - "grad_norm": 0.017496848478913307, - "learning_rate": 8.276039384199203e-06, - "loss": 0.001099762413650751, - "step": 18505 - }, - { - "epoch": 3.156010230179028, - "grad_norm": 0.006222693715244532, - "learning_rate": 8.259990364694557e-06, - "loss": 0.00030525855254381895, - "step": 18510 - }, - { - "epoch": 3.156862745098039, - "grad_norm": 0.016474226489663124, - "learning_rate": 8.243954995399062e-06, - "loss": 0.0003330275183543563, - "step": 18515 - }, - { - "epoch": 3.15771526001705, - "grad_norm": 0.011799500323832035, - "learning_rate": 8.227933283798587e-06, - "loss": 0.0008484587073326111, - "step": 18520 - }, - { - "epoch": 3.1585677749360612, - "grad_norm": 0.10201061517000198, - "learning_rate": 8.211925237372581e-06, - "loss": 0.0006466713268309832, - "step": 18525 - }, - { - "epoch": 3.1594202898550723, - "grad_norm": 0.04381557181477547, - "learning_rate": 8.195930863594131e-06, - "loss": 0.0004293074831366539, - "step": 18530 - }, - { - "epoch": 3.1602728047740833, - "grad_norm": 0.019579825922846794, - "learning_rate": 8.17995016992994e-06, - "loss": 0.000977578666061163, - "step": 18535 - }, - { - "epoch": 3.1611253196930944, - "grad_norm": 0.043777912855148315, - "learning_rate": 8.163983163840338e-06, - "loss": 0.0004046197980642319, - "step": 18540 - }, - { - "epoch": 3.161977834612106, - "grad_norm": 0.006554318591952324, - "learning_rate": 8.148029852779258e-06, - "loss": 0.0013218319974839688, - "step": 18545 - }, - { - "epoch": 3.162830349531117, - "grad_norm": 0.005029724910855293, - "learning_rate": 8.13209024419422e-06, - "loss": 0.0006320577114820481, - "step": 18550 - }, - { - "epoch": 3.163682864450128, - "grad_norm": 0.04088412597775459, - "learning_rate": 8.11616434552637e-06, - "loss": 0.001373323891311884, - "step": 18555 - }, - { - "epoch": 3.164535379369139, - "grad_norm": 0.04089086875319481, - "learning_rate": 8.100252164210444e-06, - "loss": 0.0009590038098394871, - "step": 18560 - }, - { - "epoch": 3.16538789428815, - "grad_norm": 0.059163108468055725, - "learning_rate": 8.084353707674792e-06, - "loss": 0.0007160831708461046, - "step": 18565 - }, - { - "epoch": 3.166240409207161, - "grad_norm": 0.040127795189619064, - "learning_rate": 8.068468983341338e-06, - "loss": 0.000580929359421134, - "step": 18570 - }, - { - "epoch": 3.1670929241261723, - "grad_norm": 0.11662314087152481, - "learning_rate": 8.052597998625588e-06, - "loss": 0.0005010033026337623, - "step": 18575 - }, - { - "epoch": 3.1679454390451833, - "grad_norm": 0.0162198469042778, - "learning_rate": 8.036740760936647e-06, - "loss": 0.0003753812052309513, - "step": 18580 - }, - { - "epoch": 3.1687979539641944, - "grad_norm": 0.12012816220521927, - "learning_rate": 8.020897277677215e-06, - "loss": 0.0009693917818367481, - "step": 18585 - }, - { - "epoch": 3.1696504688832055, - "grad_norm": 0.03412945196032524, - "learning_rate": 8.00506755624355e-06, - "loss": 0.0001745267305523157, - "step": 18590 - }, - { - "epoch": 3.1705029838022165, - "grad_norm": 0.005339973606169224, - "learning_rate": 7.989251604025489e-06, - "loss": 0.0009339713491499424, - "step": 18595 - }, - { - "epoch": 3.1713554987212276, - "grad_norm": 0.021463308483362198, - "learning_rate": 7.973449428406439e-06, - "loss": 0.0006150984205305577, - "step": 18600 - }, - { - "epoch": 3.1722080136402386, - "grad_norm": 0.011554487980902195, - "learning_rate": 7.957661036763397e-06, - "loss": 0.00038701703306287527, - "step": 18605 - }, - { - "epoch": 3.1730605285592497, - "grad_norm": 0.021070247516036034, - "learning_rate": 7.941886436466888e-06, - "loss": 0.0007086104713380336, - "step": 18610 - }, - { - "epoch": 3.1739130434782608, - "grad_norm": 0.07222088426351547, - "learning_rate": 7.926125634881047e-06, - "loss": 0.0005243740510195493, - "step": 18615 - }, - { - "epoch": 3.174765558397272, - "grad_norm": 0.09254760295152664, - "learning_rate": 7.910378639363528e-06, - "loss": 0.0007765952497720719, - "step": 18620 - }, - { - "epoch": 3.175618073316283, - "grad_norm": 0.037701316177845, - "learning_rate": 7.89464545726555e-06, - "loss": 0.00039138970896601677, - "step": 18625 - }, - { - "epoch": 3.176470588235294, - "grad_norm": 0.015249347314238548, - "learning_rate": 7.878926095931876e-06, - "loss": 0.0003943302668631077, - "step": 18630 - }, - { - "epoch": 3.177323103154305, - "grad_norm": 0.016091618686914444, - "learning_rate": 7.863220562700847e-06, - "loss": 0.000575948553159833, - "step": 18635 - }, - { - "epoch": 3.1781756180733165, - "grad_norm": 0.05504714697599411, - "learning_rate": 7.847528864904322e-06, - "loss": 0.0012753555551171304, - "step": 18640 - }, - { - "epoch": 3.1790281329923276, - "grad_norm": 0.01844659261405468, - "learning_rate": 7.831851009867693e-06, - "loss": 0.00032608325127512214, - "step": 18645 - }, - { - "epoch": 3.1798806479113386, - "grad_norm": 0.02932833693921566, - "learning_rate": 7.816187004909927e-06, - "loss": 0.0002993215108290315, - "step": 18650 - }, - { - "epoch": 3.1807331628303497, - "grad_norm": 0.03746391460299492, - "learning_rate": 7.800536857343479e-06, - "loss": 0.000614574272185564, - "step": 18655 - }, - { - "epoch": 3.1815856777493607, - "grad_norm": 0.04635264351963997, - "learning_rate": 7.784900574474383e-06, - "loss": 0.0005155592691153288, - "step": 18660 - }, - { - "epoch": 3.182438192668372, - "grad_norm": 0.024929136037826538, - "learning_rate": 7.769278163602164e-06, - "loss": 0.0003661647439002991, - "step": 18665 - }, - { - "epoch": 3.183290707587383, - "grad_norm": 0.005773736163973808, - "learning_rate": 7.753669632019881e-06, - "loss": 0.0003662605304270983, - "step": 18670 - }, - { - "epoch": 3.184143222506394, - "grad_norm": 0.14380963146686554, - "learning_rate": 7.738074987014107e-06, - "loss": 0.000703729223459959, - "step": 18675 - }, - { - "epoch": 3.184995737425405, - "grad_norm": 0.03039398603141308, - "learning_rate": 7.722494235864967e-06, - "loss": 0.00028703445568680765, - "step": 18680 - }, - { - "epoch": 3.185848252344416, - "grad_norm": 0.022264502942562103, - "learning_rate": 7.706927385846053e-06, - "loss": 0.0004953373223543167, - "step": 18685 - }, - { - "epoch": 3.186700767263427, - "grad_norm": 0.022176261991262436, - "learning_rate": 7.691374444224497e-06, - "loss": 0.0006184632889926434, - "step": 18690 - }, - { - "epoch": 3.187553282182438, - "grad_norm": 0.030033515766263008, - "learning_rate": 7.675835418260915e-06, - "loss": 0.0006910198833793401, - "step": 18695 - }, - { - "epoch": 3.1884057971014492, - "grad_norm": 0.12117313593626022, - "learning_rate": 7.660310315209455e-06, - "loss": 0.0012623773887753486, - "step": 18700 - }, - { - "epoch": 3.1892583120204603, - "grad_norm": 0.013550493866205215, - "learning_rate": 7.644799142317753e-06, - "loss": 0.0007082201074808836, - "step": 18705 - }, - { - "epoch": 3.1901108269394713, - "grad_norm": 0.0489371083676815, - "learning_rate": 7.629301906826945e-06, - "loss": 0.0007669483777135611, - "step": 18710 - }, - { - "epoch": 3.1909633418584824, - "grad_norm": 0.028650205582380295, - "learning_rate": 7.6138186159716435e-06, - "loss": 0.0009685775265097618, - "step": 18715 - }, - { - "epoch": 3.1918158567774935, - "grad_norm": 0.06777958571910858, - "learning_rate": 7.598349276979958e-06, - "loss": 0.0003870198968797922, - "step": 18720 - }, - { - "epoch": 3.1926683716965045, - "grad_norm": 0.027635935693979263, - "learning_rate": 7.582893897073514e-06, - "loss": 0.00038398322649300096, - "step": 18725 - }, - { - "epoch": 3.1935208866155156, - "grad_norm": 0.0027559841983020306, - "learning_rate": 7.567452483467381e-06, - "loss": 0.0003620174713432789, - "step": 18730 - }, - { - "epoch": 3.1943734015345266, - "grad_norm": 0.010767337866127491, - "learning_rate": 7.552025043370125e-06, - "loss": 0.00037704890128225087, - "step": 18735 - }, - { - "epoch": 3.1952259164535377, - "grad_norm": 0.0657992735505104, - "learning_rate": 7.5366115839837815e-06, - "loss": 0.0004264485090970993, - "step": 18740 - }, - { - "epoch": 3.196078431372549, - "grad_norm": 0.029620325192809105, - "learning_rate": 7.5212121125038796e-06, - "loss": 0.0002805137075483799, - "step": 18745 - }, - { - "epoch": 3.1969309462915603, - "grad_norm": 0.04062730073928833, - "learning_rate": 7.505826636119407e-06, - "loss": 0.00017744075739756227, - "step": 18750 - }, - { - "epoch": 3.1977834612105713, - "grad_norm": 0.0391278937458992, - "learning_rate": 7.490455162012808e-06, - "loss": 0.001126928348094225, - "step": 18755 - }, - { - "epoch": 3.1986359761295824, - "grad_norm": 0.0766754299402237, - "learning_rate": 7.4750976973599986e-06, - "loss": 0.0006041087210178376, - "step": 18760 - }, - { - "epoch": 3.1994884910485935, - "grad_norm": 0.01741630584001541, - "learning_rate": 7.459754249330347e-06, - "loss": 0.0007178018335253, - "step": 18765 - }, - { - "epoch": 3.2003410059676045, - "grad_norm": 0.052834443747997284, - "learning_rate": 7.444424825086698e-06, - "loss": 0.0006523634772747755, - "step": 18770 - }, - { - "epoch": 3.2011935208866156, - "grad_norm": 0.051064226776361465, - "learning_rate": 7.4291094317853365e-06, - "loss": 0.0004721490200608969, - "step": 18775 - }, - { - "epoch": 3.2020460358056266, - "grad_norm": 0.01717698574066162, - "learning_rate": 7.4138080765759885e-06, - "loss": 0.0010264517739415168, - "step": 18780 - }, - { - "epoch": 3.2028985507246377, - "grad_norm": 0.0630933865904808, - "learning_rate": 7.398520766601833e-06, - "loss": 0.000731096789240837, - "step": 18785 - }, - { - "epoch": 3.2037510656436488, - "grad_norm": 0.021110277622938156, - "learning_rate": 7.383247508999501e-06, - "loss": 0.00034617548808455465, - "step": 18790 - }, - { - "epoch": 3.20460358056266, - "grad_norm": 0.03756425157189369, - "learning_rate": 7.367988310899066e-06, - "loss": 0.0005103135481476784, - "step": 18795 - }, - { - "epoch": 3.205456095481671, - "grad_norm": 0.009691229090094566, - "learning_rate": 7.352743179424024e-06, - "loss": 0.0007526874542236328, - "step": 18800 - }, - { - "epoch": 3.206308610400682, - "grad_norm": 0.007688464596867561, - "learning_rate": 7.337512121691304e-06, - "loss": 0.0008044790476560593, - "step": 18805 - }, - { - "epoch": 3.207161125319693, - "grad_norm": 0.010440339334309101, - "learning_rate": 7.322295144811276e-06, - "loss": 0.00020275618880987166, - "step": 18810 - }, - { - "epoch": 3.208013640238704, - "grad_norm": 0.01417286042124033, - "learning_rate": 7.307092255887711e-06, - "loss": 0.0007631714455783367, - "step": 18815 - }, - { - "epoch": 3.208866155157715, - "grad_norm": 0.0317782461643219, - "learning_rate": 7.291903462017859e-06, - "loss": 0.0003871546592563391, - "step": 18820 - }, - { - "epoch": 3.209718670076726, - "grad_norm": 0.017280934378504753, - "learning_rate": 7.27672877029233e-06, - "loss": 0.002473811246454716, - "step": 18825 - }, - { - "epoch": 3.2105711849957372, - "grad_norm": 0.015073291026055813, - "learning_rate": 7.261568187795169e-06, - "loss": 0.00022768331691622735, - "step": 18830 - }, - { - "epoch": 3.2114236999147483, - "grad_norm": 0.10031914710998535, - "learning_rate": 7.246421721603867e-06, - "loss": 0.0005759174935519695, - "step": 18835 - }, - { - "epoch": 3.21227621483376, - "grad_norm": 0.012291891500353813, - "learning_rate": 7.2312893787892695e-06, - "loss": 0.000749863451346755, - "step": 18840 - }, - { - "epoch": 3.213128729752771, - "grad_norm": 0.048827286809682846, - "learning_rate": 7.216171166415677e-06, - "loss": 0.00025824215263128283, - "step": 18845 - }, - { - "epoch": 3.213981244671782, - "grad_norm": 0.02683771587908268, - "learning_rate": 7.201067091540773e-06, - "loss": 0.0006501530762761832, - "step": 18850 - }, - { - "epoch": 3.214833759590793, - "grad_norm": 0.019678298383951187, - "learning_rate": 7.185977161215633e-06, - "loss": 0.0006075311917811632, - "step": 18855 - }, - { - "epoch": 3.215686274509804, - "grad_norm": 0.03252946212887764, - "learning_rate": 7.170901382484737e-06, - "loss": 0.0005334909074008465, - "step": 18860 - }, - { - "epoch": 3.216538789428815, - "grad_norm": 0.02117627114057541, - "learning_rate": 7.155839762385973e-06, - "loss": 0.0012689195573329926, - "step": 18865 - }, - { - "epoch": 3.217391304347826, - "grad_norm": 0.057649675756692886, - "learning_rate": 7.140792307950598e-06, - "loss": 0.0006012415513396263, - "step": 18870 - }, - { - "epoch": 3.2182438192668372, - "grad_norm": 0.01991843432188034, - "learning_rate": 7.125759026203254e-06, - "loss": 0.0006623437628149986, - "step": 18875 - }, - { - "epoch": 3.2190963341858483, - "grad_norm": 0.020812440663576126, - "learning_rate": 7.110739924161998e-06, - "loss": 0.00042641563341021537, - "step": 18880 - }, - { - "epoch": 3.2199488491048593, - "grad_norm": 0.001917969319038093, - "learning_rate": 7.095735008838227e-06, - "loss": 0.00040855356492102145, - "step": 18885 - }, - { - "epoch": 3.2208013640238704, - "grad_norm": 0.02301851660013199, - "learning_rate": 7.080744287236753e-06, - "loss": 0.000662582740187645, - "step": 18890 - }, - { - "epoch": 3.2216538789428815, - "grad_norm": 0.045610178261995316, - "learning_rate": 7.065767766355733e-06, - "loss": 0.0003238538280129433, - "step": 18895 - }, - { - "epoch": 3.2225063938618925, - "grad_norm": 0.02546820044517517, - "learning_rate": 7.050805453186707e-06, - "loss": 0.0005394276697188616, - "step": 18900 - }, - { - "epoch": 3.2233589087809036, - "grad_norm": 0.01680169068276882, - "learning_rate": 7.035857354714572e-06, - "loss": 0.0001849454827606678, - "step": 18905 - }, - { - "epoch": 3.2242114236999146, - "grad_norm": 0.07080511003732681, - "learning_rate": 7.020923477917616e-06, - "loss": 0.0009049614891409874, - "step": 18910 - }, - { - "epoch": 3.2250639386189257, - "grad_norm": 0.015143739990890026, - "learning_rate": 7.006003829767458e-06, - "loss": 0.0003394487779587507, - "step": 18915 - }, - { - "epoch": 3.2259164535379368, - "grad_norm": 0.02981925569474697, - "learning_rate": 6.991098417229077e-06, - "loss": 0.0008880021050572395, - "step": 18920 - }, - { - "epoch": 3.226768968456948, - "grad_norm": 0.011556530371308327, - "learning_rate": 6.976207247260836e-06, - "loss": 0.0005170104093849659, - "step": 18925 - }, - { - "epoch": 3.227621483375959, - "grad_norm": 0.06276580691337585, - "learning_rate": 6.961330326814407e-06, - "loss": 0.0013013094663619994, - "step": 18930 - }, - { - "epoch": 3.2284739982949704, - "grad_norm": 0.015998052433133125, - "learning_rate": 6.946467662834852e-06, - "loss": 0.0005359035450965167, - "step": 18935 - }, - { - "epoch": 3.229326513213981, - "grad_norm": 0.04979191720485687, - "learning_rate": 6.931619262260546e-06, - "loss": 0.0007673433981835842, - "step": 18940 - }, - { - "epoch": 3.2301790281329925, - "grad_norm": 0.12047834694385529, - "learning_rate": 6.9167851320232225e-06, - "loss": 0.0017763294279575347, - "step": 18945 - }, - { - "epoch": 3.2310315430520036, - "grad_norm": 0.010395308956503868, - "learning_rate": 6.901965279047926e-06, - "loss": 0.000369875249452889, - "step": 18950 - }, - { - "epoch": 3.2318840579710146, - "grad_norm": 0.07600873708724976, - "learning_rate": 6.887159710253089e-06, - "loss": 0.0005032925866544247, - "step": 18955 - }, - { - "epoch": 3.2327365728900257, - "grad_norm": 0.011709989979863167, - "learning_rate": 6.8723684325504235e-06, - "loss": 0.0009861321188509465, - "step": 18960 - }, - { - "epoch": 3.2335890878090368, - "grad_norm": 0.024761514738202095, - "learning_rate": 6.857591452844994e-06, - "loss": 0.00034510630648583175, - "step": 18965 - }, - { - "epoch": 3.234441602728048, - "grad_norm": 0.04486176744103432, - "learning_rate": 6.8428287780351755e-06, - "loss": 0.00017987118335440754, - "step": 18970 - }, - { - "epoch": 3.235294117647059, - "grad_norm": 0.020876318216323853, - "learning_rate": 6.828080415012691e-06, - "loss": 0.00046160193160176276, - "step": 18975 - }, - { - "epoch": 3.23614663256607, - "grad_norm": 0.005108790006488562, - "learning_rate": 6.813346370662566e-06, - "loss": 0.0002624133136123419, - "step": 18980 - }, - { - "epoch": 3.236999147485081, - "grad_norm": 0.023575518280267715, - "learning_rate": 6.798626651863142e-06, - "loss": 0.0004043182358145714, - "step": 18985 - }, - { - "epoch": 3.237851662404092, - "grad_norm": 0.03796171769499779, - "learning_rate": 6.78392126548607e-06, - "loss": 0.000300011713989079, - "step": 18990 - }, - { - "epoch": 3.238704177323103, - "grad_norm": 0.010747452266514301, - "learning_rate": 6.769230218396302e-06, - "loss": 0.0008313095197081566, - "step": 18995 - }, - { - "epoch": 3.239556692242114, - "grad_norm": 0.037278912961483, - "learning_rate": 6.75455351745213e-06, - "loss": 0.0004106287844479084, - "step": 19000 - }, - { - "epoch": 3.2404092071611252, - "grad_norm": 0.06086887791752815, - "learning_rate": 6.7398911695051155e-06, - "loss": 0.0005375253967940807, - "step": 19005 - }, - { - "epoch": 3.2412617220801363, - "grad_norm": 0.008513805456459522, - "learning_rate": 6.725243181400129e-06, - "loss": 0.0002401076490059495, - "step": 19010 - }, - { - "epoch": 3.2421142369991474, - "grad_norm": 0.11595302820205688, - "learning_rate": 6.71060955997533e-06, - "loss": 0.000469267088919878, - "step": 19015 - }, - { - "epoch": 3.2429667519181584, - "grad_norm": 0.032653845846652985, - "learning_rate": 6.695990312062191e-06, - "loss": 0.0005158457439392805, - "step": 19020 - }, - { - "epoch": 3.2438192668371695, - "grad_norm": 0.0623047836124897, - "learning_rate": 6.6813854444854695e-06, - "loss": 0.0005772956646978855, - "step": 19025 - }, - { - "epoch": 3.2446717817561805, - "grad_norm": 0.003954550251364708, - "learning_rate": 6.666794964063195e-06, - "loss": 0.0011268424801528453, - "step": 19030 - }, - { - "epoch": 3.2455242966751916, - "grad_norm": 0.02917463518679142, - "learning_rate": 6.6522188776066935e-06, - "loss": 0.0007552789058536292, - "step": 19035 - }, - { - "epoch": 3.246376811594203, - "grad_norm": 0.0267369132488966, - "learning_rate": 6.637657191920561e-06, - "loss": 0.00021620083134621382, - "step": 19040 - }, - { - "epoch": 3.247229326513214, - "grad_norm": 0.05929157882928848, - "learning_rate": 6.62310991380267e-06, - "loss": 0.0005157306790351867, - "step": 19045 - }, - { - "epoch": 3.2480818414322252, - "grad_norm": 0.01827944628894329, - "learning_rate": 6.608577050044193e-06, - "loss": 0.0003319120965898037, - "step": 19050 - }, - { - "epoch": 3.2489343563512363, - "grad_norm": 0.013090296648442745, - "learning_rate": 6.594058607429542e-06, - "loss": 0.0005971027072519064, - "step": 19055 - }, - { - "epoch": 3.2494458653026426, - "eval_loss": 0.06181741878390312, - "eval_runtime": 3.7049, - "eval_samples_per_second": 68.017, - "eval_steps_per_second": 1.08, - "step": 19058 - }, - { - "eval_cer_subset": 0.013418665624440211, - "eval_cer_subset_edit_distance": 824, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 19058 - }, - { - "epoch": 3.2497868712702473, - "grad_norm": 0.11219825595617294, - "learning_rate": 6.579554592736402e-06, - "loss": 0.0005919500254094601, - "step": 19060 - }, - { - "epoch": 3.2506393861892584, - "grad_norm": 0.010144476778805256, - "learning_rate": 6.565065012735742e-06, - "loss": 0.00045172283425927163, - "step": 19065 - }, - { - "epoch": 3.2514919011082695, - "grad_norm": 0.013089130632579327, - "learning_rate": 6.550589874191782e-06, - "loss": 0.00025117534678429363, - "step": 19070 - }, - { - "epoch": 3.2523444160272805, - "grad_norm": 0.047521159052848816, - "learning_rate": 6.536129183861994e-06, - "loss": 0.0008897949941456318, - "step": 19075 - }, - { - "epoch": 3.2531969309462916, - "grad_norm": 0.04106447473168373, - "learning_rate": 6.5216829484971085e-06, - "loss": 0.000283010583370924, - "step": 19080 - }, - { - "epoch": 3.2540494458653026, - "grad_norm": 0.04407098516821861, - "learning_rate": 6.507251174841109e-06, - "loss": 0.0006978865712881088, - "step": 19085 - }, - { - "epoch": 3.2549019607843137, - "grad_norm": 0.01719486154615879, - "learning_rate": 6.492833869631217e-06, - "loss": 0.00031038771849125626, - "step": 19090 - }, - { - "epoch": 3.2557544757033248, - "grad_norm": 0.007759191561490297, - "learning_rate": 6.478431039597928e-06, - "loss": 0.0005179021041840315, - "step": 19095 - }, - { - "epoch": 3.256606990622336, - "grad_norm": 0.13484114408493042, - "learning_rate": 6.464042691464956e-06, - "loss": 0.0006286890245974063, - "step": 19100 - }, - { - "epoch": 3.257459505541347, - "grad_norm": 0.015848470851778984, - "learning_rate": 6.449668831949248e-06, - "loss": 0.0005874604452401399, - "step": 19105 - }, - { - "epoch": 3.258312020460358, - "grad_norm": 0.010055625811219215, - "learning_rate": 6.43530946776102e-06, - "loss": 0.00036783108953386545, - "step": 19110 - }, - { - "epoch": 3.259164535379369, - "grad_norm": 0.010497787036001682, - "learning_rate": 6.420964605603681e-06, - "loss": 0.00043828897178173066, - "step": 19115 - }, - { - "epoch": 3.26001705029838, - "grad_norm": 0.024217624217271805, - "learning_rate": 6.40663425217391e-06, - "loss": 0.00042369402945041656, - "step": 19120 - }, - { - "epoch": 3.260869565217391, - "grad_norm": 0.022596243768930435, - "learning_rate": 6.392318414161583e-06, - "loss": 0.00037041325122118, - "step": 19125 - }, - { - "epoch": 3.261722080136402, - "grad_norm": 0.02588561922311783, - "learning_rate": 6.378017098249812e-06, - "loss": 0.00029896264895796777, - "step": 19130 - }, - { - "epoch": 3.2625745950554137, - "grad_norm": 0.01108810119330883, - "learning_rate": 6.363730311114913e-06, - "loss": 0.0006667471025139093, - "step": 19135 - }, - { - "epoch": 3.2634271099744243, - "grad_norm": 0.009552333503961563, - "learning_rate": 6.349458059426453e-06, - "loss": 0.0004497227258980274, - "step": 19140 - }, - { - "epoch": 3.264279624893436, - "grad_norm": 0.01460598036646843, - "learning_rate": 6.335200349847185e-06, - "loss": 0.0002194883767515421, - "step": 19145 - }, - { - "epoch": 3.265132139812447, - "grad_norm": 0.03732374310493469, - "learning_rate": 6.320957189033071e-06, - "loss": 0.0002586513292044401, - "step": 19150 - }, - { - "epoch": 3.265984654731458, - "grad_norm": 0.016899019479751587, - "learning_rate": 6.306728583633319e-06, - "loss": 0.0009543164633214474, - "step": 19155 - }, - { - "epoch": 3.266837169650469, - "grad_norm": 0.01586720161139965, - "learning_rate": 6.292514540290286e-06, - "loss": 0.0008085070177912713, - "step": 19160 - }, - { - "epoch": 3.26768968456948, - "grad_norm": 0.009034652262926102, - "learning_rate": 6.278315065639588e-06, - "loss": 0.000653286511078477, - "step": 19165 - }, - { - "epoch": 3.268542199488491, - "grad_norm": 0.003070715581998229, - "learning_rate": 6.264130166309996e-06, - "loss": 0.00026131083723157644, - "step": 19170 - }, - { - "epoch": 3.269394714407502, - "grad_norm": 0.032700877636671066, - "learning_rate": 6.249959848923497e-06, - "loss": 0.0004788469523191452, - "step": 19175 - }, - { - "epoch": 3.2702472293265132, - "grad_norm": 0.03986335173249245, - "learning_rate": 6.235804120095252e-06, - "loss": 0.0005488947499543428, - "step": 19180 - }, - { - "epoch": 3.2710997442455243, - "grad_norm": 0.02649238146841526, - "learning_rate": 6.221662986433652e-06, - "loss": 0.0008479308336973191, - "step": 19185 - }, - { - "epoch": 3.2719522591645354, - "grad_norm": 0.04019145667552948, - "learning_rate": 6.207536454540235e-06, - "loss": 0.0008174203336238861, - "step": 19190 - }, - { - "epoch": 3.2728047740835464, - "grad_norm": 0.013805892318487167, - "learning_rate": 6.193424531009733e-06, - "loss": 0.0010017482563853264, - "step": 19195 - }, - { - "epoch": 3.2736572890025575, - "grad_norm": 0.022588396444916725, - "learning_rate": 6.17932722243006e-06, - "loss": 0.0004558952059596777, - "step": 19200 - }, - { - "epoch": 3.2745098039215685, - "grad_norm": 0.009211315773427486, - "learning_rate": 6.1652445353823136e-06, - "loss": 0.0002530797151848674, - "step": 19205 - }, - { - "epoch": 3.2753623188405796, - "grad_norm": 0.03019166924059391, - "learning_rate": 6.151176476440768e-06, - "loss": 0.0010970567353069782, - "step": 19210 - }, - { - "epoch": 3.2762148337595907, - "grad_norm": 0.10982025414705276, - "learning_rate": 6.137123052172854e-06, - "loss": 0.00046633705496788023, - "step": 19215 - }, - { - "epoch": 3.2770673486786017, - "grad_norm": 0.09192784875631332, - "learning_rate": 6.123084269139178e-06, - "loss": 0.0013196432963013649, - "step": 19220 - }, - { - "epoch": 3.277919863597613, - "grad_norm": 0.02934069000184536, - "learning_rate": 6.109060133893501e-06, - "loss": 0.0005467353854328394, - "step": 19225 - }, - { - "epoch": 3.2787723785166243, - "grad_norm": 0.03443235158920288, - "learning_rate": 6.095050652982773e-06, - "loss": 0.0004425105173140764, - "step": 19230 - }, - { - "epoch": 3.279624893435635, - "grad_norm": 0.04472684487700462, - "learning_rate": 6.081055832947077e-06, - "loss": 0.0006654649972915649, - "step": 19235 - }, - { - "epoch": 3.2804774083546464, - "grad_norm": 0.04871739074587822, - "learning_rate": 6.067075680319663e-06, - "loss": 0.0011335751041769981, - "step": 19240 - }, - { - "epoch": 3.2813299232736575, - "grad_norm": 0.17364081740379333, - "learning_rate": 6.053110201626918e-06, - "loss": 0.0007838122546672821, - "step": 19245 - }, - { - "epoch": 3.2821824381926685, - "grad_norm": 0.10362228006124496, - "learning_rate": 6.0391594033884035e-06, - "loss": 0.0004850291647017002, - "step": 19250 - }, - { - "epoch": 3.2830349531116796, - "grad_norm": 0.033360131084918976, - "learning_rate": 6.025223292116828e-06, - "loss": 0.0003192754928022623, - "step": 19255 - }, - { - "epoch": 3.2838874680306906, - "grad_norm": 0.019219927489757538, - "learning_rate": 6.0113018743180195e-06, - "loss": 0.0004580964334309101, - "step": 19260 - }, - { - "epoch": 3.2847399829497017, - "grad_norm": 0.019038213416934013, - "learning_rate": 5.997395156490956e-06, - "loss": 0.000247283186763525, - "step": 19265 - }, - { - "epoch": 3.2855924978687128, - "grad_norm": 0.07672185450792313, - "learning_rate": 5.983503145127763e-06, - "loss": 0.00036474117077887056, - "step": 19270 - }, - { - "epoch": 3.286445012787724, - "grad_norm": 0.006674405187368393, - "learning_rate": 5.96962584671368e-06, - "loss": 0.000742178363725543, - "step": 19275 - }, - { - "epoch": 3.287297527706735, - "grad_norm": 0.0242395531386137, - "learning_rate": 5.9557632677271105e-06, - "loss": 0.0003610172076150775, - "step": 19280 - }, - { - "epoch": 3.288150042625746, - "grad_norm": 0.006741875316947699, - "learning_rate": 5.941915414639559e-06, - "loss": 0.0006255440413951874, - "step": 19285 - }, - { - "epoch": 3.289002557544757, - "grad_norm": 0.015137423761188984, - "learning_rate": 5.928082293915652e-06, - "loss": 0.0003517616540193558, - "step": 19290 - }, - { - "epoch": 3.289855072463768, - "grad_norm": 0.006232273764908314, - "learning_rate": 5.9142639120131636e-06, - "loss": 0.0002735992660745978, - "step": 19295 - }, - { - "epoch": 3.290707587382779, - "grad_norm": 0.025010747835040092, - "learning_rate": 5.900460275382981e-06, - "loss": 0.0004658872727304697, - "step": 19300 - }, - { - "epoch": 3.29156010230179, - "grad_norm": 0.00881986878812313, - "learning_rate": 5.88667139046909e-06, - "loss": 0.0007451147306710481, - "step": 19305 - }, - { - "epoch": 3.2924126172208013, - "grad_norm": 0.02796418033540249, - "learning_rate": 5.872897263708607e-06, - "loss": 0.0008796761743724346, - "step": 19310 - }, - { - "epoch": 3.2932651321398123, - "grad_norm": 0.027454577386379242, - "learning_rate": 5.859137901531745e-06, - "loss": 0.00026941425167024133, - "step": 19315 - }, - { - "epoch": 3.2941176470588234, - "grad_norm": 0.0395982526242733, - "learning_rate": 5.84539331036183e-06, - "loss": 0.00041040563955903054, - "step": 19320 - }, - { - "epoch": 3.2949701619778344, - "grad_norm": 0.042481981217861176, - "learning_rate": 5.831663496615304e-06, - "loss": 0.0003256106050685048, - "step": 19325 - }, - { - "epoch": 3.2958226768968455, - "grad_norm": 0.0194789320230484, - "learning_rate": 5.817948466701703e-06, - "loss": 0.00048703285865485667, - "step": 19330 - }, - { - "epoch": 3.296675191815857, - "grad_norm": 0.03693777322769165, - "learning_rate": 5.804248227023639e-06, - "loss": 0.0004573033656924963, - "step": 19335 - }, - { - "epoch": 3.2975277067348676, - "grad_norm": 0.010155921801924706, - "learning_rate": 5.790562783976857e-06, - "loss": 0.00022799526341259478, - "step": 19340 - }, - { - "epoch": 3.298380221653879, - "grad_norm": 0.014926153235137463, - "learning_rate": 5.776892143950181e-06, - "loss": 0.0002296717371791601, - "step": 19345 - }, - { - "epoch": 3.29923273657289, - "grad_norm": 0.025415342301130295, - "learning_rate": 5.763236313325513e-06, - "loss": 0.0001236582640558481, - "step": 19350 - }, - { - "epoch": 3.3000852514919012, - "grad_norm": 0.0994359701871872, - "learning_rate": 5.749595298477851e-06, - "loss": 0.0009945498779416085, - "step": 19355 - }, - { - "epoch": 3.3009377664109123, - "grad_norm": 0.015362569130957127, - "learning_rate": 5.7359691057752705e-06, - "loss": 0.0005355034954845905, - "step": 19360 - }, - { - "epoch": 3.3017902813299234, - "grad_norm": 0.07377626746892929, - "learning_rate": 5.722357741578925e-06, - "loss": 0.0009824702516198157, - "step": 19365 - }, - { - "epoch": 3.3026427962489344, - "grad_norm": 0.01109279878437519, - "learning_rate": 5.708761212243067e-06, - "loss": 0.00042829746380448344, - "step": 19370 - }, - { - "epoch": 3.3034953111679455, - "grad_norm": 0.012984010390937328, - "learning_rate": 5.695179524115008e-06, - "loss": 0.0005177812185138464, - "step": 19375 - }, - { - "epoch": 3.3043478260869565, - "grad_norm": 0.01330599281936884, - "learning_rate": 5.681612683535111e-06, - "loss": 0.00047001498751342297, - "step": 19380 - }, - { - "epoch": 3.3052003410059676, - "grad_norm": 0.027421219274401665, - "learning_rate": 5.66806069683686e-06, - "loss": 0.0005254631396383047, - "step": 19385 - }, - { - "epoch": 3.3060528559249787, - "grad_norm": 0.014183313585817814, - "learning_rate": 5.6545235703467435e-06, - "loss": 0.000249856011942029, - "step": 19390 - }, - { - "epoch": 3.3069053708439897, - "grad_norm": 0.09523740410804749, - "learning_rate": 5.641001310384365e-06, - "loss": 0.000620997790247202, - "step": 19395 - }, - { - "epoch": 3.307757885763001, - "grad_norm": 0.08386892080307007, - "learning_rate": 5.627493923262354e-06, - "loss": 0.0012673554010689259, - "step": 19400 - }, - { - "epoch": 3.308610400682012, - "grad_norm": 0.03303903713822365, - "learning_rate": 5.614001415286412e-06, - "loss": 0.0011139905080199241, - "step": 19405 - }, - { - "epoch": 3.309462915601023, - "grad_norm": 0.03811914473772049, - "learning_rate": 5.6005237927552805e-06, - "loss": 0.0006227992475032806, - "step": 19410 - }, - { - "epoch": 3.310315430520034, - "grad_norm": 0.016570856794714928, - "learning_rate": 5.5870610619607805e-06, - "loss": 0.0005445381160825491, - "step": 19415 - }, - { - "epoch": 3.311167945439045, - "grad_norm": 0.013608088716864586, - "learning_rate": 5.573613229187751e-06, - "loss": 0.0004142835270613432, - "step": 19420 - }, - { - "epoch": 3.312020460358056, - "grad_norm": 0.053280171006917953, - "learning_rate": 5.560180300714079e-06, - "loss": 0.0003944558557122946, - "step": 19425 - }, - { - "epoch": 3.3128729752770676, - "grad_norm": 0.04067116975784302, - "learning_rate": 5.5467622828107225e-06, - "loss": 0.0008278630673885345, - "step": 19430 - }, - { - "epoch": 3.313725490196078, - "grad_norm": 0.0459442101418972, - "learning_rate": 5.533359181741638e-06, - "loss": 0.00037522357888519764, - "step": 19435 - }, - { - "epoch": 3.3145780051150897, - "grad_norm": 0.07973090559244156, - "learning_rate": 5.519971003763862e-06, - "loss": 0.0006369464099407196, - "step": 19440 - }, - { - "epoch": 3.3154305200341008, - "grad_norm": 0.0318770669400692, - "learning_rate": 5.506597755127425e-06, - "loss": 0.0006823249161243439, - "step": 19445 - }, - { - "epoch": 3.316283034953112, - "grad_norm": 0.04201148822903633, - "learning_rate": 5.49323944207541e-06, - "loss": 0.000304691749624908, - "step": 19450 - }, - { - "epoch": 3.317135549872123, - "grad_norm": 0.011656812392175198, - "learning_rate": 5.479896070843919e-06, - "loss": 0.00038321034517139194, - "step": 19455 - }, - { - "epoch": 3.317988064791134, - "grad_norm": 0.03550105541944504, - "learning_rate": 5.466567647662075e-06, - "loss": 0.0007771219592541456, - "step": 19460 - }, - { - "epoch": 3.318840579710145, - "grad_norm": 0.010552220977842808, - "learning_rate": 5.453254178752044e-06, - "loss": 0.0008830759674310684, - "step": 19465 - }, - { - "epoch": 3.319693094629156, - "grad_norm": 0.0077703725546598434, - "learning_rate": 5.439955670328987e-06, - "loss": 0.0003484194632619619, - "step": 19470 - }, - { - "epoch": 3.320545609548167, - "grad_norm": 0.013720662333071232, - "learning_rate": 5.426672128601088e-06, - "loss": 0.0005347099620848894, - "step": 19475 - }, - { - "epoch": 3.321398124467178, - "grad_norm": 0.005950555205345154, - "learning_rate": 5.413403559769549e-06, - "loss": 0.00040374435484409333, - "step": 19480 - }, - { - "epoch": 3.3222506393861893, - "grad_norm": 0.005855921655893326, - "learning_rate": 5.400149970028587e-06, - "loss": 0.00011817219201475382, - "step": 19485 - }, - { - "epoch": 3.3231031543052003, - "grad_norm": 0.05193415656685829, - "learning_rate": 5.3869113655654145e-06, - "loss": 0.000558258919045329, - "step": 19490 - }, - { - "epoch": 3.3239556692242114, - "grad_norm": 0.002798686036840081, - "learning_rate": 5.37368775256025e-06, - "loss": 0.0002853567479178309, - "step": 19495 - }, - { - "epoch": 3.3248081841432224, - "grad_norm": 0.0037216702476143837, - "learning_rate": 5.360479137186315e-06, - "loss": 0.0003375500673428178, - "step": 19500 - }, - { - "epoch": 3.3256606990622335, - "grad_norm": 0.06180913746356964, - "learning_rate": 5.347285525609821e-06, - "loss": 0.00018238723278045653, - "step": 19505 - }, - { - "epoch": 3.3265132139812446, - "grad_norm": 0.034047432243824005, - "learning_rate": 5.334106923990009e-06, - "loss": 0.0006082602776587009, - "step": 19510 - }, - { - "epoch": 3.3273657289002556, - "grad_norm": 0.00867203064262867, - "learning_rate": 5.32094333847907e-06, - "loss": 0.0003369096200913191, - "step": 19515 - }, - { - "epoch": 3.3282182438192667, - "grad_norm": 0.04999540373682976, - "learning_rate": 5.3077947752222e-06, - "loss": 0.00042240540497004984, - "step": 19520 - }, - { - "epoch": 3.3290707587382777, - "grad_norm": 0.10174256563186646, - "learning_rate": 5.294661240357599e-06, - "loss": 0.0012334841303527355, - "step": 19525 - }, - { - "epoch": 3.329923273657289, - "grad_norm": 0.019731154665350914, - "learning_rate": 5.2815427400164365e-06, - "loss": 0.0002502906369045377, - "step": 19530 - }, - { - "epoch": 3.3307757885763003, - "grad_norm": 0.040488291531801224, - "learning_rate": 5.268439280322864e-06, - "loss": 0.0006264269817620516, - "step": 19535 - }, - { - "epoch": 3.3316283034953114, - "grad_norm": 0.027734950184822083, - "learning_rate": 5.2553508673940095e-06, - "loss": 0.0006609380245208741, - "step": 19540 - }, - { - "epoch": 3.3324808184143224, - "grad_norm": 0.033032696694135666, - "learning_rate": 5.24227750733998e-06, - "loss": 0.00046310769394040106, - "step": 19545 - }, - { - "epoch": 3.3333333333333335, - "grad_norm": 0.009758932515978813, - "learning_rate": 5.2292192062638485e-06, - "loss": 0.0003860333003103733, - "step": 19550 - }, - { - "epoch": 3.3341858482523445, - "grad_norm": 0.010039775632321835, - "learning_rate": 5.2161759702616764e-06, - "loss": 0.0005642361007630825, - "step": 19555 - }, - { - "epoch": 3.3350383631713556, - "grad_norm": 0.018729569390416145, - "learning_rate": 5.203147805422476e-06, - "loss": 0.0002538987435400486, - "step": 19560 - }, - { - "epoch": 3.3358908780903667, - "grad_norm": 0.07940587401390076, - "learning_rate": 5.190134717828216e-06, - "loss": 0.0003814149182289839, - "step": 19565 - }, - { - "epoch": 3.3367433930093777, - "grad_norm": 0.002807241166010499, - "learning_rate": 5.1771367135538575e-06, - "loss": 0.0005854971241205931, - "step": 19570 - }, - { - "epoch": 3.337595907928389, - "grad_norm": 0.029841719195246696, - "learning_rate": 5.164153798667284e-06, - "loss": 0.00021142382174730301, - "step": 19575 - }, - { - "epoch": 3.3384484228474, - "grad_norm": 0.017503969371318817, - "learning_rate": 5.151185979229372e-06, - "loss": 0.0005035904701799154, - "step": 19580 - }, - { - "epoch": 3.339300937766411, - "grad_norm": 0.033913351595401764, - "learning_rate": 5.138233261293917e-06, - "loss": 0.00033289811108261347, - "step": 19585 - }, - { - "epoch": 3.340153452685422, - "grad_norm": 0.027594633400440216, - "learning_rate": 5.125295650907682e-06, - "loss": 0.0006479782052338124, - "step": 19590 - }, - { - "epoch": 3.341005967604433, - "grad_norm": 0.017926139757037163, - "learning_rate": 5.112373154110365e-06, - "loss": 0.0009788990020751954, - "step": 19595 - }, - { - "epoch": 3.341858482523444, - "grad_norm": 0.012236343696713448, - "learning_rate": 5.099465776934636e-06, - "loss": 0.0009869396686553954, - "step": 19600 - }, - { - "epoch": 3.342710997442455, - "grad_norm": 0.1368396282196045, - "learning_rate": 5.086573525406075e-06, - "loss": 0.0009202501736581325, - "step": 19605 - }, - { - "epoch": 3.343563512361466, - "grad_norm": 0.04556318372488022, - "learning_rate": 5.07369640554321e-06, - "loss": 0.0002738154027611017, - "step": 19610 - }, - { - "epoch": 3.3444160272804773, - "grad_norm": 0.10456430912017822, - "learning_rate": 5.060834423357522e-06, - "loss": 0.0005405619740486145, - "step": 19615 - }, - { - "epoch": 3.3452685421994883, - "grad_norm": 0.009618780575692654, - "learning_rate": 5.047987584853398e-06, - "loss": 0.0003645260352641344, - "step": 19620 - }, - { - "epoch": 3.3461210571184994, - "grad_norm": 0.0008346811519004405, - "learning_rate": 5.035155896028186e-06, - "loss": 0.0009771523997187614, - "step": 19625 - }, - { - "epoch": 3.346973572037511, - "grad_norm": 0.07436166703701019, - "learning_rate": 5.022339362872134e-06, - "loss": 0.0005307651124894619, - "step": 19630 - }, - { - "epoch": 3.3478260869565215, - "grad_norm": 0.047390375286340714, - "learning_rate": 5.0095379913684326e-06, - "loss": 0.00032626844476908446, - "step": 19635 - }, - { - "epoch": 3.348678601875533, - "grad_norm": 0.11302479356527328, - "learning_rate": 4.996751787493172e-06, - "loss": 0.0006419796496629715, - "step": 19640 - }, - { - "epoch": 3.349531116794544, - "grad_norm": 0.04735693335533142, - "learning_rate": 4.983980757215398e-06, - "loss": 0.0004858987871557474, - "step": 19645 - }, - { - "epoch": 3.350383631713555, - "grad_norm": 0.05677567049860954, - "learning_rate": 4.971224906497043e-06, - "loss": 0.0009346410632133483, - "step": 19650 - }, - { - "epoch": 3.351236146632566, - "grad_norm": 0.01608835905790329, - "learning_rate": 4.958484241292954e-06, - "loss": 0.000258720014244318, - "step": 19655 - }, - { - "epoch": 3.3520886615515773, - "grad_norm": 0.023287836462259293, - "learning_rate": 4.9457587675509155e-06, - "loss": 0.0007150916382670403, - "step": 19660 - }, - { - "epoch": 3.3529411764705883, - "grad_norm": 0.02774999849498272, - "learning_rate": 4.9330484912115845e-06, - "loss": 0.000649323221296072, - "step": 19665 - }, - { - "epoch": 3.3537936913895994, - "grad_norm": 0.012109563685953617, - "learning_rate": 4.920353418208556e-06, - "loss": 0.00036820617970079184, - "step": 19670 - }, - { - "epoch": 3.3546462063086104, - "grad_norm": 0.03116477094590664, - "learning_rate": 4.907673554468305e-06, - "loss": 0.0009199230931699276, - "step": 19675 - }, - { - "epoch": 3.3554987212276215, - "grad_norm": 0.037316855043172836, - "learning_rate": 4.895008905910219e-06, - "loss": 0.0005375304259359837, - "step": 19680 - }, - { - "epoch": 3.3563512361466326, - "grad_norm": 0.02448320761322975, - "learning_rate": 4.882359478446568e-06, - "loss": 0.0007062189746648073, - "step": 19685 - }, - { - "epoch": 3.3572037510656436, - "grad_norm": 0.013858492486178875, - "learning_rate": 4.8697252779825195e-06, - "loss": 0.0003158868057653308, - "step": 19690 - }, - { - "epoch": 3.3580562659846547, - "grad_norm": 0.007077233865857124, - "learning_rate": 4.857106310416161e-06, - "loss": 0.00016839986201375723, - "step": 19695 - }, - { - "epoch": 3.3589087809036657, - "grad_norm": 0.00671799760311842, - "learning_rate": 4.844502581638424e-06, - "loss": 0.0013290375471115112, - "step": 19700 - }, - { - "epoch": 3.359761295822677, - "grad_norm": 0.11925818771123886, - "learning_rate": 4.83191409753317e-06, - "loss": 0.0008001517504453659, - "step": 19705 - }, - { - "epoch": 3.360613810741688, - "grad_norm": 0.012915749102830887, - "learning_rate": 4.819340863977098e-06, - "loss": 0.0003090864047408104, - "step": 19710 - }, - { - "epoch": 3.361466325660699, - "grad_norm": 0.0198194682598114, - "learning_rate": 4.806782886839833e-06, - "loss": 0.00015502141322940589, - "step": 19715 - }, - { - "epoch": 3.36231884057971, - "grad_norm": 0.02647668495774269, - "learning_rate": 4.794240171983848e-06, - "loss": 0.00032354283612221477, - "step": 19720 - }, - { - "epoch": 3.363171355498721, - "grad_norm": 0.03167302906513214, - "learning_rate": 4.781712725264503e-06, - "loss": 0.0008794944733381271, - "step": 19725 - }, - { - "epoch": 3.364023870417732, - "grad_norm": 0.03751087561249733, - "learning_rate": 4.769200552530017e-06, - "loss": 0.0017323100939393044, - "step": 19730 - }, - { - "epoch": 3.3648763853367436, - "grad_norm": 0.08725135773420334, - "learning_rate": 4.75670365962149e-06, - "loss": 0.0009663975797593594, - "step": 19735 - }, - { - "epoch": 3.3657289002557547, - "grad_norm": 0.014557529240846634, - "learning_rate": 4.7442220523729005e-06, - "loss": 0.0004029064439237118, - "step": 19740 - }, - { - "epoch": 3.3665814151747657, - "grad_norm": 0.05396854132413864, - "learning_rate": 4.731755736611068e-06, - "loss": 0.0011473988182842732, - "step": 19745 - }, - { - "epoch": 3.367433930093777, - "grad_norm": 0.06434670090675354, - "learning_rate": 4.7193047181556764e-06, - "loss": 0.00039711645804345607, - "step": 19750 - }, - { - "epoch": 3.368286445012788, - "grad_norm": 0.01898345723748207, - "learning_rate": 4.706869002819287e-06, - "loss": 0.0002789617981761694, - "step": 19755 - }, - { - "epoch": 3.369138959931799, - "grad_norm": 0.021839376538991928, - "learning_rate": 4.6944485964073085e-06, - "loss": 0.001008017361164093, - "step": 19760 - }, - { - "epoch": 3.36999147485081, - "grad_norm": 0.013436227105557919, - "learning_rate": 4.682043504717991e-06, - "loss": 0.0002914538374170661, - "step": 19765 - }, - { - "epoch": 3.370843989769821, - "grad_norm": 0.04120805487036705, - "learning_rate": 4.6696537335424485e-06, - "loss": 0.00099704097956419, - "step": 19770 - }, - { - "epoch": 3.371696504688832, - "grad_norm": 0.015087714418768883, - "learning_rate": 4.6572792886646326e-06, - "loss": 0.00031175173353403807, - "step": 19775 - }, - { - "epoch": 3.372549019607843, - "grad_norm": 0.09424779564142227, - "learning_rate": 4.644920175861347e-06, - "loss": 0.0008490364067256451, - "step": 19780 - }, - { - "epoch": 3.373401534526854, - "grad_norm": 0.15744835138320923, - "learning_rate": 4.632576400902244e-06, - "loss": 0.0011794422753155231, - "step": 19785 - }, - { - "epoch": 3.3742540494458653, - "grad_norm": 0.07353512197732925, - "learning_rate": 4.620247969549801e-06, - "loss": 0.0005946496035903692, - "step": 19790 - }, - { - "epoch": 3.3751065643648763, - "grad_norm": 0.05715373530983925, - "learning_rate": 4.607934887559335e-06, - "loss": 0.0005888998974114656, - "step": 19795 - }, - { - "epoch": 3.3759590792838874, - "grad_norm": 0.09267253428697586, - "learning_rate": 4.5956371606790195e-06, - "loss": 0.0007545445580035449, - "step": 19800 - }, - { - "epoch": 3.3768115942028984, - "grad_norm": 0.041159722954034805, - "learning_rate": 4.5833547946498235e-06, - "loss": 0.0006760005839169025, - "step": 19805 - }, - { - "epoch": 3.3776641091219095, - "grad_norm": 0.1095680296421051, - "learning_rate": 4.571087795205583e-06, - "loss": 0.0010204846039414406, - "step": 19810 - }, - { - "epoch": 3.3785166240409206, - "grad_norm": 0.006533615291118622, - "learning_rate": 4.558836168072928e-06, - "loss": 0.00032924620900303124, - "step": 19815 - }, - { - "epoch": 3.3793691389599316, - "grad_norm": 0.09690971672534943, - "learning_rate": 4.5465999189713305e-06, - "loss": 0.0006089920178055763, - "step": 19820 - }, - { - "epoch": 3.3802216538789427, - "grad_norm": 0.03703468665480614, - "learning_rate": 4.53437905361307e-06, - "loss": 0.00024356732610613107, - "step": 19825 - }, - { - "epoch": 3.381074168797954, - "grad_norm": 0.03449544310569763, - "learning_rate": 4.522173577703267e-06, - "loss": 0.0004322177264839411, - "step": 19830 - }, - { - "epoch": 3.381926683716965, - "grad_norm": 0.022056737914681435, - "learning_rate": 4.509983496939834e-06, - "loss": 0.00032165104057639836, - "step": 19835 - }, - { - "epoch": 3.3827791986359763, - "grad_norm": 0.06808804720640182, - "learning_rate": 4.4978088170135064e-06, - "loss": 0.0004901651758700609, - "step": 19840 - }, - { - "epoch": 3.3836317135549874, - "grad_norm": 0.035225335508584976, - "learning_rate": 4.485649543607835e-06, - "loss": 0.000494948634877801, - "step": 19845 - }, - { - "epoch": 3.3844842284739984, - "grad_norm": 0.005756362807005644, - "learning_rate": 4.473505682399165e-06, - "loss": 0.00037348996847867965, - "step": 19850 - }, - { - "epoch": 3.3853367433930095, - "grad_norm": 0.015896733850240707, - "learning_rate": 4.461377239056669e-06, - "loss": 0.001073040347546339, - "step": 19855 - }, - { - "epoch": 3.3861892583120206, - "grad_norm": 0.005726287607103586, - "learning_rate": 4.449264219242296e-06, - "loss": 0.00016913213767111301, - "step": 19860 - }, - { - "epoch": 3.3870417732310316, - "grad_norm": 0.01894184947013855, - "learning_rate": 4.4371666286108125e-06, - "loss": 0.0001936727436259389, - "step": 19865 - }, - { - "epoch": 3.3878942881500427, - "grad_norm": 0.0019047146197408438, - "learning_rate": 4.425084472809763e-06, - "loss": 0.00023375547025352716, - "step": 19870 - }, - { - "epoch": 3.3887468030690537, - "grad_norm": 0.004392183385789394, - "learning_rate": 4.41301775747952e-06, - "loss": 0.0006707040593028069, - "step": 19875 - }, - { - "epoch": 3.389599317988065, - "grad_norm": 0.024085786193609238, - "learning_rate": 4.400966488253218e-06, - "loss": 0.0002247063210234046, - "step": 19880 - }, - { - "epoch": 3.390451832907076, - "grad_norm": 0.07025684416294098, - "learning_rate": 4.388930670756779e-06, - "loss": 0.0007792794145643711, - "step": 19885 - }, - { - "epoch": 3.391304347826087, - "grad_norm": 0.06971945613622665, - "learning_rate": 4.3769103106089454e-06, - "loss": 0.0019492624327540399, - "step": 19890 - }, - { - "epoch": 3.392156862745098, - "grad_norm": 0.065009705722332, - "learning_rate": 4.364905413421204e-06, - "loss": 0.0009217139333486557, - "step": 19895 - }, - { - "epoch": 3.393009377664109, - "grad_norm": 0.050812624394893646, - "learning_rate": 4.352915984797849e-06, - "loss": 0.0007668033242225647, - "step": 19900 - }, - { - "epoch": 3.39386189258312, - "grad_norm": 0.0837833359837532, - "learning_rate": 4.340942030335942e-06, - "loss": 0.0005806859582662583, - "step": 19905 - }, - { - "epoch": 3.394714407502131, - "grad_norm": 0.03263656422495842, - "learning_rate": 4.3289835556253205e-06, - "loss": 0.0011843616142868997, - "step": 19910 - }, - { - "epoch": 3.395566922421142, - "grad_norm": 0.01964580826461315, - "learning_rate": 4.317040566248605e-06, - "loss": 0.0004248973447829485, - "step": 19915 - }, - { - "epoch": 3.3964194373401533, - "grad_norm": 0.05140439420938492, - "learning_rate": 4.305113067781167e-06, - "loss": 0.0004183043260127306, - "step": 19920 - }, - { - "epoch": 3.397271952259165, - "grad_norm": 0.015379955060780048, - "learning_rate": 4.293201065791172e-06, - "loss": 0.000815888587385416, - "step": 19925 - }, - { - "epoch": 3.3981244671781754, - "grad_norm": 0.0026071579195559025, - "learning_rate": 4.281304565839533e-06, - "loss": 0.0003499687649309635, - "step": 19930 - }, - { - "epoch": 3.398976982097187, - "grad_norm": 0.01917382702231407, - "learning_rate": 4.269423573479938e-06, - "loss": 0.0005561482626944781, - "step": 19935 - }, - { - "epoch": 3.399829497016198, - "grad_norm": 0.02250206656754017, - "learning_rate": 4.257558094258817e-06, - "loss": 0.0003818372031673789, - "step": 19940 - }, - { - "epoch": 3.400682011935209, - "grad_norm": 0.10248809307813644, - "learning_rate": 4.245708133715389e-06, - "loss": 0.0005628989078104496, - "step": 19945 - }, - { - "epoch": 3.40153452685422, - "grad_norm": 0.017903871834278107, - "learning_rate": 4.233873697381596e-06, - "loss": 0.000215845531783998, - "step": 19950 - }, - { - "epoch": 3.402387041773231, - "grad_norm": 0.061437349766492844, - "learning_rate": 4.222054790782155e-06, - "loss": 0.0007492574863135814, - "step": 19955 - }, - { - "epoch": 3.403239556692242, - "grad_norm": 0.04522673040628433, - "learning_rate": 4.210251419434515e-06, - "loss": 0.001055066753178835, - "step": 19960 - }, - { - "epoch": 3.4040920716112533, - "grad_norm": 0.005560046993196011, - "learning_rate": 4.198463588848883e-06, - "loss": 0.00024275691248476505, - "step": 19965 - }, - { - "epoch": 3.4049445865302643, - "grad_norm": 0.025880778208374977, - "learning_rate": 4.186691304528221e-06, - "loss": 0.00034111484419554474, - "step": 19970 - }, - { - "epoch": 3.4057971014492754, - "grad_norm": 0.02516460418701172, - "learning_rate": 4.174934571968218e-06, - "loss": 0.000534482765942812, - "step": 19975 - }, - { - "epoch": 3.4066496163682864, - "grad_norm": 0.027490204200148582, - "learning_rate": 4.1631933966572954e-06, - "loss": 0.001637015864253044, - "step": 19980 - }, - { - "epoch": 3.4075021312872975, - "grad_norm": 0.030315211042761803, - "learning_rate": 4.1514677840766395e-06, - "loss": 0.00029935024213045835, - "step": 19985 - }, - { - "epoch": 3.4083546462063086, - "grad_norm": 0.06448766589164734, - "learning_rate": 4.139757739700156e-06, - "loss": 0.0004935414995998144, - "step": 19990 - }, - { - "epoch": 3.4092071611253196, - "grad_norm": 0.007854131981730461, - "learning_rate": 4.128063268994479e-06, - "loss": 0.00030187955126166345, - "step": 19995 - }, - { - "epoch": 3.4100596760443307, - "grad_norm": 0.029494259506464005, - "learning_rate": 4.116384377418979e-06, - "loss": 0.0003482209984213114, - "step": 20000 - }, - { - "epoch": 3.4109121909633418, - "grad_norm": 0.030381083488464355, - "learning_rate": 4.104721070425751e-06, - "loss": 0.0002413678914308548, - "step": 20005 - }, - { - "epoch": 3.411764705882353, - "grad_norm": 0.006045108195394278, - "learning_rate": 4.093073353459604e-06, - "loss": 0.0004130109678953886, - "step": 20010 - }, - { - "epoch": 3.412617220801364, - "grad_norm": 0.0822497308254242, - "learning_rate": 4.081441231958094e-06, - "loss": 0.0007556038908660412, - "step": 20015 - }, - { - "epoch": 3.413469735720375, - "grad_norm": 0.02197144739329815, - "learning_rate": 4.069824711351475e-06, - "loss": 0.00042886766605079176, - "step": 20020 - }, - { - "epoch": 3.414322250639386, - "grad_norm": 0.01506667211651802, - "learning_rate": 4.0582237970627204e-06, - "loss": 0.0004569370299577713, - "step": 20025 - }, - { - "epoch": 3.4151747655583975, - "grad_norm": 0.0023130911868065596, - "learning_rate": 4.046638494507538e-06, - "loss": 0.0007974251173436641, - "step": 20030 - }, - { - "epoch": 3.416027280477408, - "grad_norm": 0.08822524547576904, - "learning_rate": 4.035068809094319e-06, - "loss": 0.0006814738735556602, - "step": 20035 - }, - { - "epoch": 3.4168797953964196, - "grad_norm": 0.026947883889079094, - "learning_rate": 4.023514746224184e-06, - "loss": 0.0002705232938751578, - "step": 20040 - }, - { - "epoch": 3.4177323103154307, - "grad_norm": 0.02061464823782444, - "learning_rate": 4.011976311290956e-06, - "loss": 0.0008053860627114772, - "step": 20045 - }, - { - "epoch": 3.4185848252344417, - "grad_norm": 0.01110768411308527, - "learning_rate": 4.000453509681155e-06, - "loss": 0.0005702998489141465, - "step": 20050 - }, - { - "epoch": 3.419437340153453, - "grad_norm": 0.06727463006973267, - "learning_rate": 3.9889463467739995e-06, - "loss": 0.00048296600580215453, - "step": 20055 - }, - { - "epoch": 3.420289855072464, - "grad_norm": 0.01981664076447487, - "learning_rate": 3.977454827941438e-06, - "loss": 0.0007956895977258682, - "step": 20060 - }, - { - "epoch": 3.421142369991475, - "grad_norm": 0.010179187171161175, - "learning_rate": 3.965978958548076e-06, - "loss": 0.001441807672381401, - "step": 20065 - }, - { - "epoch": 3.421994884910486, - "grad_norm": 0.028387323021888733, - "learning_rate": 3.954518743951235e-06, - "loss": 0.000527799129486084, - "step": 20070 - }, - { - "epoch": 3.422847399829497, - "grad_norm": 0.011368883773684502, - "learning_rate": 3.9430741895009275e-06, - "loss": 0.00046253204345703125, - "step": 20075 - }, - { - "epoch": 3.423699914748508, - "grad_norm": 0.012177668511867523, - "learning_rate": 3.931645300539847e-06, - "loss": 0.00043948981910943983, - "step": 20080 - }, - { - "epoch": 3.424552429667519, - "grad_norm": 0.07201547920703888, - "learning_rate": 3.920232082403392e-06, - "loss": 0.0005337335169315338, - "step": 20085 - }, - { - "epoch": 3.42540494458653, - "grad_norm": 0.12001162767410278, - "learning_rate": 3.908834540419621e-06, - "loss": 0.0008155249059200286, - "step": 20090 - }, - { - "epoch": 3.4262574595055413, - "grad_norm": 0.0132389971986413, - "learning_rate": 3.897452679909287e-06, - "loss": 0.000174278998747468, - "step": 20095 - }, - { - "epoch": 3.4271099744245523, - "grad_norm": 0.0051491111516952515, - "learning_rate": 3.886086506185822e-06, - "loss": 0.0006445198785513639, - "step": 20100 - }, - { - "epoch": 3.4279624893435634, - "grad_norm": 0.039136942476034164, - "learning_rate": 3.874736024555328e-06, - "loss": 0.0005972872488200665, - "step": 20105 - }, - { - "epoch": 3.4288150042625745, - "grad_norm": 0.00988066103309393, - "learning_rate": 3.863401240316599e-06, - "loss": 0.00036287889815866947, - "step": 20110 - }, - { - "epoch": 3.4296675191815855, - "grad_norm": 0.012278315611183643, - "learning_rate": 3.852082158761074e-06, - "loss": 0.0004206370562314987, - "step": 20115 - }, - { - "epoch": 3.4305200341005966, - "grad_norm": 0.08950433880090714, - "learning_rate": 3.840778785172897e-06, - "loss": 0.0007027041632682085, - "step": 20120 - }, - { - "epoch": 3.431372549019608, - "grad_norm": 0.015487313270568848, - "learning_rate": 3.829491124828843e-06, - "loss": 0.00030801878310739996, - "step": 20125 - }, - { - "epoch": 3.4322250639386187, - "grad_norm": 0.012695780955255032, - "learning_rate": 3.818219182998379e-06, - "loss": 0.00033567410428076984, - "step": 20130 - }, - { - "epoch": 3.43307757885763, - "grad_norm": 0.013385625556111336, - "learning_rate": 3.8069629649436134e-06, - "loss": 0.00033784976694732903, - "step": 20135 - }, - { - "epoch": 3.4339300937766413, - "grad_norm": 0.02653471939265728, - "learning_rate": 3.7957224759193258e-06, - "loss": 0.00037979823537170887, - "step": 20140 - }, - { - "epoch": 3.4347826086956523, - "grad_norm": 0.045122601091861725, - "learning_rate": 3.7844977211729523e-06, - "loss": 0.0003743718843907118, - "step": 20145 - }, - { - "epoch": 3.4356351236146634, - "grad_norm": 0.004004189744591713, - "learning_rate": 3.7732887059445717e-06, - "loss": 0.00024804847780615093, - "step": 20150 - }, - { - "epoch": 3.4364876385336744, - "grad_norm": 0.09962640702724457, - "learning_rate": 3.7620954354669443e-06, - "loss": 0.0007725684903562069, - "step": 20155 - }, - { - "epoch": 3.4373401534526855, - "grad_norm": 0.026793939992785454, - "learning_rate": 3.75091791496544e-06, - "loss": 0.0003023044904693961, - "step": 20160 - }, - { - "epoch": 3.4381926683716966, - "grad_norm": 0.06049729883670807, - "learning_rate": 3.7397561496581143e-06, - "loss": 0.00038756374269723894, - "step": 20165 - }, - { - "epoch": 3.4390451832907076, - "grad_norm": 0.10067807883024216, - "learning_rate": 3.7286101447556365e-06, - "loss": 0.00040011387318372726, - "step": 20170 - }, - { - "epoch": 3.4398976982097187, - "grad_norm": 0.025836393237113953, - "learning_rate": 3.7174799054613444e-06, - "loss": 0.0009764598682522774, - "step": 20175 - }, - { - "epoch": 3.4407502131287298, - "grad_norm": 0.03506815806031227, - "learning_rate": 3.7063654369712022e-06, - "loss": 0.0005544353742152452, - "step": 20180 - }, - { - "epoch": 3.441602728047741, - "grad_norm": 0.013711848296225071, - "learning_rate": 3.69526674447381e-06, - "loss": 0.0002796804532408714, - "step": 20185 - }, - { - "epoch": 3.442455242966752, - "grad_norm": 0.014671099372208118, - "learning_rate": 3.684183833150406e-06, - "loss": 0.0007412171456962824, - "step": 20190 - }, - { - "epoch": 3.443307757885763, - "grad_norm": 0.09581290930509567, - "learning_rate": 3.67311670817486e-06, - "loss": 0.0009363777935504913, - "step": 20195 - }, - { - "epoch": 3.444160272804774, - "grad_norm": 0.012721995823085308, - "learning_rate": 3.662065374713681e-06, - "loss": 0.0003047358011826873, - "step": 20200 - }, - { - "epoch": 3.445012787723785, - "grad_norm": 0.08440826833248138, - "learning_rate": 3.6510298379259883e-06, - "loss": 0.0015849992632865906, - "step": 20205 - }, - { - "epoch": 3.445865302642796, - "grad_norm": 0.016568806022405624, - "learning_rate": 3.6400101029635515e-06, - "loss": 0.0003544453531503677, - "step": 20210 - }, - { - "epoch": 3.446717817561807, - "grad_norm": 0.012676285579800606, - "learning_rate": 3.62900617497074e-06, - "loss": 0.00013037940952926875, - "step": 20215 - }, - { - "epoch": 3.4475703324808182, - "grad_norm": 0.028908727690577507, - "learning_rate": 3.618018059084553e-06, - "loss": 0.0004815624561160803, - "step": 20220 - }, - { - "epoch": 3.4484228473998293, - "grad_norm": 0.010771363973617554, - "learning_rate": 3.6070457604346155e-06, - "loss": 0.0008128033950924873, - "step": 20225 - }, - { - "epoch": 3.449275362318841, - "grad_norm": 0.05307495594024658, - "learning_rate": 3.5960892841431556e-06, - "loss": 0.0004443288315087557, - "step": 20230 - }, - { - "epoch": 3.4501278772378514, - "grad_norm": 0.04856376722455025, - "learning_rate": 3.5851486353250274e-06, - "loss": 0.0008865063078701496, - "step": 20235 - }, - { - "epoch": 3.450980392156863, - "grad_norm": 0.04320789873600006, - "learning_rate": 3.5742238190876752e-06, - "loss": 0.00030287024565041066, - "step": 20240 - }, - { - "epoch": 3.451832907075874, - "grad_norm": 0.026490481570363045, - "learning_rate": 3.563314840531181e-06, - "loss": 0.0003270474262535572, - "step": 20245 - }, - { - "epoch": 3.452685421994885, - "grad_norm": 0.01927161030471325, - "learning_rate": 3.5524217047482177e-06, - "loss": 0.0007327934727072716, - "step": 20250 - }, - { - "epoch": 3.453537936913896, - "grad_norm": 0.01646221987903118, - "learning_rate": 3.5415444168240547e-06, - "loss": 0.00015120231546461582, - "step": 20255 - }, - { - "epoch": 3.454390451832907, - "grad_norm": 0.07438748329877853, - "learning_rate": 3.53068298183658e-06, - "loss": 0.0008919765241444111, - "step": 20260 - }, - { - "epoch": 3.455242966751918, - "grad_norm": 0.060105398297309875, - "learning_rate": 3.519837404856263e-06, - "loss": 0.0005192287266254425, - "step": 20265 - }, - { - "epoch": 3.4560954816709293, - "grad_norm": 0.014563803561031818, - "learning_rate": 3.5090076909461946e-06, - "loss": 0.0004962874110788107, - "step": 20270 - }, - { - "epoch": 3.4569479965899403, - "grad_norm": 0.06408075243234634, - "learning_rate": 3.4981938451620393e-06, - "loss": 0.0005642884410917759, - "step": 20275 - }, - { - "epoch": 3.4578005115089514, - "grad_norm": 0.018654122948646545, - "learning_rate": 3.4873958725520555e-06, - "loss": 0.00020208589266985656, - "step": 20280 - }, - { - "epoch": 3.4586530264279625, - "grad_norm": 0.029568253085017204, - "learning_rate": 3.4766137781570934e-06, - "loss": 0.000255450839176774, - "step": 20285 - }, - { - "epoch": 3.4595055413469735, - "grad_norm": 0.007675605826079845, - "learning_rate": 3.465847567010606e-06, - "loss": 0.0007365974131971597, - "step": 20290 - }, - { - "epoch": 3.4603580562659846, - "grad_norm": 0.025231147184967995, - "learning_rate": 3.4550972441386105e-06, - "loss": 0.00035758940503001214, - "step": 20295 - }, - { - "epoch": 3.4612105711849956, - "grad_norm": 0.04905321076512337, - "learning_rate": 3.444362814559709e-06, - "loss": 0.0014069808647036552, - "step": 20300 - }, - { - "epoch": 3.4620630861040067, - "grad_norm": 0.03318062052130699, - "learning_rate": 3.4336442832851056e-06, - "loss": 0.0009246711619198322, - "step": 20305 - }, - { - "epoch": 3.4629156010230178, - "grad_norm": 0.006136562675237656, - "learning_rate": 3.422941655318552e-06, - "loss": 0.0006952826399356127, - "step": 20310 - }, - { - "epoch": 3.463768115942029, - "grad_norm": 0.025494717061519623, - "learning_rate": 3.4122549356564057e-06, - "loss": 0.0005774838849902153, - "step": 20315 - }, - { - "epoch": 3.46462063086104, - "grad_norm": 0.07548290491104126, - "learning_rate": 3.4015841292875754e-06, - "loss": 0.0007774532772600651, - "step": 20320 - }, - { - "epoch": 3.4654731457800514, - "grad_norm": 0.05289645493030548, - "learning_rate": 3.3909292411935475e-06, - "loss": 0.0011253023520112037, - "step": 20325 - }, - { - "epoch": 3.466325660699062, - "grad_norm": 0.018192177638411522, - "learning_rate": 3.380290276348377e-06, - "loss": 0.00043428516946733, - "step": 20330 - }, - { - "epoch": 3.4671781756180735, - "grad_norm": 0.04587262123823166, - "learning_rate": 3.3696672397186862e-06, - "loss": 0.00036711143329739573, - "step": 20335 - }, - { - "epoch": 3.4680306905370846, - "grad_norm": 0.04300279915332794, - "learning_rate": 3.3590601362636707e-06, - "loss": 0.0004922755528241396, - "step": 20340 - }, - { - "epoch": 3.4688832054560956, - "grad_norm": 0.051384493708610535, - "learning_rate": 3.3484689709350614e-06, - "loss": 0.000578513415530324, - "step": 20345 - }, - { - "epoch": 3.4697357203751067, - "grad_norm": 0.041927583515644073, - "learning_rate": 3.337893748677191e-06, - "loss": 0.00029339513275772333, - "step": 20350 - }, - { - "epoch": 3.4705882352941178, - "grad_norm": 0.059719622135162354, - "learning_rate": 3.3273344744269014e-06, - "loss": 0.000449614180251956, - "step": 20355 - }, - { - "epoch": 3.471440750213129, - "grad_norm": 0.04662923142313957, - "learning_rate": 3.3167911531136334e-06, - "loss": 0.0005207772832363844, - "step": 20360 - }, - { - "epoch": 3.47229326513214, - "grad_norm": 0.0316859669983387, - "learning_rate": 3.3062637896593498e-06, - "loss": 0.00032441234216094015, - "step": 20365 - }, - { - "epoch": 3.473145780051151, - "grad_norm": 0.05493699386715889, - "learning_rate": 3.2957523889785733e-06, - "loss": 0.0006051870062947273, - "step": 20370 - }, - { - "epoch": 3.473998294970162, - "grad_norm": 0.09825102239847183, - "learning_rate": 3.2852569559783785e-06, - "loss": 0.0013698142021894455, - "step": 20375 - }, - { - "epoch": 3.474850809889173, - "grad_norm": 0.03209096938371658, - "learning_rate": 3.2747774955583757e-06, - "loss": 0.0005756544414907694, - "step": 20380 - }, - { - "epoch": 3.475703324808184, - "grad_norm": 0.07800310105085373, - "learning_rate": 3.2643140126107343e-06, - "loss": 0.001114057283848524, - "step": 20385 - }, - { - "epoch": 3.476555839727195, - "grad_norm": 0.00817018747329712, - "learning_rate": 3.253866512020148e-06, - "loss": 0.00051291324198246, - "step": 20390 - }, - { - "epoch": 3.4774083546462062, - "grad_norm": 0.024451689794659615, - "learning_rate": 3.2434349986638687e-06, - "loss": 0.00032486242707818747, - "step": 20395 - }, - { - "epoch": 3.4782608695652173, - "grad_norm": 0.0076407743617892265, - "learning_rate": 3.2330194774116636e-06, - "loss": 0.00043834159150719644, - "step": 20400 - }, - { - "epoch": 3.4791133844842284, - "grad_norm": 0.012483174912631512, - "learning_rate": 3.222619953125852e-06, - "loss": 0.00016895943554118276, - "step": 20405 - }, - { - "epoch": 3.4799658994032394, - "grad_norm": 0.013760508969426155, - "learning_rate": 3.2122364306612745e-06, - "loss": 0.0003591555170714855, - "step": 20410 - }, - { - "epoch": 3.4808184143222505, - "grad_norm": 0.06936871260404587, - "learning_rate": 3.201868914865309e-06, - "loss": 0.0007365354336798191, - "step": 20415 - }, - { - "epoch": 3.4816709292412615, - "grad_norm": 0.026890093460679054, - "learning_rate": 3.19151741057785e-06, - "loss": 0.0003096622182056308, - "step": 20420 - }, - { - "epoch": 3.4825234441602726, - "grad_norm": 0.1273396909236908, - "learning_rate": 3.181181922631319e-06, - "loss": 0.0018214803189039231, - "step": 20425 - }, - { - "epoch": 3.483375959079284, - "grad_norm": 0.07851844280958176, - "learning_rate": 3.1708624558506784e-06, - "loss": 0.00047972016036510465, - "step": 20430 - }, - { - "epoch": 3.484228473998295, - "grad_norm": 0.10473177582025528, - "learning_rate": 3.1605590150533863e-06, - "loss": 0.0003519801888614893, - "step": 20435 - }, - { - "epoch": 3.485080988917306, - "grad_norm": 0.011826570145785809, - "learning_rate": 3.1502716050494493e-06, - "loss": 0.00012582357740029693, - "step": 20440 - }, - { - "epoch": 3.4859335038363173, - "grad_norm": 0.09120000153779984, - "learning_rate": 3.1400002306413596e-06, - "loss": 0.0011743055656552315, - "step": 20445 - }, - { - "epoch": 3.4867860187553283, - "grad_norm": 0.03551065921783447, - "learning_rate": 3.1297448966241312e-06, - "loss": 0.0003235040698200464, - "step": 20450 - }, - { - "epoch": 3.4876385336743394, - "grad_norm": 0.022862901911139488, - "learning_rate": 3.1195056077853093e-06, - "loss": 0.00019952079746872188, - "step": 20455 - }, - { - "epoch": 3.4884910485933505, - "grad_norm": 0.012301230803132057, - "learning_rate": 3.1092823689049294e-06, - "loss": 0.0005085674580186606, - "step": 20460 - }, - { - "epoch": 3.4893435635123615, - "grad_norm": 0.012983572669327259, - "learning_rate": 3.0990751847555355e-06, - "loss": 0.00026952670887112615, - "step": 20465 - }, - { - "epoch": 3.4901960784313726, - "grad_norm": 0.03648987412452698, - "learning_rate": 3.0888840601021784e-06, - "loss": 0.0006700227968394756, - "step": 20470 - }, - { - "epoch": 3.4910485933503836, - "grad_norm": 0.03276946395635605, - "learning_rate": 3.078708999702424e-06, - "loss": 0.0003196124453097582, - "step": 20475 - }, - { - "epoch": 3.4919011082693947, - "grad_norm": 0.05831300839781761, - "learning_rate": 3.068550008306318e-06, - "loss": 0.0005575232207775116, - "step": 20480 - }, - { - "epoch": 3.4927536231884058, - "grad_norm": 0.007617017719894648, - "learning_rate": 3.0584070906564297e-06, - "loss": 0.0005659013520926237, - "step": 20485 - }, - { - "epoch": 3.493606138107417, - "grad_norm": 0.01129902619868517, - "learning_rate": 3.0482802514878e-06, - "loss": 0.0005468820687383414, - "step": 20490 - }, - { - "epoch": 3.494458653026428, - "grad_norm": 0.021842598915100098, - "learning_rate": 3.0381694955279687e-06, - "loss": 0.00030360198579728606, - "step": 20495 - }, - { - "epoch": 3.495311167945439, - "grad_norm": 0.008945580571889877, - "learning_rate": 3.0280748274969887e-06, - "loss": 0.00019925013184547425, - "step": 20500 - }, - { - "epoch": 3.49616368286445, - "grad_norm": 0.024309689179062843, - "learning_rate": 3.0179962521073823e-06, - "loss": 0.0004822061397135258, - "step": 20505 - }, - { - "epoch": 3.497016197783461, - "grad_norm": 0.1111924946308136, - "learning_rate": 3.007933774064157e-06, - "loss": 0.0009571518748998642, - "step": 20510 - }, - { - "epoch": 3.497868712702472, - "grad_norm": 0.07870755344629288, - "learning_rate": 2.997887398064809e-06, - "loss": 0.00046168952248990534, - "step": 20515 - }, - { - "epoch": 3.498721227621483, - "grad_norm": 0.05273010954260826, - "learning_rate": 2.987857128799333e-06, - "loss": 0.0002907732035964727, - "step": 20520 - }, - { - "epoch": 3.4994032395566923, - "eval_loss": 0.0633777305483818, - "eval_runtime": 3.7174, - "eval_samples_per_second": 67.79, - "eval_steps_per_second": 1.076, - "step": 20524 - }, - { - "eval_cer_subset": 0.01387463969905711, - "eval_cer_subset_edit_distance": 852, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 20524 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 6.918689457253859e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-20524/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/adapter_model.safetensors deleted file mode 100644 index 9c5e62badc97c7bd5603b0a94c88df208769a8ed..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec5e15b419b88b556f172661495d05db5612e44c5f5e491069a0834105412c13 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/optimizer.pt deleted file mode 100644 index 1f59fb2c8e2ad0ea9d883a951c225ce4f13021b4..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0201b217b6318169c41b80cf856170c5dce4cab09c31c16e4febadcf621d33c5 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/rng_state.pth deleted file mode 100644 index 48d3b0862789a8279291084fafa92eaa06b9c1fd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:827270dbeb2e04c837d1c35958cd7372837c48e711d15c2963ea3917cc5c7b4a -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/scheduler.pt deleted file mode 100644 index 4fd1cfcf40d34b30794f31ce83effda015a7305d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:018241c2f6214dfcf645e2e92db9ca7ae590dc70f41f5123b6979b3d5694c51a -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/trainer_state.json deleted file mode 100644 index ab82c48b84116ada59576df27a4138de63acfdce..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/trainer_state.json +++ /dev/null @@ -1,3203 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.4979536152796726, - "eval_steps": 366, - "global_step": 2196, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 2.297930050038989e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2196/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/adapter_model.safetensors deleted file mode 100644 index 620bb05353c976b06b382ac8eab57afe56846f82..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6bdcb9de449bcd859d514261e9967c8d548b2bf2e4b62316bd2cd026ed3fbf60 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/optimizer.pt deleted file mode 100644 index 24a47baeaddf664e3a76c4c95c97e8535491c6bc..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a90baab2183f9c6ebbda598d1693cb12f2f67c0ab964268418b887185c34e09 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/rng_state.pth deleted file mode 100644 index 4669d52b4df192bc91f2676391b1233756a07f32..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a2df993afae23610a148cbb90d0d2fcfd8b5e755873f80c30cdbe027d950c3c -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/scheduler.pt deleted file mode 100644 index 8429b80e88480674a208d83cc0adc0ad68dd7dc3..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4059bef0b2408bf94fa666a438021596e124a648d80d56c2446e470aa3285b11 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/trainer_state.json deleted file mode 100644 index 1ce78a0ba4c6540649f9b810f69a5d7605eeb796..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/trainer_state.json +++ /dev/null @@ -1,33127 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.9993179880647913, - "eval_steps": 1466, - "global_step": 23456, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - }, - { - "epoch": 0.7502131287297528, - "grad_norm": 0.061287231743335724, - "learning_rate": 6.972370044908141e-05, - "loss": 0.011720015108585358, - "step": 4400 - }, - { - "epoch": 0.7510656436487638, - "grad_norm": 0.0668778270483017, - "learning_rate": 6.971058796426478e-05, - "loss": 0.012064819037914277, - "step": 4405 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 0.07897942513227463, - "learning_rate": 6.969746044247421e-05, - "loss": 0.010592888295650481, - "step": 4410 - }, - { - "epoch": 0.7527706734867861, - "grad_norm": 0.09090534597635269, - "learning_rate": 6.968431788983806e-05, - "loss": 0.011600010097026825, - "step": 4415 - }, - { - "epoch": 0.7536231884057971, - "grad_norm": 0.080506332218647, - "learning_rate": 6.967116031249172e-05, - "loss": 0.013006125390529633, - "step": 4420 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 0.04851040989160538, - "learning_rate": 6.965798771657758e-05, - "loss": 0.010165790468454361, - "step": 4425 - }, - { - "epoch": 0.7553282182438192, - "grad_norm": 0.06298603117465973, - "learning_rate": 6.964480010824505e-05, - "loss": 0.007800602912902832, - "step": 4430 - }, - { - "epoch": 0.7561807331628303, - "grad_norm": 0.09919694811105728, - "learning_rate": 6.963159749365053e-05, - "loss": 0.010877586901187897, - "step": 4435 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 0.083896704018116, - "learning_rate": 6.961837987895747e-05, - "loss": 0.011114003509283066, - "step": 4440 - }, - { - "epoch": 0.7578857630008525, - "grad_norm": 0.0452699176967144, - "learning_rate": 6.960514727033626e-05, - "loss": 0.008609171956777573, - "step": 4445 - }, - { - "epoch": 0.7587382779198636, - "grad_norm": 0.08951374143362045, - "learning_rate": 6.959189967396435e-05, - "loss": 0.01193360835313797, - "step": 4450 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 0.08749551326036453, - "learning_rate": 6.957863709602611e-05, - "loss": 0.009163837879896164, - "step": 4455 - }, - { - "epoch": 0.7604433077578857, - "grad_norm": 0.09304409474134445, - "learning_rate": 6.956535954271301e-05, - "loss": 0.01038273349404335, - "step": 4460 - }, - { - "epoch": 0.7612958226768969, - "grad_norm": 0.06662629544734955, - "learning_rate": 6.955206702022342e-05, - "loss": 0.010570932179689407, - "step": 4465 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 0.07736595720052719, - "learning_rate": 6.953875953476276e-05, - "loss": 0.009856238961219788, - "step": 4470 - }, - { - "epoch": 0.763000852514919, - "grad_norm": 0.04692552238702774, - "learning_rate": 6.952543709254338e-05, - "loss": 0.006967573612928391, - "step": 4475 - }, - { - "epoch": 0.7638533674339301, - "grad_norm": 0.06901179254055023, - "learning_rate": 6.951209969978464e-05, - "loss": 0.008588603138923645, - "step": 4480 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 0.07733304053544998, - "learning_rate": 6.949874736271289e-05, - "loss": 0.012579981982707978, - "step": 4485 - }, - { - "epoch": 0.7655583972719523, - "grad_norm": 0.0693301409482956, - "learning_rate": 6.948538008756144e-05, - "loss": 0.009747470915317535, - "step": 4490 - }, - { - "epoch": 0.7664109121909634, - "grad_norm": 0.07054253667593002, - "learning_rate": 6.947199788057059e-05, - "loss": 0.008387601375579834, - "step": 4495 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 0.06526053696870804, - "learning_rate": 6.945860074798757e-05, - "loss": 0.008389735966920853, - "step": 4500 - }, - { - "epoch": 0.7681159420289855, - "grad_norm": 0.06862380355596542, - "learning_rate": 6.944518869606662e-05, - "loss": 0.008736115694046021, - "step": 4505 - }, - { - "epoch": 0.7689684569479965, - "grad_norm": 0.06233246996998787, - "learning_rate": 6.943176173106897e-05, - "loss": 0.008519527316093446, - "step": 4510 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 0.07696249336004257, - "learning_rate": 6.941831985926273e-05, - "loss": 0.011381441354751587, - "step": 4515 - }, - { - "epoch": 0.7706734867860188, - "grad_norm": 0.11450641602277756, - "learning_rate": 6.940486308692302e-05, - "loss": 0.012895810604095458, - "step": 4520 - }, - { - "epoch": 0.7715260017050298, - "grad_norm": 0.09141158312559128, - "learning_rate": 6.939139142033191e-05, - "loss": 0.009508632868528367, - "step": 4525 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 0.09469986706972122, - "learning_rate": 6.937790486577844e-05, - "loss": 0.014812557399272919, - "step": 4530 - }, - { - "epoch": 0.773231031543052, - "grad_norm": 0.08494299650192261, - "learning_rate": 6.936440342955855e-05, - "loss": 0.01355334222316742, - "step": 4535 - }, - { - "epoch": 0.7740835464620631, - "grad_norm": 0.09328251332044601, - "learning_rate": 6.93508871179752e-05, - "loss": 0.011529977619647979, - "step": 4540 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 0.06696850806474686, - "learning_rate": 6.933735593733821e-05, - "loss": 0.010230815410614014, - "step": 4545 - }, - { - "epoch": 0.7757885763000852, - "grad_norm": 0.07968153059482574, - "learning_rate": 6.932380989396442e-05, - "loss": 0.012129776924848557, - "step": 4550 - }, - { - "epoch": 0.7766410912190963, - "grad_norm": 0.07916650176048279, - "learning_rate": 6.931024899417756e-05, - "loss": 0.009455519914627075, - "step": 4555 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 0.05229945108294487, - "learning_rate": 6.92966732443083e-05, - "loss": 0.008516684174537659, - "step": 4560 - }, - { - "epoch": 0.7783461210571185, - "grad_norm": 0.08314234018325806, - "learning_rate": 6.928308265069428e-05, - "loss": 0.010914114117622376, - "step": 4565 - }, - { - "epoch": 0.7791986359761296, - "grad_norm": 0.05075672268867493, - "learning_rate": 6.926947721968001e-05, - "loss": 0.008188208192586898, - "step": 4570 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 0.09465362876653671, - "learning_rate": 6.925585695761697e-05, - "loss": 0.009074468165636063, - "step": 4575 - }, - { - "epoch": 0.7809036658141517, - "grad_norm": 0.09024044126272202, - "learning_rate": 6.924222187086356e-05, - "loss": 0.006571034342050553, - "step": 4580 - }, - { - "epoch": 0.7817561807331628, - "grad_norm": 0.050575681030750275, - "learning_rate": 6.922857196578507e-05, - "loss": 0.008829852938652039, - "step": 4585 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.09888230264186859, - "learning_rate": 6.921490724875376e-05, - "loss": 0.01053793728351593, - "step": 4590 - }, - { - "epoch": 0.783461210571185, - "grad_norm": 0.042567264288663864, - "learning_rate": 6.920122772614875e-05, - "loss": 0.009682109951972962, - "step": 4595 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 0.10021623969078064, - "learning_rate": 6.91875334043561e-05, - "loss": 0.012160807102918624, - "step": 4600 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 0.07016255706548691, - "learning_rate": 6.917382428976878e-05, - "loss": 0.008590599894523621, - "step": 4605 - }, - { - "epoch": 0.7860187553282183, - "grad_norm": 0.06567320227622986, - "learning_rate": 6.916010038878667e-05, - "loss": 0.00809207409620285, - "step": 4610 - }, - { - "epoch": 0.7868712702472294, - "grad_norm": 0.05032164603471756, - "learning_rate": 6.914636170781652e-05, - "loss": 0.009291460365056991, - "step": 4615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 0.06303273886442184, - "learning_rate": 6.913260825327204e-05, - "loss": 0.00837131291627884, - "step": 4620 - }, - { - "epoch": 0.7885763000852515, - "grad_norm": 0.05837355926632881, - "learning_rate": 6.911884003157376e-05, - "loss": 0.007800968736410141, - "step": 4625 - }, - { - "epoch": 0.7894288150042625, - "grad_norm": 0.07052712142467499, - "learning_rate": 6.910505704914916e-05, - "loss": 0.010577390342950821, - "step": 4630 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 0.08550997078418732, - "learning_rate": 6.909125931243259e-05, - "loss": 0.012821859121322632, - "step": 4635 - }, - { - "epoch": 0.7911338448422848, - "grad_norm": 0.060406558215618134, - "learning_rate": 6.90774468278653e-05, - "loss": 0.0065113060176372525, - "step": 4640 - }, - { - "epoch": 0.7919863597612958, - "grad_norm": 0.13999445736408234, - "learning_rate": 6.906361960189542e-05, - "loss": 0.012699820101261139, - "step": 4645 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 0.07585978507995605, - "learning_rate": 6.904977764097797e-05, - "loss": 0.008435635268688202, - "step": 4650 - }, - { - "epoch": 0.793691389599318, - "grad_norm": 0.07488108426332474, - "learning_rate": 6.90359209515748e-05, - "loss": 0.011925875395536422, - "step": 4655 - }, - { - "epoch": 0.7945439045183291, - "grad_norm": 0.12476535886526108, - "learning_rate": 6.902204954015471e-05, - "loss": 0.009086847305297852, - "step": 4660 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 0.08779732137918472, - "learning_rate": 6.900816341319331e-05, - "loss": 0.00962812826037407, - "step": 4665 - }, - { - "epoch": 0.7962489343563512, - "grad_norm": 0.15791405737400055, - "learning_rate": 6.899426257717312e-05, - "loss": 0.011767397075891495, - "step": 4670 - }, - { - "epoch": 0.7971014492753623, - "grad_norm": 0.11228909343481064, - "learning_rate": 6.898034703858352e-05, - "loss": 0.008271434903144836, - "step": 4675 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 0.07288003712892532, - "learning_rate": 6.896641680392073e-05, - "loss": 0.009384474158287049, - "step": 4680 - }, - { - "epoch": 0.7988064791133845, - "grad_norm": 0.08285173773765564, - "learning_rate": 6.895247187968784e-05, - "loss": 0.012600034475326538, - "step": 4685 - }, - { - "epoch": 0.7996589940323956, - "grad_norm": 0.07812397927045822, - "learning_rate": 6.893851227239484e-05, - "loss": 0.008935874700546265, - "step": 4690 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 0.07500546425580978, - "learning_rate": 6.892453798855852e-05, - "loss": 0.010619471222162247, - "step": 4695 - }, - { - "epoch": 0.8013640238704177, - "grad_norm": 0.05211177095770836, - "learning_rate": 6.891054903470251e-05, - "loss": 0.008601508289575576, - "step": 4700 - }, - { - "epoch": 0.8022165387894288, - "grad_norm": 0.029616642743349075, - "learning_rate": 6.889654541735738e-05, - "loss": 0.007921247184276581, - "step": 4705 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 0.06894131749868393, - "learning_rate": 6.888252714306044e-05, - "loss": 0.010935742408037186, - "step": 4710 - }, - { - "epoch": 0.803921568627451, - "grad_norm": 0.0766182467341423, - "learning_rate": 6.886849421835587e-05, - "loss": 0.010556706041097642, - "step": 4715 - }, - { - "epoch": 0.8047740835464621, - "grad_norm": 0.09164462238550186, - "learning_rate": 6.885444664979477e-05, - "loss": 0.010812586545944214, - "step": 4720 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 0.06463408470153809, - "learning_rate": 6.884038444393496e-05, - "loss": 0.009179002791643142, - "step": 4725 - }, - { - "epoch": 0.8064791133844842, - "grad_norm": 0.06639672070741653, - "learning_rate": 6.882630760734118e-05, - "loss": 0.012755092978477479, - "step": 4730 - }, - { - "epoch": 0.8073316283034954, - "grad_norm": 0.062024496495723724, - "learning_rate": 6.881221614658493e-05, - "loss": 0.009655499458312988, - "step": 4735 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 0.06751494854688644, - "learning_rate": 6.879811006824459e-05, - "loss": 0.010860173404216767, - "step": 4740 - }, - { - "epoch": 0.8090366581415175, - "grad_norm": 0.05535218119621277, - "learning_rate": 6.878398937890535e-05, - "loss": 0.011440058052539826, - "step": 4745 - }, - { - "epoch": 0.8098891730605285, - "grad_norm": 0.08365204185247421, - "learning_rate": 6.876985408515922e-05, - "loss": 0.011058357357978821, - "step": 4750 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 0.06450537592172623, - "learning_rate": 6.875570419360501e-05, - "loss": 0.01046149879693985, - "step": 4755 - }, - { - "epoch": 0.8115942028985508, - "grad_norm": 0.08542726188898087, - "learning_rate": 6.874153971084837e-05, - "loss": 0.009869573265314102, - "step": 4760 - }, - { - "epoch": 0.8124467178175618, - "grad_norm": 0.08184531331062317, - "learning_rate": 6.872736064350176e-05, - "loss": 0.01054040789604187, - "step": 4765 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 0.07068512588739395, - "learning_rate": 6.871316699818442e-05, - "loss": 0.009573462605476379, - "step": 4770 - }, - { - "epoch": 0.814151747655584, - "grad_norm": 0.08866564929485321, - "learning_rate": 6.869895878152244e-05, - "loss": 0.008078956604003906, - "step": 4775 - }, - { - "epoch": 0.815004262574595, - "grad_norm": 0.08215270191431046, - "learning_rate": 6.868473600014867e-05, - "loss": 0.010586659610271453, - "step": 4780 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 0.0449003241956234, - "learning_rate": 6.867049866070278e-05, - "loss": 0.008572281152009965, - "step": 4785 - }, - { - "epoch": 0.8167092924126172, - "grad_norm": 0.0766722783446312, - "learning_rate": 6.865624676983124e-05, - "loss": 0.009015947580337524, - "step": 4790 - }, - { - "epoch": 0.8175618073316283, - "grad_norm": 0.07404733449220657, - "learning_rate": 6.864198033418732e-05, - "loss": 0.014639028906822204, - "step": 4795 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 0.10437514632940292, - "learning_rate": 6.862769936043102e-05, - "loss": 0.009333716332912445, - "step": 4800 - }, - { - "epoch": 0.8192668371696504, - "grad_norm": 0.06732609868049622, - "learning_rate": 6.861340385522921e-05, - "loss": 0.007169592380523682, - "step": 4805 - }, - { - "epoch": 0.8201193520886616, - "grad_norm": 0.06016068905591965, - "learning_rate": 6.859909382525552e-05, - "loss": 0.009211564064025879, - "step": 4810 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 0.07302942126989365, - "learning_rate": 6.858476927719031e-05, - "loss": 0.009643231332302094, - "step": 4815 - }, - { - "epoch": 0.8218243819266837, - "grad_norm": 0.07511111348867416, - "learning_rate": 6.857043021772079e-05, - "loss": 0.010751830041408538, - "step": 4820 - }, - { - "epoch": 0.8226768968456948, - "grad_norm": 0.04791528359055519, - "learning_rate": 6.855607665354088e-05, - "loss": 0.008413314074277877, - "step": 4825 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 0.08279003202915192, - "learning_rate": 6.854170859135132e-05, - "loss": 0.009260118752717972, - "step": 4830 - }, - { - "epoch": 0.824381926683717, - "grad_norm": 0.06907783448696136, - "learning_rate": 6.85273260378596e-05, - "loss": 0.009681220352649688, - "step": 4835 - }, - { - "epoch": 0.8252344416027281, - "grad_norm": 0.09847953170537949, - "learning_rate": 6.851292899977997e-05, - "loss": 0.009847448766231537, - "step": 4840 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 0.0683966800570488, - "learning_rate": 6.849851748383343e-05, - "loss": 0.007601346075534821, - "step": 4845 - }, - { - "epoch": 0.8269394714407502, - "grad_norm": 0.0523662269115448, - "learning_rate": 6.848409149674779e-05, - "loss": 0.00900915488600731, - "step": 4850 - }, - { - "epoch": 0.8277919863597613, - "grad_norm": 0.07112257927656174, - "learning_rate": 6.846965104525757e-05, - "loss": 0.011230588704347611, - "step": 4855 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 0.09305348247289658, - "learning_rate": 6.845519613610402e-05, - "loss": 0.01087992861866951, - "step": 4860 - }, - { - "epoch": 0.8294970161977835, - "grad_norm": 0.062347084283828735, - "learning_rate": 6.84407267760352e-05, - "loss": 0.00806276947259903, - "step": 4865 - }, - { - "epoch": 0.8303495311167945, - "grad_norm": 0.09091926366090775, - "learning_rate": 6.84262429718059e-05, - "loss": 0.010562103241682053, - "step": 4870 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 0.05807443708181381, - "learning_rate": 6.841174473017762e-05, - "loss": 0.010788433253765106, - "step": 4875 - }, - { - "epoch": 0.8320545609548167, - "grad_norm": 0.06664252281188965, - "learning_rate": 6.839723205791863e-05, - "loss": 0.01030244082212448, - "step": 4880 - }, - { - "epoch": 0.8329070758738278, - "grad_norm": 0.07469561696052551, - "learning_rate": 6.838270496180392e-05, - "loss": 0.01250479370355606, - "step": 4885 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 0.0469096302986145, - "learning_rate": 6.836816344861523e-05, - "loss": 0.010546717792749405, - "step": 4890 - }, - { - "epoch": 0.83461210571185, - "grad_norm": 0.0646355077624321, - "learning_rate": 6.835360752514104e-05, - "loss": 0.008491561561822892, - "step": 4895 - }, - { - "epoch": 0.835464620630861, - "grad_norm": 0.06006006523966789, - "learning_rate": 6.83390371981765e-05, - "loss": 0.010175065696239471, - "step": 4900 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 0.0595518983900547, - "learning_rate": 6.832445247452355e-05, - "loss": 0.009471315890550613, - "step": 4905 - }, - { - "epoch": 0.8371696504688833, - "grad_norm": 0.0722845196723938, - "learning_rate": 6.830985336099081e-05, - "loss": 0.011656039208173753, - "step": 4910 - }, - { - "epoch": 0.8380221653878943, - "grad_norm": 0.09830670058727264, - "learning_rate": 6.829523986439366e-05, - "loss": 0.0106172576546669, - "step": 4915 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 0.0725899264216423, - "learning_rate": 6.828061199155413e-05, - "loss": 0.00935768336057663, - "step": 4920 - }, - { - "epoch": 0.8397271952259164, - "grad_norm": 0.06721889227628708, - "learning_rate": 6.826596974930101e-05, - "loss": 0.010951700061559677, - "step": 4925 - }, - { - "epoch": 0.8405797101449275, - "grad_norm": 0.09289079904556274, - "learning_rate": 6.82513131444698e-05, - "loss": 0.010057362169027329, - "step": 4930 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 0.07667957991361618, - "learning_rate": 6.823664218390267e-05, - "loss": 0.012943412363529205, - "step": 4935 - }, - { - "epoch": 0.8422847399829497, - "grad_norm": 0.041785743087530136, - "learning_rate": 6.822195687444853e-05, - "loss": 0.009845246374607087, - "step": 4940 - }, - { - "epoch": 0.8431372549019608, - "grad_norm": 0.062134817242622375, - "learning_rate": 6.820725722296295e-05, - "loss": 0.010506168007850647, - "step": 4945 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 0.0870882049202919, - "learning_rate": 6.819254323630825e-05, - "loss": 0.008953387290239334, - "step": 4950 - }, - { - "epoch": 0.8448422847399829, - "grad_norm": 0.06943989545106888, - "learning_rate": 6.817781492135337e-05, - "loss": 0.009975537657737732, - "step": 4955 - }, - { - "epoch": 0.8456947996589941, - "grad_norm": 0.0961625948548317, - "learning_rate": 6.8163072284974e-05, - "loss": 0.010299818217754364, - "step": 4960 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 0.0685473084449768, - "learning_rate": 6.814831533405249e-05, - "loss": 0.007892660051584243, - "step": 4965 - }, - { - "epoch": 0.8473998294970162, - "grad_norm": 0.06370922178030014, - "learning_rate": 6.813354407547787e-05, - "loss": 0.011673354357481004, - "step": 4970 - }, - { - "epoch": 0.8482523444160273, - "grad_norm": 0.1212867870926857, - "learning_rate": 6.811875851614586e-05, - "loss": 0.01049395203590393, - "step": 4975 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 0.07590476423501968, - "learning_rate": 6.810395866295885e-05, - "loss": 0.011261900514364242, - "step": 4980 - }, - { - "epoch": 0.8499573742540495, - "grad_norm": 0.06342966109514236, - "learning_rate": 6.808914452282592e-05, - "loss": 0.011414043605327606, - "step": 4985 - }, - { - "epoch": 0.8508098891730606, - "grad_norm": 0.08939556777477264, - "learning_rate": 6.807431610266278e-05, - "loss": 0.008719882369041443, - "step": 4990 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 0.09418119490146637, - "learning_rate": 6.805947340939183e-05, - "loss": 0.011406099796295166, - "step": 4995 - }, - { - "epoch": 0.8525149190110827, - "grad_norm": 0.09113836288452148, - "learning_rate": 6.804461644994213e-05, - "loss": 0.011070792376995087, - "step": 5000 - }, - { - "epoch": 0.8533674339300937, - "grad_norm": 0.06545080989599228, - "learning_rate": 6.802974523124941e-05, - "loss": 0.009602059423923493, - "step": 5005 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 0.07779792696237564, - "learning_rate": 6.801485976025607e-05, - "loss": 0.008207190036773681, - "step": 5010 - }, - { - "epoch": 0.855072463768116, - "grad_norm": 0.038606271147727966, - "learning_rate": 6.799996004391113e-05, - "loss": 0.008772189915180206, - "step": 5015 - }, - { - "epoch": 0.855924978687127, - "grad_norm": 0.07329479604959488, - "learning_rate": 6.798504608917025e-05, - "loss": 0.011226999759674072, - "step": 5020 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 0.08631903678178787, - "learning_rate": 6.797011790299579e-05, - "loss": 0.012361649423837662, - "step": 5025 - }, - { - "epoch": 0.8576300085251491, - "grad_norm": 0.09041957557201385, - "learning_rate": 6.79551754923567e-05, - "loss": 0.010576151311397552, - "step": 5030 - }, - { - "epoch": 0.8584825234441603, - "grad_norm": 0.07003892213106155, - "learning_rate": 6.794021886422861e-05, - "loss": 0.008748160302639007, - "step": 5035 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 0.06976106762886047, - "learning_rate": 6.792524802559378e-05, - "loss": 0.010539846867322922, - "step": 5040 - }, - { - "epoch": 0.8601875532821824, - "grad_norm": 0.05501266196370125, - "learning_rate": 6.791026298344107e-05, - "loss": 0.01045292615890503, - "step": 5045 - }, - { - "epoch": 0.8610400682011935, - "grad_norm": 0.051503781229257584, - "learning_rate": 6.789526374476602e-05, - "loss": 0.009410140663385391, - "step": 5050 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 0.05674072727560997, - "learning_rate": 6.788025031657076e-05, - "loss": 0.00899135023355484, - "step": 5055 - }, - { - "epoch": 0.8627450980392157, - "grad_norm": 0.09688259661197662, - "learning_rate": 6.786522270586406e-05, - "loss": 0.010228607058525085, - "step": 5060 - }, - { - "epoch": 0.8635976129582268, - "grad_norm": 0.08542542159557343, - "learning_rate": 6.785018091966131e-05, - "loss": 0.010819461196660995, - "step": 5065 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 0.05703757330775261, - "learning_rate": 6.783512496498452e-05, - "loss": 0.011345957219600678, - "step": 5070 - }, - { - "epoch": 0.8653026427962489, - "grad_norm": 0.08096349984407425, - "learning_rate": 6.782005484886231e-05, - "loss": 0.011220332235097885, - "step": 5075 - }, - { - "epoch": 0.86615515771526, - "grad_norm": 0.07224266976118088, - "learning_rate": 6.780497057832988e-05, - "loss": 0.012557309865951539, - "step": 5080 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 0.08718731999397278, - "learning_rate": 6.778987216042912e-05, - "loss": 0.009770408272743225, - "step": 5085 - }, - { - "epoch": 0.8678601875532822, - "grad_norm": 0.05731170251965523, - "learning_rate": 6.777475960220846e-05, - "loss": 0.008567098528146744, - "step": 5090 - }, - { - "epoch": 0.8687127024722933, - "grad_norm": 0.07938708364963531, - "learning_rate": 6.775963291072292e-05, - "loss": 0.010664292424917222, - "step": 5095 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.028421485796570778, - "learning_rate": 6.774449209303416e-05, - "loss": 0.010194088518619537, - "step": 5100 - }, - { - "epoch": 0.8704177323103154, - "grad_norm": 0.06094631180167198, - "learning_rate": 6.772933715621042e-05, - "loss": 0.009481045603752136, - "step": 5105 - }, - { - "epoch": 0.8712702472293266, - "grad_norm": 0.08887558430433273, - "learning_rate": 6.771416810732653e-05, - "loss": 0.012845474481582641, - "step": 5110 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 0.07431238144636154, - "learning_rate": 6.76989849534639e-05, - "loss": 0.00815560668706894, - "step": 5115 - }, - { - "epoch": 0.8729752770673487, - "grad_norm": 0.09380137175321579, - "learning_rate": 6.768378770171052e-05, - "loss": 0.009580246359109878, - "step": 5120 - }, - { - "epoch": 0.8738277919863597, - "grad_norm": 0.08001488447189331, - "learning_rate": 6.766857635916099e-05, - "loss": 0.00857289507985115, - "step": 5125 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 0.049355555325746536, - "learning_rate": 6.765335093291647e-05, - "loss": 0.009263276308774947, - "step": 5130 - }, - { - "epoch": 0.875532821824382, - "grad_norm": 0.05873994901776314, - "learning_rate": 6.763811143008469e-05, - "loss": 0.008233514428138734, - "step": 5135 - }, - { - "epoch": 0.876385336743393, - "grad_norm": 0.10190756618976593, - "learning_rate": 6.762285785777995e-05, - "loss": 0.01529676467180252, - "step": 5140 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 0.08395158499479294, - "learning_rate": 6.760759022312313e-05, - "loss": 0.00961325541138649, - "step": 5145 - }, - { - "epoch": 0.8780903665814151, - "grad_norm": 0.07476748526096344, - "learning_rate": 6.759230853324169e-05, - "loss": 0.010477164387702942, - "step": 5150 - }, - { - "epoch": 0.8789428815004262, - "grad_norm": 0.07773051410913467, - "learning_rate": 6.757701279526961e-05, - "loss": 0.010389962792396545, - "step": 5155 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 0.07345708459615707, - "learning_rate": 6.756170301634745e-05, - "loss": 0.009174319356679917, - "step": 5160 - }, - { - "epoch": 0.8806479113384484, - "grad_norm": 0.07917368412017822, - "learning_rate": 6.754637920362233e-05, - "loss": 0.012756256759166718, - "step": 5165 - }, - { - "epoch": 0.8815004262574595, - "grad_norm": 0.06016271933913231, - "learning_rate": 6.75310413642479e-05, - "loss": 0.011058077961206437, - "step": 5170 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 0.06637005507946014, - "learning_rate": 6.751568950538441e-05, - "loss": 0.013590328395366669, - "step": 5175 - }, - { - "epoch": 0.8832054560954816, - "grad_norm": 0.06486016511917114, - "learning_rate": 6.750032363419857e-05, - "loss": 0.010195261240005494, - "step": 5180 - }, - { - "epoch": 0.8840579710144928, - "grad_norm": 0.09800687432289124, - "learning_rate": 6.748494375786372e-05, - "loss": 0.011106249690055848, - "step": 5185 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 0.04665162041783333, - "learning_rate": 6.746954988355967e-05, - "loss": 0.009880972653627395, - "step": 5190 - }, - { - "epoch": 0.8857630008525149, - "grad_norm": 0.05554487928748131, - "learning_rate": 6.745414201847282e-05, - "loss": 0.009480565786361694, - "step": 5195 - }, - { - "epoch": 0.886615515771526, - "grad_norm": 0.059967316687107086, - "learning_rate": 6.743872016979605e-05, - "loss": 0.010072766989469528, - "step": 5200 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 0.08275031298398972, - "learning_rate": 6.74232843447288e-05, - "loss": 0.010208947211503982, - "step": 5205 - }, - { - "epoch": 0.8883205456095482, - "grad_norm": 0.07091715931892395, - "learning_rate": 6.740783455047704e-05, - "loss": 0.011881709098815918, - "step": 5210 - }, - { - "epoch": 0.8891730605285593, - "grad_norm": 0.1465480923652649, - "learning_rate": 6.739237079425322e-05, - "loss": 0.010970161855220794, - "step": 5215 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 0.07571437954902649, - "learning_rate": 6.737689308327636e-05, - "loss": 0.010722124576568603, - "step": 5220 - }, - { - "epoch": 0.8908780903665814, - "grad_norm": 0.06671100109815598, - "learning_rate": 6.736140142477194e-05, - "loss": 0.010463282465934753, - "step": 5225 - }, - { - "epoch": 0.8917306052855924, - "grad_norm": 0.05676295980811119, - "learning_rate": 6.734589582597204e-05, - "loss": 0.00933043509721756, - "step": 5230 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 0.09708777070045471, - "learning_rate": 6.733037629411514e-05, - "loss": 0.011712780594825745, - "step": 5235 - }, - { - "epoch": 0.8934356351236147, - "grad_norm": 0.07073090970516205, - "learning_rate": 6.731484283644626e-05, - "loss": 0.008112293481826783, - "step": 5240 - }, - { - "epoch": 0.8942881500426257, - "grad_norm": 0.08821752667427063, - "learning_rate": 6.7299295460217e-05, - "loss": 0.01026250645518303, - "step": 5245 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 0.05165687948465347, - "learning_rate": 6.728373417268533e-05, - "loss": 0.008761890232563019, - "step": 5250 - }, - { - "epoch": 0.8959931798806479, - "grad_norm": 0.06872246414422989, - "learning_rate": 6.726815898111581e-05, - "loss": 0.012551462650299073, - "step": 5255 - }, - { - "epoch": 0.896845694799659, - "grad_norm": 0.07550673931837082, - "learning_rate": 6.725256989277944e-05, - "loss": 0.010165071487426758, - "step": 5260 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 0.05931933969259262, - "learning_rate": 6.723696691495373e-05, - "loss": 0.009457996487617493, - "step": 5265 - }, - { - "epoch": 0.8985507246376812, - "grad_norm": 0.10838861763477325, - "learning_rate": 6.722135005492268e-05, - "loss": 0.012053199112415314, - "step": 5270 - }, - { - "epoch": 0.8994032395566922, - "grad_norm": 0.062102265655994415, - "learning_rate": 6.720571931997676e-05, - "loss": 0.007582514733076096, - "step": 5275 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 0.05997858941555023, - "learning_rate": 6.719007471741289e-05, - "loss": 0.009348342567682267, - "step": 5280 - }, - { - "epoch": 0.9011082693947144, - "grad_norm": 0.05405129864811897, - "learning_rate": 6.717441625453451e-05, - "loss": 0.009910254925489425, - "step": 5285 - }, - { - "epoch": 0.9019607843137255, - "grad_norm": 0.08071329444646835, - "learning_rate": 6.715874393865152e-05, - "loss": 0.010672248899936676, - "step": 5290 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 0.06863300502300262, - "learning_rate": 6.714305777708027e-05, - "loss": 0.008570954948663712, - "step": 5295 - }, - { - "epoch": 0.9036658141517476, - "grad_norm": 0.07818135619163513, - "learning_rate": 6.712735777714357e-05, - "loss": 0.008647527545690536, - "step": 5300 - }, - { - "epoch": 0.9045183290707587, - "grad_norm": 0.14757376909255981, - "learning_rate": 6.711164394617072e-05, - "loss": 0.010825049877166749, - "step": 5305 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 0.07376445829868317, - "learning_rate": 6.709591629149746e-05, - "loss": 0.01299697458744049, - "step": 5310 - }, - { - "epoch": 0.9062233589087809, - "grad_norm": 0.05860469490289688, - "learning_rate": 6.708017482046597e-05, - "loss": 0.009549598395824432, - "step": 5315 - }, - { - "epoch": 0.907075873827792, - "grad_norm": 0.0799872875213623, - "learning_rate": 6.706441954042488e-05, - "loss": 0.009733843803405761, - "step": 5320 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 0.05245954543352127, - "learning_rate": 6.704865045872932e-05, - "loss": 0.009799794852733612, - "step": 5325 - }, - { - "epoch": 0.9087809036658141, - "grad_norm": 0.05515241622924805, - "learning_rate": 6.703286758274079e-05, - "loss": 0.007391643524169922, - "step": 5330 - }, - { - "epoch": 0.9096334185848253, - "grad_norm": 0.05900256708264351, - "learning_rate": 6.701707091982726e-05, - "loss": 0.009107303619384766, - "step": 5335 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 0.09559495002031326, - "learning_rate": 6.700126047736317e-05, - "loss": 0.009052158147096635, - "step": 5340 - }, - { - "epoch": 0.9113384484228474, - "grad_norm": 0.11189334839582443, - "learning_rate": 6.698543626272932e-05, - "loss": 0.011292549222707749, - "step": 5345 - }, - { - "epoch": 0.9121909633418585, - "grad_norm": 0.07031659781932831, - "learning_rate": 6.6969598283313e-05, - "loss": 0.008589480072259903, - "step": 5350 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.1652907431125641, - "learning_rate": 6.69537465465079e-05, - "loss": 0.010865563899278641, - "step": 5355 - }, - { - "epoch": 0.9138959931798807, - "grad_norm": 0.06157436594367027, - "learning_rate": 6.693788105971413e-05, - "loss": 0.012611952424049378, - "step": 5360 - }, - { - "epoch": 0.9147485080988917, - "grad_norm": 0.03928734362125397, - "learning_rate": 6.692200183033826e-05, - "loss": 0.009115418046712875, - "step": 5365 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 0.06604880094528198, - "learning_rate": 6.690610886579321e-05, - "loss": 0.010015038400888443, - "step": 5370 - }, - { - "epoch": 0.9164535379369139, - "grad_norm": 0.07625336199998856, - "learning_rate": 6.689020217349835e-05, - "loss": 0.010416677594184876, - "step": 5375 - }, - { - "epoch": 0.9173060528559249, - "grad_norm": 0.07674526423215866, - "learning_rate": 6.687428176087946e-05, - "loss": 0.01016802191734314, - "step": 5380 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 0.08422617614269257, - "learning_rate": 6.685834763536872e-05, - "loss": 0.011127004027366638, - "step": 5385 - }, - { - "epoch": 0.9190110826939472, - "grad_norm": 0.057719554752111435, - "learning_rate": 6.684239980440472e-05, - "loss": 0.008915853500366212, - "step": 5390 - }, - { - "epoch": 0.9198635976129582, - "grad_norm": 0.056555263698101044, - "learning_rate": 6.682643827543241e-05, - "loss": 0.0095272496342659, - "step": 5395 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 0.07605638355016708, - "learning_rate": 6.681046305590317e-05, - "loss": 0.010731159895658492, - "step": 5400 - }, - { - "epoch": 0.9215686274509803, - "grad_norm": 0.07499220222234726, - "learning_rate": 6.679447415327479e-05, - "loss": 0.010919998586177825, - "step": 5405 - }, - { - "epoch": 0.9224211423699915, - "grad_norm": 0.052863143384456635, - "learning_rate": 6.677847157501137e-05, - "loss": 0.011300939321517944, - "step": 5410 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 0.09210597723722458, - "learning_rate": 6.676245532858351e-05, - "loss": 0.013997772336006164, - "step": 5415 - }, - { - "epoch": 0.9241261722080136, - "grad_norm": 0.0746840238571167, - "learning_rate": 6.674642542146807e-05, - "loss": 0.012542533874511718, - "step": 5420 - }, - { - "epoch": 0.9249786871270247, - "grad_norm": 0.06137506663799286, - "learning_rate": 6.67303818611484e-05, - "loss": 0.008029398322105408, - "step": 5425 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 0.03867131471633911, - "learning_rate": 6.671432465511411e-05, - "loss": 0.009305672347545623, - "step": 5430 - }, - { - "epoch": 0.9266837169650469, - "grad_norm": 0.08291540294885635, - "learning_rate": 6.669825381086128e-05, - "loss": 0.011059926450252533, - "step": 5435 - }, - { - "epoch": 0.927536231884058, - "grad_norm": 0.0689411610364914, - "learning_rate": 6.668216933589228e-05, - "loss": 0.008350597321987152, - "step": 5440 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 0.051505669951438904, - "learning_rate": 6.666607123771591e-05, - "loss": 0.011102759093046189, - "step": 5445 - }, - { - "epoch": 0.9292412617220801, - "grad_norm": 0.08774327486753464, - "learning_rate": 6.664995952384729e-05, - "loss": 0.009498609602451325, - "step": 5450 - }, - { - "epoch": 0.9300937766410913, - "grad_norm": 0.060566093772649765, - "learning_rate": 6.663383420180789e-05, - "loss": 0.007811173051595688, - "step": 5455 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 0.09952156245708466, - "learning_rate": 6.661769527912555e-05, - "loss": 0.010514630377292633, - "step": 5460 - }, - { - "epoch": 0.9317988064791134, - "grad_norm": 0.04871741309762001, - "learning_rate": 6.660154276333446e-05, - "loss": 0.006630983203649521, - "step": 5465 - }, - { - "epoch": 0.9326513213981245, - "grad_norm": 0.06254981458187103, - "learning_rate": 6.658537666197517e-05, - "loss": 0.009895097464323044, - "step": 5470 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 0.08351470530033112, - "learning_rate": 6.656919698259452e-05, - "loss": 0.010659988969564438, - "step": 5475 - }, - { - "epoch": 0.9343563512361467, - "grad_norm": 0.07085305452346802, - "learning_rate": 6.655300373274575e-05, - "loss": 0.008971457183361054, - "step": 5480 - }, - { - "epoch": 0.9352088661551577, - "grad_norm": 0.06461923569440842, - "learning_rate": 6.653679691998839e-05, - "loss": 0.009138958156108856, - "step": 5485 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 0.11675399541854858, - "learning_rate": 6.652057655188832e-05, - "loss": 0.008388948440551759, - "step": 5490 - }, - { - "epoch": 0.9369138959931799, - "grad_norm": 0.09698229283094406, - "learning_rate": 6.650434263601777e-05, - "loss": 0.011885351687669753, - "step": 5495 - }, - { - "epoch": 0.9377664109121909, - "grad_norm": 0.06786464154720306, - "learning_rate": 6.648809517995524e-05, - "loss": 0.012351768463850022, - "step": 5500 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 0.09192351251840591, - "learning_rate": 6.647183419128561e-05, - "loss": 0.010940471291542053, - "step": 5505 - }, - { - "epoch": 0.9394714407502132, - "grad_norm": 0.05660499259829521, - "learning_rate": 6.645555967760003e-05, - "loss": 0.01160380095243454, - "step": 5510 - }, - { - "epoch": 0.9403239556692242, - "grad_norm": 0.0829106792807579, - "learning_rate": 6.6439271646496e-05, - "loss": 0.009475469589233398, - "step": 5515 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 0.08007021248340607, - "learning_rate": 6.642297010557733e-05, - "loss": 0.010524801164865493, - "step": 5520 - }, - { - "epoch": 0.9420289855072463, - "grad_norm": 0.07578855752944946, - "learning_rate": 6.640665506245406e-05, - "loss": 0.008864742517471314, - "step": 5525 - }, - { - "epoch": 0.9428815004262575, - "grad_norm": 0.064674511551857, - "learning_rate": 6.639032652474265e-05, - "loss": 0.010619612783193589, - "step": 5530 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 0.07176528871059418, - "learning_rate": 6.637398450006579e-05, - "loss": 0.011696039140224457, - "step": 5535 - }, - { - "epoch": 0.9445865302642796, - "grad_norm": 0.16578713059425354, - "learning_rate": 6.635762899605248e-05, - "loss": 0.009867334365844726, - "step": 5540 - }, - { - "epoch": 0.9454390451832907, - "grad_norm": 0.069394052028656, - "learning_rate": 6.634126002033802e-05, - "loss": 0.012951886653900147, - "step": 5545 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 0.07247213274240494, - "learning_rate": 6.632487758056397e-05, - "loss": 0.009127721190452576, - "step": 5550 - }, - { - "epoch": 0.9471440750213129, - "grad_norm": 0.11084317415952682, - "learning_rate": 6.630848168437822e-05, - "loss": 0.009197863936424255, - "step": 5555 - }, - { - "epoch": 0.947996589940324, - "grad_norm": 0.08719248324632645, - "learning_rate": 6.629207233943492e-05, - "loss": 0.010768509656190871, - "step": 5560 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 0.0857851505279541, - "learning_rate": 6.62756495533945e-05, - "loss": 0.009163270145654679, - "step": 5565 - }, - { - "epoch": 0.9497016197783461, - "grad_norm": 0.09808778762817383, - "learning_rate": 6.625921333392362e-05, - "loss": 0.01005362868309021, - "step": 5570 - }, - { - "epoch": 0.9505541346973572, - "grad_norm": 0.08482059836387634, - "learning_rate": 6.624276368869532e-05, - "loss": 0.008303509652614593, - "step": 5575 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 0.07460886240005493, - "learning_rate": 6.62263006253888e-05, - "loss": 0.00857923850417137, - "step": 5580 - }, - { - "epoch": 0.9522591645353794, - "grad_norm": 0.09494256228208542, - "learning_rate": 6.620982415168956e-05, - "loss": 0.009573552012443542, - "step": 5585 - }, - { - "epoch": 0.9531116794543905, - "grad_norm": 0.08753519505262375, - "learning_rate": 6.61933342752894e-05, - "loss": 0.010430536419153213, - "step": 5590 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 0.05132949724793434, - "learning_rate": 6.617683100388632e-05, - "loss": 0.009080658107995987, - "step": 5595 - }, - { - "epoch": 0.9548167092924126, - "grad_norm": 0.07516856491565704, - "learning_rate": 6.61603143451846e-05, - "loss": 0.009718524664640427, - "step": 5600 - }, - { - "epoch": 0.9556692242114238, - "grad_norm": 0.12723733484745026, - "learning_rate": 6.614378430689477e-05, - "loss": 0.01136334240436554, - "step": 5605 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 0.0905863493680954, - "learning_rate": 6.612724089673359e-05, - "loss": 0.013209307193756103, - "step": 5610 - }, - { - "epoch": 0.9573742540494459, - "grad_norm": 0.06084009259939194, - "learning_rate": 6.611068412242409e-05, - "loss": 0.01001257449388504, - "step": 5615 - }, - { - "epoch": 0.9582267689684569, - "grad_norm": 0.11799532175064087, - "learning_rate": 6.60941139916955e-05, - "loss": 0.008610795438289642, - "step": 5620 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 0.08627504110336304, - "learning_rate": 6.607753051228333e-05, - "loss": 0.009049218893051148, - "step": 5625 - }, - { - "epoch": 0.9599317988064792, - "grad_norm": 0.0910186693072319, - "learning_rate": 6.60609336919293e-05, - "loss": 0.01068672090768814, - "step": 5630 - }, - { - "epoch": 0.9607843137254902, - "grad_norm": 0.06503022462129593, - "learning_rate": 6.604432353838134e-05, - "loss": 0.010604655742645264, - "step": 5635 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 0.07681523263454437, - "learning_rate": 6.602770005939363e-05, - "loss": 0.010527564585208893, - "step": 5640 - }, - { - "epoch": 0.9624893435635123, - "grad_norm": 0.0680806040763855, - "learning_rate": 6.601106326272659e-05, - "loss": 0.009375665336847305, - "step": 5645 - }, - { - "epoch": 0.9633418584825234, - "grad_norm": 0.06601905822753906, - "learning_rate": 6.599441315614678e-05, - "loss": 0.009470004588365555, - "step": 5650 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 0.06291890889406204, - "learning_rate": 6.597774974742706e-05, - "loss": 0.012320800870656966, - "step": 5655 - }, - { - "epoch": 0.9650468883205456, - "grad_norm": 0.0956176221370697, - "learning_rate": 6.596107304434645e-05, - "loss": 0.01018187329173088, - "step": 5660 - }, - { - "epoch": 0.9658994032395567, - "grad_norm": 0.06642715632915497, - "learning_rate": 6.59443830546902e-05, - "loss": 0.010271859169006348, - "step": 5665 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 0.06783592700958252, - "learning_rate": 6.592767978624973e-05, - "loss": 0.00986798033118248, - "step": 5670 - }, - { - "epoch": 0.9676044330775788, - "grad_norm": 0.05877846106886864, - "learning_rate": 6.591096324682272e-05, - "loss": 0.009708859026432037, - "step": 5675 - }, - { - "epoch": 0.96845694799659, - "grad_norm": 0.049258604645729065, - "learning_rate": 6.589423344421297e-05, - "loss": 0.008615868538618088, - "step": 5680 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 0.05622515454888344, - "learning_rate": 6.587749038623052e-05, - "loss": 0.010138686001300811, - "step": 5685 - }, - { - "epoch": 0.9701619778346121, - "grad_norm": 0.05719893425703049, - "learning_rate": 6.586073408069159e-05, - "loss": 0.00869678258895874, - "step": 5690 - }, - { - "epoch": 0.9710144927536232, - "grad_norm": 0.07675095647573471, - "learning_rate": 6.584396453541856e-05, - "loss": 0.010557885468006133, - "step": 5695 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 0.0999779924750328, - "learning_rate": 6.582718175824006e-05, - "loss": 0.010667790472507478, - "step": 5700 - }, - { - "epoch": 0.9727195225916454, - "grad_norm": 0.04620293527841568, - "learning_rate": 6.58103857569908e-05, - "loss": 0.0078192800283432, - "step": 5705 - }, - { - "epoch": 0.9735720375106565, - "grad_norm": 0.06725125759840012, - "learning_rate": 6.579357653951174e-05, - "loss": 0.010441574454307555, - "step": 5710 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 0.09062530100345612, - "learning_rate": 6.577675411364997e-05, - "loss": 0.011757946014404297, - "step": 5715 - }, - { - "epoch": 0.9752770673486786, - "grad_norm": 0.050651032477617264, - "learning_rate": 6.575991848725876e-05, - "loss": 0.009817829728126526, - "step": 5720 - }, - { - "epoch": 0.9761295822676896, - "grad_norm": 0.06951560825109482, - "learning_rate": 6.574306966819755e-05, - "loss": 0.008903174102306366, - "step": 5725 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 0.0733589306473732, - "learning_rate": 6.57262076643319e-05, - "loss": 0.009786784648895264, - "step": 5730 - }, - { - "epoch": 0.9778346121057119, - "grad_norm": 0.0736282467842102, - "learning_rate": 6.570933248353359e-05, - "loss": 0.012176553905010223, - "step": 5735 - }, - { - "epoch": 0.9786871270247229, - "grad_norm": 0.069704569876194, - "learning_rate": 6.56924441336805e-05, - "loss": 0.008654942363500595, - "step": 5740 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 0.07497496902942657, - "learning_rate": 6.567554262265668e-05, - "loss": 0.010902392119169236, - "step": 5745 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 0.07559038698673248, - "learning_rate": 6.56586279583523e-05, - "loss": 0.00921270027756691, - "step": 5750 - }, - { - "epoch": 0.9812446717817562, - "grad_norm": 0.05894545465707779, - "learning_rate": 6.56417001486637e-05, - "loss": 0.009073206037282944, - "step": 5755 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 0.06555377691984177, - "learning_rate": 6.562475920149335e-05, - "loss": 0.010872729122638702, - "step": 5760 - }, - { - "epoch": 0.9829497016197783, - "grad_norm": 0.15036429464817047, - "learning_rate": 6.560780512474984e-05, - "loss": 0.009879975020885468, - "step": 5765 - }, - { - "epoch": 0.9838022165387894, - "grad_norm": 0.06842299550771713, - "learning_rate": 6.559083792634791e-05, - "loss": 0.00965554341673851, - "step": 5770 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 0.0486510805785656, - "learning_rate": 6.557385761420839e-05, - "loss": 0.00872802734375, - "step": 5775 - }, - { - "epoch": 0.9855072463768116, - "grad_norm": 0.059796739369630814, - "learning_rate": 6.555686419625826e-05, - "loss": 0.009720289707183838, - "step": 5780 - }, - { - "epoch": 0.9863597612958227, - "grad_norm": 0.07971934229135513, - "learning_rate": 6.553985768043062e-05, - "loss": 0.008043316006660462, - "step": 5785 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 0.09556971490383148, - "learning_rate": 6.552283807466468e-05, - "loss": 0.009030704945325851, - "step": 5790 - }, - { - "epoch": 0.9880647911338448, - "grad_norm": 0.04631726071238518, - "learning_rate": 6.550580538690577e-05, - "loss": 0.007388583570718765, - "step": 5795 - }, - { - "epoch": 0.9889173060528559, - "grad_norm": 0.08172665536403656, - "learning_rate": 6.548875962510528e-05, - "loss": 0.007863265275955201, - "step": 5800 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 0.11129096895456314, - "learning_rate": 6.547170079722076e-05, - "loss": 0.012218999862670898, - "step": 5805 - }, - { - "epoch": 0.9906223358908781, - "grad_norm": 0.06619804352521896, - "learning_rate": 6.545462891121584e-05, - "loss": 0.007535400986671448, - "step": 5810 - }, - { - "epoch": 0.9914748508098892, - "grad_norm": 0.11495351046323776, - "learning_rate": 6.543754397506025e-05, - "loss": 0.0121284119784832, - "step": 5815 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 0.06017669290304184, - "learning_rate": 6.542044599672978e-05, - "loss": 0.008776353299617767, - "step": 5820 - }, - { - "epoch": 0.9931798806479113, - "grad_norm": 0.08049561828374863, - "learning_rate": 6.540333498420637e-05, - "loss": 0.010460492223501205, - "step": 5825 - }, - { - "epoch": 0.9940323955669225, - "grad_norm": 0.07041274011135101, - "learning_rate": 6.538621094547798e-05, - "loss": 0.008290639519691468, - "step": 5830 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 0.061981480568647385, - "learning_rate": 6.53690738885387e-05, - "loss": 0.007011125236749649, - "step": 5835 - }, - { - "epoch": 0.9957374254049446, - "grad_norm": 0.09022640436887741, - "learning_rate": 6.535192382138867e-05, - "loss": 0.012455084919929504, - "step": 5840 - }, - { - "epoch": 0.9965899403239556, - "grad_norm": 0.05652628839015961, - "learning_rate": 6.53347607520341e-05, - "loss": 0.011704784631729127, - "step": 5845 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 0.0717577114701271, - "learning_rate": 6.531758468848732e-05, - "loss": 0.007738448679447174, - "step": 5850 - }, - { - "epoch": 0.9982949701619779, - "grad_norm": 0.04797588661313057, - "learning_rate": 6.530039563876665e-05, - "loss": 0.00894927978515625, - "step": 5855 - }, - { - "epoch": 0.9991474850809889, - "grad_norm": 0.06541015207767487, - "learning_rate": 6.528319361089651e-05, - "loss": 0.00731588676571846, - "step": 5860 - }, - { - "epoch": 0.9998294970161978, - "eval_loss": 0.03369956836104393, - "eval_runtime": 3.5892, - "eval_samples_per_second": 70.21, - "eval_steps_per_second": 1.114, - "step": 5864 - }, - { - "eval_cer_subset": 0.014444607292328236, - "eval_cer_subset_edit_distance": 887, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 5864 - }, - { - "epoch": 1.0, - "grad_norm": 0.06960473209619522, - "learning_rate": 6.52659786129074e-05, - "loss": 0.009423434734344482, - "step": 5865 - }, - { - "epoch": 1.000852514919011, - "grad_norm": 0.08438396453857422, - "learning_rate": 6.524875065283587e-05, - "loss": 0.009560108184814453, - "step": 5870 - }, - { - "epoch": 1.0017050298380221, - "grad_norm": 0.06281089037656784, - "learning_rate": 6.523150973872446e-05, - "loss": 0.007503298670053482, - "step": 5875 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 0.11766793578863144, - "learning_rate": 6.52142558786218e-05, - "loss": 0.008890827000141144, - "step": 5880 - }, - { - "epoch": 1.0034100596760442, - "grad_norm": 0.058115314692258835, - "learning_rate": 6.519698908058262e-05, - "loss": 0.006190531700849533, - "step": 5885 - }, - { - "epoch": 1.0042625745950555, - "grad_norm": 0.06857501715421677, - "learning_rate": 6.51797093526676e-05, - "loss": 0.007162582129240036, - "step": 5890 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 0.03517467528581619, - "learning_rate": 6.51624167029435e-05, - "loss": 0.0060476396232843396, - "step": 5895 - }, - { - "epoch": 1.0059676044330776, - "grad_norm": 0.10047292709350586, - "learning_rate": 6.514511113948307e-05, - "loss": 0.006416718661785126, - "step": 5900 - }, - { - "epoch": 1.0068201193520887, - "grad_norm": 0.07266796380281448, - "learning_rate": 6.512779267036518e-05, - "loss": 0.005519292503595352, - "step": 5905 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 0.05385264754295349, - "learning_rate": 6.511046130367464e-05, - "loss": 0.006731215119361878, - "step": 5910 - }, - { - "epoch": 1.0085251491901108, - "grad_norm": 0.0927869975566864, - "learning_rate": 6.50931170475023e-05, - "loss": 0.0073065564036369325, - "step": 5915 - }, - { - "epoch": 1.0093776641091219, - "grad_norm": 0.08416371792554855, - "learning_rate": 6.507575990994504e-05, - "loss": 0.005843915045261383, - "step": 5920 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 0.06585095822811127, - "learning_rate": 6.505838989910576e-05, - "loss": 0.006345044076442719, - "step": 5925 - }, - { - "epoch": 1.011082693947144, - "grad_norm": 0.06341785192489624, - "learning_rate": 6.504100702309336e-05, - "loss": 0.005391617119312286, - "step": 5930 - }, - { - "epoch": 1.011935208866155, - "grad_norm": 0.08260001242160797, - "learning_rate": 6.502361129002273e-05, - "loss": 0.008031262457370758, - "step": 5935 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 0.08805666118860245, - "learning_rate": 6.500620270801478e-05, - "loss": 0.006408621370792389, - "step": 5940 - }, - { - "epoch": 1.0136402387041774, - "grad_norm": 0.0704861581325531, - "learning_rate": 6.498878128519642e-05, - "loss": 0.006208440661430359, - "step": 5945 - }, - { - "epoch": 1.0144927536231885, - "grad_norm": 0.07539117336273193, - "learning_rate": 6.497134702970055e-05, - "loss": 0.005263582617044449, - "step": 5950 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 0.022507963702082634, - "learning_rate": 6.495389994966606e-05, - "loss": 0.005692056566476822, - "step": 5955 - }, - { - "epoch": 1.0161977834612106, - "grad_norm": 0.05641510710120201, - "learning_rate": 6.493644005323783e-05, - "loss": 0.007954449951648712, - "step": 5960 - }, - { - "epoch": 1.0170502983802217, - "grad_norm": 0.04853788763284683, - "learning_rate": 6.49189673485667e-05, - "loss": 0.006910678744316101, - "step": 5965 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 0.07868898659944534, - "learning_rate": 6.490148184380956e-05, - "loss": 0.007678037136793136, - "step": 5970 - }, - { - "epoch": 1.0187553282182438, - "grad_norm": 0.08481275290250778, - "learning_rate": 6.488398354712917e-05, - "loss": 0.0060794509947299956, - "step": 5975 - }, - { - "epoch": 1.0196078431372548, - "grad_norm": 0.05573422089219093, - "learning_rate": 6.486647246669435e-05, - "loss": 0.0050107244402170185, - "step": 5980 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 0.10777781158685684, - "learning_rate": 6.484894861067983e-05, - "loss": 0.006611569225788117, - "step": 5985 - }, - { - "epoch": 1.0213128729752772, - "grad_norm": 0.041842151433229446, - "learning_rate": 6.483141198726635e-05, - "loss": 0.0060344856232404705, - "step": 5990 - }, - { - "epoch": 1.0221653878942882, - "grad_norm": 0.05765567347407341, - "learning_rate": 6.48138626046406e-05, - "loss": 0.005772604793310166, - "step": 5995 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 0.05987582355737686, - "learning_rate": 6.479630047099517e-05, - "loss": 0.006899695098400116, - "step": 6000 - }, - { - "epoch": 1.0238704177323104, - "grad_norm": 0.046085257083177567, - "learning_rate": 6.477872559452867e-05, - "loss": 0.006151453405618667, - "step": 6005 - }, - { - "epoch": 1.0247229326513214, - "grad_norm": 0.05994739755988121, - "learning_rate": 6.476113798344566e-05, - "loss": 0.007787984609603882, - "step": 6010 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 0.08866287767887115, - "learning_rate": 6.47435376459566e-05, - "loss": 0.007754974067211151, - "step": 6015 - }, - { - "epoch": 1.0264279624893435, - "grad_norm": 0.07492240518331528, - "learning_rate": 6.472592459027793e-05, - "loss": 0.005562775582075119, - "step": 6020 - }, - { - "epoch": 1.0272804774083546, - "grad_norm": 0.058771468698978424, - "learning_rate": 6.470829882463198e-05, - "loss": 0.008101420104503631, - "step": 6025 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 0.08099868148565292, - "learning_rate": 6.469066035724708e-05, - "loss": 0.007585109025239944, - "step": 6030 - }, - { - "epoch": 1.0289855072463767, - "grad_norm": 0.09368649870157242, - "learning_rate": 6.467300919635743e-05, - "loss": 0.007342393696308136, - "step": 6035 - }, - { - "epoch": 1.029838022165388, - "grad_norm": 0.07358572632074356, - "learning_rate": 6.465534535020317e-05, - "loss": 0.007179292291402817, - "step": 6040 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 0.0542459636926651, - "learning_rate": 6.46376688270304e-05, - "loss": 0.007178785651922226, - "step": 6045 - }, - { - "epoch": 1.0315430520034101, - "grad_norm": 0.04534808546304703, - "learning_rate": 6.461997963509109e-05, - "loss": 0.005939013883471489, - "step": 6050 - }, - { - "epoch": 1.0323955669224212, - "grad_norm": 0.04498334974050522, - "learning_rate": 6.460227778264314e-05, - "loss": 0.007932021468877792, - "step": 6055 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 0.09503943473100662, - "learning_rate": 6.458456327795038e-05, - "loss": 0.006005316227674484, - "step": 6060 - }, - { - "epoch": 1.0341005967604433, - "grad_norm": 0.06634567677974701, - "learning_rate": 6.456683612928252e-05, - "loss": 0.00472346730530262, - "step": 6065 - }, - { - "epoch": 1.0349531116794544, - "grad_norm": 0.06090138852596283, - "learning_rate": 6.454909634491518e-05, - "loss": 0.0071956045925617215, - "step": 6070 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 0.09833965450525284, - "learning_rate": 6.453134393312988e-05, - "loss": 0.00738539919257164, - "step": 6075 - }, - { - "epoch": 1.0366581415174765, - "grad_norm": 0.07924133539199829, - "learning_rate": 6.451357890221406e-05, - "loss": 0.008464773744344711, - "step": 6080 - }, - { - "epoch": 1.0375106564364875, - "grad_norm": 0.04132373258471489, - "learning_rate": 6.4495801260461e-05, - "loss": 0.005705388635396958, - "step": 6085 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 0.08653424680233002, - "learning_rate": 6.44780110161699e-05, - "loss": 0.00777137503027916, - "step": 6090 - }, - { - "epoch": 1.0392156862745099, - "grad_norm": 0.08147025108337402, - "learning_rate": 6.446020817764583e-05, - "loss": 0.005003783106803894, - "step": 6095 - }, - { - "epoch": 1.040068201193521, - "grad_norm": 0.07091398537158966, - "learning_rate": 6.444239275319977e-05, - "loss": 0.005957254022359848, - "step": 6100 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 0.06259306520223618, - "learning_rate": 6.442456475114855e-05, - "loss": 0.005096634104847908, - "step": 6105 - }, - { - "epoch": 1.041773231031543, - "grad_norm": 0.07044103741645813, - "learning_rate": 6.440672417981485e-05, - "loss": 0.00557241328060627, - "step": 6110 - }, - { - "epoch": 1.0426257459505541, - "grad_norm": 0.05029159039258957, - "learning_rate": 6.438887104752726e-05, - "loss": 0.0056043524295091626, - "step": 6115 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 0.04778699576854706, - "learning_rate": 6.437100536262022e-05, - "loss": 0.00855453684926033, - "step": 6120 - }, - { - "epoch": 1.0443307757885762, - "grad_norm": 0.07467184215784073, - "learning_rate": 6.435312713343401e-05, - "loss": 0.006690071523189544, - "step": 6125 - }, - { - "epoch": 1.0451832907075873, - "grad_norm": 0.07189153879880905, - "learning_rate": 6.433523636831481e-05, - "loss": 0.007009527087211609, - "step": 6130 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 0.08000020682811737, - "learning_rate": 6.431733307561459e-05, - "loss": 0.007411211729049683, - "step": 6135 - }, - { - "epoch": 1.0468883205456097, - "grad_norm": 0.06737730652093887, - "learning_rate": 6.429941726369124e-05, - "loss": 0.006843548268079758, - "step": 6140 - }, - { - "epoch": 1.0477408354646207, - "grad_norm": 0.09834714978933334, - "learning_rate": 6.428148894090841e-05, - "loss": 0.007167841494083405, - "step": 6145 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 0.06415695697069168, - "learning_rate": 6.426354811563567e-05, - "loss": 0.005131457373499871, - "step": 6150 - }, - { - "epoch": 1.0494458653026428, - "grad_norm": 0.07823871076107025, - "learning_rate": 6.424559479624839e-05, - "loss": 0.004797356575727463, - "step": 6155 - }, - { - "epoch": 1.050298380221654, - "grad_norm": 0.07165013998746872, - "learning_rate": 6.422762899112777e-05, - "loss": 0.006430945545434952, - "step": 6160 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 0.10924427956342697, - "learning_rate": 6.420965070866086e-05, - "loss": 0.008151047676801682, - "step": 6165 - }, - { - "epoch": 1.052003410059676, - "grad_norm": 0.10381831228733063, - "learning_rate": 6.41916599572405e-05, - "loss": 0.009056917577981948, - "step": 6170 - }, - { - "epoch": 1.052855924978687, - "grad_norm": 0.05251248553395271, - "learning_rate": 6.417365674526539e-05, - "loss": 0.004240944981575012, - "step": 6175 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 0.0812104344367981, - "learning_rate": 6.415564108114001e-05, - "loss": 0.008805926889181137, - "step": 6180 - }, - { - "epoch": 1.0545609548167092, - "grad_norm": 0.05640942230820656, - "learning_rate": 6.413761297327469e-05, - "loss": 0.005727213248610497, - "step": 6185 - }, - { - "epoch": 1.0554134697357205, - "grad_norm": 0.10114334523677826, - "learning_rate": 6.411957243008552e-05, - "loss": 0.008660107105970382, - "step": 6190 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 0.06809760630130768, - "learning_rate": 6.410151945999447e-05, - "loss": 0.006786180287599563, - "step": 6195 - }, - { - "epoch": 1.0571184995737426, - "grad_norm": 0.08121974021196365, - "learning_rate": 6.408345407142924e-05, - "loss": 0.004730105027556419, - "step": 6200 - }, - { - "epoch": 1.0579710144927537, - "grad_norm": 0.0630379393696785, - "learning_rate": 6.406537627282336e-05, - "loss": 0.006532897800207138, - "step": 6205 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 0.09354323893785477, - "learning_rate": 6.404728607261612e-05, - "loss": 0.008165966719388962, - "step": 6210 - }, - { - "epoch": 1.0596760443307758, - "grad_norm": 0.0509798526763916, - "learning_rate": 6.402918347925267e-05, - "loss": 0.006781977415084839, - "step": 6215 - }, - { - "epoch": 1.0605285592497868, - "grad_norm": 0.09830603748559952, - "learning_rate": 6.401106850118389e-05, - "loss": 0.00675075501203537, - "step": 6220 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 0.08417326211929321, - "learning_rate": 6.399294114686645e-05, - "loss": 0.005759935826063156, - "step": 6225 - }, - { - "epoch": 1.062233589087809, - "grad_norm": 0.04999511316418648, - "learning_rate": 6.39748014247628e-05, - "loss": 0.0059943776577711105, - "step": 6230 - }, - { - "epoch": 1.06308610400682, - "grad_norm": 0.0355304591357708, - "learning_rate": 6.395664934334116e-05, - "loss": 0.003978967294096946, - "step": 6235 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 0.09096778929233551, - "learning_rate": 6.393848491107554e-05, - "loss": 0.006428928673267364, - "step": 6240 - }, - { - "epoch": 1.0647911338448424, - "grad_norm": 0.09047707170248032, - "learning_rate": 6.392030813644569e-05, - "loss": 0.005584535002708435, - "step": 6245 - }, - { - "epoch": 1.0656436487638534, - "grad_norm": 0.07133036106824875, - "learning_rate": 6.390211902793714e-05, - "loss": 0.00610351674258709, - "step": 6250 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 0.1025620549917221, - "learning_rate": 6.388391759404117e-05, - "loss": 0.006316560506820679, - "step": 6255 - }, - { - "epoch": 1.0673486786018755, - "grad_norm": 0.0922650694847107, - "learning_rate": 6.386570384325482e-05, - "loss": 0.008717238903045654, - "step": 6260 - }, - { - "epoch": 1.0682011935208866, - "grad_norm": 0.094338558614254, - "learning_rate": 6.384747778408085e-05, - "loss": 0.0067199327051639555, - "step": 6265 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 0.07260075211524963, - "learning_rate": 6.382923942502782e-05, - "loss": 0.007249505072832107, - "step": 6270 - }, - { - "epoch": 1.0699062233589087, - "grad_norm": 0.06572386622428894, - "learning_rate": 6.381098877460999e-05, - "loss": 0.007879015803337098, - "step": 6275 - }, - { - "epoch": 1.0707587382779198, - "grad_norm": 0.11646077036857605, - "learning_rate": 6.379272584134737e-05, - "loss": 0.006477512419223785, - "step": 6280 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 0.14154180884361267, - "learning_rate": 6.37744506337657e-05, - "loss": 0.0069471016526222226, - "step": 6285 - }, - { - "epoch": 1.0724637681159421, - "grad_norm": 0.113606296479702, - "learning_rate": 6.375616316039647e-05, - "loss": 0.010210946947336198, - "step": 6290 - }, - { - "epoch": 1.0733162830349532, - "grad_norm": 0.07193166017532349, - "learning_rate": 6.373786342977687e-05, - "loss": 0.00820360854268074, - "step": 6295 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 0.06180251017212868, - "learning_rate": 6.371955145044983e-05, - "loss": 0.006048502773046494, - "step": 6300 - }, - { - "epoch": 1.0750213128729753, - "grad_norm": 0.06956778466701508, - "learning_rate": 6.370122723096398e-05, - "loss": 0.005345676839351654, - "step": 6305 - }, - { - "epoch": 1.0758738277919864, - "grad_norm": 0.09170625358819962, - "learning_rate": 6.368289077987368e-05, - "loss": 0.0068355493247509004, - "step": 6310 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 0.07023731619119644, - "learning_rate": 6.366454210573901e-05, - "loss": 0.004600600153207779, - "step": 6315 - }, - { - "epoch": 1.0775788576300085, - "grad_norm": 0.07429320365190506, - "learning_rate": 6.36461812171257e-05, - "loss": 0.006272794306278228, - "step": 6320 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 0.11356805264949799, - "learning_rate": 6.362780812260528e-05, - "loss": 0.0048342026770114895, - "step": 6325 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 0.11231013387441635, - "learning_rate": 6.360942283075489e-05, - "loss": 0.00653451681137085, - "step": 6330 - }, - { - "epoch": 1.0801364023870417, - "grad_norm": 0.09655431658029556, - "learning_rate": 6.359102535015739e-05, - "loss": 0.008280844241380692, - "step": 6335 - }, - { - "epoch": 1.080988917306053, - "grad_norm": 0.10172779113054276, - "learning_rate": 6.357261568940135e-05, - "loss": 0.007757744938135147, - "step": 6340 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 0.06417235732078552, - "learning_rate": 6.3554193857081e-05, - "loss": 0.007309675216674805, - "step": 6345 - }, - { - "epoch": 1.082693947144075, - "grad_norm": 0.053178418427705765, - "learning_rate": 6.35357598617963e-05, - "loss": 0.007162143290042877, - "step": 6350 - }, - { - "epoch": 1.0835464620630861, - "grad_norm": 0.07408315688371658, - "learning_rate": 6.351731371215278e-05, - "loss": 0.008392173796892166, - "step": 6355 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 0.03643275052309036, - "learning_rate": 6.349885541676179e-05, - "loss": 0.00610513798892498, - "step": 6360 - }, - { - "epoch": 1.0852514919011083, - "grad_norm": 0.04701307415962219, - "learning_rate": 6.348038498424023e-05, - "loss": 0.00612705871462822, - "step": 6365 - }, - { - "epoch": 1.0861040068201193, - "grad_norm": 0.05050053820014, - "learning_rate": 6.346190242321075e-05, - "loss": 0.005640604719519615, - "step": 6370 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 0.05203640088438988, - "learning_rate": 6.344340774230159e-05, - "loss": 0.005340654775500298, - "step": 6375 - }, - { - "epoch": 1.0878090366581414, - "grad_norm": 0.07451866567134857, - "learning_rate": 6.342490095014669e-05, - "loss": 0.006459225714206695, - "step": 6380 - }, - { - "epoch": 1.0886615515771525, - "grad_norm": 0.09951499849557877, - "learning_rate": 6.340638205538566e-05, - "loss": 0.008529558777809143, - "step": 6385 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 0.06064416840672493, - "learning_rate": 6.33878510666637e-05, - "loss": 0.007885071635246276, - "step": 6390 - }, - { - "epoch": 1.0903665814151748, - "grad_norm": 0.09382321685552597, - "learning_rate": 6.33693079926317e-05, - "loss": 0.007992906123399734, - "step": 6395 - }, - { - "epoch": 1.091219096334186, - "grad_norm": 0.054066915065050125, - "learning_rate": 6.335075284194621e-05, - "loss": 0.007473263889551163, - "step": 6400 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 0.06763065606355667, - "learning_rate": 6.333218562326937e-05, - "loss": 0.006374929845333099, - "step": 6405 - }, - { - "epoch": 1.092924126172208, - "grad_norm": 0.0656818076968193, - "learning_rate": 6.331360634526899e-05, - "loss": 0.006085469573736191, - "step": 6410 - }, - { - "epoch": 1.093776641091219, - "grad_norm": 0.060463279485702515, - "learning_rate": 6.329501501661848e-05, - "loss": 0.005605050176382065, - "step": 6415 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 0.05734890326857567, - "learning_rate": 6.32764116459969e-05, - "loss": 0.00563613623380661, - "step": 6420 - }, - { - "epoch": 1.0954816709292412, - "grad_norm": 0.0502542182803154, - "learning_rate": 6.32577962420889e-05, - "loss": 0.004675766825675965, - "step": 6425 - }, - { - "epoch": 1.0963341858482523, - "grad_norm": 0.06550677120685577, - "learning_rate": 6.32391688135848e-05, - "loss": 0.006265480071306229, - "step": 6430 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 0.1298699826002121, - "learning_rate": 6.322052936918048e-05, - "loss": 0.008352620899677277, - "step": 6435 - }, - { - "epoch": 1.0980392156862746, - "grad_norm": 0.08422241359949112, - "learning_rate": 6.320187791757748e-05, - "loss": 0.005868597701191902, - "step": 6440 - }, - { - "epoch": 1.0988917306052857, - "grad_norm": 0.07807652652263641, - "learning_rate": 6.318321446748291e-05, - "loss": 0.010353461652994157, - "step": 6445 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 0.0663999617099762, - "learning_rate": 6.316453902760946e-05, - "loss": 0.00667201578617096, - "step": 6450 - }, - { - "epoch": 1.1005967604433078, - "grad_norm": 0.06362646073102951, - "learning_rate": 6.314585160667547e-05, - "loss": 0.006539353728294372, - "step": 6455 - }, - { - "epoch": 1.1014492753623188, - "grad_norm": 0.04160219058394432, - "learning_rate": 6.312715221340485e-05, - "loss": 0.004082740843296051, - "step": 6460 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 0.05449013039469719, - "learning_rate": 6.31084408565271e-05, - "loss": 0.007537595182657242, - "step": 6465 - }, - { - "epoch": 1.103154305200341, - "grad_norm": 0.06819169223308563, - "learning_rate": 6.308971754477729e-05, - "loss": 0.006866573542356491, - "step": 6470 - }, - { - "epoch": 1.104006820119352, - "grad_norm": 0.06622573733329773, - "learning_rate": 6.307098228689611e-05, - "loss": 0.009055091440677643, - "step": 6475 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 0.05732693895697594, - "learning_rate": 6.305223509162978e-05, - "loss": 0.006077280640602112, - "step": 6480 - }, - { - "epoch": 1.1057118499573741, - "grad_norm": 0.06630431115627289, - "learning_rate": 6.303347596773012e-05, - "loss": 0.0064442440867424015, - "step": 6485 - }, - { - "epoch": 1.1065643648763854, - "grad_norm": 0.06782745569944382, - "learning_rate": 6.301470492395451e-05, - "loss": 0.005072608217597007, - "step": 6490 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 0.05796601250767708, - "learning_rate": 6.299592196906591e-05, - "loss": 0.0074319586157798765, - "step": 6495 - }, - { - "epoch": 1.1082693947144076, - "grad_norm": 0.04627149552106857, - "learning_rate": 6.297712711183282e-05, - "loss": 0.00512685589492321, - "step": 6500 - }, - { - "epoch": 1.1091219096334186, - "grad_norm": 0.08210720866918564, - "learning_rate": 6.295832036102929e-05, - "loss": 0.006917678564786911, - "step": 6505 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 0.08367052674293518, - "learning_rate": 6.293950172543496e-05, - "loss": 0.0054212499409914015, - "step": 6510 - }, - { - "epoch": 1.1108269394714407, - "grad_norm": 0.08192700892686844, - "learning_rate": 6.292067121383499e-05, - "loss": 0.00791442021727562, - "step": 6515 - }, - { - "epoch": 1.1116794543904518, - "grad_norm": 0.05766206234693527, - "learning_rate": 6.290182883502008e-05, - "loss": 0.006540960818529129, - "step": 6520 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 0.07752664387226105, - "learning_rate": 6.28829745977865e-05, - "loss": 0.009196925908327103, - "step": 6525 - }, - { - "epoch": 1.113384484228474, - "grad_norm": 0.07419038563966751, - "learning_rate": 6.2864108510936e-05, - "loss": 0.007524489611387253, - "step": 6530 - }, - { - "epoch": 1.1142369991474852, - "grad_norm": 0.04851066321134567, - "learning_rate": 6.284523058327593e-05, - "loss": 0.004060015082359314, - "step": 6535 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 0.0764140635728836, - "learning_rate": 6.282634082361911e-05, - "loss": 0.006797254830598831, - "step": 6540 - }, - { - "epoch": 1.1159420289855073, - "grad_norm": 0.06873292475938797, - "learning_rate": 6.280743924078392e-05, - "loss": 0.007637844234704971, - "step": 6545 - }, - { - "epoch": 1.1167945439045184, - "grad_norm": 0.047832686454057693, - "learning_rate": 6.278852584359425e-05, - "loss": 0.00542646199464798, - "step": 6550 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 0.10061443597078323, - "learning_rate": 6.27696006408795e-05, - "loss": 0.008591605722904206, - "step": 6555 - }, - { - "epoch": 1.1184995737425405, - "grad_norm": 0.09729041159152985, - "learning_rate": 6.27506636414746e-05, - "loss": 0.0064585842192173, - "step": 6560 - }, - { - "epoch": 1.1193520886615516, - "grad_norm": 0.04427873343229294, - "learning_rate": 6.273171485421992e-05, - "loss": 0.004846593365073204, - "step": 6565 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 0.07813888788223267, - "learning_rate": 6.271275428796146e-05, - "loss": 0.007345958054065705, - "step": 6570 - }, - { - "epoch": 1.1210571184995737, - "grad_norm": 0.12500733137130737, - "learning_rate": 6.269378195155058e-05, - "loss": 0.006376177072525024, - "step": 6575 - }, - { - "epoch": 1.1219096334185847, - "grad_norm": 0.09636004269123077, - "learning_rate": 6.267479785384422e-05, - "loss": 0.0069282323122024534, - "step": 6580 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 0.06236017122864723, - "learning_rate": 6.265580200370478e-05, - "loss": 0.0050656192004680635, - "step": 6585 - }, - { - "epoch": 1.123614663256607, - "grad_norm": 0.0596463568508625, - "learning_rate": 6.263679441000019e-05, - "loss": 0.006198804825544357, - "step": 6590 - }, - { - "epoch": 1.1244671781756181, - "grad_norm": 0.0846855491399765, - "learning_rate": 6.261777508160378e-05, - "loss": 0.0073812372982501985, - "step": 6595 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 0.05228402093052864, - "learning_rate": 6.259874402739442e-05, - "loss": 0.006196213513612747, - "step": 6600 - }, - { - "epoch": 1.1261722080136403, - "grad_norm": 0.08395595103502274, - "learning_rate": 6.257970125625647e-05, - "loss": 0.0060448311269283295, - "step": 6605 - }, - { - "epoch": 1.1270247229326513, - "grad_norm": 0.055274344980716705, - "learning_rate": 6.25606467770797e-05, - "loss": 0.006578336656093598, - "step": 6610 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 0.054609380662441254, - "learning_rate": 6.254158059875936e-05, - "loss": 0.008093905448913575, - "step": 6615 - }, - { - "epoch": 1.1287297527706734, - "grad_norm": 0.05168715491890907, - "learning_rate": 6.25225027301962e-05, - "loss": 0.006086795404553413, - "step": 6620 - }, - { - "epoch": 1.1295822676896845, - "grad_norm": 0.06260590255260468, - "learning_rate": 6.250341318029641e-05, - "loss": 0.007288631051778793, - "step": 6625 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 0.06585957854986191, - "learning_rate": 6.24843119579716e-05, - "loss": 0.005779954791069031, - "step": 6630 - }, - { - "epoch": 1.1312872975277068, - "grad_norm": 0.05828391760587692, - "learning_rate": 6.246519907213888e-05, - "loss": 0.006320308148860932, - "step": 6635 - }, - { - "epoch": 1.132139812446718, - "grad_norm": 0.08400154113769531, - "learning_rate": 6.244607453172078e-05, - "loss": 0.00452205128967762, - "step": 6640 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 0.059920940548181534, - "learning_rate": 6.242693834564525e-05, - "loss": 0.00620727390050888, - "step": 6645 - }, - { - "epoch": 1.13384484228474, - "grad_norm": 0.1100456491112709, - "learning_rate": 6.240779052284571e-05, - "loss": 0.006768511235713958, - "step": 6650 - }, - { - "epoch": 1.134697357203751, - "grad_norm": 0.07722730189561844, - "learning_rate": 6.238863107226102e-05, - "loss": 0.008207496255636215, - "step": 6655 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 0.05468403548002243, - "learning_rate": 6.236946000283542e-05, - "loss": 0.005272969231009483, - "step": 6660 - }, - { - "epoch": 1.1364023870417732, - "grad_norm": 0.0685155913233757, - "learning_rate": 6.235027732351863e-05, - "loss": 0.008074409514665603, - "step": 6665 - }, - { - "epoch": 1.1372549019607843, - "grad_norm": 0.07667854428291321, - "learning_rate": 6.233108304326571e-05, - "loss": 0.00863628089427948, - "step": 6670 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 0.0727204978466034, - "learning_rate": 6.231187717103727e-05, - "loss": 0.004446333646774292, - "step": 6675 - }, - { - "epoch": 1.1389599317988064, - "grad_norm": 0.06465403735637665, - "learning_rate": 6.229265971579918e-05, - "loss": 0.007380707561969757, - "step": 6680 - }, - { - "epoch": 1.1398124467178175, - "grad_norm": 0.04102586954832077, - "learning_rate": 6.227343068652281e-05, - "loss": 0.006103607639670372, - "step": 6685 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 0.06988929212093353, - "learning_rate": 6.225419009218493e-05, - "loss": 0.007358456403017044, - "step": 6690 - }, - { - "epoch": 1.1415174765558398, - "grad_norm": 0.07802724838256836, - "learning_rate": 6.223493794176767e-05, - "loss": 0.007887010276317597, - "step": 6695 - }, - { - "epoch": 1.1423699914748509, - "grad_norm": 0.10777991265058517, - "learning_rate": 6.221567424425857e-05, - "loss": 0.007653985172510147, - "step": 6700 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 0.0922352522611618, - "learning_rate": 6.219639900865058e-05, - "loss": 0.007459624856710434, - "step": 6705 - }, - { - "epoch": 1.144075021312873, - "grad_norm": 0.06321967393159866, - "learning_rate": 6.217711224394202e-05, - "loss": 0.00540911853313446, - "step": 6710 - }, - { - "epoch": 1.144927536231884, - "grad_norm": 0.09344825148582458, - "learning_rate": 6.215781395913656e-05, - "loss": 0.0053936421871185304, - "step": 6715 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 0.03457584232091904, - "learning_rate": 6.213850416324333e-05, - "loss": 0.006388355046510696, - "step": 6720 - }, - { - "epoch": 1.1466325660699062, - "grad_norm": 0.06931985914707184, - "learning_rate": 6.211918286527676e-05, - "loss": 0.005831810832023621, - "step": 6725 - }, - { - "epoch": 1.1474850809889172, - "grad_norm": 0.05588890612125397, - "learning_rate": 6.209985007425668e-05, - "loss": 0.0041655078530311584, - "step": 6730 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 0.07582694292068481, - "learning_rate": 6.208050579920826e-05, - "loss": 0.006521198153495789, - "step": 6735 - }, - { - "epoch": 1.1491901108269396, - "grad_norm": 0.07055433094501495, - "learning_rate": 6.206115004916209e-05, - "loss": 0.0066129244863986966, - "step": 6740 - }, - { - "epoch": 1.1500426257459506, - "grad_norm": 0.07039172202348709, - "learning_rate": 6.204178283315405e-05, - "loss": 0.005633739382028579, - "step": 6745 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 0.07833350449800491, - "learning_rate": 6.202240416022541e-05, - "loss": 0.006761975586414337, - "step": 6750 - }, - { - "epoch": 1.1517476555839727, - "grad_norm": 0.05553733557462692, - "learning_rate": 6.200301403942278e-05, - "loss": 0.006545065343379975, - "step": 6755 - }, - { - "epoch": 1.1526001705029838, - "grad_norm": 0.07306832075119019, - "learning_rate": 6.198361247979809e-05, - "loss": 0.008323725312948227, - "step": 6760 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 0.04787914454936981, - "learning_rate": 6.196419949040867e-05, - "loss": 0.004425797611474991, - "step": 6765 - }, - { - "epoch": 1.154305200341006, - "grad_norm": 0.08021930605173111, - "learning_rate": 6.194477508031712e-05, - "loss": 0.005103312805294991, - "step": 6770 - }, - { - "epoch": 1.155157715260017, - "grad_norm": 0.0821428894996643, - "learning_rate": 6.192533925859144e-05, - "loss": 0.009274877607822418, - "step": 6775 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 0.09880609810352325, - "learning_rate": 6.190589203430486e-05, - "loss": 0.007863005250692367, - "step": 6780 - }, - { - "epoch": 1.156862745098039, - "grad_norm": 0.08075276762247086, - "learning_rate": 6.188643341653604e-05, - "loss": 0.004675677418708802, - "step": 6785 - }, - { - "epoch": 1.1577152600170504, - "grad_norm": 0.0633573830127716, - "learning_rate": 6.186696341436889e-05, - "loss": 0.007359890639781952, - "step": 6790 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 0.03828895092010498, - "learning_rate": 6.184748203689265e-05, - "loss": 0.004494070634245872, - "step": 6795 - }, - { - "epoch": 1.1594202898550725, - "grad_norm": 0.07907325774431229, - "learning_rate": 6.18279892932019e-05, - "loss": 0.006256269663572312, - "step": 6800 - }, - { - "epoch": 1.1602728047740836, - "grad_norm": 0.055188342928886414, - "learning_rate": 6.180848519239647e-05, - "loss": 0.009548474848270417, - "step": 6805 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 0.05850991606712341, - "learning_rate": 6.178896974358154e-05, - "loss": 0.0056076571345329285, - "step": 6810 - }, - { - "epoch": 1.1619778346121057, - "grad_norm": 0.0626932755112648, - "learning_rate": 6.176944295586757e-05, - "loss": 0.005667714029550552, - "step": 6815 - }, - { - "epoch": 1.1628303495311167, - "grad_norm": 0.06506946682929993, - "learning_rate": 6.174990483837031e-05, - "loss": 0.006154880672693252, - "step": 6820 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 0.0535859651863575, - "learning_rate": 6.17303554002108e-05, - "loss": 0.0042555928230285645, - "step": 6825 - }, - { - "epoch": 1.1645353793691389, - "grad_norm": 0.05577898398041725, - "learning_rate": 6.171079465051538e-05, - "loss": 0.006060104072093964, - "step": 6830 - }, - { - "epoch": 1.1653878942881502, - "grad_norm": 0.05574663355946541, - "learning_rate": 6.169122259841566e-05, - "loss": 0.008667515218257904, - "step": 6835 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 0.09157130867242813, - "learning_rate": 6.16716392530485e-05, - "loss": 0.007259176671504974, - "step": 6840 - }, - { - "epoch": 1.1670929241261723, - "grad_norm": 0.06404415518045425, - "learning_rate": 6.165204462355608e-05, - "loss": 0.007140593230724334, - "step": 6845 - }, - { - "epoch": 1.1679454390451833, - "grad_norm": 0.0713329091668129, - "learning_rate": 6.163243871908581e-05, - "loss": 0.006118748337030411, - "step": 6850 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 0.04038231074810028, - "learning_rate": 6.16128215487904e-05, - "loss": 0.006028918176889419, - "step": 6855 - }, - { - "epoch": 1.1696504688832055, - "grad_norm": 0.07793593406677246, - "learning_rate": 6.159319312182777e-05, - "loss": 0.006851163506507873, - "step": 6860 - }, - { - "epoch": 1.1705029838022165, - "grad_norm": 0.07543511688709259, - "learning_rate": 6.157355344736114e-05, - "loss": 0.006878272444009781, - "step": 6865 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 0.06332696974277496, - "learning_rate": 6.155390253455897e-05, - "loss": 0.006324999034404755, - "step": 6870 - }, - { - "epoch": 1.1722080136402386, - "grad_norm": 0.06920734792947769, - "learning_rate": 6.153424039259495e-05, - "loss": 0.00536310225725174, - "step": 6875 - }, - { - "epoch": 1.1730605285592497, - "grad_norm": 0.09313163161277771, - "learning_rate": 6.151456703064802e-05, - "loss": 0.00795424059033394, - "step": 6880 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 0.08588451147079468, - "learning_rate": 6.149488245790234e-05, - "loss": 0.006889034807682037, - "step": 6885 - }, - { - "epoch": 1.174765558397272, - "grad_norm": 0.09814538061618805, - "learning_rate": 6.147518668354737e-05, - "loss": 0.007332245260477066, - "step": 6890 - }, - { - "epoch": 1.175618073316283, - "grad_norm": 0.05141104385256767, - "learning_rate": 6.145547971677772e-05, - "loss": 0.00333656407892704, - "step": 6895 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 0.05575519800186157, - "learning_rate": 6.143576156679327e-05, - "loss": 0.005542768910527229, - "step": 6900 - }, - { - "epoch": 1.1773231031543052, - "grad_norm": 0.04917008429765701, - "learning_rate": 6.14160322427991e-05, - "loss": 0.007007633149623871, - "step": 6905 - }, - { - "epoch": 1.1781756180733163, - "grad_norm": 0.06385336071252823, - "learning_rate": 6.139629175400552e-05, - "loss": 0.007495941221714019, - "step": 6910 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 0.08664151281118393, - "learning_rate": 6.137654010962805e-05, - "loss": 0.0075534448027610775, - "step": 6915 - }, - { - "epoch": 1.1798806479113384, - "grad_norm": 0.08881189674139023, - "learning_rate": 6.13567773188874e-05, - "loss": 0.0059935558587312695, - "step": 6920 - }, - { - "epoch": 1.1807331628303495, - "grad_norm": 0.07297934591770172, - "learning_rate": 6.133700339100952e-05, - "loss": 0.006142234057188034, - "step": 6925 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 0.053351663053035736, - "learning_rate": 6.131721833522552e-05, - "loss": 0.006038139387965202, - "step": 6930 - }, - { - "epoch": 1.1824381926683718, - "grad_norm": 0.12923622131347656, - "learning_rate": 6.129742216077172e-05, - "loss": 0.008645947277545928, - "step": 6935 - }, - { - "epoch": 1.1832907075873829, - "grad_norm": 0.095455601811409, - "learning_rate": 6.127761487688964e-05, - "loss": 0.004740688577294349, - "step": 6940 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 0.11498606204986572, - "learning_rate": 6.125779649282599e-05, - "loss": 0.00805070549249649, - "step": 6945 - }, - { - "epoch": 1.184995737425405, - "grad_norm": 0.07489286363124847, - "learning_rate": 6.123796701783264e-05, - "loss": 0.0060746859759092334, - "step": 6950 - }, - { - "epoch": 1.185848252344416, - "grad_norm": 0.07027438282966614, - "learning_rate": 6.121812646116663e-05, - "loss": 0.006816025823354721, - "step": 6955 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 0.08542973548173904, - "learning_rate": 6.119827483209024e-05, - "loss": 0.005315831303596497, - "step": 6960 - }, - { - "epoch": 1.1875532821824382, - "grad_norm": 0.08409032970666885, - "learning_rate": 6.117841213987082e-05, - "loss": 0.0061601437628269196, - "step": 6965 - }, - { - "epoch": 1.1884057971014492, - "grad_norm": 0.10387974232435226, - "learning_rate": 6.115853839378096e-05, - "loss": 0.0071022816002368925, - "step": 6970 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 0.056441329419612885, - "learning_rate": 6.113865360309838e-05, - "loss": 0.004539218544960022, - "step": 6975 - }, - { - "epoch": 1.1901108269394713, - "grad_norm": 0.10132234543561935, - "learning_rate": 6.111875777710598e-05, - "loss": 0.0060778014361858364, - "step": 6980 - }, - { - "epoch": 1.1909633418584824, - "grad_norm": 0.07129226624965668, - "learning_rate": 6.109885092509177e-05, - "loss": 0.007794113457202911, - "step": 6985 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 0.09267892688512802, - "learning_rate": 6.107893305634893e-05, - "loss": 0.006867295503616333, - "step": 6990 - }, - { - "epoch": 1.1926683716965047, - "grad_norm": 0.0739888921380043, - "learning_rate": 6.10590041801758e-05, - "loss": 0.006263263523578644, - "step": 6995 - }, - { - "epoch": 1.1935208866155158, - "grad_norm": 0.10201311856508255, - "learning_rate": 6.103906430587583e-05, - "loss": 0.006245525926351548, - "step": 7000 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 0.08561360090970993, - "learning_rate": 6.1019113442757636e-05, - "loss": 0.005739351361989975, - "step": 7005 - }, - { - "epoch": 1.195225916453538, - "grad_norm": 0.06410367786884308, - "learning_rate": 6.099915160013491e-05, - "loss": 0.00598936527967453, - "step": 7010 - }, - { - "epoch": 1.196078431372549, - "grad_norm": 0.11765716224908829, - "learning_rate": 6.0979178787326544e-05, - "loss": 0.010477253794670105, - "step": 7015 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 0.06659694015979767, - "learning_rate": 6.095919501365648e-05, - "loss": 0.0072447523474693295, - "step": 7020 - }, - { - "epoch": 1.197783461210571, - "grad_norm": 0.05651358142495155, - "learning_rate": 6.093920028845381e-05, - "loss": 0.004644358158111572, - "step": 7025 - }, - { - "epoch": 1.1986359761295824, - "grad_norm": 0.07198809087276459, - "learning_rate": 6.0919194621052764e-05, - "loss": 0.00517328754067421, - "step": 7030 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 0.06188420578837395, - "learning_rate": 6.0899178020792614e-05, - "loss": 0.005182894691824913, - "step": 7035 - }, - { - "epoch": 1.2003410059676045, - "grad_norm": 0.07731341570615768, - "learning_rate": 6.087915049701783e-05, - "loss": 0.006863371282815933, - "step": 7040 - }, - { - "epoch": 1.2011935208866156, - "grad_norm": 0.07693833857774734, - "learning_rate": 6.0859112059077866e-05, - "loss": 0.008634812384843826, - "step": 7045 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 0.08118413388729095, - "learning_rate": 6.083906271632736e-05, - "loss": 0.008003174513578414, - "step": 7050 - }, - { - "epoch": 1.2028985507246377, - "grad_norm": 0.07794218510389328, - "learning_rate": 6.0819002478126016e-05, - "loss": 0.005899757146835327, - "step": 7055 - }, - { - "epoch": 1.2037510656436488, - "grad_norm": 0.08107218146324158, - "learning_rate": 6.079893135383861e-05, - "loss": 0.007581057399511338, - "step": 7060 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 0.06910198926925659, - "learning_rate": 6.077884935283502e-05, - "loss": 0.00794234573841095, - "step": 7065 - }, - { - "epoch": 1.2054560954816709, - "grad_norm": 0.08426421135663986, - "learning_rate": 6.0758756484490186e-05, - "loss": 0.0057635679841041565, - "step": 7070 - }, - { - "epoch": 1.206308610400682, - "grad_norm": 0.08670961856842041, - "learning_rate": 6.073865275818413e-05, - "loss": 0.006369538605213165, - "step": 7075 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 0.04245399683713913, - "learning_rate": 6.071853818330193e-05, - "loss": 0.0067548036575317385, - "step": 7080 - }, - { - "epoch": 1.208013640238704, - "grad_norm": 0.10600235313177109, - "learning_rate": 6.069841276923376e-05, - "loss": 0.006923867762088776, - "step": 7085 - }, - { - "epoch": 1.2088661551577153, - "grad_norm": 0.07093790173530579, - "learning_rate": 6.0678276525374814e-05, - "loss": 0.005083417519927025, - "step": 7090 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 0.04997126758098602, - "learning_rate": 6.065812946112537e-05, - "loss": 0.006207586824893951, - "step": 7095 - }, - { - "epoch": 1.2105711849957375, - "grad_norm": 0.04425262287259102, - "learning_rate": 6.063797158589074e-05, - "loss": 0.0046977601945400235, - "step": 7100 - }, - { - "epoch": 1.2114236999147485, - "grad_norm": 0.07300136238336563, - "learning_rate": 6.0617802909081295e-05, - "loss": 0.005589437484741211, - "step": 7105 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 0.0878889262676239, - "learning_rate": 6.0597623440112445e-05, - "loss": 0.005844222381711006, - "step": 7110 - }, - { - "epoch": 1.2131287297527706, - "grad_norm": 0.09079992771148682, - "learning_rate": 6.0577433188404635e-05, - "loss": 0.007329034805297852, - "step": 7115 - }, - { - "epoch": 1.2139812446717817, - "grad_norm": 0.07165387272834778, - "learning_rate": 6.055723216338336e-05, - "loss": 0.006642927974462509, - "step": 7120 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 0.07113955169916153, - "learning_rate": 6.053702037447911e-05, - "loss": 0.006992670893669129, - "step": 7125 - }, - { - "epoch": 1.215686274509804, - "grad_norm": 0.08737215399742126, - "learning_rate": 6.0516797831127425e-05, - "loss": 0.006702055037021637, - "step": 7130 - }, - { - "epoch": 1.216538789428815, - "grad_norm": 0.07276564091444016, - "learning_rate": 6.049656454276887e-05, - "loss": 0.005692619457840919, - "step": 7135 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 0.09203831106424332, - "learning_rate": 6.0476320518849006e-05, - "loss": 0.006464710086584091, - "step": 7140 - }, - { - "epoch": 1.2182438192668372, - "grad_norm": 0.07749375700950623, - "learning_rate": 6.045606576881843e-05, - "loss": 0.008598372340202332, - "step": 7145 - }, - { - "epoch": 1.2190963341858483, - "grad_norm": 0.04338040575385094, - "learning_rate": 6.043580030213272e-05, - "loss": 0.006892016530036927, - "step": 7150 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 0.06691034138202667, - "learning_rate": 6.0415524128252474e-05, - "loss": 0.007622670382261276, - "step": 7155 - }, - { - "epoch": 1.2208013640238704, - "grad_norm": 0.07532396167516708, - "learning_rate": 6.039523725664329e-05, - "loss": 0.00698106437921524, - "step": 7160 - }, - { - "epoch": 1.2216538789428815, - "grad_norm": 0.0408058688044548, - "learning_rate": 6.037493969677575e-05, - "loss": 0.005919446796178817, - "step": 7165 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 0.07306578010320663, - "learning_rate": 6.0354631458125425e-05, - "loss": 0.008953345566987991, - "step": 7170 - }, - { - "epoch": 1.2233589087809036, - "grad_norm": 0.08269601315259933, - "learning_rate": 6.033431255017289e-05, - "loss": 0.007224951684474945, - "step": 7175 - }, - { - "epoch": 1.2242114236999146, - "grad_norm": 0.045140884816646576, - "learning_rate": 6.0313982982403676e-05, - "loss": 0.006175454705953598, - "step": 7180 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 0.0721440315246582, - "learning_rate": 6.0293642764308304e-05, - "loss": 0.007267911732196808, - "step": 7185 - }, - { - "epoch": 1.225916453537937, - "grad_norm": 0.081813283264637, - "learning_rate": 6.027329190538227e-05, - "loss": 0.006872846186161042, - "step": 7190 - }, - { - "epoch": 1.226768968456948, - "grad_norm": 0.05662613734602928, - "learning_rate": 6.025293041512602e-05, - "loss": 0.004837202653288841, - "step": 7195 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 0.10023492574691772, - "learning_rate": 6.023255830304498e-05, - "loss": 0.0060194481164217, - "step": 7200 - }, - { - "epoch": 1.2284739982949702, - "grad_norm": 0.06398235261440277, - "learning_rate": 6.021217557864954e-05, - "loss": 0.007653398066759109, - "step": 7205 - }, - { - "epoch": 1.2293265132139812, - "grad_norm": 0.09494475275278091, - "learning_rate": 6.019178225145503e-05, - "loss": 0.007547302544116974, - "step": 7210 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 0.05356467142701149, - "learning_rate": 6.017137833098171e-05, - "loss": 0.007133310288190841, - "step": 7215 - }, - { - "epoch": 1.2310315430520034, - "grad_norm": 0.09225092083215714, - "learning_rate": 6.0150963826754836e-05, - "loss": 0.006320309638977051, - "step": 7220 - }, - { - "epoch": 1.2318840579710144, - "grad_norm": 0.07072161883115768, - "learning_rate": 6.013053874830458e-05, - "loss": 0.007313568145036697, - "step": 7225 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 0.07206818461418152, - "learning_rate": 6.0110103105166026e-05, - "loss": 0.0054031949490308765, - "step": 7230 - }, - { - "epoch": 1.2335890878090368, - "grad_norm": 0.08611681312322617, - "learning_rate": 6.008965690687922e-05, - "loss": 0.00670153945684433, - "step": 7235 - }, - { - "epoch": 1.2344416027280478, - "grad_norm": 0.07864221930503845, - "learning_rate": 6.0069200162989154e-05, - "loss": 0.0069690033793449405, - "step": 7240 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 0.06847227364778519, - "learning_rate": 6.0048732883045665e-05, - "loss": 0.006755173206329346, - "step": 7245 - }, - { - "epoch": 1.23614663256607, - "grad_norm": 0.06264699995517731, - "learning_rate": 6.0028255076603606e-05, - "loss": 0.00519348569214344, - "step": 7250 - }, - { - "epoch": 1.236999147485081, - "grad_norm": 0.04114431515336037, - "learning_rate": 6.0007766753222665e-05, - "loss": 0.006071234866976738, - "step": 7255 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 0.03660140559077263, - "learning_rate": 5.998726792246751e-05, - "loss": 0.007517064362764359, - "step": 7260 - }, - { - "epoch": 1.2387041773231031, - "grad_norm": 0.10343052446842194, - "learning_rate": 5.9966758593907636e-05, - "loss": 0.0070131182670593265, - "step": 7265 - }, - { - "epoch": 1.2395566922421142, - "grad_norm": 0.08169959485530853, - "learning_rate": 5.994623877711751e-05, - "loss": 0.006279528886079788, - "step": 7270 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 0.06312677264213562, - "learning_rate": 5.992570848167645e-05, - "loss": 0.0041657909750938416, - "step": 7275 - }, - { - "epoch": 1.2412617220801363, - "grad_norm": 0.08725713193416595, - "learning_rate": 5.990516771716869e-05, - "loss": 0.007908149063587189, - "step": 7280 - }, - { - "epoch": 1.2421142369991476, - "grad_norm": 0.05857875198125839, - "learning_rate": 5.988461649318333e-05, - "loss": 0.005137740075588227, - "step": 7285 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 0.04836168512701988, - "learning_rate": 5.986405481931438e-05, - "loss": 0.005033157765865326, - "step": 7290 - }, - { - "epoch": 1.2438192668371697, - "grad_norm": 0.09514568001031876, - "learning_rate": 5.98434827051607e-05, - "loss": 0.007460397481918335, - "step": 7295 - }, - { - "epoch": 1.2446717817561808, - "grad_norm": 0.049415748566389084, - "learning_rate": 5.982290016032604e-05, - "loss": 0.0049881644546985624, - "step": 7300 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 0.1069302037358284, - "learning_rate": 5.980230719441903e-05, - "loss": 0.005356961116194725, - "step": 7305 - }, - { - "epoch": 1.2463768115942029, - "grad_norm": 0.1031380444765091, - "learning_rate": 5.9781703817053136e-05, - "loss": 0.0067513369023799895, - "step": 7310 - }, - { - "epoch": 1.247229326513214, - "grad_norm": 0.05909884348511696, - "learning_rate": 5.976109003784671e-05, - "loss": 0.005357486009597778, - "step": 7315 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 0.09306607395410538, - "learning_rate": 5.974046586642295e-05, - "loss": 0.005747456848621368, - "step": 7320 - }, - { - "epoch": 1.248934356351236, - "grad_norm": 0.0688752606511116, - "learning_rate": 5.971983131240988e-05, - "loss": 0.0073902375996112825, - "step": 7325 - }, - { - "epoch": 1.2497868712702473, - "grad_norm": 0.06592141836881638, - "learning_rate": 5.969918638544044e-05, - "loss": 0.008268805593252182, - "step": 7330 - }, - { - "epoch": 1.2497868712702473, - "eval_loss": 0.037298671901226044, - "eval_runtime": 3.6917, - "eval_samples_per_second": 68.261, - "eval_steps_per_second": 1.084, - "step": 7330 - }, - { - "eval_cer_subset": 0.01283241324278991, - "eval_cer_subset_edit_distance": 788, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 7330 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 0.06094380095601082, - "learning_rate": 5.9678531095152326e-05, - "loss": 0.005528298765420913, - "step": 7335 - }, - { - "epoch": 1.2514919011082695, - "grad_norm": 0.1417030543088913, - "learning_rate": 5.965786545118815e-05, - "loss": 0.00984017476439476, - "step": 7340 - }, - { - "epoch": 1.2523444160272805, - "grad_norm": 0.08209668844938278, - "learning_rate": 5.963718946319529e-05, - "loss": 0.007516486942768097, - "step": 7345 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 0.06825494766235352, - "learning_rate": 5.9616503140826006e-05, - "loss": 0.005924524366855621, - "step": 7350 - }, - { - "epoch": 1.2540494458653026, - "grad_norm": 0.11229037493467331, - "learning_rate": 5.959580649373736e-05, - "loss": 0.006495627760887146, - "step": 7355 - }, - { - "epoch": 1.2549019607843137, - "grad_norm": 0.13235078752040863, - "learning_rate": 5.957509953159123e-05, - "loss": 0.00942063182592392, - "step": 7360 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 0.04514055699110031, - "learning_rate": 5.955438226405432e-05, - "loss": 0.006601292639970779, - "step": 7365 - }, - { - "epoch": 1.2566069906223358, - "grad_norm": 0.08192043751478195, - "learning_rate": 5.9533654700798126e-05, - "loss": 0.007403627783060074, - "step": 7370 - }, - { - "epoch": 1.257459505541347, - "grad_norm": 0.07101254910230637, - "learning_rate": 5.951291685149898e-05, - "loss": 0.006301522254943848, - "step": 7375 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 0.05598035827279091, - "learning_rate": 5.949216872583799e-05, - "loss": 0.006812388449907303, - "step": 7380 - }, - { - "epoch": 1.259164535379369, - "grad_norm": 0.06444506347179413, - "learning_rate": 5.9471410333501085e-05, - "loss": 0.005891536176204681, - "step": 7385 - }, - { - "epoch": 1.2600170502983803, - "grad_norm": 0.04921717569231987, - "learning_rate": 5.945064168417895e-05, - "loss": 0.004649973660707474, - "step": 7390 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 0.09095602482557297, - "learning_rate": 5.94298627875671e-05, - "loss": 0.007515725493431091, - "step": 7395 - }, - { - "epoch": 1.2617220801364024, - "grad_norm": 0.09932803362607956, - "learning_rate": 5.9409073653365816e-05, - "loss": 0.006223166733980179, - "step": 7400 - }, - { - "epoch": 1.2625745950554135, - "grad_norm": 0.08616010844707489, - "learning_rate": 5.938827429128014e-05, - "loss": 0.006999516487121582, - "step": 7405 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 0.11979297548532486, - "learning_rate": 5.936746471101993e-05, - "loss": 0.00812242105603218, - "step": 7410 - }, - { - "epoch": 1.2642796248934356, - "grad_norm": 0.12872007489204407, - "learning_rate": 5.934664492229976e-05, - "loss": 0.006246988475322723, - "step": 7415 - }, - { - "epoch": 1.2651321398124467, - "grad_norm": 0.0831044539809227, - "learning_rate": 5.932581493483903e-05, - "loss": 0.00590248554944992, - "step": 7420 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 0.09913221001625061, - "learning_rate": 5.9304974758361857e-05, - "loss": 0.007224322855472564, - "step": 7425 - }, - { - "epoch": 1.266837169650469, - "grad_norm": 0.08654595911502838, - "learning_rate": 5.928412440259713e-05, - "loss": 0.007056090980768204, - "step": 7430 - }, - { - "epoch": 1.26768968456948, - "grad_norm": 0.07882801443338394, - "learning_rate": 5.926326387727849e-05, - "loss": 0.00572751946747303, - "step": 7435 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 0.12886428833007812, - "learning_rate": 5.924239319214432e-05, - "loss": 0.0106881283223629, - "step": 7440 - }, - { - "epoch": 1.2693947144075022, - "grad_norm": 0.05597686767578125, - "learning_rate": 5.922151235693775e-05, - "loss": 0.005041084438562393, - "step": 7445 - }, - { - "epoch": 1.2702472293265132, - "grad_norm": 0.10719682276248932, - "learning_rate": 5.920062138140665e-05, - "loss": 0.007724158465862274, - "step": 7450 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 0.045485325157642365, - "learning_rate": 5.917972027530363e-05, - "loss": 0.003246675431728363, - "step": 7455 - }, - { - "epoch": 1.2719522591645354, - "grad_norm": 0.09602563083171844, - "learning_rate": 5.9158809048386017e-05, - "loss": 0.006592199206352234, - "step": 7460 - }, - { - "epoch": 1.2728047740835464, - "grad_norm": 0.0555407889187336, - "learning_rate": 5.913788771041586e-05, - "loss": 0.00537751168012619, - "step": 7465 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 0.15820109844207764, - "learning_rate": 5.911695627115994e-05, - "loss": 0.005968114733695984, - "step": 7470 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 0.05781199410557747, - "learning_rate": 5.9096014740389754e-05, - "loss": 0.00887204110622406, - "step": 7475 - }, - { - "epoch": 1.2753623188405796, - "grad_norm": 0.07927337288856506, - "learning_rate": 5.90750631278815e-05, - "loss": 0.006439142674207687, - "step": 7480 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 0.03843824937939644, - "learning_rate": 5.905410144341609e-05, - "loss": 0.007792883366346359, - "step": 7485 - }, - { - "epoch": 1.277067348678602, - "grad_norm": 0.0692640095949173, - "learning_rate": 5.903312969677914e-05, - "loss": 0.006274447590112686, - "step": 7490 - }, - { - "epoch": 1.277919863597613, - "grad_norm": 0.07501527667045593, - "learning_rate": 5.901214789776094e-05, - "loss": 0.007496471703052521, - "step": 7495 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 0.10271260142326355, - "learning_rate": 5.8991156056156514e-05, - "loss": 0.008766942471265794, - "step": 7500 - }, - { - "epoch": 1.2796248934356351, - "grad_norm": 0.03995242714881897, - "learning_rate": 5.897015418176555e-05, - "loss": 0.0055749226361513134, - "step": 7505 - }, - { - "epoch": 1.2804774083546462, - "grad_norm": 0.09215585142374039, - "learning_rate": 5.8949142284392406e-05, - "loss": 0.005763960257172585, - "step": 7510 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 0.07763402909040451, - "learning_rate": 5.892812037384615e-05, - "loss": 0.006439389288425445, - "step": 7515 - }, - { - "epoch": 1.2821824381926683, - "grad_norm": 0.04945438355207443, - "learning_rate": 5.890708845994049e-05, - "loss": 0.006960665434598922, - "step": 7520 - }, - { - "epoch": 1.2830349531116796, - "grad_norm": 0.05348283797502518, - "learning_rate": 5.888604655249384e-05, - "loss": 0.0061422914266586305, - "step": 7525 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 0.10389877110719681, - "learning_rate": 5.886499466132926e-05, - "loss": 0.009247081726789475, - "step": 7530 - }, - { - "epoch": 1.2847399829497017, - "grad_norm": 0.07753872126340866, - "learning_rate": 5.884393279627448e-05, - "loss": 0.004902977123856544, - "step": 7535 - }, - { - "epoch": 1.2855924978687128, - "grad_norm": 0.10553103685379028, - "learning_rate": 5.8822860967161856e-05, - "loss": 0.004547145590186119, - "step": 7540 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 0.08235067129135132, - "learning_rate": 5.880177918382844e-05, - "loss": 0.005282455682754516, - "step": 7545 - }, - { - "epoch": 1.287297527706735, - "grad_norm": 0.08135014772415161, - "learning_rate": 5.878068745611591e-05, - "loss": 0.006127358600497246, - "step": 7550 - }, - { - "epoch": 1.288150042625746, - "grad_norm": 0.04027952626347542, - "learning_rate": 5.875958579387056e-05, - "loss": 0.008251778036355972, - "step": 7555 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 0.1060953438282013, - "learning_rate": 5.8738474206943385e-05, - "loss": 0.008290941268205643, - "step": 7560 - }, - { - "epoch": 1.289855072463768, - "grad_norm": 0.06716421991586685, - "learning_rate": 5.871735270518995e-05, - "loss": 0.004932524263858795, - "step": 7565 - }, - { - "epoch": 1.2907075873827791, - "grad_norm": 0.07644582539796829, - "learning_rate": 5.869622129847048e-05, - "loss": 0.006172410026192665, - "step": 7570 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 0.06018557399511337, - "learning_rate": 5.867507999664983e-05, - "loss": 0.005532362312078476, - "step": 7575 - }, - { - "epoch": 1.2924126172208013, - "grad_norm": 0.06454342603683472, - "learning_rate": 5.865392880959745e-05, - "loss": 0.005053167790174484, - "step": 7580 - }, - { - "epoch": 1.2932651321398123, - "grad_norm": 0.07618142664432526, - "learning_rate": 5.863276774718742e-05, - "loss": 0.005658206716179848, - "step": 7585 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 0.05649973824620247, - "learning_rate": 5.8611596819298434e-05, - "loss": 0.007477214187383651, - "step": 7590 - }, - { - "epoch": 1.2949701619778347, - "grad_norm": 0.09222351759672165, - "learning_rate": 5.859041603581377e-05, - "loss": 0.006974493712186813, - "step": 7595 - }, - { - "epoch": 1.2958226768968457, - "grad_norm": 0.07462326437234879, - "learning_rate": 5.856922540662134e-05, - "loss": 0.008175718039274216, - "step": 7600 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 0.10593193024396896, - "learning_rate": 5.854802494161364e-05, - "loss": 0.006635700166225433, - "step": 7605 - }, - { - "epoch": 1.2975277067348678, - "grad_norm": 0.08673358708620071, - "learning_rate": 5.8526814650687724e-05, - "loss": 0.007347754389047623, - "step": 7610 - }, - { - "epoch": 1.298380221653879, - "grad_norm": 0.10450063645839691, - "learning_rate": 5.850559454374528e-05, - "loss": 0.008085139095783234, - "step": 7615 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 0.04219435900449753, - "learning_rate": 5.848436463069257e-05, - "loss": 0.006296204030513763, - "step": 7620 - }, - { - "epoch": 1.3000852514919012, - "grad_norm": 0.08187524974346161, - "learning_rate": 5.84631249214404e-05, - "loss": 0.007680010050535202, - "step": 7625 - }, - { - "epoch": 1.3009377664109123, - "grad_norm": 0.21044164896011353, - "learning_rate": 5.844187542590418e-05, - "loss": 0.008709554374217988, - "step": 7630 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 0.09822215139865875, - "learning_rate": 5.842061615400389e-05, - "loss": 0.007412384450435639, - "step": 7635 - }, - { - "epoch": 1.3026427962489344, - "grad_norm": 0.05957398563623428, - "learning_rate": 5.8399347115664053e-05, - "loss": 0.0062717020511627196, - "step": 7640 - }, - { - "epoch": 1.3034953111679455, - "grad_norm": 0.07013436406850815, - "learning_rate": 5.837806832081378e-05, - "loss": 0.005471421033143997, - "step": 7645 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 0.09616916626691818, - "learning_rate": 5.835677977938671e-05, - "loss": 0.008985907584428788, - "step": 7650 - }, - { - "epoch": 1.3052003410059676, - "grad_norm": 0.07946161180734634, - "learning_rate": 5.833548150132105e-05, - "loss": 0.00563003197312355, - "step": 7655 - }, - { - "epoch": 1.3060528559249787, - "grad_norm": 0.0630686804652214, - "learning_rate": 5.831417349655953e-05, - "loss": 0.007591667026281357, - "step": 7660 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 0.08530164510011673, - "learning_rate": 5.829285577504944e-05, - "loss": 0.006751708686351776, - "step": 7665 - }, - { - "epoch": 1.3077578857630008, - "grad_norm": 0.045148320496082306, - "learning_rate": 5.8271528346742616e-05, - "loss": 0.0052963607013225555, - "step": 7670 - }, - { - "epoch": 1.3086104006820118, - "grad_norm": 0.07147885859012604, - "learning_rate": 5.82501912215954e-05, - "loss": 0.005282463133335113, - "step": 7675 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 0.0933302789926529, - "learning_rate": 5.8228844409568654e-05, - "loss": 0.0073209434747695925, - "step": 7680 - }, - { - "epoch": 1.310315430520034, - "grad_norm": 0.07449645549058914, - "learning_rate": 5.820748792062781e-05, - "loss": 0.007801111787557602, - "step": 7685 - }, - { - "epoch": 1.3111679454390452, - "grad_norm": 0.04569214582443237, - "learning_rate": 5.8186121764742774e-05, - "loss": 0.006659354269504547, - "step": 7690 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 0.07046396285295486, - "learning_rate": 5.8164745951887995e-05, - "loss": 0.006448440253734589, - "step": 7695 - }, - { - "epoch": 1.3128729752770674, - "grad_norm": 0.09704319387674332, - "learning_rate": 5.814336049204239e-05, - "loss": 0.008210816234350205, - "step": 7700 - }, - { - "epoch": 1.3137254901960784, - "grad_norm": 0.06477776169776917, - "learning_rate": 5.81219653951894e-05, - "loss": 0.005369330942630768, - "step": 7705 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 0.11657397449016571, - "learning_rate": 5.810056067131698e-05, - "loss": 0.010190412402153015, - "step": 7710 - }, - { - "epoch": 1.3154305200341005, - "grad_norm": 0.06578268110752106, - "learning_rate": 5.8079146330417575e-05, - "loss": 0.006289052963256836, - "step": 7715 - }, - { - "epoch": 1.3162830349531116, - "grad_norm": 0.06296945363283157, - "learning_rate": 5.80577223824881e-05, - "loss": 0.008120459318161011, - "step": 7720 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 0.08275634050369263, - "learning_rate": 5.803628883752996e-05, - "loss": 0.006926379352807999, - "step": 7725 - }, - { - "epoch": 1.317988064791134, - "grad_norm": 0.0693436712026596, - "learning_rate": 5.8014845705549086e-05, - "loss": 0.006521113961935043, - "step": 7730 - }, - { - "epoch": 1.318840579710145, - "grad_norm": 0.05845775827765465, - "learning_rate": 5.799339299655579e-05, - "loss": 0.00762510895729065, - "step": 7735 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 0.08802217245101929, - "learning_rate": 5.7971930720564947e-05, - "loss": 0.008071760833263397, - "step": 7740 - }, - { - "epoch": 1.3205456095481671, - "grad_norm": 0.08866037428379059, - "learning_rate": 5.795045888759585e-05, - "loss": 0.006111105903983116, - "step": 7745 - }, - { - "epoch": 1.3213981244671782, - "grad_norm": 0.0844360888004303, - "learning_rate": 5.792897750767225e-05, - "loss": 0.005196729302406311, - "step": 7750 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 0.06763950735330582, - "learning_rate": 5.79074865908224e-05, - "loss": 0.006462454050779343, - "step": 7755 - }, - { - "epoch": 1.3231031543052003, - "grad_norm": 0.06333937495946884, - "learning_rate": 5.7885986147078946e-05, - "loss": 0.0068017512559890745, - "step": 7760 - }, - { - "epoch": 1.3239556692242114, - "grad_norm": 0.05730217695236206, - "learning_rate": 5.786447618647904e-05, - "loss": 0.0065845087170600895, - "step": 7765 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 0.06838720291852951, - "learning_rate": 5.784295671906422e-05, - "loss": 0.0059626404196023945, - "step": 7770 - }, - { - "epoch": 1.3256606990622335, - "grad_norm": 0.06693503260612488, - "learning_rate": 5.782142775488051e-05, - "loss": 0.008056168258190156, - "step": 7775 - }, - { - "epoch": 1.3265132139812446, - "grad_norm": 0.07886708527803421, - "learning_rate": 5.7799889303978324e-05, - "loss": 0.006670922040939331, - "step": 7780 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 0.06932322680950165, - "learning_rate": 5.777834137641255e-05, - "loss": 0.006734507530927658, - "step": 7785 - }, - { - "epoch": 1.328218243819267, - "grad_norm": 0.08057818561792374, - "learning_rate": 5.775678398224247e-05, - "loss": 0.005952415242791176, - "step": 7790 - }, - { - "epoch": 1.329070758738278, - "grad_norm": 0.06614059209823608, - "learning_rate": 5.7735217131531785e-05, - "loss": 0.007471600174903869, - "step": 7795 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 0.06335467845201492, - "learning_rate": 5.771364083434862e-05, - "loss": 0.007279399782419205, - "step": 7800 - }, - { - "epoch": 1.3307757885763, - "grad_norm": 0.11745526641607285, - "learning_rate": 5.769205510076552e-05, - "loss": 0.006242561340332031, - "step": 7805 - }, - { - "epoch": 1.3316283034953111, - "grad_norm": 0.0590963289141655, - "learning_rate": 5.7670459940859414e-05, - "loss": 0.006263129413127899, - "step": 7810 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 0.05416800454258919, - "learning_rate": 5.764885536471164e-05, - "loss": 0.00531160868704319, - "step": 7815 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.05527244135737419, - "learning_rate": 5.7627241382407933e-05, - "loss": 0.005747637152671814, - "step": 7820 - }, - { - "epoch": 1.3341858482523445, - "grad_norm": 0.057753629982471466, - "learning_rate": 5.760561800403844e-05, - "loss": 0.004979781061410904, - "step": 7825 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 0.10882547497749329, - "learning_rate": 5.758398523969763e-05, - "loss": 0.00589316263794899, - "step": 7830 - }, - { - "epoch": 1.3358908780903667, - "grad_norm": 0.08053787797689438, - "learning_rate": 5.756234309948443e-05, - "loss": 0.004465704411268234, - "step": 7835 - }, - { - "epoch": 1.3367433930093777, - "grad_norm": 0.09168808907270432, - "learning_rate": 5.75406915935021e-05, - "loss": 0.004735191911458969, - "step": 7840 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 0.0956537052989006, - "learning_rate": 5.751903073185829e-05, - "loss": 0.005519610643386841, - "step": 7845 - }, - { - "epoch": 1.3384484228473998, - "grad_norm": 0.05775619298219681, - "learning_rate": 5.749736052466501e-05, - "loss": 0.005525605380535125, - "step": 7850 - }, - { - "epoch": 1.339300937766411, - "grad_norm": 0.08594895154237747, - "learning_rate": 5.7475680982038616e-05, - "loss": 0.005280618742108345, - "step": 7855 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 0.10326153039932251, - "learning_rate": 5.745399211409987e-05, - "loss": 0.005818159133195877, - "step": 7860 - }, - { - "epoch": 1.341005967604433, - "grad_norm": 0.053448133170604706, - "learning_rate": 5.743229393097384e-05, - "loss": 0.008255011588335037, - "step": 7865 - }, - { - "epoch": 1.341858482523444, - "grad_norm": 0.05307561904191971, - "learning_rate": 5.741058644278995e-05, - "loss": 0.006851959228515625, - "step": 7870 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 0.050789013504981995, - "learning_rate": 5.738886965968199e-05, - "loss": 0.005396667867898941, - "step": 7875 - }, - { - "epoch": 1.3435635123614662, - "grad_norm": 0.06762190908193588, - "learning_rate": 5.736714359178808e-05, - "loss": 0.005661940947175026, - "step": 7880 - }, - { - "epoch": 1.3444160272804775, - "grad_norm": 0.06955094635486603, - "learning_rate": 5.734540824925066e-05, - "loss": 0.0065834902226924895, - "step": 7885 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 0.09844111651182175, - "learning_rate": 5.7323663642216525e-05, - "loss": 0.006687184423208236, - "step": 7890 - }, - { - "epoch": 1.3461210571184996, - "grad_norm": 0.05249316990375519, - "learning_rate": 5.7301909780836766e-05, - "loss": 0.00670531764626503, - "step": 7895 - }, - { - "epoch": 1.3469735720375107, - "grad_norm": 0.06578750163316727, - "learning_rate": 5.7280146675266815e-05, - "loss": 0.0063153237104415895, - "step": 7900 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 0.10460260510444641, - "learning_rate": 5.725837433566643e-05, - "loss": 0.008820119500160217, - "step": 7905 - }, - { - "epoch": 1.3486786018755328, - "grad_norm": 0.06620552390813828, - "learning_rate": 5.7236592772199624e-05, - "loss": 0.006502580642700195, - "step": 7910 - }, - { - "epoch": 1.3495311167945439, - "grad_norm": 0.1033373698592186, - "learning_rate": 5.72148019950348e-05, - "loss": 0.008503454178571701, - "step": 7915 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 0.05790281295776367, - "learning_rate": 5.719300201434458e-05, - "loss": 0.006304294615983963, - "step": 7920 - }, - { - "epoch": 1.3512361466325662, - "grad_norm": 0.06094033271074295, - "learning_rate": 5.717119284030595e-05, - "loss": 0.006775079667568207, - "step": 7925 - }, - { - "epoch": 1.3520886615515773, - "grad_norm": 0.08011666685342789, - "learning_rate": 5.714937448310015e-05, - "loss": 0.0064566083252429966, - "step": 7930 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 0.06395548582077026, - "learning_rate": 5.7127546952912686e-05, - "loss": 0.009279583394527436, - "step": 7935 - }, - { - "epoch": 1.3537936913895994, - "grad_norm": 0.06697574257850647, - "learning_rate": 5.710571025993342e-05, - "loss": 0.005718713253736496, - "step": 7940 - }, - { - "epoch": 1.3546462063086104, - "grad_norm": 0.08821829408407211, - "learning_rate": 5.7083864414356414e-05, - "loss": 0.008157726377248764, - "step": 7945 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 0.07097669690847397, - "learning_rate": 5.706200942638006e-05, - "loss": 0.004782359302043915, - "step": 7950 - }, - { - "epoch": 1.3563512361466326, - "grad_norm": 0.05015713721513748, - "learning_rate": 5.7040145306206963e-05, - "loss": 0.004204710572957992, - "step": 7955 - }, - { - "epoch": 1.3572037510656436, - "grad_norm": 0.054049719125032425, - "learning_rate": 5.701827206404406e-05, - "loss": 0.00606432780623436, - "step": 7960 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 0.0878557488322258, - "learning_rate": 5.6996389710102474e-05, - "loss": 0.008037066459655762, - "step": 7965 - }, - { - "epoch": 1.3589087809036657, - "grad_norm": 0.10873926430940628, - "learning_rate": 5.697449825459762e-05, - "loss": 0.007864821702241898, - "step": 7970 - }, - { - "epoch": 1.3597612958226768, - "grad_norm": 0.05823246389627457, - "learning_rate": 5.695259770774919e-05, - "loss": 0.00715988278388977, - "step": 7975 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 0.06690117716789246, - "learning_rate": 5.693068807978106e-05, - "loss": 0.006888707727193832, - "step": 7980 - }, - { - "epoch": 1.3614663256606991, - "grad_norm": 0.07290884852409363, - "learning_rate": 5.6908769380921363e-05, - "loss": 0.005684115365147591, - "step": 7985 - }, - { - "epoch": 1.3623188405797102, - "grad_norm": 0.07930465042591095, - "learning_rate": 5.6886841621402504e-05, - "loss": 0.0077220767736434935, - "step": 7990 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 0.08893048763275146, - "learning_rate": 5.686490481146107e-05, - "loss": 0.007824088633060455, - "step": 7995 - }, - { - "epoch": 1.3640238704177323, - "grad_norm": 0.09335844218730927, - "learning_rate": 5.6842958961337905e-05, - "loss": 0.006522499769926071, - "step": 8000 - }, - { - "epoch": 1.3648763853367434, - "grad_norm": 0.07194571942090988, - "learning_rate": 5.682100408127806e-05, - "loss": 0.008011893928050995, - "step": 8005 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 0.053429413586854935, - "learning_rate": 5.6799040181530794e-05, - "loss": 0.006260050833225251, - "step": 8010 - }, - { - "epoch": 1.3665814151747655, - "grad_norm": 0.11974478513002396, - "learning_rate": 5.677706727234959e-05, - "loss": 0.006692723929882049, - "step": 8015 - }, - { - "epoch": 1.3674339300937766, - "grad_norm": 0.07810027152299881, - "learning_rate": 5.6755085363992155e-05, - "loss": 0.007429388910531997, - "step": 8020 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 0.10204190760850906, - "learning_rate": 5.673309446672034e-05, - "loss": 0.005550343170762062, - "step": 8025 - }, - { - "epoch": 1.369138959931799, - "grad_norm": 0.07640541344881058, - "learning_rate": 5.671109459080026e-05, - "loss": 0.006840181350708008, - "step": 8030 - }, - { - "epoch": 1.36999147485081, - "grad_norm": 0.06644181162118912, - "learning_rate": 5.668908574650216e-05, - "loss": 0.005395495146512985, - "step": 8035 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 0.09630967676639557, - "learning_rate": 5.6667067944100526e-05, - "loss": 0.005423872545361519, - "step": 8040 - }, - { - "epoch": 1.371696504688832, - "grad_norm": 0.07114128023386002, - "learning_rate": 5.664504119387398e-05, - "loss": 0.007013414800167084, - "step": 8045 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 0.07324981689453125, - "learning_rate": 5.662300550610535e-05, - "loss": 0.008274464309215546, - "step": 8050 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 0.06012870743870735, - "learning_rate": 5.660096089108163e-05, - "loss": 0.00520169697701931, - "step": 8055 - }, - { - "epoch": 1.3742540494458653, - "grad_norm": 0.07458557933568954, - "learning_rate": 5.657890735909397e-05, - "loss": 0.006112886965274811, - "step": 8060 - }, - { - "epoch": 1.3751065643648763, - "grad_norm": 0.0470297709107399, - "learning_rate": 5.655684492043771e-05, - "loss": 0.004435106366872788, - "step": 8065 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 0.05244847387075424, - "learning_rate": 5.653477358541231e-05, - "loss": 0.006484140455722809, - "step": 8070 - }, - { - "epoch": 1.3768115942028984, - "grad_norm": 0.10809201747179031, - "learning_rate": 5.651269336432142e-05, - "loss": 0.006385499238967895, - "step": 8075 - }, - { - "epoch": 1.3776641091219095, - "grad_norm": 0.11761374026536942, - "learning_rate": 5.649060426747281e-05, - "loss": 0.0056259695440530775, - "step": 8080 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 0.06250949203968048, - "learning_rate": 5.646850630517842e-05, - "loss": 0.005127568915486336, - "step": 8085 - }, - { - "epoch": 1.3793691389599319, - "grad_norm": 0.07686682790517807, - "learning_rate": 5.6446399487754307e-05, - "loss": 0.006484859436750412, - "step": 8090 - }, - { - "epoch": 1.380221653878943, - "grad_norm": 0.10453952848911285, - "learning_rate": 5.6424283825520656e-05, - "loss": 0.007125881314277649, - "step": 8095 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 0.08170976489782333, - "learning_rate": 5.640215932880181e-05, - "loss": 0.007152590900659561, - "step": 8100 - }, - { - "epoch": 1.381926683716965, - "grad_norm": 0.08639637380838394, - "learning_rate": 5.638002600792621e-05, - "loss": 0.006862475723028183, - "step": 8105 - }, - { - "epoch": 1.382779198635976, - "grad_norm": 0.061349738389253616, - "learning_rate": 5.635788387322642e-05, - "loss": 0.006520121544599533, - "step": 8110 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 0.09568873792886734, - "learning_rate": 5.633573293503915e-05, - "loss": 0.00690893828868866, - "step": 8115 - }, - { - "epoch": 1.3844842284739982, - "grad_norm": 0.05280910059809685, - "learning_rate": 5.631357320370518e-05, - "loss": 0.0068241022527217865, - "step": 8120 - }, - { - "epoch": 1.3853367433930095, - "grad_norm": 0.08307540416717529, - "learning_rate": 5.6291404689569406e-05, - "loss": 0.009796305000782013, - "step": 8125 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 0.06511564552783966, - "learning_rate": 5.6269227402980824e-05, - "loss": 0.00675605982542038, - "step": 8130 - }, - { - "epoch": 1.3870417732310316, - "grad_norm": 0.09521665424108505, - "learning_rate": 5.624704135429255e-05, - "loss": 0.00661565363407135, - "step": 8135 - }, - { - "epoch": 1.3878942881500427, - "grad_norm": 0.06467590481042862, - "learning_rate": 5.622484655386175e-05, - "loss": 0.007056808471679688, - "step": 8140 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 0.04240449517965317, - "learning_rate": 5.62026430120497e-05, - "loss": 0.005277678743004799, - "step": 8145 - }, - { - "epoch": 1.3895993179880648, - "grad_norm": 0.08462672680616379, - "learning_rate": 5.618043073922176e-05, - "loss": 0.005951377004384995, - "step": 8150 - }, - { - "epoch": 1.3904518329070759, - "grad_norm": 0.08304573595523834, - "learning_rate": 5.615820974574735e-05, - "loss": 0.006729351729154587, - "step": 8155 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 0.04584382846951485, - "learning_rate": 5.6135980041999964e-05, - "loss": 0.00490913912653923, - "step": 8160 - }, - { - "epoch": 1.392156862745098, - "grad_norm": 0.06771710515022278, - "learning_rate": 5.6113741638357175e-05, - "loss": 0.007046511024236679, - "step": 8165 - }, - { - "epoch": 1.393009377664109, - "grad_norm": 0.06334209442138672, - "learning_rate": 5.609149454520062e-05, - "loss": 0.006314977258443833, - "step": 8170 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 0.06783269345760345, - "learning_rate": 5.606923877291595e-05, - "loss": 0.006176649779081345, - "step": 8175 - }, - { - "epoch": 1.3947144075021312, - "grad_norm": 0.10245220363140106, - "learning_rate": 5.604697433189293e-05, - "loss": 0.006309907138347626, - "step": 8180 - }, - { - "epoch": 1.3955669224211424, - "grad_norm": 0.07151709496974945, - "learning_rate": 5.6024701232525325e-05, - "loss": 0.005038458108901978, - "step": 8185 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 0.08811933547258377, - "learning_rate": 5.600241948521099e-05, - "loss": 0.006065644696354866, - "step": 8190 - }, - { - "epoch": 1.3972719522591646, - "grad_norm": 0.07598903775215149, - "learning_rate": 5.5980129100351736e-05, - "loss": 0.006201237812638283, - "step": 8195 - }, - { - "epoch": 1.3981244671781756, - "grad_norm": 0.058092061430215836, - "learning_rate": 5.5957830088353475e-05, - "loss": 0.006383272260427475, - "step": 8200 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 0.18196560442447662, - "learning_rate": 5.593552245962616e-05, - "loss": 0.004768157005310058, - "step": 8205 - }, - { - "epoch": 1.3998294970161977, - "grad_norm": 0.09071574360132217, - "learning_rate": 5.591320622458369e-05, - "loss": 0.007671289891004562, - "step": 8210 - }, - { - "epoch": 1.4006820119352088, - "grad_norm": 0.09470858424901962, - "learning_rate": 5.589088139364405e-05, - "loss": 0.006691985577344894, - "step": 8215 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 0.05345157906413078, - "learning_rate": 5.58685479772292e-05, - "loss": 0.005832263827323913, - "step": 8220 - }, - { - "epoch": 1.4023870417732311, - "grad_norm": 0.08154480904340744, - "learning_rate": 5.584620598576514e-05, - "loss": 0.00907905399799347, - "step": 8225 - }, - { - "epoch": 1.4032395566922422, - "grad_norm": 0.06621691584587097, - "learning_rate": 5.582385542968185e-05, - "loss": 0.005938088893890381, - "step": 8230 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 0.0557720884680748, - "learning_rate": 5.580149631941329e-05, - "loss": 0.005079039558768273, - "step": 8235 - }, - { - "epoch": 1.4049445865302643, - "grad_norm": 0.07839027792215347, - "learning_rate": 5.577912866539746e-05, - "loss": 0.006307472288608551, - "step": 8240 - }, - { - "epoch": 1.4057971014492754, - "grad_norm": 0.05926419049501419, - "learning_rate": 5.575675247807632e-05, - "loss": 0.0072102643549442295, - "step": 8245 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 0.0570182129740715, - "learning_rate": 5.5734367767895814e-05, - "loss": 0.0066485337913036345, - "step": 8250 - }, - { - "epoch": 1.4075021312872975, - "grad_norm": 0.0930657833814621, - "learning_rate": 5.571197454530588e-05, - "loss": 0.005854785442352295, - "step": 8255 - }, - { - "epoch": 1.4083546462063086, - "grad_norm": 0.06018427759408951, - "learning_rate": 5.568957282076041e-05, - "loss": 0.0049718767404556274, - "step": 8260 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 0.0889105498790741, - "learning_rate": 5.566716260471726e-05, - "loss": 0.005993577092885971, - "step": 8265 - }, - { - "epoch": 1.4100596760443307, - "grad_norm": 0.04429823160171509, - "learning_rate": 5.5644743907638294e-05, - "loss": 0.005357314646244049, - "step": 8270 - }, - { - "epoch": 1.4109121909633418, - "grad_norm": 0.054813142865896225, - "learning_rate": 5.5622316739989296e-05, - "loss": 0.005666692927479744, - "step": 8275 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 0.06909197568893433, - "learning_rate": 5.559988111224001e-05, - "loss": 0.005058525875210762, - "step": 8280 - }, - { - "epoch": 1.412617220801364, - "grad_norm": 0.10060004889965057, - "learning_rate": 5.557743703486413e-05, - "loss": 0.0070966087281703946, - "step": 8285 - }, - { - "epoch": 1.4134697357203752, - "grad_norm": 0.052008096128702164, - "learning_rate": 5.555498451833929e-05, - "loss": 0.006671085208654404, - "step": 8290 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 0.06272758543491364, - "learning_rate": 5.5532523573147094e-05, - "loss": 0.006071484088897705, - "step": 8295 - }, - { - "epoch": 1.4151747655583973, - "grad_norm": 0.08005380630493164, - "learning_rate": 5.551005420977304e-05, - "loss": 0.006429645419120789, - "step": 8300 - }, - { - "epoch": 1.4160272804774083, - "grad_norm": 0.08246695250272751, - "learning_rate": 5.548757643870659e-05, - "loss": 0.004599097743630409, - "step": 8305 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 0.1190599724650383, - "learning_rate": 5.54650902704411e-05, - "loss": 0.00652359127998352, - "step": 8310 - }, - { - "epoch": 1.4177323103154305, - "grad_norm": 0.042393747717142105, - "learning_rate": 5.5442595715473867e-05, - "loss": 0.004551848769187928, - "step": 8315 - }, - { - "epoch": 1.4185848252344417, - "grad_norm": 0.0809137374162674, - "learning_rate": 5.54200927843061e-05, - "loss": 0.0062880381941795346, - "step": 8320 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 0.09030820429325104, - "learning_rate": 5.5397581487442905e-05, - "loss": 0.007365265488624572, - "step": 8325 - }, - { - "epoch": 1.4202898550724639, - "grad_norm": 0.060766976326704025, - "learning_rate": 5.537506183539333e-05, - "loss": 0.0047208376228809355, - "step": 8330 - }, - { - "epoch": 1.421142369991475, - "grad_norm": 0.0763862356543541, - "learning_rate": 5.535253383867027e-05, - "loss": 0.006549081206321717, - "step": 8335 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 0.13493886590003967, - "learning_rate": 5.532999750779056e-05, - "loss": 0.0075773999094963075, - "step": 8340 - }, - { - "epoch": 1.422847399829497, - "grad_norm": 0.07888541370630264, - "learning_rate": 5.53074528532749e-05, - "loss": 0.007893601059913635, - "step": 8345 - }, - { - "epoch": 1.423699914748508, - "grad_norm": 0.04488302394747734, - "learning_rate": 5.528489988564789e-05, - "loss": 0.006052879989147187, - "step": 8350 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 0.09534436464309692, - "learning_rate": 5.5262338615438e-05, - "loss": 0.006183170899748802, - "step": 8355 - }, - { - "epoch": 1.4254049445865302, - "grad_norm": 0.0796227753162384, - "learning_rate": 5.523976905317758e-05, - "loss": 0.006368820369243622, - "step": 8360 - }, - { - "epoch": 1.4262574595055413, - "grad_norm": 0.039230771362781525, - "learning_rate": 5.521719120940287e-05, - "loss": 0.005282421037554741, - "step": 8365 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 0.12020807713270187, - "learning_rate": 5.5194605094653935e-05, - "loss": 0.00718868374824524, - "step": 8370 - }, - { - "epoch": 1.4279624893435634, - "grad_norm": 0.07434894144535065, - "learning_rate": 5.5172010719474746e-05, - "loss": 0.007591472566127777, - "step": 8375 - }, - { - "epoch": 1.4288150042625745, - "grad_norm": 0.0722346156835556, - "learning_rate": 5.514940809441309e-05, - "loss": 0.005854631587862968, - "step": 8380 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 0.07834078371524811, - "learning_rate": 5.5126797230020634e-05, - "loss": 0.007415357977151871, - "step": 8385 - }, - { - "epoch": 1.4305200341005968, - "grad_norm": 0.08718696236610413, - "learning_rate": 5.5104178136852884e-05, - "loss": 0.007089633494615555, - "step": 8390 - }, - { - "epoch": 1.4313725490196079, - "grad_norm": 0.08823035657405853, - "learning_rate": 5.508155082546918e-05, - "loss": 0.007153714448213578, - "step": 8395 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 0.07249119132757187, - "learning_rate": 5.505891530643269e-05, - "loss": 0.007651906460523605, - "step": 8400 - }, - { - "epoch": 1.43307757885763, - "grad_norm": 0.06284502893686295, - "learning_rate": 5.503627159031046e-05, - "loss": 0.007741397619247437, - "step": 8405 - }, - { - "epoch": 1.433930093776641, - "grad_norm": 0.06557357311248779, - "learning_rate": 5.501361968767331e-05, - "loss": 0.007656902819871902, - "step": 8410 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 0.0775318294763565, - "learning_rate": 5.49909596090959e-05, - "loss": 0.006714560091495514, - "step": 8415 - }, - { - "epoch": 1.4356351236146634, - "grad_norm": 0.05347822234034538, - "learning_rate": 5.496829136515672e-05, - "loss": 0.0048537302762269975, - "step": 8420 - }, - { - "epoch": 1.4364876385336744, - "grad_norm": 0.07040467113256454, - "learning_rate": 5.4945614966438046e-05, - "loss": 0.005411979556083679, - "step": 8425 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 0.09473410993814468, - "learning_rate": 5.492293042352598e-05, - "loss": 0.008005911856889725, - "step": 8430 - }, - { - "epoch": 1.4381926683716966, - "grad_norm": 0.037446580827236176, - "learning_rate": 5.4900237747010426e-05, - "loss": 0.006237779557704925, - "step": 8435 - }, - { - "epoch": 1.4390451832907076, - "grad_norm": 0.11029476672410965, - "learning_rate": 5.4877536947485074e-05, - "loss": 0.008190502226352692, - "step": 8440 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 0.0514204315841198, - "learning_rate": 5.4854828035547424e-05, - "loss": 0.006500741839408875, - "step": 8445 - }, - { - "epoch": 1.4407502131287298, - "grad_norm": 0.08411483466625214, - "learning_rate": 5.483211102179873e-05, - "loss": 0.0053235463798046116, - "step": 8450 - }, - { - "epoch": 1.4416027280477408, - "grad_norm": 0.09279052913188934, - "learning_rate": 5.480938591684407e-05, - "loss": 0.006842000037431717, - "step": 8455 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 0.09881046414375305, - "learning_rate": 5.478665273129228e-05, - "loss": 0.007064050436019898, - "step": 8460 - }, - { - "epoch": 1.443307757885763, - "grad_norm": 0.09018172323703766, - "learning_rate": 5.476391147575595e-05, - "loss": 0.005222787708044052, - "step": 8465 - }, - { - "epoch": 1.444160272804774, - "grad_norm": 0.11489493399858475, - "learning_rate": 5.4741162160851455e-05, - "loss": 0.004823528230190277, - "step": 8470 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 0.11010619252920151, - "learning_rate": 5.4718404797198955e-05, - "loss": 0.007554465532302856, - "step": 8475 - }, - { - "epoch": 1.4458653026427961, - "grad_norm": 0.10469060391187668, - "learning_rate": 5.469563939542233e-05, - "loss": 0.005817038565874099, - "step": 8480 - }, - { - "epoch": 1.4467178175618074, - "grad_norm": 0.06776002794504166, - "learning_rate": 5.467286596614922e-05, - "loss": 0.00899386927485466, - "step": 8485 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 0.08414942771196365, - "learning_rate": 5.4650084520011026e-05, - "loss": 0.00611347034573555, - "step": 8490 - }, - { - "epoch": 1.4484228473998295, - "grad_norm": 0.09625279158353806, - "learning_rate": 5.462729506764289e-05, - "loss": 0.005645812302827835, - "step": 8495 - }, - { - "epoch": 1.4492753623188406, - "grad_norm": 0.10020645707845688, - "learning_rate": 5.4604497619683674e-05, - "loss": 0.0058198563754558565, - "step": 8500 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 0.08466780185699463, - "learning_rate": 5.4581692186776e-05, - "loss": 0.005098164081573486, - "step": 8505 - }, - { - "epoch": 1.4509803921568627, - "grad_norm": 0.058955296874046326, - "learning_rate": 5.4558878779566194e-05, - "loss": 0.004072735831141472, - "step": 8510 - }, - { - "epoch": 1.4518329070758738, - "grad_norm": 0.14849397540092468, - "learning_rate": 5.4536057408704304e-05, - "loss": 0.011097650229930877, - "step": 8515 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 0.08641809970140457, - "learning_rate": 5.451322808484413e-05, - "loss": 0.006210784614086151, - "step": 8520 - }, - { - "epoch": 1.453537936913896, - "grad_norm": 0.07506752014160156, - "learning_rate": 5.4490390818643136e-05, - "loss": 0.006071462482213974, - "step": 8525 - }, - { - "epoch": 1.4543904518329072, - "grad_norm": 0.10383405536413193, - "learning_rate": 5.4467545620762545e-05, - "loss": 0.008749781548976899, - "step": 8530 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 0.08180487155914307, - "learning_rate": 5.444469250186721e-05, - "loss": 0.00613279715180397, - "step": 8535 - }, - { - "epoch": 1.4560954816709293, - "grad_norm": 0.07797367125749588, - "learning_rate": 5.442183147262577e-05, - "loss": 0.005885690450668335, - "step": 8540 - }, - { - "epoch": 1.4569479965899403, - "grad_norm": 0.0780278891324997, - "learning_rate": 5.439896254371049e-05, - "loss": 0.007259850949048996, - "step": 8545 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 0.10005395114421844, - "learning_rate": 5.437608572579737e-05, - "loss": 0.0061523888260126116, - "step": 8550 - }, - { - "epoch": 1.4586530264279625, - "grad_norm": 0.10141763836145401, - "learning_rate": 5.435320102956604e-05, - "loss": 0.006501191109418869, - "step": 8555 - }, - { - "epoch": 1.4595055413469735, - "grad_norm": 0.0938732773065567, - "learning_rate": 5.4330308465699865e-05, - "loss": 0.008337517827749252, - "step": 8560 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 0.1085699051618576, - "learning_rate": 5.430740804488582e-05, - "loss": 0.005681714415550232, - "step": 8565 - }, - { - "epoch": 1.4612105711849956, - "grad_norm": 0.07967904955148697, - "learning_rate": 5.428449977781463e-05, - "loss": 0.006299185007810593, - "step": 8570 - }, - { - "epoch": 1.4620630861040067, - "grad_norm": 0.090158611536026, - "learning_rate": 5.426158367518061e-05, - "loss": 0.007821831852197647, - "step": 8575 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 0.12222256511449814, - "learning_rate": 5.4238659747681736e-05, - "loss": 0.0065193742513656614, - "step": 8580 - }, - { - "epoch": 1.463768115942029, - "grad_norm": 0.07724417746067047, - "learning_rate": 5.421572800601971e-05, - "loss": 0.00850745365023613, - "step": 8585 - }, - { - "epoch": 1.46462063086104, - "grad_norm": 0.07322543114423752, - "learning_rate": 5.4192788460899786e-05, - "loss": 0.006478501856327057, - "step": 8590 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 0.07086360454559326, - "learning_rate": 5.416984112303095e-05, - "loss": 0.007459370046854019, - "step": 8595 - }, - { - "epoch": 1.4663256606990622, - "grad_norm": 0.08460366725921631, - "learning_rate": 5.414688600312575e-05, - "loss": 0.006461035460233688, - "step": 8600 - }, - { - "epoch": 1.4671781756180733, - "grad_norm": 0.06856394559144974, - "learning_rate": 5.412392311190041e-05, - "loss": 0.007420676201581955, - "step": 8605 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 0.06801126897335052, - "learning_rate": 5.4100952460074766e-05, - "loss": 0.006456401199102402, - "step": 8610 - }, - { - "epoch": 1.4688832054560954, - "grad_norm": 0.06273184716701508, - "learning_rate": 5.4077974058372295e-05, - "loss": 0.00508052185177803, - "step": 8615 - }, - { - "epoch": 1.4697357203751067, - "grad_norm": 0.07751575112342834, - "learning_rate": 5.405498791752007e-05, - "loss": 0.006596812605857849, - "step": 8620 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 0.10850238054990768, - "learning_rate": 5.4031994048248776e-05, - "loss": 0.006385332345962525, - "step": 8625 - }, - { - "epoch": 1.4714407502131288, - "grad_norm": 0.07195930927991867, - "learning_rate": 5.4008992461292736e-05, - "loss": 0.007354143261909485, - "step": 8630 - }, - { - "epoch": 1.4722932651321399, - "grad_norm": 0.061606891453266144, - "learning_rate": 5.3985983167389846e-05, - "loss": 0.007285259664058685, - "step": 8635 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 0.059549275785684586, - "learning_rate": 5.3962966177281616e-05, - "loss": 0.005211231112480163, - "step": 8640 - }, - { - "epoch": 1.473998294970162, - "grad_norm": 0.04548822343349457, - "learning_rate": 5.3939941501713146e-05, - "loss": 0.00805831179022789, - "step": 8645 - }, - { - "epoch": 1.474850809889173, - "grad_norm": 0.046682652086019516, - "learning_rate": 5.3916909151433096e-05, - "loss": 0.005787956342101097, - "step": 8650 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 0.06405246257781982, - "learning_rate": 5.3893869137193755e-05, - "loss": 0.005377359688282013, - "step": 8655 - }, - { - "epoch": 1.4765558397271952, - "grad_norm": 0.09410709887742996, - "learning_rate": 5.3870821469750964e-05, - "loss": 0.006961540877819061, - "step": 8660 - }, - { - "epoch": 1.4774083546462062, - "grad_norm": 0.0637243241071701, - "learning_rate": 5.384776615986414e-05, - "loss": 0.0060172989964485165, - "step": 8665 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 0.07082457840442657, - "learning_rate": 5.382470321829627e-05, - "loss": 0.005960140377283096, - "step": 8670 - }, - { - "epoch": 1.4791133844842284, - "grad_norm": 0.06502280384302139, - "learning_rate": 5.380163265581391e-05, - "loss": 0.005596417188644409, - "step": 8675 - }, - { - "epoch": 1.4799658994032396, - "grad_norm": 0.07504235208034515, - "learning_rate": 5.3778554483187134e-05, - "loss": 0.007427094876766205, - "step": 8680 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 0.08005198836326599, - "learning_rate": 5.375546871118964e-05, - "loss": 0.006888572126626968, - "step": 8685 - }, - { - "epoch": 1.4816709292412618, - "grad_norm": 0.1083201915025711, - "learning_rate": 5.373237535059861e-05, - "loss": 0.007253114879131317, - "step": 8690 - }, - { - "epoch": 1.4825234441602728, - "grad_norm": 0.060000013560056686, - "learning_rate": 5.37092744121948e-05, - "loss": 0.005570416525006294, - "step": 8695 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 0.04832584038376808, - "learning_rate": 5.3686165906762504e-05, - "loss": 0.005356843769550324, - "step": 8700 - }, - { - "epoch": 1.484228473998295, - "grad_norm": 0.061586812138557434, - "learning_rate": 5.3663049845089534e-05, - "loss": 0.005226074159145356, - "step": 8705 - }, - { - "epoch": 1.485080988917306, - "grad_norm": 0.08486256003379822, - "learning_rate": 5.363992623796724e-05, - "loss": 0.007083939760923386, - "step": 8710 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 0.09085836261510849, - "learning_rate": 5.361679509619048e-05, - "loss": 0.005988218262791634, - "step": 8715 - }, - { - "epoch": 1.4867860187553283, - "grad_norm": 0.06301745027303696, - "learning_rate": 5.359365643055765e-05, - "loss": 0.00595020055770874, - "step": 8720 - }, - { - "epoch": 1.4876385336743394, - "grad_norm": 0.07939866930246353, - "learning_rate": 5.3570510251870646e-05, - "loss": 0.006101110950112343, - "step": 8725 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 0.10560661554336548, - "learning_rate": 5.354735657093487e-05, - "loss": 0.006781180202960968, - "step": 8730 - }, - { - "epoch": 1.4893435635123615, - "grad_norm": 0.10549639165401459, - "learning_rate": 5.352419539855925e-05, - "loss": 0.006455187499523163, - "step": 8735 - }, - { - "epoch": 1.4901960784313726, - "grad_norm": 0.06474289298057556, - "learning_rate": 5.3501026745556157e-05, - "loss": 0.0078111283481121065, - "step": 8740 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 0.11109986901283264, - "learning_rate": 5.3477850622741525e-05, - "loss": 0.00798504576086998, - "step": 8745 - }, - { - "epoch": 1.4919011082693947, - "grad_norm": 0.0787222608923912, - "learning_rate": 5.3454667040934715e-05, - "loss": 0.007222773879766465, - "step": 8750 - }, - { - "epoch": 1.4927536231884058, - "grad_norm": 0.06622221320867538, - "learning_rate": 5.3431476010958613e-05, - "loss": 0.0064462460577487946, - "step": 8755 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 0.07526405900716782, - "learning_rate": 5.340827754363955e-05, - "loss": 0.005344667285680771, - "step": 8760 - }, - { - "epoch": 1.4944586530264279, - "grad_norm": 0.08911366015672684, - "learning_rate": 5.338507164980734e-05, - "loss": 0.006722994893789291, - "step": 8765 - }, - { - "epoch": 1.495311167945439, - "grad_norm": 0.03749583289027214, - "learning_rate": 5.336185834029527e-05, - "loss": 0.006120331957936287, - "step": 8770 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 0.08167645335197449, - "learning_rate": 5.333863762594008e-05, - "loss": 0.007496768981218338, - "step": 8775 - }, - { - "epoch": 1.4970161977834613, - "grad_norm": 0.09045904129743576, - "learning_rate": 5.3315409517581996e-05, - "loss": 0.007222528755664826, - "step": 8780 - }, - { - "epoch": 1.4978687127024723, - "grad_norm": 0.06064090132713318, - "learning_rate": 5.329217402606464e-05, - "loss": 0.0044986031949520115, - "step": 8785 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 0.07282263785600662, - "learning_rate": 5.3268931162235126e-05, - "loss": 0.005251912400126457, - "step": 8790 - }, - { - "epoch": 1.4995737425404945, - "grad_norm": 0.0674249604344368, - "learning_rate": 5.324568093694401e-05, - "loss": 0.006289477646350861, - "step": 8795 - }, - { - "epoch": 1.4997442455242966, - "eval_loss": 0.03760311380028725, - "eval_runtime": 3.668, - "eval_samples_per_second": 68.702, - "eval_steps_per_second": 1.091, - "step": 8796 - }, - { - "eval_cer_subset": 0.014184050678261437, - "eval_cer_subset_edit_distance": 871, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 8796 - }, - { - "epoch": 1.5004262574595055, - "grad_norm": 0.06114037334918976, - "learning_rate": 5.322242336104525e-05, - "loss": 0.005809751898050308, - "step": 8800 - }, - { - "epoch": 1.5012787723785166, - "grad_norm": 0.08830825984477997, - "learning_rate": 5.319915844539626e-05, - "loss": 0.006921032071113586, - "step": 8805 - }, - { - "epoch": 1.5021312872975279, - "grad_norm": 0.10813544690608978, - "learning_rate": 5.3175886200857873e-05, - "loss": 0.007966426759958267, - "step": 8810 - }, - { - "epoch": 1.502983802216539, - "grad_norm": 0.08357173204421997, - "learning_rate": 5.3152606638294355e-05, - "loss": 0.006943506002426147, - "step": 8815 - }, - { - "epoch": 1.50383631713555, - "grad_norm": 0.08059901744127274, - "learning_rate": 5.312931976857339e-05, - "loss": 0.0047626100480556485, - "step": 8820 - }, - { - "epoch": 1.504688832054561, - "grad_norm": 0.07412680238485336, - "learning_rate": 5.310602560256604e-05, - "loss": 0.00709492564201355, - "step": 8825 - }, - { - "epoch": 1.5055413469735721, - "grad_norm": 0.046478480100631714, - "learning_rate": 5.3082724151146814e-05, - "loss": 0.006465598940849304, - "step": 8830 - }, - { - "epoch": 1.5063938618925832, - "grad_norm": 0.11122216284275055, - "learning_rate": 5.30594154251936e-05, - "loss": 0.00888531506061554, - "step": 8835 - }, - { - "epoch": 1.5072463768115942, - "grad_norm": 0.06441432982683182, - "learning_rate": 5.3036099435587685e-05, - "loss": 0.005882937833666802, - "step": 8840 - }, - { - "epoch": 1.5080988917306053, - "grad_norm": 0.05722307041287422, - "learning_rate": 5.301277619321374e-05, - "loss": 0.0059202808886766435, - "step": 8845 - }, - { - "epoch": 1.5089514066496164, - "grad_norm": 0.06677310913801193, - "learning_rate": 5.2989445708959856e-05, - "loss": 0.0064939349889755246, - "step": 8850 - }, - { - "epoch": 1.5098039215686274, - "grad_norm": 0.08854222297668457, - "learning_rate": 5.296610799371745e-05, - "loss": 0.007034827768802643, - "step": 8855 - }, - { - "epoch": 1.5106564364876385, - "grad_norm": 0.059711627662181854, - "learning_rate": 5.2942763058381356e-05, - "loss": 0.007557753473520279, - "step": 8860 - }, - { - "epoch": 1.5115089514066495, - "grad_norm": 0.06355257332324982, - "learning_rate": 5.291941091384977e-05, - "loss": 0.006534597277641297, - "step": 8865 - }, - { - "epoch": 1.5123614663256606, - "grad_norm": 0.05741631239652634, - "learning_rate": 5.2896051571024255e-05, - "loss": 0.006453331559896469, - "step": 8870 - }, - { - "epoch": 1.5132139812446717, - "grad_norm": 0.05809224396944046, - "learning_rate": 5.287268504080972e-05, - "loss": 0.006065556779503822, - "step": 8875 - }, - { - "epoch": 1.5140664961636827, - "grad_norm": 0.04522582143545151, - "learning_rate": 5.284931133411443e-05, - "loss": 0.004097414761781692, - "step": 8880 - }, - { - "epoch": 1.514919011082694, - "grad_norm": 0.09349111467599869, - "learning_rate": 5.2825930461850014e-05, - "loss": 0.005707831308245659, - "step": 8885 - }, - { - "epoch": 1.515771526001705, - "grad_norm": 0.08951391279697418, - "learning_rate": 5.280254243493145e-05, - "loss": 0.00725678950548172, - "step": 8890 - }, - { - "epoch": 1.5166240409207161, - "grad_norm": 0.07826244086027145, - "learning_rate": 5.277914726427705e-05, - "loss": 0.008086606860160828, - "step": 8895 - }, - { - "epoch": 1.5174765558397272, - "grad_norm": 0.0619954876601696, - "learning_rate": 5.2755744960808446e-05, - "loss": 0.005462165176868439, - "step": 8900 - }, - { - "epoch": 1.5183290707587382, - "grad_norm": 0.04414132609963417, - "learning_rate": 5.273233553545062e-05, - "loss": 0.005678927898406983, - "step": 8905 - }, - { - "epoch": 1.5191815856777495, - "grad_norm": 0.07183931767940521, - "learning_rate": 5.2708918999131864e-05, - "loss": 0.007184042781591416, - "step": 8910 - }, - { - "epoch": 1.5200341005967606, - "grad_norm": 0.10447251796722412, - "learning_rate": 5.26854953627838e-05, - "loss": 0.009831231832504273, - "step": 8915 - }, - { - "epoch": 1.5208866155157716, - "grad_norm": 0.04392845183610916, - "learning_rate": 5.266206463734135e-05, - "loss": 0.006517301499843598, - "step": 8920 - }, - { - "epoch": 1.5217391304347827, - "grad_norm": 0.06292697787284851, - "learning_rate": 5.2638626833742776e-05, - "loss": 0.005328541249036789, - "step": 8925 - }, - { - "epoch": 1.5225916453537938, - "grad_norm": 0.06425110250711441, - "learning_rate": 5.2615181962929605e-05, - "loss": 0.006298693269491196, - "step": 8930 - }, - { - "epoch": 1.5234441602728048, - "grad_norm": 0.08059051632881165, - "learning_rate": 5.259173003584669e-05, - "loss": 0.008097793161869048, - "step": 8935 - }, - { - "epoch": 1.5242966751918159, - "grad_norm": 0.0625302791595459, - "learning_rate": 5.256827106344218e-05, - "loss": 0.006664089858531952, - "step": 8940 - }, - { - "epoch": 1.525149190110827, - "grad_norm": 0.06092630326747894, - "learning_rate": 5.254480505666749e-05, - "loss": 0.006084204837679863, - "step": 8945 - }, - { - "epoch": 1.526001705029838, - "grad_norm": 0.07297338545322418, - "learning_rate": 5.2521332026477344e-05, - "loss": 0.006405481696128845, - "step": 8950 - }, - { - "epoch": 1.526854219948849, - "grad_norm": 0.05876631662249565, - "learning_rate": 5.249785198382973e-05, - "loss": 0.006670171767473221, - "step": 8955 - }, - { - "epoch": 1.5277067348678601, - "grad_norm": 0.0633542388677597, - "learning_rate": 5.247436493968589e-05, - "loss": 0.004565924406051636, - "step": 8960 - }, - { - "epoch": 1.5285592497868712, - "grad_norm": 0.09164717048406601, - "learning_rate": 5.2450870905010395e-05, - "loss": 0.005662925541400909, - "step": 8965 - }, - { - "epoch": 1.5294117647058822, - "grad_norm": 0.06646572798490524, - "learning_rate": 5.2427369890771026e-05, - "loss": 0.006319984793663025, - "step": 8970 - }, - { - "epoch": 1.5302642796248933, - "grad_norm": 0.08518269658088684, - "learning_rate": 5.2403861907938826e-05, - "loss": 0.0066184550523757935, - "step": 8975 - }, - { - "epoch": 1.5311167945439044, - "grad_norm": 0.08369076251983643, - "learning_rate": 5.238034696748811e-05, - "loss": 0.005069610476493835, - "step": 8980 - }, - { - "epoch": 1.5319693094629157, - "grad_norm": 0.05607258528470993, - "learning_rate": 5.235682508039646e-05, - "loss": 0.007457223534584045, - "step": 8985 - }, - { - "epoch": 1.5328218243819267, - "grad_norm": 0.0828152522444725, - "learning_rate": 5.2333296257644646e-05, - "loss": 0.007727481424808502, - "step": 8990 - }, - { - "epoch": 1.5336743393009378, - "grad_norm": 0.09770844876766205, - "learning_rate": 5.230976051021671e-05, - "loss": 0.007591258734464645, - "step": 8995 - }, - { - "epoch": 1.5345268542199488, - "grad_norm": 0.05906900763511658, - "learning_rate": 5.2286217849099925e-05, - "loss": 0.008510296791791916, - "step": 9000 - }, - { - "epoch": 1.53537936913896, - "grad_norm": 0.07594765722751617, - "learning_rate": 5.2262668285284785e-05, - "loss": 0.005943647772073746, - "step": 9005 - }, - { - "epoch": 1.5362318840579712, - "grad_norm": 0.056658126413822174, - "learning_rate": 5.223911182976502e-05, - "loss": 0.004702667891979218, - "step": 9010 - }, - { - "epoch": 1.5370843989769822, - "grad_norm": 0.060573313385248184, - "learning_rate": 5.2215548493537556e-05, - "loss": 0.006530648469924927, - "step": 9015 - }, - { - "epoch": 1.5379369138959933, - "grad_norm": 0.06876473873853683, - "learning_rate": 5.219197828760254e-05, - "loss": 0.0070976391434669495, - "step": 9020 - }, - { - "epoch": 1.5387894288150044, - "grad_norm": 0.05402369797229767, - "learning_rate": 5.2168401222963354e-05, - "loss": 0.005997032299637795, - "step": 9025 - }, - { - "epoch": 1.5396419437340154, - "grad_norm": 0.0907805860042572, - "learning_rate": 5.214481731062652e-05, - "loss": 0.007357357442378998, - "step": 9030 - }, - { - "epoch": 1.5404944586530265, - "grad_norm": 0.07572564482688904, - "learning_rate": 5.212122656160182e-05, - "loss": 0.004879472404718399, - "step": 9035 - }, - { - "epoch": 1.5413469735720375, - "grad_norm": 0.05684768036007881, - "learning_rate": 5.209762898690218e-05, - "loss": 0.006248699128627777, - "step": 9040 - }, - { - "epoch": 1.5421994884910486, - "grad_norm": 0.070293128490448, - "learning_rate": 5.2074024597543745e-05, - "loss": 0.005055962502956391, - "step": 9045 - }, - { - "epoch": 1.5430520034100597, - "grad_norm": 0.06611300259828568, - "learning_rate": 5.2050413404545823e-05, - "loss": 0.0048581909388303755, - "step": 9050 - }, - { - "epoch": 1.5439045183290707, - "grad_norm": 0.06960003823041916, - "learning_rate": 5.202679541893092e-05, - "loss": 0.006258350610733032, - "step": 9055 - }, - { - "epoch": 1.5447570332480818, - "grad_norm": 0.059757016599178314, - "learning_rate": 5.2003170651724675e-05, - "loss": 0.006347355991601944, - "step": 9060 - }, - { - "epoch": 1.5456095481670928, - "grad_norm": 0.06531284749507904, - "learning_rate": 5.1979539113955936e-05, - "loss": 0.00543224960565567, - "step": 9065 - }, - { - "epoch": 1.546462063086104, - "grad_norm": 0.08068390935659409, - "learning_rate": 5.195590081665667e-05, - "loss": 0.004933612793684006, - "step": 9070 - }, - { - "epoch": 1.547314578005115, - "grad_norm": 0.06198716536164284, - "learning_rate": 5.193225577086203e-05, - "loss": 0.00523824393749237, - "step": 9075 - }, - { - "epoch": 1.548167092924126, - "grad_norm": 0.07734926789999008, - "learning_rate": 5.190860398761032e-05, - "loss": 0.005699950456619263, - "step": 9080 - }, - { - "epoch": 1.5490196078431373, - "grad_norm": 0.058083925396203995, - "learning_rate": 5.188494547794297e-05, - "loss": 0.006147466972470284, - "step": 9085 - }, - { - "epoch": 1.5498721227621484, - "grad_norm": 0.0675162672996521, - "learning_rate": 5.1861280252904546e-05, - "loss": 0.0059716224670410155, - "step": 9090 - }, - { - "epoch": 1.5507246376811594, - "grad_norm": 0.05415274575352669, - "learning_rate": 5.183760832354278e-05, - "loss": 0.0058246061205863954, - "step": 9095 - }, - { - "epoch": 1.5515771526001705, - "grad_norm": 0.05826190859079361, - "learning_rate": 5.1813929700908523e-05, - "loss": 0.005409573763608932, - "step": 9100 - }, - { - "epoch": 1.5524296675191815, - "grad_norm": 0.07188098877668381, - "learning_rate": 5.179024439605573e-05, - "loss": 0.00541839525103569, - "step": 9105 - }, - { - "epoch": 1.5532821824381928, - "grad_norm": 0.07955330610275269, - "learning_rate": 5.176655242004149e-05, - "loss": 0.007760365307331085, - "step": 9110 - }, - { - "epoch": 1.5541346973572039, - "grad_norm": 0.07923565059900284, - "learning_rate": 5.1742853783926e-05, - "loss": 0.00563618317246437, - "step": 9115 - }, - { - "epoch": 1.554987212276215, - "grad_norm": 0.08301008492708206, - "learning_rate": 5.171914849877258e-05, - "loss": 0.006948529183864594, - "step": 9120 - }, - { - "epoch": 1.555839727195226, - "grad_norm": 0.10905841737985611, - "learning_rate": 5.1695436575647655e-05, - "loss": 0.005861887335777282, - "step": 9125 - }, - { - "epoch": 1.556692242114237, - "grad_norm": 0.06157204881310463, - "learning_rate": 5.167171802562072e-05, - "loss": 0.005052468553185463, - "step": 9130 - }, - { - "epoch": 1.5575447570332481, - "grad_norm": 0.08309191465377808, - "learning_rate": 5.164799285976438e-05, - "loss": 0.006937308609485627, - "step": 9135 - }, - { - "epoch": 1.5583972719522592, - "grad_norm": 0.07454490661621094, - "learning_rate": 5.162426108915437e-05, - "loss": 0.00504121258854866, - "step": 9140 - }, - { - "epoch": 1.5592497868712702, - "grad_norm": 0.07217807322740555, - "learning_rate": 5.160052272486943e-05, - "loss": 0.004582167789340019, - "step": 9145 - }, - { - "epoch": 1.5601023017902813, - "grad_norm": 0.07113789767026901, - "learning_rate": 5.157677777799145e-05, - "loss": 0.0055323362350463865, - "step": 9150 - }, - { - "epoch": 1.5609548167092924, - "grad_norm": 0.10281748324632645, - "learning_rate": 5.1553026259605316e-05, - "loss": 0.006342601776123047, - "step": 9155 - }, - { - "epoch": 1.5618073316283034, - "grad_norm": 0.09731876850128174, - "learning_rate": 5.152926818079906e-05, - "loss": 0.0054936733096838, - "step": 9160 - }, - { - "epoch": 1.5626598465473145, - "grad_norm": 0.09631586819887161, - "learning_rate": 5.1505503552663734e-05, - "loss": 0.0064162641763687136, - "step": 9165 - }, - { - "epoch": 1.5635123614663256, - "grad_norm": 0.07588718831539154, - "learning_rate": 5.148173238629348e-05, - "loss": 0.0069232374429702755, - "step": 9170 - }, - { - "epoch": 1.5643648763853366, - "grad_norm": 0.10357257723808289, - "learning_rate": 5.145795469278544e-05, - "loss": 0.007076382637023926, - "step": 9175 - }, - { - "epoch": 1.5652173913043477, - "grad_norm": 0.07249122112989426, - "learning_rate": 5.1434170483239826e-05, - "loss": 0.005868781358003616, - "step": 9180 - }, - { - "epoch": 1.566069906223359, - "grad_norm": 0.06878417730331421, - "learning_rate": 5.1410379768759934e-05, - "loss": 0.006841042637825012, - "step": 9185 - }, - { - "epoch": 1.56692242114237, - "grad_norm": 0.1096004843711853, - "learning_rate": 5.138658256045203e-05, - "loss": 0.00807877779006958, - "step": 9190 - }, - { - "epoch": 1.567774936061381, - "grad_norm": 0.07194329053163528, - "learning_rate": 5.136277886942547e-05, - "loss": 0.005923056975007057, - "step": 9195 - }, - { - "epoch": 1.5686274509803921, - "grad_norm": 0.08904275298118591, - "learning_rate": 5.133896870679257e-05, - "loss": 0.006372517347335816, - "step": 9200 - }, - { - "epoch": 1.5694799658994032, - "grad_norm": 0.05133598670363426, - "learning_rate": 5.131515208366873e-05, - "loss": 0.00692460760474205, - "step": 9205 - }, - { - "epoch": 1.5703324808184145, - "grad_norm": 0.047151900827884674, - "learning_rate": 5.1291329011172345e-05, - "loss": 0.006545543670654297, - "step": 9210 - }, - { - "epoch": 1.5711849957374255, - "grad_norm": 0.07102219760417938, - "learning_rate": 5.126749950042482e-05, - "loss": 0.006531259417533875, - "step": 9215 - }, - { - "epoch": 1.5720375106564366, - "grad_norm": 0.09585709124803543, - "learning_rate": 5.124366356255056e-05, - "loss": 0.005086017400026321, - "step": 9220 - }, - { - "epoch": 1.5728900255754477, - "grad_norm": 0.06898393481969833, - "learning_rate": 5.121982120867695e-05, - "loss": 0.004247477650642395, - "step": 9225 - }, - { - "epoch": 1.5737425404944587, - "grad_norm": 0.10513560473918915, - "learning_rate": 5.119597244993443e-05, - "loss": 0.006501986831426621, - "step": 9230 - }, - { - "epoch": 1.5745950554134698, - "grad_norm": 0.06671630591154099, - "learning_rate": 5.1172117297456366e-05, - "loss": 0.007658005505800247, - "step": 9235 - }, - { - "epoch": 1.5754475703324808, - "grad_norm": 0.09480880945920944, - "learning_rate": 5.1148255762379156e-05, - "loss": 0.006366011500358581, - "step": 9240 - }, - { - "epoch": 1.576300085251492, - "grad_norm": 0.06769633293151855, - "learning_rate": 5.112438785584215e-05, - "loss": 0.00625738725066185, - "step": 9245 - }, - { - "epoch": 1.577152600170503, - "grad_norm": 0.03695152327418327, - "learning_rate": 5.1100513588987665e-05, - "loss": 0.006924654543399811, - "step": 9250 - }, - { - "epoch": 1.578005115089514, - "grad_norm": 0.05657009407877922, - "learning_rate": 5.107663297296104e-05, - "loss": 0.005848415940999985, - "step": 9255 - }, - { - "epoch": 1.578857630008525, - "grad_norm": 0.11228469014167786, - "learning_rate": 5.105274601891051e-05, - "loss": 0.005637861788272858, - "step": 9260 - }, - { - "epoch": 1.5797101449275361, - "grad_norm": 0.06454899162054062, - "learning_rate": 5.102885273798732e-05, - "loss": 0.0066472023725509645, - "step": 9265 - }, - { - "epoch": 1.5805626598465472, - "grad_norm": 0.05328953638672829, - "learning_rate": 5.1004953141345637e-05, - "loss": 0.008773463219404221, - "step": 9270 - }, - { - "epoch": 1.5814151747655583, - "grad_norm": 0.05827401205897331, - "learning_rate": 5.0981047240142576e-05, - "loss": 0.0075307883322238926, - "step": 9275 - }, - { - "epoch": 1.5822676896845693, - "grad_norm": 0.0719359889626503, - "learning_rate": 5.095713504553822e-05, - "loss": 0.007532978057861328, - "step": 9280 - }, - { - "epoch": 1.5831202046035806, - "grad_norm": 0.08982953429222107, - "learning_rate": 5.0933216568695596e-05, - "loss": 0.007915425300598144, - "step": 9285 - }, - { - "epoch": 1.5839727195225917, - "grad_norm": 0.0919221043586731, - "learning_rate": 5.090929182078061e-05, - "loss": 0.005685590207576752, - "step": 9290 - }, - { - "epoch": 1.5848252344416027, - "grad_norm": 0.0840388685464859, - "learning_rate": 5.088536081296215e-05, - "loss": 0.0070190995931625364, - "step": 9295 - }, - { - "epoch": 1.5856777493606138, - "grad_norm": 0.08340579271316528, - "learning_rate": 5.086142355641199e-05, - "loss": 0.005871276929974556, - "step": 9300 - }, - { - "epoch": 1.5865302642796248, - "grad_norm": 0.0840516984462738, - "learning_rate": 5.0837480062304865e-05, - "loss": 0.007803326845169068, - "step": 9305 - }, - { - "epoch": 1.5873827791986361, - "grad_norm": 0.08378542214632034, - "learning_rate": 5.0813530341818377e-05, - "loss": 0.005085055530071258, - "step": 9310 - }, - { - "epoch": 1.5882352941176472, - "grad_norm": 0.10764650255441666, - "learning_rate": 5.078957440613305e-05, - "loss": 0.007959616929292678, - "step": 9315 - }, - { - "epoch": 1.5890878090366582, - "grad_norm": 0.07483979314565659, - "learning_rate": 5.076561226643231e-05, - "loss": 0.004332176968455314, - "step": 9320 - }, - { - "epoch": 1.5899403239556693, - "grad_norm": 0.06658382713794708, - "learning_rate": 5.074164393390249e-05, - "loss": 0.006168607249855995, - "step": 9325 - }, - { - "epoch": 1.5907928388746804, - "grad_norm": 0.09388890862464905, - "learning_rate": 5.071766941973282e-05, - "loss": 0.006460639089345932, - "step": 9330 - }, - { - "epoch": 1.5916453537936914, - "grad_norm": 0.051856543868780136, - "learning_rate": 5.0693688735115364e-05, - "loss": 0.005657953023910522, - "step": 9335 - }, - { - "epoch": 1.5924978687127025, - "grad_norm": 0.0785013884305954, - "learning_rate": 5.066970189124513e-05, - "loss": 0.008378601819276809, - "step": 9340 - }, - { - "epoch": 1.5933503836317136, - "grad_norm": 0.0653534010052681, - "learning_rate": 5.0645708899319956e-05, - "loss": 0.006928309798240662, - "step": 9345 - }, - { - "epoch": 1.5942028985507246, - "grad_norm": 0.047050826251506805, - "learning_rate": 5.062170977054058e-05, - "loss": 0.005722399801015854, - "step": 9350 - }, - { - "epoch": 1.5950554134697357, - "grad_norm": 0.10868531465530396, - "learning_rate": 5.059770451611061e-05, - "loss": 0.009898315370082855, - "step": 9355 - }, - { - "epoch": 1.5959079283887467, - "grad_norm": 0.0615832693874836, - "learning_rate": 5.0573693147236465e-05, - "loss": 0.007755370438098907, - "step": 9360 - }, - { - "epoch": 1.5967604433077578, - "grad_norm": 0.10720556974411011, - "learning_rate": 5.054967567512747e-05, - "loss": 0.006318587809801102, - "step": 9365 - }, - { - "epoch": 1.5976129582267689, - "grad_norm": 0.06587128341197968, - "learning_rate": 5.052565211099578e-05, - "loss": 0.004849371314048767, - "step": 9370 - }, - { - "epoch": 1.59846547314578, - "grad_norm": 0.07305008918046951, - "learning_rate": 5.050162246605638e-05, - "loss": 0.005983927100896835, - "step": 9375 - }, - { - "epoch": 1.599317988064791, - "grad_norm": 0.06641892343759537, - "learning_rate": 5.0477586751527124e-05, - "loss": 0.007008136063814163, - "step": 9380 - }, - { - "epoch": 1.6001705029838023, - "grad_norm": 0.06871581077575684, - "learning_rate": 5.045354497862868e-05, - "loss": 0.0066993959248065945, - "step": 9385 - }, - { - "epoch": 1.6010230179028133, - "grad_norm": 0.07417753338813782, - "learning_rate": 5.042949715858453e-05, - "loss": 0.006360804289579391, - "step": 9390 - }, - { - "epoch": 1.6018755328218244, - "grad_norm": 0.09202401340007782, - "learning_rate": 5.040544330262102e-05, - "loss": 0.006207296252250671, - "step": 9395 - }, - { - "epoch": 1.6027280477408354, - "grad_norm": 0.06747353821992874, - "learning_rate": 5.0381383421967276e-05, - "loss": 0.006196716427803039, - "step": 9400 - }, - { - "epoch": 1.6035805626598465, - "grad_norm": 0.06609310954809189, - "learning_rate": 5.0357317527855266e-05, - "loss": 0.005642791092395782, - "step": 9405 - }, - { - "epoch": 1.6044330775788578, - "grad_norm": 0.039614174515008926, - "learning_rate": 5.0333245631519716e-05, - "loss": 0.005146804824471473, - "step": 9410 - }, - { - "epoch": 1.6052855924978688, - "grad_norm": 0.0902944952249527, - "learning_rate": 5.0309167744198234e-05, - "loss": 0.005218298360705376, - "step": 9415 - }, - { - "epoch": 1.60613810741688, - "grad_norm": 0.06527641415596008, - "learning_rate": 5.028508387713114e-05, - "loss": 0.006157718971371651, - "step": 9420 - }, - { - "epoch": 1.606990622335891, - "grad_norm": 0.10824134200811386, - "learning_rate": 5.026099404156161e-05, - "loss": 0.00577687993645668, - "step": 9425 - }, - { - "epoch": 1.607843137254902, - "grad_norm": 0.091335728764534, - "learning_rate": 5.023689824873556e-05, - "loss": 0.005114461481571198, - "step": 9430 - }, - { - "epoch": 1.608695652173913, - "grad_norm": 0.047340504825115204, - "learning_rate": 5.021279650990173e-05, - "loss": 0.005150845646858216, - "step": 9435 - }, - { - "epoch": 1.6095481670929241, - "grad_norm": 0.05847655236721039, - "learning_rate": 5.01886888363116e-05, - "loss": 0.006019642949104309, - "step": 9440 - }, - { - "epoch": 1.6104006820119352, - "grad_norm": 0.10413257032632828, - "learning_rate": 5.016457523921943e-05, - "loss": 0.0097243569791317, - "step": 9445 - }, - { - "epoch": 1.6112531969309463, - "grad_norm": 0.06559625267982483, - "learning_rate": 5.014045572988226e-05, - "loss": 0.006743426620960236, - "step": 9450 - }, - { - "epoch": 1.6121057118499573, - "grad_norm": 0.07541610300540924, - "learning_rate": 5.0116330319559865e-05, - "loss": 0.004393500834703445, - "step": 9455 - }, - { - "epoch": 1.6129582267689684, - "grad_norm": 0.04757530242204666, - "learning_rate": 5.00921990195148e-05, - "loss": 0.004641738906502724, - "step": 9460 - }, - { - "epoch": 1.6138107416879794, - "grad_norm": 0.10010012239217758, - "learning_rate": 5.0068061841012355e-05, - "loss": 0.005677872523665428, - "step": 9465 - }, - { - "epoch": 1.6146632566069905, - "grad_norm": 0.08248613774776459, - "learning_rate": 5.0043918795320576e-05, - "loss": 0.006557486951351166, - "step": 9470 - }, - { - "epoch": 1.6155157715260016, - "grad_norm": 0.06300318241119385, - "learning_rate": 5.001976989371023e-05, - "loss": 0.0052742622792720795, - "step": 9475 - }, - { - "epoch": 1.6163682864450126, - "grad_norm": 0.06455430388450623, - "learning_rate": 4.999561514745482e-05, - "loss": 0.0061374582350254055, - "step": 9480 - }, - { - "epoch": 1.617220801364024, - "grad_norm": 0.04623732715845108, - "learning_rate": 4.997145456783062e-05, - "loss": 0.007861848175525665, - "step": 9485 - }, - { - "epoch": 1.618073316283035, - "grad_norm": 0.05294455960392952, - "learning_rate": 4.994728816611655e-05, - "loss": 0.005468960478901863, - "step": 9490 - }, - { - "epoch": 1.618925831202046, - "grad_norm": 0.04539628326892853, - "learning_rate": 4.992311595359431e-05, - "loss": 0.005490221083164215, - "step": 9495 - }, - { - "epoch": 1.619778346121057, - "grad_norm": 0.04033574461936951, - "learning_rate": 4.98989379415483e-05, - "loss": 0.005296828970313072, - "step": 9500 - }, - { - "epoch": 1.6206308610400681, - "grad_norm": 0.10801003128290176, - "learning_rate": 4.98747541412656e-05, - "loss": 0.007847490906715392, - "step": 9505 - }, - { - "epoch": 1.6214833759590794, - "grad_norm": 0.05979831889271736, - "learning_rate": 4.985056456403603e-05, - "loss": 0.005352787673473358, - "step": 9510 - }, - { - "epoch": 1.6223358908780905, - "grad_norm": 0.07628990709781647, - "learning_rate": 4.9826369221152086e-05, - "loss": 0.005436672642827034, - "step": 9515 - }, - { - "epoch": 1.6231884057971016, - "grad_norm": 0.0654626339673996, - "learning_rate": 4.9802168123908955e-05, - "loss": 0.004777481406927108, - "step": 9520 - }, - { - "epoch": 1.6240409207161126, - "grad_norm": 0.08487557619810104, - "learning_rate": 4.97779612836045e-05, - "loss": 0.006834116578102112, - "step": 9525 - }, - { - "epoch": 1.6248934356351237, - "grad_norm": 0.09151525050401688, - "learning_rate": 4.9753748711539316e-05, - "loss": 0.006389729678630829, - "step": 9530 - }, - { - "epoch": 1.6257459505541347, - "grad_norm": 0.10458851605653763, - "learning_rate": 4.972953041901661e-05, - "loss": 0.005984527617692947, - "step": 9535 - }, - { - "epoch": 1.6265984654731458, - "grad_norm": 0.08780983090400696, - "learning_rate": 4.970530641734229e-05, - "loss": 0.0068392202258110045, - "step": 9540 - }, - { - "epoch": 1.6274509803921569, - "grad_norm": 0.04871044307947159, - "learning_rate": 4.968107671782493e-05, - "loss": 0.005444938316941261, - "step": 9545 - }, - { - "epoch": 1.628303495311168, - "grad_norm": 0.05514970421791077, - "learning_rate": 4.9656841331775745e-05, - "loss": 0.005353255197405815, - "step": 9550 - }, - { - "epoch": 1.629156010230179, - "grad_norm": 0.057791441679000854, - "learning_rate": 4.9632600270508655e-05, - "loss": 0.005117457732558251, - "step": 9555 - }, - { - "epoch": 1.63000852514919, - "grad_norm": 0.0816815048456192, - "learning_rate": 4.960835354534015e-05, - "loss": 0.005405401438474655, - "step": 9560 - }, - { - "epoch": 1.630861040068201, - "grad_norm": 0.087788425385952, - "learning_rate": 4.958410116758945e-05, - "loss": 0.006124432012438774, - "step": 9565 - }, - { - "epoch": 1.6317135549872122, - "grad_norm": 0.08500470966100693, - "learning_rate": 4.955984314857832e-05, - "loss": 0.00581449456512928, - "step": 9570 - }, - { - "epoch": 1.6325660699062232, - "grad_norm": 0.042804375290870667, - "learning_rate": 4.9535579499631264e-05, - "loss": 0.007793295383453369, - "step": 9575 - }, - { - "epoch": 1.6334185848252343, - "grad_norm": 0.08767658472061157, - "learning_rate": 4.951131023207533e-05, - "loss": 0.006432226300239563, - "step": 9580 - }, - { - "epoch": 1.6342710997442456, - "grad_norm": 0.0693424716591835, - "learning_rate": 4.948703535724023e-05, - "loss": 0.006517377495765686, - "step": 9585 - }, - { - "epoch": 1.6351236146632566, - "grad_norm": 0.08574991673231125, - "learning_rate": 4.9462754886458276e-05, - "loss": 0.009532185643911362, - "step": 9590 - }, - { - "epoch": 1.6359761295822677, - "grad_norm": 0.04135733097791672, - "learning_rate": 4.94384688310644e-05, - "loss": 0.005358002707362175, - "step": 9595 - }, - { - "epoch": 1.6368286445012787, - "grad_norm": 0.09947369992733002, - "learning_rate": 4.941417720239616e-05, - "loss": 0.005965238064527511, - "step": 9600 - }, - { - "epoch": 1.6376811594202898, - "grad_norm": 0.038376711308956146, - "learning_rate": 4.9389880011793665e-05, - "loss": 0.00521450936794281, - "step": 9605 - }, - { - "epoch": 1.638533674339301, - "grad_norm": 0.05022123083472252, - "learning_rate": 4.9365577270599675e-05, - "loss": 0.006678921729326248, - "step": 9610 - }, - { - "epoch": 1.6393861892583121, - "grad_norm": 0.06687050312757492, - "learning_rate": 4.93412689901595e-05, - "loss": 0.006315051764249802, - "step": 9615 - }, - { - "epoch": 1.6402387041773232, - "grad_norm": 0.08563709259033203, - "learning_rate": 4.931695518182107e-05, - "loss": 0.005977614223957062, - "step": 9620 - }, - { - "epoch": 1.6410912190963343, - "grad_norm": 0.07901418209075928, - "learning_rate": 4.929263585693486e-05, - "loss": 0.004367914795875549, - "step": 9625 - }, - { - "epoch": 1.6419437340153453, - "grad_norm": 0.05929172784090042, - "learning_rate": 4.9268311026853974e-05, - "loss": 0.00466451421380043, - "step": 9630 - }, - { - "epoch": 1.6427962489343564, - "grad_norm": 0.09167131781578064, - "learning_rate": 4.924398070293403e-05, - "loss": 0.0063233010470867155, - "step": 9635 - }, - { - "epoch": 1.6436487638533674, - "grad_norm": 0.053217221051454544, - "learning_rate": 4.921964489653321e-05, - "loss": 0.005829869210720063, - "step": 9640 - }, - { - "epoch": 1.6445012787723785, - "grad_norm": 0.05341719463467598, - "learning_rate": 4.919530361901232e-05, - "loss": 0.005165425688028335, - "step": 9645 - }, - { - "epoch": 1.6453537936913896, - "grad_norm": 0.0763968899846077, - "learning_rate": 4.917095688173466e-05, - "loss": 0.008034119009971618, - "step": 9650 - }, - { - "epoch": 1.6462063086104006, - "grad_norm": 0.07722017168998718, - "learning_rate": 4.9146604696066095e-05, - "loss": 0.008911440521478653, - "step": 9655 - }, - { - "epoch": 1.6470588235294117, - "grad_norm": 0.0639941543340683, - "learning_rate": 4.912224707337504e-05, - "loss": 0.0066375695168972015, - "step": 9660 - }, - { - "epoch": 1.6479113384484227, - "grad_norm": 0.05451088026165962, - "learning_rate": 4.9097884025032425e-05, - "loss": 0.004018183052539826, - "step": 9665 - }, - { - "epoch": 1.6487638533674338, - "grad_norm": 0.06928657740354538, - "learning_rate": 4.907351556241176e-05, - "loss": 0.0061560459434986115, - "step": 9670 - }, - { - "epoch": 1.6496163682864449, - "grad_norm": 0.0672740638256073, - "learning_rate": 4.904914169688903e-05, - "loss": 0.005010559782385826, - "step": 9675 - }, - { - "epoch": 1.6504688832054561, - "grad_norm": 0.05115605145692825, - "learning_rate": 4.902476243984279e-05, - "loss": 0.005690005421638489, - "step": 9680 - }, - { - "epoch": 1.6513213981244672, - "grad_norm": 0.08852645754814148, - "learning_rate": 4.9000377802654055e-05, - "loss": 0.0067652732133865355, - "step": 9685 - }, - { - "epoch": 1.6521739130434783, - "grad_norm": 0.08289605379104614, - "learning_rate": 4.897598779670643e-05, - "loss": 0.005946322903037071, - "step": 9690 - }, - { - "epoch": 1.6530264279624893, - "grad_norm": 0.08343428373336792, - "learning_rate": 4.895159243338594e-05, - "loss": 0.006231371313333511, - "step": 9695 - }, - { - "epoch": 1.6538789428815004, - "grad_norm": 0.08138900995254517, - "learning_rate": 4.892719172408117e-05, - "loss": 0.006785771995782852, - "step": 9700 - }, - { - "epoch": 1.6547314578005117, - "grad_norm": 0.07599585503339767, - "learning_rate": 4.890278568018318e-05, - "loss": 0.00609181635081768, - "step": 9705 - }, - { - "epoch": 1.6555839727195227, - "grad_norm": 0.07918383926153183, - "learning_rate": 4.887837431308552e-05, - "loss": 0.006991502642631531, - "step": 9710 - }, - { - "epoch": 1.6564364876385338, - "grad_norm": 0.048750922083854675, - "learning_rate": 4.8853957634184246e-05, - "loss": 0.00639684796333313, - "step": 9715 - }, - { - "epoch": 1.6572890025575449, - "grad_norm": 0.07931654155254364, - "learning_rate": 4.882953565487785e-05, - "loss": 0.004780232906341553, - "step": 9720 - }, - { - "epoch": 1.658141517476556, - "grad_norm": 0.07394375652074814, - "learning_rate": 4.8805108386567345e-05, - "loss": 0.005560039728879929, - "step": 9725 - }, - { - "epoch": 1.658994032395567, - "grad_norm": 0.07906223088502884, - "learning_rate": 4.8780675840656175e-05, - "loss": 0.006233107298612595, - "step": 9730 - }, - { - "epoch": 1.659846547314578, - "grad_norm": 0.05145291984081268, - "learning_rate": 4.875623802855027e-05, - "loss": 0.0049663417041301726, - "step": 9735 - }, - { - "epoch": 1.660699062233589, - "grad_norm": 0.06227492541074753, - "learning_rate": 4.873179496165802e-05, - "loss": 0.006139815598726272, - "step": 9740 - }, - { - "epoch": 1.6615515771526002, - "grad_norm": 0.08176816254854202, - "learning_rate": 4.870734665139028e-05, - "loss": 0.007625886052846908, - "step": 9745 - }, - { - "epoch": 1.6624040920716112, - "grad_norm": 0.06774444133043289, - "learning_rate": 4.868289310916029e-05, - "loss": 0.006510105729103088, - "step": 9750 - }, - { - "epoch": 1.6632566069906223, - "grad_norm": 0.07336006313562393, - "learning_rate": 4.8658434346383805e-05, - "loss": 0.0068834669888019565, - "step": 9755 - }, - { - "epoch": 1.6641091219096333, - "grad_norm": 0.07233051210641861, - "learning_rate": 4.863397037447899e-05, - "loss": 0.005505643784999847, - "step": 9760 - }, - { - "epoch": 1.6649616368286444, - "grad_norm": 0.037355873733758926, - "learning_rate": 4.860950120486643e-05, - "loss": 0.005151794478297234, - "step": 9765 - }, - { - "epoch": 1.6658141517476555, - "grad_norm": 0.10907282680273056, - "learning_rate": 4.8585026848969164e-05, - "loss": 0.007589263468980789, - "step": 9770 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 0.10357582569122314, - "learning_rate": 4.856054731821261e-05, - "loss": 0.006011854484677314, - "step": 9775 - }, - { - "epoch": 1.6675191815856778, - "grad_norm": 0.0713953971862793, - "learning_rate": 4.853606262402465e-05, - "loss": 0.006342334300279617, - "step": 9780 - }, - { - "epoch": 1.6683716965046889, - "grad_norm": 0.07772944122552872, - "learning_rate": 4.851157277783555e-05, - "loss": 0.005903373658657074, - "step": 9785 - }, - { - "epoch": 1.6692242114237, - "grad_norm": 0.1249493658542633, - "learning_rate": 4.848707779107797e-05, - "loss": 0.006542833894491196, - "step": 9790 - }, - { - "epoch": 1.670076726342711, - "grad_norm": 0.05137734115123749, - "learning_rate": 4.8462577675187e-05, - "loss": 0.004380676150321961, - "step": 9795 - }, - { - "epoch": 1.670929241261722, - "grad_norm": 0.09491576999425888, - "learning_rate": 4.8438072441600095e-05, - "loss": 0.005311820283532142, - "step": 9800 - }, - { - "epoch": 1.6717817561807333, - "grad_norm": 0.09257746487855911, - "learning_rate": 4.8413562101757134e-05, - "loss": 0.006033014133572578, - "step": 9805 - }, - { - "epoch": 1.6726342710997444, - "grad_norm": 0.045860812067985535, - "learning_rate": 4.838904666710034e-05, - "loss": 0.008368080109357834, - "step": 9810 - }, - { - "epoch": 1.6734867860187554, - "grad_norm": 0.033777810633182526, - "learning_rate": 4.836452614907435e-05, - "loss": 0.0045743979513645176, - "step": 9815 - }, - { - "epoch": 1.6743393009377665, - "grad_norm": 0.12888991832733154, - "learning_rate": 4.834000055912614e-05, - "loss": 0.005997149646282196, - "step": 9820 - }, - { - "epoch": 1.6751918158567776, - "grad_norm": 0.08622048050165176, - "learning_rate": 4.8315469908705074e-05, - "loss": 0.007002732157707215, - "step": 9825 - }, - { - "epoch": 1.6760443307757886, - "grad_norm": 0.04722774773836136, - "learning_rate": 4.82909342092629e-05, - "loss": 0.005374876409769058, - "step": 9830 - }, - { - "epoch": 1.6768968456947997, - "grad_norm": 0.08596520870923996, - "learning_rate": 4.826639347225366e-05, - "loss": 0.0066084228456020355, - "step": 9835 - }, - { - "epoch": 1.6777493606138107, - "grad_norm": 0.09831524640321732, - "learning_rate": 4.824184770913381e-05, - "loss": 0.004402932524681091, - "step": 9840 - }, - { - "epoch": 1.6786018755328218, - "grad_norm": 0.10586824268102646, - "learning_rate": 4.821729693136214e-05, - "loss": 0.006442143023014069, - "step": 9845 - }, - { - "epoch": 1.6794543904518329, - "grad_norm": 0.11845403164625168, - "learning_rate": 4.8192741150399735e-05, - "loss": 0.006300021708011627, - "step": 9850 - }, - { - "epoch": 1.680306905370844, - "grad_norm": 0.08749356120824814, - "learning_rate": 4.816818037771007e-05, - "loss": 0.0060168147087097164, - "step": 9855 - }, - { - "epoch": 1.681159420289855, - "grad_norm": 0.06483060121536255, - "learning_rate": 4.814361462475895e-05, - "loss": 0.00717247799038887, - "step": 9860 - }, - { - "epoch": 1.682011935208866, - "grad_norm": 0.09276239573955536, - "learning_rate": 4.811904390301444e-05, - "loss": 0.006788758933544159, - "step": 9865 - }, - { - "epoch": 1.682864450127877, - "grad_norm": 0.05662832781672478, - "learning_rate": 4.809446822394701e-05, - "loss": 0.0068000413477420805, - "step": 9870 - }, - { - "epoch": 1.6837169650468882, - "grad_norm": 0.07508451491594315, - "learning_rate": 4.80698875990294e-05, - "loss": 0.006339512765407562, - "step": 9875 - }, - { - "epoch": 1.6845694799658995, - "grad_norm": 0.06525320559740067, - "learning_rate": 4.804530203973664e-05, - "loss": 0.010082229971885681, - "step": 9880 - }, - { - "epoch": 1.6854219948849105, - "grad_norm": 0.07791458070278168, - "learning_rate": 4.8020711557546104e-05, - "loss": 0.006830710172653198, - "step": 9885 - }, - { - "epoch": 1.6862745098039216, - "grad_norm": 0.05997749790549278, - "learning_rate": 4.799611616393745e-05, - "loss": 0.00666801705956459, - "step": 9890 - }, - { - "epoch": 1.6871270247229326, - "grad_norm": 0.07050258666276932, - "learning_rate": 4.797151587039261e-05, - "loss": 0.0059244450181722644, - "step": 9895 - }, - { - "epoch": 1.6879795396419437, - "grad_norm": 0.06760186702013016, - "learning_rate": 4.794691068839585e-05, - "loss": 0.006415641307830811, - "step": 9900 - }, - { - "epoch": 1.688832054560955, - "grad_norm": 0.07285474240779877, - "learning_rate": 4.792230062943364e-05, - "loss": 0.004972729086875916, - "step": 9905 - }, - { - "epoch": 1.689684569479966, - "grad_norm": 0.02914854884147644, - "learning_rate": 4.789768570499481e-05, - "loss": 0.004819701239466667, - "step": 9910 - }, - { - "epoch": 1.690537084398977, - "grad_norm": 0.058768294751644135, - "learning_rate": 4.787306592657042e-05, - "loss": 0.00581958070397377, - "step": 9915 - }, - { - "epoch": 1.6913895993179882, - "grad_norm": 0.08694405853748322, - "learning_rate": 4.7848441305653804e-05, - "loss": 0.004998849332332611, - "step": 9920 - }, - { - "epoch": 1.6922421142369992, - "grad_norm": 0.10194200277328491, - "learning_rate": 4.782381185374054e-05, - "loss": 0.00809016153216362, - "step": 9925 - }, - { - "epoch": 1.6930946291560103, - "grad_norm": 0.04976386949419975, - "learning_rate": 4.779917758232849e-05, - "loss": 0.00392133817076683, - "step": 9930 - }, - { - "epoch": 1.6939471440750213, - "grad_norm": 0.04324428364634514, - "learning_rate": 4.777453850291774e-05, - "loss": 0.005488916113972664, - "step": 9935 - }, - { - "epoch": 1.6947996589940324, - "grad_norm": 0.128068745136261, - "learning_rate": 4.774989462701063e-05, - "loss": 0.008696570992469788, - "step": 9940 - }, - { - "epoch": 1.6956521739130435, - "grad_norm": 0.06357335299253464, - "learning_rate": 4.7725245966111764e-05, - "loss": 0.00657767504453659, - "step": 9945 - }, - { - "epoch": 1.6965046888320545, - "grad_norm": 0.09200388938188553, - "learning_rate": 4.770059253172793e-05, - "loss": 0.00511985532939434, - "step": 9950 - }, - { - "epoch": 1.6973572037510656, - "grad_norm": 0.0898200049996376, - "learning_rate": 4.767593433536819e-05, - "loss": 0.005805553123354912, - "step": 9955 - }, - { - "epoch": 1.6982097186700766, - "grad_norm": 0.06495708227157593, - "learning_rate": 4.765127138854379e-05, - "loss": 0.005122709274291992, - "step": 9960 - }, - { - "epoch": 1.6990622335890877, - "grad_norm": 0.06079862266778946, - "learning_rate": 4.762660370276824e-05, - "loss": 0.005829216912388802, - "step": 9965 - }, - { - "epoch": 1.6999147485080988, - "grad_norm": 0.07300638407468796, - "learning_rate": 4.760193128955721e-05, - "loss": 0.0057421475648880005, - "step": 9970 - }, - { - "epoch": 1.7007672634271098, - "grad_norm": 0.09826004505157471, - "learning_rate": 4.757725416042863e-05, - "loss": 0.007709302753210068, - "step": 9975 - }, - { - "epoch": 1.701619778346121, - "grad_norm": 0.08353756368160248, - "learning_rate": 4.755257232690258e-05, - "loss": 0.007458946853876114, - "step": 9980 - }, - { - "epoch": 1.7024722932651322, - "grad_norm": 0.057993657886981964, - "learning_rate": 4.752788580050137e-05, - "loss": 0.0048107530921697615, - "step": 9985 - }, - { - "epoch": 1.7033248081841432, - "grad_norm": 0.08480621874332428, - "learning_rate": 4.750319459274951e-05, - "loss": 0.007556724548339844, - "step": 9990 - }, - { - "epoch": 1.7041773231031543, - "grad_norm": 0.06563637405633926, - "learning_rate": 4.747849871517364e-05, - "loss": 0.00476250983774662, - "step": 9995 - }, - { - "epoch": 1.7050298380221653, - "grad_norm": 0.06217886507511139, - "learning_rate": 4.7453798179302656e-05, - "loss": 0.008565887063741683, - "step": 10000 - }, - { - "epoch": 1.7058823529411766, - "grad_norm": 0.07285669445991516, - "learning_rate": 4.742909299666756e-05, - "loss": 0.0062899492681026455, - "step": 10005 - }, - { - "epoch": 1.7067348678601877, - "grad_norm": 0.043275732547044754, - "learning_rate": 4.7404383178801564e-05, - "loss": 0.005467301979660988, - "step": 10010 - }, - { - "epoch": 1.7075873827791987, - "grad_norm": 0.09345486015081406, - "learning_rate": 4.7379668737240044e-05, - "loss": 0.007198603451251983, - "step": 10015 - }, - { - "epoch": 1.7084398976982098, - "grad_norm": 0.09792933613061905, - "learning_rate": 4.735494968352049e-05, - "loss": 0.009155672788619996, - "step": 10020 - }, - { - "epoch": 1.7092924126172209, - "grad_norm": 0.03888144716620445, - "learning_rate": 4.733022602918263e-05, - "loss": 0.00484597384929657, - "step": 10025 - }, - { - "epoch": 1.710144927536232, - "grad_norm": 0.050344232469797134, - "learning_rate": 4.7305497785768235e-05, - "loss": 0.00478862039744854, - "step": 10030 - }, - { - "epoch": 1.710997442455243, - "grad_norm": 0.0724092647433281, - "learning_rate": 4.728076496482131e-05, - "loss": 0.005028426647186279, - "step": 10035 - }, - { - "epoch": 1.711849957374254, - "grad_norm": 0.10781413316726685, - "learning_rate": 4.725602757788794e-05, - "loss": 0.00789962187409401, - "step": 10040 - }, - { - "epoch": 1.712702472293265, - "grad_norm": 0.0828569084405899, - "learning_rate": 4.723128563651637e-05, - "loss": 0.006212035566568375, - "step": 10045 - }, - { - "epoch": 1.7135549872122762, - "grad_norm": 0.06634854525327682, - "learning_rate": 4.720653915225695e-05, - "loss": 0.00550018809735775, - "step": 10050 - }, - { - "epoch": 1.7144075021312872, - "grad_norm": 0.07699137926101685, - "learning_rate": 4.718178813666217e-05, - "loss": 0.007427608966827393, - "step": 10055 - }, - { - "epoch": 1.7152600170502983, - "grad_norm": 0.08237455785274506, - "learning_rate": 4.715703260128663e-05, - "loss": 0.0049440376460552216, - "step": 10060 - }, - { - "epoch": 1.7161125319693094, - "grad_norm": 0.0423310324549675, - "learning_rate": 4.7132272557687034e-05, - "loss": 0.005643930658698082, - "step": 10065 - }, - { - "epoch": 1.7169650468883204, - "grad_norm": 0.08052363246679306, - "learning_rate": 4.71075080174222e-05, - "loss": 0.005594046413898468, - "step": 10070 - }, - { - "epoch": 1.7178175618073315, - "grad_norm": 0.05388827249407768, - "learning_rate": 4.7082738992053004e-05, - "loss": 0.005239073187112808, - "step": 10075 - }, - { - "epoch": 1.7186700767263428, - "grad_norm": 0.0699780210852623, - "learning_rate": 4.70579654931425e-05, - "loss": 0.004442551359534264, - "step": 10080 - }, - { - "epoch": 1.7195225916453538, - "grad_norm": 0.07259970158338547, - "learning_rate": 4.7033187532255765e-05, - "loss": 0.004775180667638779, - "step": 10085 - }, - { - "epoch": 1.7203751065643649, - "grad_norm": 0.10291304439306259, - "learning_rate": 4.700840512095995e-05, - "loss": 0.009148158878087998, - "step": 10090 - }, - { - "epoch": 1.721227621483376, - "grad_norm": 0.09639768302440643, - "learning_rate": 4.698361827082435e-05, - "loss": 0.008357913047075272, - "step": 10095 - }, - { - "epoch": 1.722080136402387, - "grad_norm": 0.08128193765878677, - "learning_rate": 4.695882699342026e-05, - "loss": 0.006467945128679276, - "step": 10100 - }, - { - "epoch": 1.7229326513213983, - "grad_norm": 0.0678371787071228, - "learning_rate": 4.6934031300321094e-05, - "loss": 0.005760467797517777, - "step": 10105 - }, - { - "epoch": 1.7237851662404093, - "grad_norm": 0.0766267478466034, - "learning_rate": 4.6909231203102285e-05, - "loss": 0.0068340465426445, - "step": 10110 - }, - { - "epoch": 1.7246376811594204, - "grad_norm": 0.04263419657945633, - "learning_rate": 4.6884426713341366e-05, - "loss": 0.005921339616179466, - "step": 10115 - }, - { - "epoch": 1.7254901960784315, - "grad_norm": 0.10168195515871048, - "learning_rate": 4.6859617842617874e-05, - "loss": 0.006926319003105164, - "step": 10120 - }, - { - "epoch": 1.7263427109974425, - "grad_norm": 0.07910803705453873, - "learning_rate": 4.683480460251343e-05, - "loss": 0.006997878849506378, - "step": 10125 - }, - { - "epoch": 1.7271952259164536, - "grad_norm": 0.045049965381622314, - "learning_rate": 4.680998700461169e-05, - "loss": 0.005594813078641891, - "step": 10130 - }, - { - "epoch": 1.7280477408354646, - "grad_norm": 0.07185275852680206, - "learning_rate": 4.678516506049832e-05, - "loss": 0.006092778965830803, - "step": 10135 - }, - { - "epoch": 1.7289002557544757, - "grad_norm": 0.07003147900104523, - "learning_rate": 4.676033878176102e-05, - "loss": 0.007595886290073395, - "step": 10140 - }, - { - "epoch": 1.7297527706734868, - "grad_norm": 0.06360077112913132, - "learning_rate": 4.6735508179989536e-05, - "loss": 0.00546439029276371, - "step": 10145 - }, - { - "epoch": 1.7306052855924978, - "grad_norm": 0.07347442954778671, - "learning_rate": 4.671067326677563e-05, - "loss": 0.004961185902357101, - "step": 10150 - }, - { - "epoch": 1.7314578005115089, - "grad_norm": 0.056153345853090286, - "learning_rate": 4.6685834053713035e-05, - "loss": 0.006820976734161377, - "step": 10155 - }, - { - "epoch": 1.73231031543052, - "grad_norm": 0.09868444502353668, - "learning_rate": 4.666099055239755e-05, - "loss": 0.004829689115285874, - "step": 10160 - }, - { - "epoch": 1.733162830349531, - "grad_norm": 0.07029838860034943, - "learning_rate": 4.663614277442694e-05, - "loss": 0.006708820164203644, - "step": 10165 - }, - { - "epoch": 1.734015345268542, - "grad_norm": 0.0785607323050499, - "learning_rate": 4.661129073140096e-05, - "loss": 0.0093411885201931, - "step": 10170 - }, - { - "epoch": 1.7348678601875531, - "grad_norm": 0.05867304652929306, - "learning_rate": 4.658643443492139e-05, - "loss": 0.004420546442270279, - "step": 10175 - }, - { - "epoch": 1.7357203751065644, - "grad_norm": 0.08736653625965118, - "learning_rate": 4.656157389659196e-05, - "loss": 0.0049125440418720245, - "step": 10180 - }, - { - "epoch": 1.7365728900255755, - "grad_norm": 0.10769468545913696, - "learning_rate": 4.653670912801842e-05, - "loss": 0.006663528829813003, - "step": 10185 - }, - { - "epoch": 1.7374254049445865, - "grad_norm": 0.054130490869283676, - "learning_rate": 4.651184014080843e-05, - "loss": 0.005649637803435326, - "step": 10190 - }, - { - "epoch": 1.7382779198635976, - "grad_norm": 0.0760764479637146, - "learning_rate": 4.648696694657171e-05, - "loss": 0.00803508386015892, - "step": 10195 - }, - { - "epoch": 1.7391304347826086, - "grad_norm": 0.08103618025779724, - "learning_rate": 4.646208955691987e-05, - "loss": 0.005645860359072686, - "step": 10200 - }, - { - "epoch": 1.73998294970162, - "grad_norm": 0.060226406902074814, - "learning_rate": 4.643720798346649e-05, - "loss": 0.005114502459764481, - "step": 10205 - }, - { - "epoch": 1.740835464620631, - "grad_norm": 0.08842508494853973, - "learning_rate": 4.641232223782713e-05, - "loss": 0.004128537327051163, - "step": 10210 - }, - { - "epoch": 1.741687979539642, - "grad_norm": 0.03715536370873451, - "learning_rate": 4.6387432331619284e-05, - "loss": 0.005640536174178123, - "step": 10215 - }, - { - "epoch": 1.742540494458653, - "grad_norm": 0.09130766242742538, - "learning_rate": 4.636253827646239e-05, - "loss": 0.0074319176375865935, - "step": 10220 - }, - { - "epoch": 1.7433930093776642, - "grad_norm": 0.08204436302185059, - "learning_rate": 4.6337640083977826e-05, - "loss": 0.006443107873201371, - "step": 10225 - }, - { - "epoch": 1.7442455242966752, - "grad_norm": 0.09834989905357361, - "learning_rate": 4.6312737765788883e-05, - "loss": 0.00825996845960617, - "step": 10230 - }, - { - "epoch": 1.7450980392156863, - "grad_norm": 0.07453756034374237, - "learning_rate": 4.628783133352078e-05, - "loss": 0.005153121426701546, - "step": 10235 - }, - { - "epoch": 1.7459505541346974, - "grad_norm": 0.0658891350030899, - "learning_rate": 4.626292079880071e-05, - "loss": 0.005568725615739822, - "step": 10240 - }, - { - "epoch": 1.7468030690537084, - "grad_norm": 0.08673261851072311, - "learning_rate": 4.623800617325772e-05, - "loss": 0.00687919333577156, - "step": 10245 - }, - { - "epoch": 1.7476555839727195, - "grad_norm": 0.08707419037818909, - "learning_rate": 4.621308746852276e-05, - "loss": 0.009814801812171935, - "step": 10250 - }, - { - "epoch": 1.7485080988917305, - "grad_norm": 0.07168986648321152, - "learning_rate": 4.618816469622874e-05, - "loss": 0.004722443222999573, - "step": 10255 - }, - { - "epoch": 1.7493606138107416, - "grad_norm": 0.07987508177757263, - "learning_rate": 4.616323786801042e-05, - "loss": 0.006749927252531052, - "step": 10260 - }, - { - "epoch": 1.7497016197783462, - "eval_loss": 0.03619376942515373, - "eval_runtime": 3.6854, - "eval_samples_per_second": 68.379, - "eval_steps_per_second": 1.085, - "step": 10262 - }, - { - "eval_cer_subset": 0.014314328985294836, - "eval_cer_subset_edit_distance": 879, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 10262 - }, - { - "epoch": 1.7502131287297527, - "grad_norm": 0.10899413377046585, - "learning_rate": 4.6138306995504495e-05, - "loss": 0.006938809901475907, - "step": 10265 - }, - { - "epoch": 1.7510656436487637, - "grad_norm": 0.10073213279247284, - "learning_rate": 4.6113372090349516e-05, - "loss": 0.00795048326253891, - "step": 10270 - }, - { - "epoch": 1.7519181585677748, - "grad_norm": 0.04800979420542717, - "learning_rate": 4.608843316418592e-05, - "loss": 0.007616385817527771, - "step": 10275 - }, - { - "epoch": 1.752770673486786, - "grad_norm": 0.09020161628723145, - "learning_rate": 4.6063490228656025e-05, - "loss": 0.005228221416473389, - "step": 10280 - }, - { - "epoch": 1.7536231884057971, - "grad_norm": 0.083438441157341, - "learning_rate": 4.603854329540403e-05, - "loss": 0.00726160854101181, - "step": 10285 - }, - { - "epoch": 1.7544757033248082, - "grad_norm": 0.07851024717092514, - "learning_rate": 4.6013592376076e-05, - "loss": 0.006890790909528733, - "step": 10290 - }, - { - "epoch": 1.7553282182438192, - "grad_norm": 0.09015098959207535, - "learning_rate": 4.598863748231985e-05, - "loss": 0.007083073258399963, - "step": 10295 - }, - { - "epoch": 1.7561807331628303, - "grad_norm": 0.04751535877585411, - "learning_rate": 4.596367862578534e-05, - "loss": 0.005376371741294861, - "step": 10300 - }, - { - "epoch": 1.7570332480818416, - "grad_norm": 0.07547739148139954, - "learning_rate": 4.5938715818124094e-05, - "loss": 0.008766484260559083, - "step": 10305 - }, - { - "epoch": 1.7578857630008526, - "grad_norm": 0.052052512764930725, - "learning_rate": 4.5913749070989616e-05, - "loss": 0.005375667661428452, - "step": 10310 - }, - { - "epoch": 1.7587382779198637, - "grad_norm": 0.11575129628181458, - "learning_rate": 4.5888778396037187e-05, - "loss": 0.006675881892442703, - "step": 10315 - }, - { - "epoch": 1.7595907928388748, - "grad_norm": 0.05995294824242592, - "learning_rate": 4.586380380492394e-05, - "loss": 0.007097356766462326, - "step": 10320 - }, - { - "epoch": 1.7604433077578858, - "grad_norm": 0.049236129969358444, - "learning_rate": 4.583882530930887e-05, - "loss": 0.004433324560523033, - "step": 10325 - }, - { - "epoch": 1.7612958226768969, - "grad_norm": 0.048296503722667694, - "learning_rate": 4.581384292085274e-05, - "loss": 0.0051886774599552155, - "step": 10330 - }, - { - "epoch": 1.762148337595908, - "grad_norm": 0.09939385205507278, - "learning_rate": 4.57888566512182e-05, - "loss": 0.006426715105772018, - "step": 10335 - }, - { - "epoch": 1.763000852514919, - "grad_norm": 0.08810277283191681, - "learning_rate": 4.5763866512069626e-05, - "loss": 0.00727301687002182, - "step": 10340 - }, - { - "epoch": 1.76385336743393, - "grad_norm": 0.05262129753828049, - "learning_rate": 4.573887251507328e-05, - "loss": 0.004860313236713409, - "step": 10345 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 0.09755868464708328, - "learning_rate": 4.571387467189718e-05, - "loss": 0.00684543177485466, - "step": 10350 - }, - { - "epoch": 1.7655583972719522, - "grad_norm": 0.08306272327899933, - "learning_rate": 4.568887299421115e-05, - "loss": 0.005363506823778152, - "step": 10355 - }, - { - "epoch": 1.7664109121909632, - "grad_norm": 0.06304962188005447, - "learning_rate": 4.566386749368681e-05, - "loss": 0.006262359023094177, - "step": 10360 - }, - { - "epoch": 1.7672634271099743, - "grad_norm": 0.099216029047966, - "learning_rate": 4.5638858181997544e-05, - "loss": 0.005263365060091019, - "step": 10365 - }, - { - "epoch": 1.7681159420289854, - "grad_norm": 0.06316341459751129, - "learning_rate": 4.5613845070818544e-05, - "loss": 0.0053974583745002745, - "step": 10370 - }, - { - "epoch": 1.7689684569479964, - "grad_norm": 0.08523725718259811, - "learning_rate": 4.5588828171826755e-05, - "loss": 0.006064000725746155, - "step": 10375 - }, - { - "epoch": 1.7698209718670077, - "grad_norm": 0.0663699060678482, - "learning_rate": 4.5563807496700925e-05, - "loss": 0.00665600374341011, - "step": 10380 - }, - { - "epoch": 1.7706734867860188, - "grad_norm": 0.10673311352729797, - "learning_rate": 4.55387830571215e-05, - "loss": 0.006540966033935547, - "step": 10385 - }, - { - "epoch": 1.7715260017050298, - "grad_norm": 0.08779574930667877, - "learning_rate": 4.551375486477074e-05, - "loss": 0.00547558106482029, - "step": 10390 - }, - { - "epoch": 1.772378516624041, - "grad_norm": 0.07451514899730682, - "learning_rate": 4.5488722931332625e-05, - "loss": 0.008499838411808014, - "step": 10395 - }, - { - "epoch": 1.773231031543052, - "grad_norm": 0.06014202535152435, - "learning_rate": 4.5463687268492904e-05, - "loss": 0.006278771907091141, - "step": 10400 - }, - { - "epoch": 1.7740835464620632, - "grad_norm": 0.039256151765584946, - "learning_rate": 4.543864788793907e-05, - "loss": 0.0037193533033132555, - "step": 10405 - }, - { - "epoch": 1.7749360613810743, - "grad_norm": 0.09449942409992218, - "learning_rate": 4.541360480136031e-05, - "loss": 0.006574592739343643, - "step": 10410 - }, - { - "epoch": 1.7757885763000854, - "grad_norm": 0.07616980373859406, - "learning_rate": 4.53885580204476e-05, - "loss": 0.006042734161019326, - "step": 10415 - }, - { - "epoch": 1.7766410912190964, - "grad_norm": 0.07019155472517014, - "learning_rate": 4.5363507556893574e-05, - "loss": 0.006044945493340492, - "step": 10420 - }, - { - "epoch": 1.7774936061381075, - "grad_norm": 0.0616939477622509, - "learning_rate": 4.533845342239266e-05, - "loss": 0.004315405339002609, - "step": 10425 - }, - { - "epoch": 1.7783461210571185, - "grad_norm": 0.09354502707719803, - "learning_rate": 4.5313395628640943e-05, - "loss": 0.005719271302223205, - "step": 10430 - }, - { - "epoch": 1.7791986359761296, - "grad_norm": 0.08747732639312744, - "learning_rate": 4.528833418733623e-05, - "loss": 0.00472431555390358, - "step": 10435 - }, - { - "epoch": 1.7800511508951407, - "grad_norm": 0.09513017535209656, - "learning_rate": 4.5263269110178034e-05, - "loss": 0.006968998908996582, - "step": 10440 - }, - { - "epoch": 1.7809036658141517, - "grad_norm": 0.09208676964044571, - "learning_rate": 4.523820040886759e-05, - "loss": 0.006609047204256058, - "step": 10445 - }, - { - "epoch": 1.7817561807331628, - "grad_norm": 0.09964144974946976, - "learning_rate": 4.521312809510778e-05, - "loss": 0.0056272163987159726, - "step": 10450 - }, - { - "epoch": 1.7826086956521738, - "grad_norm": 0.06850367784500122, - "learning_rate": 4.51880521806032e-05, - "loss": 0.005562498047947883, - "step": 10455 - }, - { - "epoch": 1.783461210571185, - "grad_norm": 0.0654430240392685, - "learning_rate": 4.5162972677060124e-05, - "loss": 0.0059367924928665165, - "step": 10460 - }, - { - "epoch": 1.784313725490196, - "grad_norm": 0.0449560284614563, - "learning_rate": 4.513788959618649e-05, - "loss": 0.005458919331431389, - "step": 10465 - }, - { - "epoch": 1.785166240409207, - "grad_norm": 0.14256814122200012, - "learning_rate": 4.511280294969192e-05, - "loss": 0.0066184431314468386, - "step": 10470 - }, - { - "epoch": 1.7860187553282183, - "grad_norm": 0.08284557610750198, - "learning_rate": 4.508771274928771e-05, - "loss": 0.007388219982385635, - "step": 10475 - }, - { - "epoch": 1.7868712702472294, - "grad_norm": 0.05675457417964935, - "learning_rate": 4.506261900668676e-05, - "loss": 0.005572458356618881, - "step": 10480 - }, - { - "epoch": 1.7877237851662404, - "grad_norm": 0.05767322704195976, - "learning_rate": 4.50375217336037e-05, - "loss": 0.0058133058249950405, - "step": 10485 - }, - { - "epoch": 1.7885763000852515, - "grad_norm": 0.03421638533473015, - "learning_rate": 4.501242094175476e-05, - "loss": 0.005268872529268265, - "step": 10490 - }, - { - "epoch": 1.7894288150042625, - "grad_norm": 0.07319685071706772, - "learning_rate": 4.4987316642857836e-05, - "loss": 0.008701664954423904, - "step": 10495 - }, - { - "epoch": 1.7902813299232738, - "grad_norm": 0.04271615296602249, - "learning_rate": 4.4962208848632426e-05, - "loss": 0.005680259317159653, - "step": 10500 - }, - { - "epoch": 1.7911338448422849, - "grad_norm": 0.05916997417807579, - "learning_rate": 4.493709757079971e-05, - "loss": 0.004779224097728729, - "step": 10505 - }, - { - "epoch": 1.791986359761296, - "grad_norm": 0.04994066804647446, - "learning_rate": 4.491198282108244e-05, - "loss": 0.00443916954100132, - "step": 10510 - }, - { - "epoch": 1.792838874680307, - "grad_norm": 0.09032617509365082, - "learning_rate": 4.488686461120504e-05, - "loss": 0.007850547134876252, - "step": 10515 - }, - { - "epoch": 1.793691389599318, - "grad_norm": 0.05055975914001465, - "learning_rate": 4.4861742952893525e-05, - "loss": 0.005925046652555466, - "step": 10520 - }, - { - "epoch": 1.7945439045183291, - "grad_norm": 0.07521310448646545, - "learning_rate": 4.48366178578755e-05, - "loss": 0.006785632669925689, - "step": 10525 - }, - { - "epoch": 1.7953964194373402, - "grad_norm": 0.06577371805906296, - "learning_rate": 4.4811489337880216e-05, - "loss": 0.005300462618470192, - "step": 10530 - }, - { - "epoch": 1.7962489343563512, - "grad_norm": 0.0451020710170269, - "learning_rate": 4.4786357404638485e-05, - "loss": 0.00612550750374794, - "step": 10535 - }, - { - "epoch": 1.7971014492753623, - "grad_norm": 0.08968023955821991, - "learning_rate": 4.4761222069882754e-05, - "loss": 0.00558510459959507, - "step": 10540 - }, - { - "epoch": 1.7979539641943734, - "grad_norm": 0.0945729911327362, - "learning_rate": 4.4736083345347015e-05, - "loss": 0.007513274252414703, - "step": 10545 - }, - { - "epoch": 1.7988064791133844, - "grad_norm": 0.10392102599143982, - "learning_rate": 4.4710941242766844e-05, - "loss": 0.006224355846643448, - "step": 10550 - }, - { - "epoch": 1.7996589940323955, - "grad_norm": 0.10485874116420746, - "learning_rate": 4.4685795773879446e-05, - "loss": 0.005821261927485466, - "step": 10555 - }, - { - "epoch": 1.8005115089514065, - "grad_norm": 0.0689731314778328, - "learning_rate": 4.466064695042355e-05, - "loss": 0.0062000565230846405, - "step": 10560 - }, - { - "epoch": 1.8013640238704176, - "grad_norm": 0.07008705288171768, - "learning_rate": 4.4635494784139463e-05, - "loss": 0.006286797672510147, - "step": 10565 - }, - { - "epoch": 1.8022165387894287, - "grad_norm": 0.07595150172710419, - "learning_rate": 4.461033928676904e-05, - "loss": 0.006704485416412354, - "step": 10570 - }, - { - "epoch": 1.80306905370844, - "grad_norm": 0.07564863562583923, - "learning_rate": 4.458518047005572e-05, - "loss": 0.005777762830257415, - "step": 10575 - }, - { - "epoch": 1.803921568627451, - "grad_norm": 0.07202555984258652, - "learning_rate": 4.4560018345744466e-05, - "loss": 0.00602865107357502, - "step": 10580 - }, - { - "epoch": 1.804774083546462, - "grad_norm": 0.10462740063667297, - "learning_rate": 4.453485292558179e-05, - "loss": 0.007622111588716507, - "step": 10585 - }, - { - "epoch": 1.8056265984654731, - "grad_norm": 0.05587150529026985, - "learning_rate": 4.450968422131578e-05, - "loss": 0.00641121193766594, - "step": 10590 - }, - { - "epoch": 1.8064791133844842, - "grad_norm": 0.0603446289896965, - "learning_rate": 4.448451224469598e-05, - "loss": 0.0073586970567703245, - "step": 10595 - }, - { - "epoch": 1.8073316283034955, - "grad_norm": 0.04228143393993378, - "learning_rate": 4.445933700747353e-05, - "loss": 0.005406339466571808, - "step": 10600 - }, - { - "epoch": 1.8081841432225065, - "grad_norm": 0.04840795323252678, - "learning_rate": 4.4434158521401065e-05, - "loss": 0.0041844088584184645, - "step": 10605 - }, - { - "epoch": 1.8090366581415176, - "grad_norm": 0.08334027975797653, - "learning_rate": 4.440897679823275e-05, - "loss": 0.008376862108707427, - "step": 10610 - }, - { - "epoch": 1.8098891730605287, - "grad_norm": 0.07879523187875748, - "learning_rate": 4.438379184972423e-05, - "loss": 0.0053595036268234255, - "step": 10615 - }, - { - "epoch": 1.8107416879795397, - "grad_norm": 0.0689932182431221, - "learning_rate": 4.435860368763269e-05, - "loss": 0.005961846932768822, - "step": 10620 - }, - { - "epoch": 1.8115942028985508, - "grad_norm": 0.07035796344280243, - "learning_rate": 4.43334123237168e-05, - "loss": 0.005833951756358147, - "step": 10625 - }, - { - "epoch": 1.8124467178175618, - "grad_norm": 0.06488184630870819, - "learning_rate": 4.4308217769736715e-05, - "loss": 0.006380685418844223, - "step": 10630 - }, - { - "epoch": 1.813299232736573, - "grad_norm": 0.1095893532037735, - "learning_rate": 4.428302003745412e-05, - "loss": 0.006500106304883957, - "step": 10635 - }, - { - "epoch": 1.814151747655584, - "grad_norm": 0.07402926683425903, - "learning_rate": 4.425781913863212e-05, - "loss": 0.010839180648326873, - "step": 10640 - }, - { - "epoch": 1.815004262574595, - "grad_norm": 0.07752810418605804, - "learning_rate": 4.4232615085035354e-05, - "loss": 0.0053322531282901766, - "step": 10645 - }, - { - "epoch": 1.815856777493606, - "grad_norm": 0.06572280824184418, - "learning_rate": 4.420740788842991e-05, - "loss": 0.0072415158152580265, - "step": 10650 - }, - { - "epoch": 1.8167092924126171, - "grad_norm": 0.07175682485103607, - "learning_rate": 4.418219756058335e-05, - "loss": 0.007061149924993515, - "step": 10655 - }, - { - "epoch": 1.8175618073316282, - "grad_norm": 0.0702451840043068, - "learning_rate": 4.4156984113264684e-05, - "loss": 0.0050024140626192095, - "step": 10660 - }, - { - "epoch": 1.8184143222506393, - "grad_norm": 0.05054900422692299, - "learning_rate": 4.4131767558244375e-05, - "loss": 0.004906433075666428, - "step": 10665 - }, - { - "epoch": 1.8192668371696503, - "grad_norm": 0.07256589829921722, - "learning_rate": 4.410654790729438e-05, - "loss": 0.006986310333013534, - "step": 10670 - }, - { - "epoch": 1.8201193520886616, - "grad_norm": 0.06617925316095352, - "learning_rate": 4.408132517218805e-05, - "loss": 0.007973263412714005, - "step": 10675 - }, - { - "epoch": 1.8209718670076727, - "grad_norm": 0.09039802104234695, - "learning_rate": 4.405609936470022e-05, - "loss": 0.007263268530368805, - "step": 10680 - }, - { - "epoch": 1.8218243819266837, - "grad_norm": 0.03763730078935623, - "learning_rate": 4.40308704966071e-05, - "loss": 0.005709199234843254, - "step": 10685 - }, - { - "epoch": 1.8226768968456948, - "grad_norm": 0.09264735877513885, - "learning_rate": 4.400563857968639e-05, - "loss": 0.006996266543865204, - "step": 10690 - }, - { - "epoch": 1.8235294117647058, - "grad_norm": 0.0882507711648941, - "learning_rate": 4.398040362571719e-05, - "loss": 0.007461686432361603, - "step": 10695 - }, - { - "epoch": 1.8243819266837171, - "grad_norm": 0.07662846893072128, - "learning_rate": 4.395516564648e-05, - "loss": 0.006977429986000061, - "step": 10700 - }, - { - "epoch": 1.8252344416027282, - "grad_norm": 0.07431378960609436, - "learning_rate": 4.392992465375676e-05, - "loss": 0.004957346618175507, - "step": 10705 - }, - { - "epoch": 1.8260869565217392, - "grad_norm": 0.06182624027132988, - "learning_rate": 4.39046806593308e-05, - "loss": 0.006677946448326111, - "step": 10710 - }, - { - "epoch": 1.8269394714407503, - "grad_norm": 0.06389910727739334, - "learning_rate": 4.3879433674986856e-05, - "loss": 0.006449097394943237, - "step": 10715 - }, - { - "epoch": 1.8277919863597614, - "grad_norm": 0.06772691756486893, - "learning_rate": 4.385418371251107e-05, - "loss": 0.004998251050710678, - "step": 10720 - }, - { - "epoch": 1.8286445012787724, - "grad_norm": 0.07048022747039795, - "learning_rate": 4.3828930783690955e-05, - "loss": 0.006418389827013015, - "step": 10725 - }, - { - "epoch": 1.8294970161977835, - "grad_norm": 0.09442687779664993, - "learning_rate": 4.3803674900315424e-05, - "loss": 0.006921603530645371, - "step": 10730 - }, - { - "epoch": 1.8303495311167945, - "grad_norm": 0.0578981414437294, - "learning_rate": 4.377841607417475e-05, - "loss": 0.007038000971078873, - "step": 10735 - }, - { - "epoch": 1.8312020460358056, - "grad_norm": 0.06990659236907959, - "learning_rate": 4.37531543170606e-05, - "loss": 0.005136258527636528, - "step": 10740 - }, - { - "epoch": 1.8320545609548167, - "grad_norm": 0.05566668137907982, - "learning_rate": 4.372788964076601e-05, - "loss": 0.005333118140697479, - "step": 10745 - }, - { - "epoch": 1.8329070758738277, - "grad_norm": 0.09198274463415146, - "learning_rate": 4.3702622057085376e-05, - "loss": 0.005783502757549286, - "step": 10750 - }, - { - "epoch": 1.8337595907928388, - "grad_norm": 0.12995415925979614, - "learning_rate": 4.3677351577814423e-05, - "loss": 0.005794361606240273, - "step": 10755 - }, - { - "epoch": 1.8346121057118498, - "grad_norm": 0.0827256515622139, - "learning_rate": 4.3652078214750264e-05, - "loss": 0.00593951866030693, - "step": 10760 - }, - { - "epoch": 1.835464620630861, - "grad_norm": 0.09131235629320145, - "learning_rate": 4.362680197969136e-05, - "loss": 0.006387272477149963, - "step": 10765 - }, - { - "epoch": 1.836317135549872, - "grad_norm": 0.06061462685465813, - "learning_rate": 4.360152288443748e-05, - "loss": 0.006085103005170822, - "step": 10770 - }, - { - "epoch": 1.8371696504688833, - "grad_norm": 0.05650132894515991, - "learning_rate": 4.357624094078976e-05, - "loss": 0.004817041009664536, - "step": 10775 - }, - { - "epoch": 1.8380221653878943, - "grad_norm": 0.09250559657812119, - "learning_rate": 4.355095616055063e-05, - "loss": 0.006116693839430809, - "step": 10780 - }, - { - "epoch": 1.8388746803069054, - "grad_norm": 0.06575264036655426, - "learning_rate": 4.352566855552389e-05, - "loss": 0.006027846410870552, - "step": 10785 - }, - { - "epoch": 1.8397271952259164, - "grad_norm": 0.07538174092769623, - "learning_rate": 4.350037813751462e-05, - "loss": 0.006624206900596619, - "step": 10790 - }, - { - "epoch": 1.8405797101449275, - "grad_norm": 0.06000296771526337, - "learning_rate": 4.347508491832924e-05, - "loss": 0.006386204063892365, - "step": 10795 - }, - { - "epoch": 1.8414322250639388, - "grad_norm": 0.058621276170015335, - "learning_rate": 4.3449788909775455e-05, - "loss": 0.006246517226099968, - "step": 10800 - }, - { - "epoch": 1.8422847399829498, - "grad_norm": 0.10082551836967468, - "learning_rate": 4.34244901236623e-05, - "loss": 0.006916524469852447, - "step": 10805 - }, - { - "epoch": 1.843137254901961, - "grad_norm": 0.07926804572343826, - "learning_rate": 4.3399188571800064e-05, - "loss": 0.006270130723714828, - "step": 10810 - }, - { - "epoch": 1.843989769820972, - "grad_norm": 0.14256511628627777, - "learning_rate": 4.3373884266000375e-05, - "loss": 0.008555002510547638, - "step": 10815 - }, - { - "epoch": 1.844842284739983, - "grad_norm": 0.0711030438542366, - "learning_rate": 4.334857721807612e-05, - "loss": 0.004097539931535721, - "step": 10820 - }, - { - "epoch": 1.845694799658994, - "grad_norm": 0.05918106436729431, - "learning_rate": 4.3323267439841464e-05, - "loss": 0.006263938546180725, - "step": 10825 - }, - { - "epoch": 1.8465473145780051, - "grad_norm": 0.06577462702989578, - "learning_rate": 4.329795494311186e-05, - "loss": 0.004532983154058456, - "step": 10830 - }, - { - "epoch": 1.8473998294970162, - "grad_norm": 0.07599867880344391, - "learning_rate": 4.327263973970401e-05, - "loss": 0.006951173394918441, - "step": 10835 - }, - { - "epoch": 1.8482523444160273, - "grad_norm": 0.055239275097846985, - "learning_rate": 4.324732184143592e-05, - "loss": 0.00514591783285141, - "step": 10840 - }, - { - "epoch": 1.8491048593350383, - "grad_norm": 0.10522980988025665, - "learning_rate": 4.322200126012681e-05, - "loss": 0.00747048556804657, - "step": 10845 - }, - { - "epoch": 1.8499573742540494, - "grad_norm": 0.08132579177618027, - "learning_rate": 4.319667800759716e-05, - "loss": 0.005432958528399467, - "step": 10850 - }, - { - "epoch": 1.8508098891730604, - "grad_norm": 0.04027591645717621, - "learning_rate": 4.3171352095668726e-05, - "loss": 0.004450181499123573, - "step": 10855 - }, - { - "epoch": 1.8516624040920715, - "grad_norm": 0.0873839259147644, - "learning_rate": 4.314602353616446e-05, - "loss": 0.006079509109258652, - "step": 10860 - }, - { - "epoch": 1.8525149190110826, - "grad_norm": 0.04989013075828552, - "learning_rate": 4.312069234090862e-05, - "loss": 0.003988634794950485, - "step": 10865 - }, - { - "epoch": 1.8533674339300936, - "grad_norm": 0.061433590948581696, - "learning_rate": 4.309535852172661e-05, - "loss": 0.0056050091981887816, - "step": 10870 - }, - { - "epoch": 1.854219948849105, - "grad_norm": 0.07007768750190735, - "learning_rate": 4.3070022090445114e-05, - "loss": 0.006938119232654571, - "step": 10875 - }, - { - "epoch": 1.855072463768116, - "grad_norm": 0.03557104617357254, - "learning_rate": 4.3044683058892024e-05, - "loss": 0.0061099715530872345, - "step": 10880 - }, - { - "epoch": 1.855924978687127, - "grad_norm": 0.07706935703754425, - "learning_rate": 4.3019341438896446e-05, - "loss": 0.0050103053450584415, - "step": 10885 - }, - { - "epoch": 1.856777493606138, - "grad_norm": 0.06719083338975906, - "learning_rate": 4.2993997242288686e-05, - "loss": 0.005047342553734779, - "step": 10890 - }, - { - "epoch": 1.8576300085251491, - "grad_norm": 0.05179615691304207, - "learning_rate": 4.296865048090024e-05, - "loss": 0.004692831635475158, - "step": 10895 - }, - { - "epoch": 1.8584825234441604, - "grad_norm": 0.08594074845314026, - "learning_rate": 4.294330116656385e-05, - "loss": 0.006039778143167496, - "step": 10900 - }, - { - "epoch": 1.8593350383631715, - "grad_norm": 0.11285590380430222, - "learning_rate": 4.291794931111339e-05, - "loss": 0.005857323482632637, - "step": 10905 - }, - { - "epoch": 1.8601875532821825, - "grad_norm": 0.056068334728479385, - "learning_rate": 4.289259492638399e-05, - "loss": 0.006339801102876663, - "step": 10910 - }, - { - "epoch": 1.8610400682011936, - "grad_norm": 0.1027015820145607, - "learning_rate": 4.2867238024211873e-05, - "loss": 0.007628202438354492, - "step": 10915 - }, - { - "epoch": 1.8618925831202047, - "grad_norm": 0.06938920170068741, - "learning_rate": 4.2841878616434516e-05, - "loss": 0.005421775206923485, - "step": 10920 - }, - { - "epoch": 1.8627450980392157, - "grad_norm": 0.06613876670598984, - "learning_rate": 4.2816516714890525e-05, - "loss": 0.00747835859656334, - "step": 10925 - }, - { - "epoch": 1.8635976129582268, - "grad_norm": 0.07735379040241241, - "learning_rate": 4.279115233141967e-05, - "loss": 0.006907149404287338, - "step": 10930 - }, - { - "epoch": 1.8644501278772379, - "grad_norm": 0.06285069137811661, - "learning_rate": 4.276578547786291e-05, - "loss": 0.005340246856212616, - "step": 10935 - }, - { - "epoch": 1.865302642796249, - "grad_norm": 0.0670136883854866, - "learning_rate": 4.274041616606232e-05, - "loss": 0.0067828245460987095, - "step": 10940 - }, - { - "epoch": 1.86615515771526, - "grad_norm": 0.07944425195455551, - "learning_rate": 4.2715044407861144e-05, - "loss": 0.006403806060552597, - "step": 10945 - }, - { - "epoch": 1.867007672634271, - "grad_norm": 0.07202634960412979, - "learning_rate": 4.268967021510375e-05, - "loss": 0.004467373341321945, - "step": 10950 - }, - { - "epoch": 1.867860187553282, - "grad_norm": 0.08753371983766556, - "learning_rate": 4.266429359963568e-05, - "loss": 0.006740668416023254, - "step": 10955 - }, - { - "epoch": 1.8687127024722932, - "grad_norm": 0.0920538380742073, - "learning_rate": 4.263891457330357e-05, - "loss": 0.007489701360464096, - "step": 10960 - }, - { - "epoch": 1.8695652173913042, - "grad_norm": 0.11196473985910416, - "learning_rate": 4.261353314795519e-05, - "loss": 0.007533909380435943, - "step": 10965 - }, - { - "epoch": 1.8704177323103153, - "grad_norm": 0.08394299447536469, - "learning_rate": 4.258814933543943e-05, - "loss": 0.005159291997551918, - "step": 10970 - }, - { - "epoch": 1.8712702472293266, - "grad_norm": 0.08024156838655472, - "learning_rate": 4.25627631476063e-05, - "loss": 0.00543709248304367, - "step": 10975 - }, - { - "epoch": 1.8721227621483376, - "grad_norm": 0.052640948444604874, - "learning_rate": 4.253737459630694e-05, - "loss": 0.004067758470773697, - "step": 10980 - }, - { - "epoch": 1.8729752770673487, - "grad_norm": 0.08472926914691925, - "learning_rate": 4.251198369339353e-05, - "loss": 0.0077335178852081295, - "step": 10985 - }, - { - "epoch": 1.8738277919863597, - "grad_norm": 0.09794485569000244, - "learning_rate": 4.248659045071942e-05, - "loss": 0.0055429480969905855, - "step": 10990 - }, - { - "epoch": 1.8746803069053708, - "grad_norm": 0.07767575234174728, - "learning_rate": 4.2461194880139016e-05, - "loss": 0.008025288581848145, - "step": 10995 - }, - { - "epoch": 1.875532821824382, - "grad_norm": 0.07427361607551575, - "learning_rate": 4.2435796993507794e-05, - "loss": 0.006437119096517563, - "step": 11000 - }, - { - "epoch": 1.8763853367433931, - "grad_norm": 0.07420040667057037, - "learning_rate": 4.241039680268237e-05, - "loss": 0.0051200386136770245, - "step": 11005 - }, - { - "epoch": 1.8772378516624042, - "grad_norm": 0.09004204720258713, - "learning_rate": 4.2384994319520355e-05, - "loss": 0.007488063722848892, - "step": 11010 - }, - { - "epoch": 1.8780903665814153, - "grad_norm": 0.060929473489522934, - "learning_rate": 4.235958955588049e-05, - "loss": 0.00483398288488388, - "step": 11015 - }, - { - "epoch": 1.8789428815004263, - "grad_norm": 0.08116185665130615, - "learning_rate": 4.2334182523622584e-05, - "loss": 0.007078002393245697, - "step": 11020 - }, - { - "epoch": 1.8797953964194374, - "grad_norm": 0.0908491238951683, - "learning_rate": 4.230877323460746e-05, - "loss": 0.007228228449821472, - "step": 11025 - }, - { - "epoch": 1.8806479113384484, - "grad_norm": 0.08618480712175369, - "learning_rate": 4.228336170069703e-05, - "loss": 0.005402455478906632, - "step": 11030 - }, - { - "epoch": 1.8815004262574595, - "grad_norm": 0.06869816035032272, - "learning_rate": 4.2257947933754236e-05, - "loss": 0.006180650368332863, - "step": 11035 - }, - { - "epoch": 1.8823529411764706, - "grad_norm": 0.0904744416475296, - "learning_rate": 4.223253194564309e-05, - "loss": 0.00636049136519432, - "step": 11040 - }, - { - "epoch": 1.8832054560954816, - "grad_norm": 0.04902644082903862, - "learning_rate": 4.220711374822859e-05, - "loss": 0.0062784947454929355, - "step": 11045 - }, - { - "epoch": 1.8840579710144927, - "grad_norm": 0.060081589967012405, - "learning_rate": 4.2181693353376817e-05, - "loss": 0.005494052171707153, - "step": 11050 - }, - { - "epoch": 1.8849104859335037, - "grad_norm": 0.058530837297439575, - "learning_rate": 4.215627077295485e-05, - "loss": 0.005457080900669098, - "step": 11055 - }, - { - "epoch": 1.8857630008525148, - "grad_norm": 0.15006953477859497, - "learning_rate": 4.2130846018830795e-05, - "loss": 0.0062985971570014955, - "step": 11060 - }, - { - "epoch": 1.8866155157715259, - "grad_norm": 0.04498155787587166, - "learning_rate": 4.210541910287377e-05, - "loss": 0.004242038726806641, - "step": 11065 - }, - { - "epoch": 1.887468030690537, - "grad_norm": 0.09093966335058212, - "learning_rate": 4.207999003695392e-05, - "loss": 0.00554364025592804, - "step": 11070 - }, - { - "epoch": 1.8883205456095482, - "grad_norm": 0.06531018018722534, - "learning_rate": 4.2054558832942365e-05, - "loss": 0.0063869751989841465, - "step": 11075 - }, - { - "epoch": 1.8891730605285593, - "grad_norm": 0.059213872998952866, - "learning_rate": 4.202912550271124e-05, - "loss": 0.004836349189281464, - "step": 11080 - }, - { - "epoch": 1.8900255754475703, - "grad_norm": 0.11074823886156082, - "learning_rate": 4.200369005813367e-05, - "loss": 0.00584055446088314, - "step": 11085 - }, - { - "epoch": 1.8908780903665814, - "grad_norm": 0.09352346509695053, - "learning_rate": 4.197825251108376e-05, - "loss": 0.006423837691545487, - "step": 11090 - }, - { - "epoch": 1.8917306052855924, - "grad_norm": 0.10930176079273224, - "learning_rate": 4.195281287343662e-05, - "loss": 0.007819923013448716, - "step": 11095 - }, - { - "epoch": 1.8925831202046037, - "grad_norm": 0.10935486853122711, - "learning_rate": 4.19273711570683e-05, - "loss": 0.008524692058563233, - "step": 11100 - }, - { - "epoch": 1.8934356351236148, - "grad_norm": 0.07407546788454056, - "learning_rate": 4.190192737385586e-05, - "loss": 0.006353407353162766, - "step": 11105 - }, - { - "epoch": 1.8942881500426259, - "grad_norm": 0.11030165106058121, - "learning_rate": 4.187648153567729e-05, - "loss": 0.007683426141738892, - "step": 11110 - }, - { - "epoch": 1.895140664961637, - "grad_norm": 0.09419413655996323, - "learning_rate": 4.185103365441155e-05, - "loss": 0.005654521286487579, - "step": 11115 - }, - { - "epoch": 1.895993179880648, - "grad_norm": 0.06284896284341812, - "learning_rate": 4.1825583741938576e-05, - "loss": 0.0048633765429258345, - "step": 11120 - }, - { - "epoch": 1.896845694799659, - "grad_norm": 0.06429705023765564, - "learning_rate": 4.180013181013921e-05, - "loss": 0.006907754391431808, - "step": 11125 - }, - { - "epoch": 1.89769820971867, - "grad_norm": 0.1234050914645195, - "learning_rate": 4.177467787089527e-05, - "loss": 0.008531783521175385, - "step": 11130 - }, - { - "epoch": 1.8985507246376812, - "grad_norm": 0.04056263715028763, - "learning_rate": 4.174922193608951e-05, - "loss": 0.006784418225288391, - "step": 11135 - }, - { - "epoch": 1.8994032395566922, - "grad_norm": 0.048422425985336304, - "learning_rate": 4.172376401760561e-05, - "loss": 0.006587067246437072, - "step": 11140 - }, - { - "epoch": 1.9002557544757033, - "grad_norm": 0.10680951178073883, - "learning_rate": 4.169830412732815e-05, - "loss": 0.005700337141752243, - "step": 11145 - }, - { - "epoch": 1.9011082693947143, - "grad_norm": 0.09418217837810516, - "learning_rate": 4.167284227714267e-05, - "loss": 0.0059782925993204115, - "step": 11150 - }, - { - "epoch": 1.9019607843137254, - "grad_norm": 0.12511073052883148, - "learning_rate": 4.1647378478935614e-05, - "loss": 0.006256800889968872, - "step": 11155 - }, - { - "epoch": 1.9028132992327365, - "grad_norm": 0.06957859545946121, - "learning_rate": 4.1621912744594316e-05, - "loss": 0.008690094202756881, - "step": 11160 - }, - { - "epoch": 1.9036658141517475, - "grad_norm": 0.10859719663858414, - "learning_rate": 4.159644508600704e-05, - "loss": 0.008262380957603455, - "step": 11165 - }, - { - "epoch": 1.9045183290707586, - "grad_norm": 0.08408714830875397, - "learning_rate": 4.157097551506292e-05, - "loss": 0.005347007513046264, - "step": 11170 - }, - { - "epoch": 1.9053708439897699, - "grad_norm": 0.05623621866106987, - "learning_rate": 4.1545504043652014e-05, - "loss": 0.005091758817434311, - "step": 11175 - }, - { - "epoch": 1.906223358908781, - "grad_norm": 0.06791777908802032, - "learning_rate": 4.1520030683665246e-05, - "loss": 0.006755101680755615, - "step": 11180 - }, - { - "epoch": 1.907075873827792, - "grad_norm": 0.039112675935029984, - "learning_rate": 4.149455544699444e-05, - "loss": 0.0063312210142612456, - "step": 11185 - }, - { - "epoch": 1.907928388746803, - "grad_norm": 0.05682097375392914, - "learning_rate": 4.146907834553227e-05, - "loss": 0.005028403550386429, - "step": 11190 - }, - { - "epoch": 1.908780903665814, - "grad_norm": 0.07670710980892181, - "learning_rate": 4.144359939117229e-05, - "loss": 0.006438900530338287, - "step": 11195 - }, - { - "epoch": 1.9096334185848254, - "grad_norm": 0.06266012787818909, - "learning_rate": 4.141811859580894e-05, - "loss": 0.006153284758329392, - "step": 11200 - }, - { - "epoch": 1.9104859335038364, - "grad_norm": 0.06892232596874237, - "learning_rate": 4.139263597133749e-05, - "loss": 0.0042446799576282505, - "step": 11205 - }, - { - "epoch": 1.9113384484228475, - "grad_norm": 0.08733050525188446, - "learning_rate": 4.136715152965409e-05, - "loss": 0.0048094093799591064, - "step": 11210 - }, - { - "epoch": 1.9121909633418586, - "grad_norm": 0.06578327715396881, - "learning_rate": 4.13416652826557e-05, - "loss": 0.0047289058566093445, - "step": 11215 - }, - { - "epoch": 1.9130434782608696, - "grad_norm": 0.06382749229669571, - "learning_rate": 4.1316177242240174e-05, - "loss": 0.004200926423072815, - "step": 11220 - }, - { - "epoch": 1.9138959931798807, - "grad_norm": 0.07368794828653336, - "learning_rate": 4.129068742030617e-05, - "loss": 0.0063028551638126375, - "step": 11225 - }, - { - "epoch": 1.9147485080988917, - "grad_norm": 0.09302657842636108, - "learning_rate": 4.1265195828753176e-05, - "loss": 0.008124063909053802, - "step": 11230 - }, - { - "epoch": 1.9156010230179028, - "grad_norm": 0.08030751347541809, - "learning_rate": 4.123970247948153e-05, - "loss": 0.009628574550151824, - "step": 11235 - }, - { - "epoch": 1.9164535379369139, - "grad_norm": 0.08395590633153915, - "learning_rate": 4.1214207384392356e-05, - "loss": 0.007773591578006745, - "step": 11240 - }, - { - "epoch": 1.917306052855925, - "grad_norm": 0.09472183138132095, - "learning_rate": 4.118871055538762e-05, - "loss": 0.005461954325437546, - "step": 11245 - }, - { - "epoch": 1.918158567774936, - "grad_norm": 0.095457524061203, - "learning_rate": 4.11632120043701e-05, - "loss": 0.005725187063217163, - "step": 11250 - }, - { - "epoch": 1.919011082693947, - "grad_norm": 0.10508730262517929, - "learning_rate": 4.113771174324336e-05, - "loss": 0.006902433931827545, - "step": 11255 - }, - { - "epoch": 1.919863597612958, - "grad_norm": 0.08675665408372879, - "learning_rate": 4.111220978391176e-05, - "loss": 0.007470531016588211, - "step": 11260 - }, - { - "epoch": 1.9207161125319692, - "grad_norm": 0.08215013146400452, - "learning_rate": 4.108670613828049e-05, - "loss": 0.005732448399066925, - "step": 11265 - }, - { - "epoch": 1.9215686274509802, - "grad_norm": 0.054156310856342316, - "learning_rate": 4.1061200818255476e-05, - "loss": 0.005808809399604797, - "step": 11270 - }, - { - "epoch": 1.9224211423699915, - "grad_norm": 0.09332830458879471, - "learning_rate": 4.103569383574346e-05, - "loss": 0.005646481737494468, - "step": 11275 - }, - { - "epoch": 1.9232736572890026, - "grad_norm": 0.05589313432574272, - "learning_rate": 4.101018520265195e-05, - "loss": 0.005581434443593025, - "step": 11280 - }, - { - "epoch": 1.9241261722080136, - "grad_norm": 0.0465618334710598, - "learning_rate": 4.098467493088922e-05, - "loss": 0.005028170347213745, - "step": 11285 - }, - { - "epoch": 1.9249786871270247, - "grad_norm": 0.07304909080266953, - "learning_rate": 4.095916303236431e-05, - "loss": 0.007494028657674789, - "step": 11290 - }, - { - "epoch": 1.9258312020460358, - "grad_norm": 0.09532103687524796, - "learning_rate": 4.0933649518987025e-05, - "loss": 0.006374432146549225, - "step": 11295 - }, - { - "epoch": 1.926683716965047, - "grad_norm": 0.07364784181118011, - "learning_rate": 4.090813440266794e-05, - "loss": 0.0053088821470737456, - "step": 11300 - }, - { - "epoch": 1.927536231884058, - "grad_norm": 0.0804903507232666, - "learning_rate": 4.088261769531834e-05, - "loss": 0.0069495439529418945, - "step": 11305 - }, - { - "epoch": 1.9283887468030692, - "grad_norm": 0.07125549763441086, - "learning_rate": 4.0857099408850264e-05, - "loss": 0.005846098065376282, - "step": 11310 - }, - { - "epoch": 1.9292412617220802, - "grad_norm": 0.017375558614730835, - "learning_rate": 4.083157955517653e-05, - "loss": 0.004308582097291946, - "step": 11315 - }, - { - "epoch": 1.9300937766410913, - "grad_norm": 0.07655836641788483, - "learning_rate": 4.080605814621063e-05, - "loss": 0.006030111759901047, - "step": 11320 - }, - { - "epoch": 1.9309462915601023, - "grad_norm": 0.05411117896437645, - "learning_rate": 4.078053519386681e-05, - "loss": 0.0069768443703651425, - "step": 11325 - }, - { - "epoch": 1.9317988064791134, - "grad_norm": 0.08431188017129898, - "learning_rate": 4.0755010710060035e-05, - "loss": 0.006973695755004883, - "step": 11330 - }, - { - "epoch": 1.9326513213981245, - "grad_norm": 0.08480583131313324, - "learning_rate": 4.072948470670598e-05, - "loss": 0.006525547057390213, - "step": 11335 - }, - { - "epoch": 1.9335038363171355, - "grad_norm": 0.073171466588974, - "learning_rate": 4.070395719572104e-05, - "loss": 0.0054599311202764515, - "step": 11340 - }, - { - "epoch": 1.9343563512361466, - "grad_norm": 0.06951522827148438, - "learning_rate": 4.0678428189022304e-05, - "loss": 0.008897364884614945, - "step": 11345 - }, - { - "epoch": 1.9352088661551576, - "grad_norm": 0.08654197305440903, - "learning_rate": 4.0652897698527557e-05, - "loss": 0.005458325147628784, - "step": 11350 - }, - { - "epoch": 1.9360613810741687, - "grad_norm": 0.07929553836584091, - "learning_rate": 4.0627365736155285e-05, - "loss": 0.00710543841123581, - "step": 11355 - }, - { - "epoch": 1.9369138959931798, - "grad_norm": 0.12434503436088562, - "learning_rate": 4.060183231382466e-05, - "loss": 0.0071723200380802155, - "step": 11360 - }, - { - "epoch": 1.9377664109121908, - "grad_norm": 0.06440022587776184, - "learning_rate": 4.057629744345551e-05, - "loss": 0.006010268628597259, - "step": 11365 - }, - { - "epoch": 1.938618925831202, - "grad_norm": 0.09477414190769196, - "learning_rate": 4.0550761136968404e-05, - "loss": 0.007152469456195831, - "step": 11370 - }, - { - "epoch": 1.9394714407502132, - "grad_norm": 0.06758873164653778, - "learning_rate": 4.0525223406284516e-05, - "loss": 0.004493400454521179, - "step": 11375 - }, - { - "epoch": 1.9403239556692242, - "grad_norm": 0.06823158264160156, - "learning_rate": 4.0499684263325695e-05, - "loss": 0.0058505676686763765, - "step": 11380 - }, - { - "epoch": 1.9411764705882353, - "grad_norm": 0.10731697082519531, - "learning_rate": 4.0474143720014485e-05, - "loss": 0.00592585802078247, - "step": 11385 - }, - { - "epoch": 1.9420289855072463, - "grad_norm": 0.09786538779735565, - "learning_rate": 4.044860178827405e-05, - "loss": 0.008860854804515839, - "step": 11390 - }, - { - "epoch": 1.9428815004262576, - "grad_norm": 0.08662491291761398, - "learning_rate": 4.042305848002822e-05, - "loss": 0.00579673945903778, - "step": 11395 - }, - { - "epoch": 1.9437340153452687, - "grad_norm": 0.08446741849184036, - "learning_rate": 4.039751380720145e-05, - "loss": 0.0067916139960289, - "step": 11400 - }, - { - "epoch": 1.9445865302642797, - "grad_norm": 0.08059567958116531, - "learning_rate": 4.037196778171885e-05, - "loss": 0.007273902744054794, - "step": 11405 - }, - { - "epoch": 1.9454390451832908, - "grad_norm": 0.067914679646492, - "learning_rate": 4.0346420415506156e-05, - "loss": 0.00854090303182602, - "step": 11410 - }, - { - "epoch": 1.9462915601023019, - "grad_norm": 0.06519316136837006, - "learning_rate": 4.032087172048973e-05, - "loss": 0.006127477809786797, - "step": 11415 - }, - { - "epoch": 1.947144075021313, - "grad_norm": 0.10216967016458511, - "learning_rate": 4.029532170859655e-05, - "loss": 0.007330343872308731, - "step": 11420 - }, - { - "epoch": 1.947996589940324, - "grad_norm": 0.07684756815433502, - "learning_rate": 4.02697703917542e-05, - "loss": 0.006121716648340225, - "step": 11425 - }, - { - "epoch": 1.948849104859335, - "grad_norm": 0.08026126027107239, - "learning_rate": 4.0244217781890906e-05, - "loss": 0.006386417150497437, - "step": 11430 - }, - { - "epoch": 1.949701619778346, - "grad_norm": 0.09047527611255646, - "learning_rate": 4.021866389093546e-05, - "loss": 0.004208286106586456, - "step": 11435 - }, - { - "epoch": 1.9505541346973572, - "grad_norm": 0.047482747584581375, - "learning_rate": 4.0193108730817284e-05, - "loss": 0.005754061415791512, - "step": 11440 - }, - { - "epoch": 1.9514066496163682, - "grad_norm": 0.054364416748285294, - "learning_rate": 4.0167552313466355e-05, - "loss": 0.004412830248475075, - "step": 11445 - }, - { - "epoch": 1.9522591645353793, - "grad_norm": 0.07640549540519714, - "learning_rate": 4.014199465081327e-05, - "loss": 0.005214530602097511, - "step": 11450 - }, - { - "epoch": 1.9531116794543903, - "grad_norm": 0.07241252809762955, - "learning_rate": 4.0116435754789206e-05, - "loss": 0.005129393562674523, - "step": 11455 - }, - { - "epoch": 1.9539641943734014, - "grad_norm": 0.048170432448387146, - "learning_rate": 4.009087563732589e-05, - "loss": 0.005180074647068977, - "step": 11460 - }, - { - "epoch": 1.9548167092924125, - "grad_norm": 0.07336216419935226, - "learning_rate": 4.006531431035566e-05, - "loss": 0.009098170697689057, - "step": 11465 - }, - { - "epoch": 1.9556692242114238, - "grad_norm": 0.04934614151716232, - "learning_rate": 4.0039751785811346e-05, - "loss": 0.005307629331946373, - "step": 11470 - }, - { - "epoch": 1.9565217391304348, - "grad_norm": 0.08941303193569183, - "learning_rate": 4.001418807562643e-05, - "loss": 0.0069742932915687565, - "step": 11475 - }, - { - "epoch": 1.9573742540494459, - "grad_norm": 0.05791569501161575, - "learning_rate": 3.998862319173488e-05, - "loss": 0.0050424404442310335, - "step": 11480 - }, - { - "epoch": 1.958226768968457, - "grad_norm": 0.04596787318587303, - "learning_rate": 3.996305714607125e-05, - "loss": 0.004805172979831696, - "step": 11485 - }, - { - "epoch": 1.959079283887468, - "grad_norm": 0.07698309421539307, - "learning_rate": 3.993748995057061e-05, - "loss": 0.006605527549982071, - "step": 11490 - }, - { - "epoch": 1.9599317988064793, - "grad_norm": 0.08400565385818481, - "learning_rate": 3.9911921617168565e-05, - "loss": 0.0085490882396698, - "step": 11495 - }, - { - "epoch": 1.9607843137254903, - "grad_norm": 0.1446380764245987, - "learning_rate": 3.9886352157801296e-05, - "loss": 0.005958027392625809, - "step": 11500 - }, - { - "epoch": 1.9616368286445014, - "grad_norm": 0.06108809635043144, - "learning_rate": 3.986078158440544e-05, - "loss": 0.0054461218416690825, - "step": 11505 - }, - { - "epoch": 1.9624893435635125, - "grad_norm": 0.1163720190525055, - "learning_rate": 3.983520990891823e-05, - "loss": 0.0065662160515785216, - "step": 11510 - }, - { - "epoch": 1.9633418584825235, - "grad_norm": 0.08339548110961914, - "learning_rate": 3.980963714327734e-05, - "loss": 0.007503192871809006, - "step": 11515 - }, - { - "epoch": 1.9641943734015346, - "grad_norm": 0.07774331420660019, - "learning_rate": 3.9784063299421e-05, - "loss": 0.005831217020750045, - "step": 11520 - }, - { - "epoch": 1.9650468883205456, - "grad_norm": 0.08897018432617188, - "learning_rate": 3.9758488389287936e-05, - "loss": 0.006972354650497436, - "step": 11525 - }, - { - "epoch": 1.9658994032395567, - "grad_norm": 0.07708834111690521, - "learning_rate": 3.9732912424817374e-05, - "loss": 0.0059847764670848845, - "step": 11530 - }, - { - "epoch": 1.9667519181585678, - "grad_norm": 0.133201003074646, - "learning_rate": 3.9707335417949015e-05, - "loss": 0.005828146636486053, - "step": 11535 - }, - { - "epoch": 1.9676044330775788, - "grad_norm": 0.05620214343070984, - "learning_rate": 3.968175738062303e-05, - "loss": 0.004607116058468819, - "step": 11540 - }, - { - "epoch": 1.9684569479965899, - "grad_norm": 0.05371567979454994, - "learning_rate": 3.965617832478015e-05, - "loss": 0.004455961659550667, - "step": 11545 - }, - { - "epoch": 1.969309462915601, - "grad_norm": 0.10317978262901306, - "learning_rate": 3.96305982623615e-05, - "loss": 0.004697806015610695, - "step": 11550 - }, - { - "epoch": 1.970161977834612, - "grad_norm": 0.08786958456039429, - "learning_rate": 3.96050172053087e-05, - "loss": 0.005183818191289902, - "step": 11555 - }, - { - "epoch": 1.971014492753623, - "grad_norm": 0.07750507444143295, - "learning_rate": 3.957943516556385e-05, - "loss": 0.005475999787449837, - "step": 11560 - }, - { - "epoch": 1.9718670076726341, - "grad_norm": 0.07066313922405243, - "learning_rate": 3.955385215506949e-05, - "loss": 0.005772550404071808, - "step": 11565 - }, - { - "epoch": 1.9727195225916454, - "grad_norm": 0.08183038979768753, - "learning_rate": 3.952826818576863e-05, - "loss": 0.005305550992488861, - "step": 11570 - }, - { - "epoch": 1.9735720375106565, - "grad_norm": 0.075381800532341, - "learning_rate": 3.95026832696047e-05, - "loss": 0.00803310126066208, - "step": 11575 - }, - { - "epoch": 1.9744245524296675, - "grad_norm": 0.09064166992902756, - "learning_rate": 3.9477097418521616e-05, - "loss": 0.006380292773246765, - "step": 11580 - }, - { - "epoch": 1.9752770673486786, - "grad_norm": 0.09140465408563614, - "learning_rate": 3.945151064446367e-05, - "loss": 0.00863645225763321, - "step": 11585 - }, - { - "epoch": 1.9761295822676896, - "grad_norm": 0.09985008090734482, - "learning_rate": 3.942592295937565e-05, - "loss": 0.005205995962023735, - "step": 11590 - }, - { - "epoch": 1.976982097186701, - "grad_norm": 0.07968702167272568, - "learning_rate": 3.940033437520273e-05, - "loss": 0.006467466801404953, - "step": 11595 - }, - { - "epoch": 1.977834612105712, - "grad_norm": 0.0925409123301506, - "learning_rate": 3.937474490389051e-05, - "loss": 0.006804432719945908, - "step": 11600 - }, - { - "epoch": 1.978687127024723, - "grad_norm": 0.053421750664711, - "learning_rate": 3.9349154557385e-05, - "loss": 0.0067677564918994905, - "step": 11605 - }, - { - "epoch": 1.979539641943734, - "grad_norm": 0.07791347801685333, - "learning_rate": 3.9323563347632624e-05, - "loss": 0.006826826930046081, - "step": 11610 - }, - { - "epoch": 1.9803921568627452, - "grad_norm": 0.08627293258905411, - "learning_rate": 3.929797128658024e-05, - "loss": 0.00804663747549057, - "step": 11615 - }, - { - "epoch": 1.9812446717817562, - "grad_norm": 0.06506595015525818, - "learning_rate": 3.927237838617503e-05, - "loss": 0.005456966534256935, - "step": 11620 - }, - { - "epoch": 1.9820971867007673, - "grad_norm": 0.09555826336145401, - "learning_rate": 3.924678465836465e-05, - "loss": 0.005365721881389618, - "step": 11625 - }, - { - "epoch": 1.9829497016197783, - "grad_norm": 0.09176401793956757, - "learning_rate": 3.922119011509706e-05, - "loss": 0.006210924685001373, - "step": 11630 - }, - { - "epoch": 1.9838022165387894, - "grad_norm": 0.05260130763053894, - "learning_rate": 3.919559476832069e-05, - "loss": 0.004408955946564675, - "step": 11635 - }, - { - "epoch": 1.9846547314578005, - "grad_norm": 0.0875319391489029, - "learning_rate": 3.916999862998427e-05, - "loss": 0.005069036781787872, - "step": 11640 - }, - { - "epoch": 1.9855072463768115, - "grad_norm": 0.10335614532232285, - "learning_rate": 3.9144401712036936e-05, - "loss": 0.007199827581644058, - "step": 11645 - }, - { - "epoch": 1.9863597612958226, - "grad_norm": 0.09518889337778091, - "learning_rate": 3.9118804026428194e-05, - "loss": 0.00541754923760891, - "step": 11650 - }, - { - "epoch": 1.9872122762148337, - "grad_norm": 0.06707368791103363, - "learning_rate": 3.9093205585107863e-05, - "loss": 0.00641927570104599, - "step": 11655 - }, - { - "epoch": 1.9880647911338447, - "grad_norm": 0.10102292895317078, - "learning_rate": 3.906760640002618e-05, - "loss": 0.007096148282289505, - "step": 11660 - }, - { - "epoch": 1.9889173060528558, - "grad_norm": 0.0690481886267662, - "learning_rate": 3.904200648313368e-05, - "loss": 0.0063364550471305845, - "step": 11665 - }, - { - "epoch": 1.989769820971867, - "grad_norm": 0.1051480695605278, - "learning_rate": 3.901640584638126e-05, - "loss": 0.009133437275886535, - "step": 11670 - }, - { - "epoch": 1.9906223358908781, - "grad_norm": 0.0857042595744133, - "learning_rate": 3.899080450172015e-05, - "loss": 0.007245839387178421, - "step": 11675 - }, - { - "epoch": 1.9914748508098892, - "grad_norm": 0.04038793221116066, - "learning_rate": 3.8965202461101904e-05, - "loss": 0.005575920641422272, - "step": 11680 - }, - { - "epoch": 1.9923273657289002, - "grad_norm": 0.06331093609333038, - "learning_rate": 3.893959973647842e-05, - "loss": 0.004866635054349899, - "step": 11685 - }, - { - "epoch": 1.9931798806479113, - "grad_norm": 0.08694019168615341, - "learning_rate": 3.891399633980188e-05, - "loss": 0.004249059408903122, - "step": 11690 - }, - { - "epoch": 1.9940323955669226, - "grad_norm": 0.06739087402820587, - "learning_rate": 3.888839228302482e-05, - "loss": 0.006520142406225204, - "step": 11695 - }, - { - "epoch": 1.9948849104859336, - "grad_norm": 0.09432726353406906, - "learning_rate": 3.886278757810005e-05, - "loss": 0.006377060711383819, - "step": 11700 - }, - { - "epoch": 1.9957374254049447, - "grad_norm": 0.040565814822912216, - "learning_rate": 3.883718223698071e-05, - "loss": 0.0062430910766124725, - "step": 11705 - }, - { - "epoch": 1.9965899403239558, - "grad_norm": 0.09249477833509445, - "learning_rate": 3.881157627162022e-05, - "loss": 0.005447167158126831, - "step": 11710 - }, - { - "epoch": 1.9974424552429668, - "grad_norm": 0.08561582118272781, - "learning_rate": 3.87859696939723e-05, - "loss": 0.0067646786570549015, - "step": 11715 - }, - { - "epoch": 1.9982949701619779, - "grad_norm": 0.0771077573299408, - "learning_rate": 3.876036251599094e-05, - "loss": 0.006473222374916076, - "step": 11720 - }, - { - "epoch": 1.999147485080989, - "grad_norm": 0.047942496836185455, - "learning_rate": 3.873475474963044e-05, - "loss": 0.004876254498958588, - "step": 11725 - }, - { - "epoch": 1.9996589940323957, - "eval_loss": 0.03507082909345627, - "eval_runtime": 3.6311, - "eval_samples_per_second": 69.4, - "eval_steps_per_second": 1.102, - "step": 11728 - }, - { - "eval_cer_subset": 0.01172504763300601, - "eval_cer_subset_edit_distance": 720, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 11728 - }, - { - "epoch": 2.0, - "grad_norm": 0.09595198184251785, - "learning_rate": 3.8709146406845345e-05, - "loss": 0.005297505855560302, - "step": 11730 - }, - { - "epoch": 2.000852514919011, - "grad_norm": 0.0500548854470253, - "learning_rate": 3.8683537499590486e-05, - "loss": 0.0029773740097880363, - "step": 11735 - }, - { - "epoch": 2.001705029838022, - "grad_norm": 0.043145813047885895, - "learning_rate": 3.865792803982097e-05, - "loss": 0.0026956576853990554, - "step": 11740 - }, - { - "epoch": 2.002557544757033, - "grad_norm": 0.06828423589468002, - "learning_rate": 3.86323180394921e-05, - "loss": 0.0032785605639219285, - "step": 11745 - }, - { - "epoch": 2.0034100596760442, - "grad_norm": 0.05070719122886658, - "learning_rate": 3.8606707510559514e-05, - "loss": 0.0025875838473439217, - "step": 11750 - }, - { - "epoch": 2.0042625745950553, - "grad_norm": 0.03793288394808769, - "learning_rate": 3.8581096464979046e-05, - "loss": 0.003196726739406586, - "step": 11755 - }, - { - "epoch": 2.0051150895140664, - "grad_norm": 0.058611899614334106, - "learning_rate": 3.8555484914706783e-05, - "loss": 0.0025842227041721344, - "step": 11760 - }, - { - "epoch": 2.0059676044330774, - "grad_norm": 0.05239633843302727, - "learning_rate": 3.8529872871699064e-05, - "loss": 0.0033856891095638275, - "step": 11765 - }, - { - "epoch": 2.0068201193520885, - "grad_norm": 0.0694168210029602, - "learning_rate": 3.8504260347912414e-05, - "loss": 0.0036750052124261854, - "step": 11770 - }, - { - "epoch": 2.0076726342710995, - "grad_norm": 0.05401293560862541, - "learning_rate": 3.847864735530364e-05, - "loss": 0.0020644858479499815, - "step": 11775 - }, - { - "epoch": 2.008525149190111, - "grad_norm": 0.024288944900035858, - "learning_rate": 3.8453033905829715e-05, - "loss": 0.0030498920008540154, - "step": 11780 - }, - { - "epoch": 2.009377664109122, - "grad_norm": 0.07617825269699097, - "learning_rate": 3.842742001144787e-05, - "loss": 0.002012002095580101, - "step": 11785 - }, - { - "epoch": 2.010230179028133, - "grad_norm": 0.05394979938864708, - "learning_rate": 3.8401805684115514e-05, - "loss": 0.0013803424313664435, - "step": 11790 - }, - { - "epoch": 2.0110826939471442, - "grad_norm": 0.04705117642879486, - "learning_rate": 3.837619093579025e-05, - "loss": 0.0019010987132787704, - "step": 11795 - }, - { - "epoch": 2.0119352088661553, - "grad_norm": 0.04174034297466278, - "learning_rate": 3.835057577842993e-05, - "loss": 0.00339580737054348, - "step": 11800 - }, - { - "epoch": 2.0127877237851663, - "grad_norm": 0.05027524381875992, - "learning_rate": 3.832496022399254e-05, - "loss": 0.003779648244380951, - "step": 11805 - }, - { - "epoch": 2.0136402387041774, - "grad_norm": 0.06344325840473175, - "learning_rate": 3.829934428443627e-05, - "loss": 0.003120606765151024, - "step": 11810 - }, - { - "epoch": 2.0144927536231885, - "grad_norm": 0.04142893850803375, - "learning_rate": 3.827372797171949e-05, - "loss": 0.001541936956346035, - "step": 11815 - }, - { - "epoch": 2.0153452685421995, - "grad_norm": 0.05739682540297508, - "learning_rate": 3.8248111297800766e-05, - "loss": 0.002022813446819782, - "step": 11820 - }, - { - "epoch": 2.0161977834612106, - "grad_norm": 0.05701421573758125, - "learning_rate": 3.82224942746388e-05, - "loss": 0.0032159242779016494, - "step": 11825 - }, - { - "epoch": 2.0170502983802217, - "grad_norm": 0.05839217081665993, - "learning_rate": 3.8196876914192476e-05, - "loss": 0.0019759060814976692, - "step": 11830 - }, - { - "epoch": 2.0179028132992327, - "grad_norm": 0.04104325920343399, - "learning_rate": 3.8171259228420824e-05, - "loss": 0.0030811641365289686, - "step": 11835 - }, - { - "epoch": 2.0187553282182438, - "grad_norm": 0.05367572233080864, - "learning_rate": 3.814564122928303e-05, - "loss": 0.0024660680443048476, - "step": 11840 - }, - { - "epoch": 2.019607843137255, - "grad_norm": 0.07062884420156479, - "learning_rate": 3.8120022928738444e-05, - "loss": 0.0028638459742069246, - "step": 11845 - }, - { - "epoch": 2.020460358056266, - "grad_norm": 0.1098889485001564, - "learning_rate": 3.809440433874652e-05, - "loss": 0.002245648391544819, - "step": 11850 - }, - { - "epoch": 2.021312872975277, - "grad_norm": 0.11214791238307953, - "learning_rate": 3.806878547126689e-05, - "loss": 0.0025152696296572687, - "step": 11855 - }, - { - "epoch": 2.022165387894288, - "grad_norm": 0.0809246301651001, - "learning_rate": 3.804316633825926e-05, - "loss": 0.0030847292393445967, - "step": 11860 - }, - { - "epoch": 2.023017902813299, - "grad_norm": 0.05590866506099701, - "learning_rate": 3.801754695168353e-05, - "loss": 0.002259066328406334, - "step": 11865 - }, - { - "epoch": 2.02387041773231, - "grad_norm": 0.061719413846731186, - "learning_rate": 3.799192732349967e-05, - "loss": 0.003117647022008896, - "step": 11870 - }, - { - "epoch": 2.024722932651321, - "grad_norm": 0.05439555272459984, - "learning_rate": 3.796630746566777e-05, - "loss": 0.00280950702726841, - "step": 11875 - }, - { - "epoch": 2.0255754475703327, - "grad_norm": 0.07110737264156342, - "learning_rate": 3.794068739014802e-05, - "loss": 0.0022924147546291352, - "step": 11880 - }, - { - "epoch": 2.0264279624893438, - "grad_norm": 0.035215508192777634, - "learning_rate": 3.791506710890075e-05, - "loss": 0.0014670810662209988, - "step": 11885 - }, - { - "epoch": 2.027280477408355, - "grad_norm": 0.04196110740303993, - "learning_rate": 3.7889446633886345e-05, - "loss": 0.002128283493220806, - "step": 11890 - }, - { - "epoch": 2.028132992327366, - "grad_norm": 0.02117479033768177, - "learning_rate": 3.7863825977065295e-05, - "loss": 0.002085634134709835, - "step": 11895 - }, - { - "epoch": 2.028985507246377, - "grad_norm": 0.137400820851326, - "learning_rate": 3.783820515039818e-05, - "loss": 0.003697726130485535, - "step": 11900 - }, - { - "epoch": 2.029838022165388, - "grad_norm": 0.05773406848311424, - "learning_rate": 3.781258416584565e-05, - "loss": 0.0020811671391129495, - "step": 11905 - }, - { - "epoch": 2.030690537084399, - "grad_norm": 0.02017928846180439, - "learning_rate": 3.7786963035368425e-05, - "loss": 0.002070310711860657, - "step": 11910 - }, - { - "epoch": 2.03154305200341, - "grad_norm": 0.023989839479327202, - "learning_rate": 3.7761341770927314e-05, - "loss": 0.0035201530903577805, - "step": 11915 - }, - { - "epoch": 2.032395566922421, - "grad_norm": 0.05773681029677391, - "learning_rate": 3.7735720384483176e-05, - "loss": 0.002326706610620022, - "step": 11920 - }, - { - "epoch": 2.0332480818414322, - "grad_norm": 0.06733391433954239, - "learning_rate": 3.771009888799692e-05, - "loss": 0.0019989268854260444, - "step": 11925 - }, - { - "epoch": 2.0341005967604433, - "grad_norm": 0.09590540081262589, - "learning_rate": 3.76844772934295e-05, - "loss": 0.0024355117231607435, - "step": 11930 - }, - { - "epoch": 2.0349531116794544, - "grad_norm": 0.027501709759235382, - "learning_rate": 3.765885561274196e-05, - "loss": 0.0011640249751508236, - "step": 11935 - }, - { - "epoch": 2.0358056265984654, - "grad_norm": 0.031739719212055206, - "learning_rate": 3.7633233857895326e-05, - "loss": 0.0022482817992568018, - "step": 11940 - }, - { - "epoch": 2.0366581415174765, - "grad_norm": 0.027232196182012558, - "learning_rate": 3.760761204085071e-05, - "loss": 0.0018043683841824532, - "step": 11945 - }, - { - "epoch": 2.0375106564364875, - "grad_norm": 0.08634094148874283, - "learning_rate": 3.75819901735692e-05, - "loss": 0.0024119339883327483, - "step": 11950 - }, - { - "epoch": 2.0383631713554986, - "grad_norm": 0.05877285450696945, - "learning_rate": 3.755636826801194e-05, - "loss": 0.0009346712380647659, - "step": 11955 - }, - { - "epoch": 2.0392156862745097, - "grad_norm": 0.034714680165052414, - "learning_rate": 3.7530746336140115e-05, - "loss": 0.0021316718310117723, - "step": 11960 - }, - { - "epoch": 2.0400682011935207, - "grad_norm": 0.05897806957364082, - "learning_rate": 3.750512438991487e-05, - "loss": 0.0029691245406866075, - "step": 11965 - }, - { - "epoch": 2.040920716112532, - "grad_norm": 0.07302019745111465, - "learning_rate": 3.747950244129739e-05, - "loss": 0.0023437861353158953, - "step": 11970 - }, - { - "epoch": 2.041773231031543, - "grad_norm": 0.07187193632125854, - "learning_rate": 3.745388050224885e-05, - "loss": 0.0016971008852124214, - "step": 11975 - }, - { - "epoch": 2.0426257459505544, - "grad_norm": 0.05619340017437935, - "learning_rate": 3.742825858473044e-05, - "loss": 0.0021343151107430457, - "step": 11980 - }, - { - "epoch": 2.0434782608695654, - "grad_norm": 0.08814098685979843, - "learning_rate": 3.7402636700703295e-05, - "loss": 0.0026463912799954414, - "step": 11985 - }, - { - "epoch": 2.0443307757885765, - "grad_norm": 0.10130181908607483, - "learning_rate": 3.737701486212859e-05, - "loss": 0.0020437544211745263, - "step": 11990 - }, - { - "epoch": 2.0451832907075875, - "grad_norm": 0.06105076149106026, - "learning_rate": 3.7351393080967416e-05, - "loss": 0.002344959042966366, - "step": 11995 - }, - { - "epoch": 2.0460358056265986, - "grad_norm": 0.052822742611169815, - "learning_rate": 3.732577136918091e-05, - "loss": 0.0020566854625940325, - "step": 12000 - }, - { - "epoch": 2.0468883205456097, - "grad_norm": 0.10074899345636368, - "learning_rate": 3.730014973873013e-05, - "loss": 0.0026124339550733567, - "step": 12005 - }, - { - "epoch": 2.0477408354646207, - "grad_norm": 0.025597436353564262, - "learning_rate": 3.7274528201576095e-05, - "loss": 0.001905813068151474, - "step": 12010 - }, - { - "epoch": 2.0485933503836318, - "grad_norm": 0.05437781289219856, - "learning_rate": 3.7248906769679776e-05, - "loss": 0.0025912046432495115, - "step": 12015 - }, - { - "epoch": 2.049445865302643, - "grad_norm": 0.07095912098884583, - "learning_rate": 3.722328545500215e-05, - "loss": 0.002769463881850243, - "step": 12020 - }, - { - "epoch": 2.050298380221654, - "grad_norm": 0.14383850991725922, - "learning_rate": 3.719766426950408e-05, - "loss": 0.0038499854505062102, - "step": 12025 - }, - { - "epoch": 2.051150895140665, - "grad_norm": 0.06089269369840622, - "learning_rate": 3.7172043225146386e-05, - "loss": 0.002288899011909962, - "step": 12030 - }, - { - "epoch": 2.052003410059676, - "grad_norm": 0.05808301270008087, - "learning_rate": 3.7146422333889824e-05, - "loss": 0.0028305932879447936, - "step": 12035 - }, - { - "epoch": 2.052855924978687, - "grad_norm": 0.13334520161151886, - "learning_rate": 3.712080160769506e-05, - "loss": 0.00331525094807148, - "step": 12040 - }, - { - "epoch": 2.053708439897698, - "grad_norm": 0.03266080096364021, - "learning_rate": 3.709518105852273e-05, - "loss": 0.0020869884639978407, - "step": 12045 - }, - { - "epoch": 2.054560954816709, - "grad_norm": 0.07307332009077072, - "learning_rate": 3.706956069833336e-05, - "loss": 0.0023028414696455004, - "step": 12050 - }, - { - "epoch": 2.0554134697357203, - "grad_norm": 0.06093568354845047, - "learning_rate": 3.7043940539087366e-05, - "loss": 0.0022027945145964623, - "step": 12055 - }, - { - "epoch": 2.0562659846547313, - "grad_norm": 0.04207700863480568, - "learning_rate": 3.70183205927451e-05, - "loss": 0.0016738155856728554, - "step": 12060 - }, - { - "epoch": 2.0571184995737424, - "grad_norm": 0.046319641172885895, - "learning_rate": 3.699270087126679e-05, - "loss": 0.002480871044099331, - "step": 12065 - }, - { - "epoch": 2.0579710144927534, - "grad_norm": 0.042888909578323364, - "learning_rate": 3.69670813866126e-05, - "loss": 0.0020912257954478265, - "step": 12070 - }, - { - "epoch": 2.0588235294117645, - "grad_norm": 0.05530136078596115, - "learning_rate": 3.694146215074256e-05, - "loss": 0.0021427463740110396, - "step": 12075 - }, - { - "epoch": 2.059676044330776, - "grad_norm": 0.04992877319455147, - "learning_rate": 3.6915843175616555e-05, - "loss": 0.001970967650413513, - "step": 12080 - }, - { - "epoch": 2.060528559249787, - "grad_norm": 0.07341081649065018, - "learning_rate": 3.6890224473194373e-05, - "loss": 0.003120069019496441, - "step": 12085 - }, - { - "epoch": 2.061381074168798, - "grad_norm": 0.05361134931445122, - "learning_rate": 3.686460605543571e-05, - "loss": 0.0030833475291728975, - "step": 12090 - }, - { - "epoch": 2.062233589087809, - "grad_norm": 0.0904894769191742, - "learning_rate": 3.683898793430008e-05, - "loss": 0.0020733945071697234, - "step": 12095 - }, - { - "epoch": 2.0630861040068202, - "grad_norm": 0.03312591835856438, - "learning_rate": 3.681337012174686e-05, - "loss": 0.002308916300535202, - "step": 12100 - }, - { - "epoch": 2.0639386189258313, - "grad_norm": 0.05372268706560135, - "learning_rate": 3.6787752629735314e-05, - "loss": 0.0024915780872106553, - "step": 12105 - }, - { - "epoch": 2.0647911338448424, - "grad_norm": 0.08257800340652466, - "learning_rate": 3.676213547022452e-05, - "loss": 0.001413002610206604, - "step": 12110 - }, - { - "epoch": 2.0656436487638534, - "grad_norm": 0.037859030067920685, - "learning_rate": 3.673651865517344e-05, - "loss": 0.002315748296678066, - "step": 12115 - }, - { - "epoch": 2.0664961636828645, - "grad_norm": 0.04125140607357025, - "learning_rate": 3.6710902196540856e-05, - "loss": 0.0022393757477402686, - "step": 12120 - }, - { - "epoch": 2.0673486786018755, - "grad_norm": 0.09325335919857025, - "learning_rate": 3.668528610628538e-05, - "loss": 0.003246062248945236, - "step": 12125 - }, - { - "epoch": 2.0682011935208866, - "grad_norm": 0.09278098493814468, - "learning_rate": 3.665967039636543e-05, - "loss": 0.0027722738683223723, - "step": 12130 - }, - { - "epoch": 2.0690537084398977, - "grad_norm": 0.07906672358512878, - "learning_rate": 3.663405507873931e-05, - "loss": 0.0035691894590854645, - "step": 12135 - }, - { - "epoch": 2.0699062233589087, - "grad_norm": 0.04077119752764702, - "learning_rate": 3.660844016536507e-05, - "loss": 0.0018417894840240478, - "step": 12140 - }, - { - "epoch": 2.07075873827792, - "grad_norm": 0.08916836231946945, - "learning_rate": 3.6582825668200636e-05, - "loss": 0.0019499020650982856, - "step": 12145 - }, - { - "epoch": 2.071611253196931, - "grad_norm": 0.017643144354224205, - "learning_rate": 3.655721159920368e-05, - "loss": 0.0018016694113612175, - "step": 12150 - }, - { - "epoch": 2.072463768115942, - "grad_norm": 0.046675924211740494, - "learning_rate": 3.6531597970331704e-05, - "loss": 0.0023558875545859337, - "step": 12155 - }, - { - "epoch": 2.073316283034953, - "grad_norm": 0.06159510463476181, - "learning_rate": 3.650598479354202e-05, - "loss": 0.003485919535160065, - "step": 12160 - }, - { - "epoch": 2.074168797953964, - "grad_norm": 0.10620608925819397, - "learning_rate": 3.64803720807917e-05, - "loss": 0.0021355047821998594, - "step": 12165 - }, - { - "epoch": 2.075021312872975, - "grad_norm": 0.03321434184908867, - "learning_rate": 3.645475984403761e-05, - "loss": 0.0027330033481121063, - "step": 12170 - }, - { - "epoch": 2.075873827791986, - "grad_norm": 0.05574263632297516, - "learning_rate": 3.642914809523639e-05, - "loss": 0.0017123395577073098, - "step": 12175 - }, - { - "epoch": 2.0767263427109977, - "grad_norm": 0.045334603637456894, - "learning_rate": 3.640353684634446e-05, - "loss": 0.001525832526385784, - "step": 12180 - }, - { - "epoch": 2.0775788576300087, - "grad_norm": 0.05117806792259216, - "learning_rate": 3.6377926109318005e-05, - "loss": 0.0022421007975935935, - "step": 12185 - }, - { - "epoch": 2.0784313725490198, - "grad_norm": 0.02836792916059494, - "learning_rate": 3.635231589611297e-05, - "loss": 0.003241851553320885, - "step": 12190 - }, - { - "epoch": 2.079283887468031, - "grad_norm": 0.13245631754398346, - "learning_rate": 3.632670621868506e-05, - "loss": 0.0028171174228191374, - "step": 12195 - }, - { - "epoch": 2.080136402387042, - "grad_norm": 0.04175787791609764, - "learning_rate": 3.63010970889897e-05, - "loss": 0.0026013338938355446, - "step": 12200 - }, - { - "epoch": 2.080988917306053, - "grad_norm": 0.022211721166968346, - "learning_rate": 3.6275488518982104e-05, - "loss": 0.0029422508552670477, - "step": 12205 - }, - { - "epoch": 2.081841432225064, - "grad_norm": 0.0889682024717331, - "learning_rate": 3.6249880520617205e-05, - "loss": 0.002521348185837269, - "step": 12210 - }, - { - "epoch": 2.082693947144075, - "grad_norm": 0.022678803652524948, - "learning_rate": 3.622427310584967e-05, - "loss": 0.0010427280329167842, - "step": 12215 - }, - { - "epoch": 2.083546462063086, - "grad_norm": 0.07812847197055817, - "learning_rate": 3.6198666286633886e-05, - "loss": 0.002325686253607273, - "step": 12220 - }, - { - "epoch": 2.084398976982097, - "grad_norm": 0.06912051141262054, - "learning_rate": 3.6173060074923945e-05, - "loss": 0.0022675972431898117, - "step": 12225 - }, - { - "epoch": 2.0852514919011083, - "grad_norm": 0.02951810136437416, - "learning_rate": 3.6147454482673715e-05, - "loss": 0.00159697774797678, - "step": 12230 - }, - { - "epoch": 2.0861040068201193, - "grad_norm": 0.11821833997964859, - "learning_rate": 3.6121849521836735e-05, - "loss": 0.002206057496368885, - "step": 12235 - }, - { - "epoch": 2.0869565217391304, - "grad_norm": 0.05461777001619339, - "learning_rate": 3.609624520436624e-05, - "loss": 0.0012241648510098457, - "step": 12240 - }, - { - "epoch": 2.0878090366581414, - "grad_norm": 0.05038715526461601, - "learning_rate": 3.607064154221516e-05, - "loss": 0.002225806750357151, - "step": 12245 - }, - { - "epoch": 2.0886615515771525, - "grad_norm": 0.03050738200545311, - "learning_rate": 3.604503854733617e-05, - "loss": 0.0020998189225792884, - "step": 12250 - }, - { - "epoch": 2.0895140664961636, - "grad_norm": 0.07000287622213364, - "learning_rate": 3.6019436231681585e-05, - "loss": 0.0022106122225522993, - "step": 12255 - }, - { - "epoch": 2.0903665814151746, - "grad_norm": 0.0332137756049633, - "learning_rate": 3.5993834607203416e-05, - "loss": 0.0020401908084750177, - "step": 12260 - }, - { - "epoch": 2.0912190963341857, - "grad_norm": 0.0996270552277565, - "learning_rate": 3.596823368585336e-05, - "loss": 0.002487153559923172, - "step": 12265 - }, - { - "epoch": 2.0920716112531967, - "grad_norm": 0.1305847465991974, - "learning_rate": 3.594263347958276e-05, - "loss": 0.0028627485036849974, - "step": 12270 - }, - { - "epoch": 2.092924126172208, - "grad_norm": 0.0762234702706337, - "learning_rate": 3.5917034000342664e-05, - "loss": 0.0020220713689923287, - "step": 12275 - }, - { - "epoch": 2.0937766410912193, - "grad_norm": 0.015480007976293564, - "learning_rate": 3.589143526008376e-05, - "loss": 0.00215108972042799, - "step": 12280 - }, - { - "epoch": 2.0946291560102304, - "grad_norm": 0.0862250104546547, - "learning_rate": 3.5865837270756385e-05, - "loss": 0.0020705640316009523, - "step": 12285 - }, - { - "epoch": 2.0954816709292414, - "grad_norm": 0.03390849754214287, - "learning_rate": 3.584024004431052e-05, - "loss": 0.002040168456733227, - "step": 12290 - }, - { - "epoch": 2.0963341858482525, - "grad_norm": 0.07754851132631302, - "learning_rate": 3.581464359269582e-05, - "loss": 0.0029265256598591805, - "step": 12295 - }, - { - "epoch": 2.0971867007672635, - "grad_norm": 0.0625162348151207, - "learning_rate": 3.578904792786155e-05, - "loss": 0.0020755715668201447, - "step": 12300 - }, - { - "epoch": 2.0980392156862746, - "grad_norm": 0.10999561101198196, - "learning_rate": 3.576345306175663e-05, - "loss": 0.0027062267065048216, - "step": 12305 - }, - { - "epoch": 2.0988917306052857, - "grad_norm": 0.03573682904243469, - "learning_rate": 3.573785900632959e-05, - "loss": 0.00178314708173275, - "step": 12310 - }, - { - "epoch": 2.0997442455242967, - "grad_norm": 0.07235981523990631, - "learning_rate": 3.5712265773528564e-05, - "loss": 0.00233871191740036, - "step": 12315 - }, - { - "epoch": 2.100596760443308, - "grad_norm": 0.054438747465610504, - "learning_rate": 3.568667337530135e-05, - "loss": 0.0031350374221801756, - "step": 12320 - }, - { - "epoch": 2.101449275362319, - "grad_norm": 0.07696446031332016, - "learning_rate": 3.566108182359533e-05, - "loss": 0.0019116310402750968, - "step": 12325 - }, - { - "epoch": 2.10230179028133, - "grad_norm": 0.0676850974559784, - "learning_rate": 3.563549113035749e-05, - "loss": 0.0011704936623573303, - "step": 12330 - }, - { - "epoch": 2.103154305200341, - "grad_norm": 0.07241418212652206, - "learning_rate": 3.5609901307534416e-05, - "loss": 0.002332131937146187, - "step": 12335 - }, - { - "epoch": 2.104006820119352, - "grad_norm": 0.0832296758890152, - "learning_rate": 3.558431236707227e-05, - "loss": 0.002539648115634918, - "step": 12340 - }, - { - "epoch": 2.104859335038363, - "grad_norm": 0.046911224722862244, - "learning_rate": 3.555872432091684e-05, - "loss": 0.0015112090855836867, - "step": 12345 - }, - { - "epoch": 2.105711849957374, - "grad_norm": 0.09462827444076538, - "learning_rate": 3.553313718101348e-05, - "loss": 0.0024237846955657005, - "step": 12350 - }, - { - "epoch": 2.106564364876385, - "grad_norm": 0.06934045255184174, - "learning_rate": 3.550755095930711e-05, - "loss": 0.0014186175540089607, - "step": 12355 - }, - { - "epoch": 2.1074168797953963, - "grad_norm": 0.05409622564911842, - "learning_rate": 3.5481965667742216e-05, - "loss": 0.0016573246568441391, - "step": 12360 - }, - { - "epoch": 2.1082693947144073, - "grad_norm": 0.05712766572833061, - "learning_rate": 3.545638131826289e-05, - "loss": 0.0029039720073342325, - "step": 12365 - }, - { - "epoch": 2.1091219096334184, - "grad_norm": 0.05685155466198921, - "learning_rate": 3.543079792281274e-05, - "loss": 0.0016390934586524963, - "step": 12370 - }, - { - "epoch": 2.10997442455243, - "grad_norm": 0.06140974909067154, - "learning_rate": 3.5405215493334966e-05, - "loss": 0.0038812048733234406, - "step": 12375 - }, - { - "epoch": 2.110826939471441, - "grad_norm": 0.0662747323513031, - "learning_rate": 3.537963404177227e-05, - "loss": 0.0029465768486261366, - "step": 12380 - }, - { - "epoch": 2.111679454390452, - "grad_norm": 0.05666056647896767, - "learning_rate": 3.535405358006694e-05, - "loss": 0.0028562054038047792, - "step": 12385 - }, - { - "epoch": 2.112531969309463, - "grad_norm": 0.02187039703130722, - "learning_rate": 3.532847412016077e-05, - "loss": 0.0017194624990224839, - "step": 12390 - }, - { - "epoch": 2.113384484228474, - "grad_norm": 0.040781840682029724, - "learning_rate": 3.530289567399513e-05, - "loss": 0.0026536308228969573, - "step": 12395 - }, - { - "epoch": 2.114236999147485, - "grad_norm": 0.05844609811902046, - "learning_rate": 3.527731825351088e-05, - "loss": 0.0018391696736216544, - "step": 12400 - }, - { - "epoch": 2.1150895140664963, - "grad_norm": 0.08661946654319763, - "learning_rate": 3.52517418706484e-05, - "loss": 0.0028108954429626465, - "step": 12405 - }, - { - "epoch": 2.1159420289855073, - "grad_norm": 0.05540858209133148, - "learning_rate": 3.52261665373476e-05, - "loss": 0.001869852840900421, - "step": 12410 - }, - { - "epoch": 2.1167945439045184, - "grad_norm": 0.05183592066168785, - "learning_rate": 3.520059226554789e-05, - "loss": 0.0038085319101810455, - "step": 12415 - }, - { - "epoch": 2.1176470588235294, - "grad_norm": 0.09019337594509125, - "learning_rate": 3.517501906718822e-05, - "loss": 0.0025485800579190254, - "step": 12420 - }, - { - "epoch": 2.1184995737425405, - "grad_norm": 0.05994381010532379, - "learning_rate": 3.514944695420698e-05, - "loss": 0.0023555709049105644, - "step": 12425 - }, - { - "epoch": 2.1193520886615516, - "grad_norm": 0.07013200968503952, - "learning_rate": 3.512387593854208e-05, - "loss": 0.0023415835574269296, - "step": 12430 - }, - { - "epoch": 2.1202046035805626, - "grad_norm": 0.0558604821562767, - "learning_rate": 3.509830603213094e-05, - "loss": 0.002999695762991905, - "step": 12435 - }, - { - "epoch": 2.1210571184995737, - "grad_norm": 0.054457131773233414, - "learning_rate": 3.507273724691045e-05, - "loss": 0.0022147590294480323, - "step": 12440 - }, - { - "epoch": 2.1219096334185847, - "grad_norm": 0.052365075796842575, - "learning_rate": 3.5047169594816955e-05, - "loss": 0.0023635342717170716, - "step": 12445 - }, - { - "epoch": 2.122762148337596, - "grad_norm": 0.047059565782547, - "learning_rate": 3.502160308778627e-05, - "loss": 0.0015694497153162957, - "step": 12450 - }, - { - "epoch": 2.123614663256607, - "grad_norm": 0.03100336343050003, - "learning_rate": 3.499603773775371e-05, - "loss": 0.0020049646496772765, - "step": 12455 - }, - { - "epoch": 2.124467178175618, - "grad_norm": 0.07436710596084595, - "learning_rate": 3.4970473556654027e-05, - "loss": 0.004277446493506432, - "step": 12460 - }, - { - "epoch": 2.125319693094629, - "grad_norm": 0.044698864221572876, - "learning_rate": 3.4944910556421444e-05, - "loss": 0.0032587334513664245, - "step": 12465 - }, - { - "epoch": 2.12617220801364, - "grad_norm": 0.04725298285484314, - "learning_rate": 3.491934874898961e-05, - "loss": 0.0018061451613903047, - "step": 12470 - }, - { - "epoch": 2.127024722932651, - "grad_norm": 0.04054245352745056, - "learning_rate": 3.4893788146291604e-05, - "loss": 0.0017766639590263366, - "step": 12475 - }, - { - "epoch": 2.1278772378516626, - "grad_norm": 0.06061461195349693, - "learning_rate": 3.486822876025999e-05, - "loss": 0.0025131702423095703, - "step": 12480 - }, - { - "epoch": 2.1287297527706737, - "grad_norm": 0.058438993990421295, - "learning_rate": 3.4842670602826744e-05, - "loss": 0.002218405343592167, - "step": 12485 - }, - { - "epoch": 2.1295822676896847, - "grad_norm": 0.057440634816884995, - "learning_rate": 3.481711368592327e-05, - "loss": 0.0015549706295132637, - "step": 12490 - }, - { - "epoch": 2.130434782608696, - "grad_norm": 0.06638845056295395, - "learning_rate": 3.4791558021480355e-05, - "loss": 0.002662469446659088, - "step": 12495 - }, - { - "epoch": 2.131287297527707, - "grad_norm": 0.06725790351629257, - "learning_rate": 3.476600362142824e-05, - "loss": 0.0024463947862386703, - "step": 12500 - }, - { - "epoch": 2.132139812446718, - "grad_norm": 0.07708985358476639, - "learning_rate": 3.474045049769659e-05, - "loss": 0.0034916583448648454, - "step": 12505 - }, - { - "epoch": 2.132992327365729, - "grad_norm": 0.06412148475646973, - "learning_rate": 3.4714898662214454e-05, - "loss": 0.002831364795565605, - "step": 12510 - }, - { - "epoch": 2.13384484228474, - "grad_norm": 0.04649505391716957, - "learning_rate": 3.468934812691027e-05, - "loss": 0.002048753574490547, - "step": 12515 - }, - { - "epoch": 2.134697357203751, - "grad_norm": 0.04807932674884796, - "learning_rate": 3.4663798903711865e-05, - "loss": 0.0018209950998425485, - "step": 12520 - }, - { - "epoch": 2.135549872122762, - "grad_norm": 0.043283116072416306, - "learning_rate": 3.4638251004546476e-05, - "loss": 0.001797056198120117, - "step": 12525 - }, - { - "epoch": 2.136402387041773, - "grad_norm": 0.015419692732393742, - "learning_rate": 3.4612704441340716e-05, - "loss": 0.002100854739546776, - "step": 12530 - }, - { - "epoch": 2.1372549019607843, - "grad_norm": 0.05244193226099014, - "learning_rate": 3.458715922602057e-05, - "loss": 0.002430478297173977, - "step": 12535 - }, - { - "epoch": 2.1381074168797953, - "grad_norm": 0.08995307981967926, - "learning_rate": 3.4561615370511394e-05, - "loss": 0.0023157089948654176, - "step": 12540 - }, - { - "epoch": 2.1389599317988064, - "grad_norm": 0.06513174623250961, - "learning_rate": 3.4536072886737894e-05, - "loss": 0.002109107933938503, - "step": 12545 - }, - { - "epoch": 2.1398124467178175, - "grad_norm": 0.12199243903160095, - "learning_rate": 3.4510531786624176e-05, - "loss": 0.0016247857362031936, - "step": 12550 - }, - { - "epoch": 2.1406649616368285, - "grad_norm": 0.06062543764710426, - "learning_rate": 3.4484992082093665e-05, - "loss": 0.0033494606614112854, - "step": 12555 - }, - { - "epoch": 2.1415174765558396, - "grad_norm": 0.08636222034692764, - "learning_rate": 3.445945378506915e-05, - "loss": 0.0037529505789279938, - "step": 12560 - }, - { - "epoch": 2.1423699914748506, - "grad_norm": 0.027961688116192818, - "learning_rate": 3.443391690747274e-05, - "loss": 0.0016466494649648666, - "step": 12565 - }, - { - "epoch": 2.1432225063938617, - "grad_norm": 0.033621031790971756, - "learning_rate": 3.440838146122591e-05, - "loss": 0.002477872557938099, - "step": 12570 - }, - { - "epoch": 2.144075021312873, - "grad_norm": 0.08104594051837921, - "learning_rate": 3.4382847458249453e-05, - "loss": 0.0031348835676908494, - "step": 12575 - }, - { - "epoch": 2.1449275362318843, - "grad_norm": 0.07412353157997131, - "learning_rate": 3.4357314910463506e-05, - "loss": 0.002509618178009987, - "step": 12580 - }, - { - "epoch": 2.1457800511508953, - "grad_norm": 0.04261288791894913, - "learning_rate": 3.43317838297875e-05, - "loss": 0.0021477997303009032, - "step": 12585 - }, - { - "epoch": 2.1466325660699064, - "grad_norm": 0.15133292973041534, - "learning_rate": 3.430625422814018e-05, - "loss": 0.0033604972064495086, - "step": 12590 - }, - { - "epoch": 2.1474850809889174, - "grad_norm": 0.08455967903137207, - "learning_rate": 3.428072611743962e-05, - "loss": 0.0035134248435497286, - "step": 12595 - }, - { - "epoch": 2.1483375959079285, - "grad_norm": 0.10830427706241608, - "learning_rate": 3.425519950960321e-05, - "loss": 0.003783620521426201, - "step": 12600 - }, - { - "epoch": 2.1491901108269396, - "grad_norm": 0.05701782926917076, - "learning_rate": 3.422967441654761e-05, - "loss": 0.0017763100564479827, - "step": 12605 - }, - { - "epoch": 2.1500426257459506, - "grad_norm": 0.058323513716459274, - "learning_rate": 3.420415085018878e-05, - "loss": 0.003765106201171875, - "step": 12610 - }, - { - "epoch": 2.1508951406649617, - "grad_norm": 0.08780697733163834, - "learning_rate": 3.417862882244195e-05, - "loss": 0.0021065909415483473, - "step": 12615 - }, - { - "epoch": 2.1517476555839727, - "grad_norm": 0.08741293847560883, - "learning_rate": 3.415310834522168e-05, - "loss": 0.0022673629224300384, - "step": 12620 - }, - { - "epoch": 2.152600170502984, - "grad_norm": 0.08681067824363708, - "learning_rate": 3.412758943044177e-05, - "loss": 0.0029561318457126617, - "step": 12625 - }, - { - "epoch": 2.153452685421995, - "grad_norm": 0.05104825645685196, - "learning_rate": 3.4102072090015306e-05, - "loss": 0.0028430519625544546, - "step": 12630 - }, - { - "epoch": 2.154305200341006, - "grad_norm": 0.05437494069337845, - "learning_rate": 3.4076556335854606e-05, - "loss": 0.0026259947568178176, - "step": 12635 - }, - { - "epoch": 2.155157715260017, - "grad_norm": 0.016572406515479088, - "learning_rate": 3.4051042179871286e-05, - "loss": 0.00198390893638134, - "step": 12640 - }, - { - "epoch": 2.156010230179028, - "grad_norm": 0.04134957864880562, - "learning_rate": 3.4025529633976216e-05, - "loss": 0.0017651205882430077, - "step": 12645 - }, - { - "epoch": 2.156862745098039, - "grad_norm": 0.04091856628656387, - "learning_rate": 3.400001871007949e-05, - "loss": 0.002631684020161629, - "step": 12650 - }, - { - "epoch": 2.15771526001705, - "grad_norm": 0.08851557224988937, - "learning_rate": 3.397450942009046e-05, - "loss": 0.004056418687105179, - "step": 12655 - }, - { - "epoch": 2.1585677749360612, - "grad_norm": 0.09870146960020065, - "learning_rate": 3.3949001775917686e-05, - "loss": 0.0017272619530558585, - "step": 12660 - }, - { - "epoch": 2.1594202898550723, - "grad_norm": 0.059828147292137146, - "learning_rate": 3.3923495789469016e-05, - "loss": 0.0018833462148904801, - "step": 12665 - }, - { - "epoch": 2.1602728047740833, - "grad_norm": 0.04078202694654465, - "learning_rate": 3.3897991472651495e-05, - "loss": 0.0015183920040726662, - "step": 12670 - }, - { - "epoch": 2.1611253196930944, - "grad_norm": 0.09713901579380035, - "learning_rate": 3.387248883737137e-05, - "loss": 0.002313835546374321, - "step": 12675 - }, - { - "epoch": 2.161977834612106, - "grad_norm": 0.13590694963932037, - "learning_rate": 3.3846987895534116e-05, - "loss": 0.002948279120028019, - "step": 12680 - }, - { - "epoch": 2.162830349531117, - "grad_norm": 0.05830051749944687, - "learning_rate": 3.3821488659044435e-05, - "loss": 0.002866750955581665, - "step": 12685 - }, - { - "epoch": 2.163682864450128, - "grad_norm": 0.08523424714803696, - "learning_rate": 3.3795991139806205e-05, - "loss": 0.001992848888039589, - "step": 12690 - }, - { - "epoch": 2.164535379369139, - "grad_norm": 0.07573958486318588, - "learning_rate": 3.3770495349722534e-05, - "loss": 0.003222312778234482, - "step": 12695 - }, - { - "epoch": 2.16538789428815, - "grad_norm": 0.1144784539937973, - "learning_rate": 3.374500130069569e-05, - "loss": 0.0023121457546949387, - "step": 12700 - }, - { - "epoch": 2.166240409207161, - "grad_norm": 0.037679724395275116, - "learning_rate": 3.371950900462716e-05, - "loss": 0.0022720521315932273, - "step": 12705 - }, - { - "epoch": 2.1670929241261723, - "grad_norm": 0.09523876011371613, - "learning_rate": 3.369401847341756e-05, - "loss": 0.0026744550094008447, - "step": 12710 - }, - { - "epoch": 2.1679454390451833, - "grad_norm": 0.08904188126325607, - "learning_rate": 3.3668529718966753e-05, - "loss": 0.0025367535650730132, - "step": 12715 - }, - { - "epoch": 2.1687979539641944, - "grad_norm": 0.065862737596035, - "learning_rate": 3.364304275317373e-05, - "loss": 0.0017513807862997055, - "step": 12720 - }, - { - "epoch": 2.1696504688832055, - "grad_norm": 0.03308388963341713, - "learning_rate": 3.361755758793665e-05, - "loss": 0.001534645166248083, - "step": 12725 - }, - { - "epoch": 2.1705029838022165, - "grad_norm": 0.11249089986085892, - "learning_rate": 3.359207423515283e-05, - "loss": 0.0012927086092531681, - "step": 12730 - }, - { - "epoch": 2.1713554987212276, - "grad_norm": 0.09918250143527985, - "learning_rate": 3.356659270671875e-05, - "loss": 0.0035567093640565873, - "step": 12735 - }, - { - "epoch": 2.1722080136402386, - "grad_norm": 0.008674295619130135, - "learning_rate": 3.354111301453005e-05, - "loss": 0.0013304737396538258, - "step": 12740 - }, - { - "epoch": 2.1730605285592497, - "grad_norm": 0.09038940817117691, - "learning_rate": 3.351563517048149e-05, - "loss": 0.0022449616342782976, - "step": 12745 - }, - { - "epoch": 2.1739130434782608, - "grad_norm": 0.11863812804222107, - "learning_rate": 3.349015918646695e-05, - "loss": 0.0029456689953804016, - "step": 12750 - }, - { - "epoch": 2.174765558397272, - "grad_norm": 0.055224135518074036, - "learning_rate": 3.34646850743795e-05, - "loss": 0.0021983785554766655, - "step": 12755 - }, - { - "epoch": 2.175618073316283, - "grad_norm": 0.05251838266849518, - "learning_rate": 3.34392128461113e-05, - "loss": 0.0018048876896500587, - "step": 12760 - }, - { - "epoch": 2.176470588235294, - "grad_norm": 0.07146445661783218, - "learning_rate": 3.341374251355361e-05, - "loss": 0.0030932359397411345, - "step": 12765 - }, - { - "epoch": 2.177323103154305, - "grad_norm": 0.03640792518854141, - "learning_rate": 3.338827408859686e-05, - "loss": 0.0016893571242690085, - "step": 12770 - }, - { - "epoch": 2.1781756180733165, - "grad_norm": 0.0680721327662468, - "learning_rate": 3.336280758313052e-05, - "loss": 0.0037735387682914733, - "step": 12775 - }, - { - "epoch": 2.1790281329923276, - "grad_norm": 0.047598470002412796, - "learning_rate": 3.333734300904322e-05, - "loss": 0.002026566304266453, - "step": 12780 - }, - { - "epoch": 2.1798806479113386, - "grad_norm": 0.08361580222845078, - "learning_rate": 3.3311880378222695e-05, - "loss": 0.002865005284547806, - "step": 12785 - }, - { - "epoch": 2.1807331628303497, - "grad_norm": 0.04869835823774338, - "learning_rate": 3.328641970255572e-05, - "loss": 0.0018146531656384468, - "step": 12790 - }, - { - "epoch": 2.1815856777493607, - "grad_norm": 0.06970708072185516, - "learning_rate": 3.326096099392819e-05, - "loss": 0.0022316936403512953, - "step": 12795 - }, - { - "epoch": 2.182438192668372, - "grad_norm": 0.07073621451854706, - "learning_rate": 3.323550426422508e-05, - "loss": 0.0021546846255660057, - "step": 12800 - }, - { - "epoch": 2.183290707587383, - "grad_norm": 0.0552116334438324, - "learning_rate": 3.3210049525330426e-05, - "loss": 0.0022750692442059517, - "step": 12805 - }, - { - "epoch": 2.184143222506394, - "grad_norm": 0.08244488388299942, - "learning_rate": 3.318459678912737e-05, - "loss": 0.0027180306613445284, - "step": 12810 - }, - { - "epoch": 2.184995737425405, - "grad_norm": 0.07275483757257462, - "learning_rate": 3.315914606749808e-05, - "loss": 0.002150987088680267, - "step": 12815 - }, - { - "epoch": 2.185848252344416, - "grad_norm": 0.06152818351984024, - "learning_rate": 3.3133697372323804e-05, - "loss": 0.002709987387061119, - "step": 12820 - }, - { - "epoch": 2.186700767263427, - "grad_norm": 0.07358045876026154, - "learning_rate": 3.310825071548483e-05, - "loss": 0.0029207577928900717, - "step": 12825 - }, - { - "epoch": 2.187553282182438, - "grad_norm": 0.07633842527866364, - "learning_rate": 3.3082806108860516e-05, - "loss": 0.0028854381293058396, - "step": 12830 - }, - { - "epoch": 2.1884057971014492, - "grad_norm": 0.0533052496612072, - "learning_rate": 3.305736356432926e-05, - "loss": 0.0023338528349995612, - "step": 12835 - }, - { - "epoch": 2.1892583120204603, - "grad_norm": 0.09400077164173126, - "learning_rate": 3.303192309376846e-05, - "loss": 0.00362023301422596, - "step": 12840 - }, - { - "epoch": 2.1901108269394713, - "grad_norm": 0.09847433120012283, - "learning_rate": 3.300648470905459e-05, - "loss": 0.003238249197602272, - "step": 12845 - }, - { - "epoch": 2.1909633418584824, - "grad_norm": 0.09695439040660858, - "learning_rate": 3.298104842206314e-05, - "loss": 0.002254056558012962, - "step": 12850 - }, - { - "epoch": 2.1918158567774935, - "grad_norm": 0.07510244101285934, - "learning_rate": 3.295561424466861e-05, - "loss": 0.002555438503623009, - "step": 12855 - }, - { - "epoch": 2.1926683716965045, - "grad_norm": 0.07085850089788437, - "learning_rate": 3.2930182188744524e-05, - "loss": 0.0029295925050973892, - "step": 12860 - }, - { - "epoch": 2.1935208866155156, - "grad_norm": 0.12662498652935028, - "learning_rate": 3.290475226616339e-05, - "loss": 0.0019443847239017486, - "step": 12865 - }, - { - "epoch": 2.1943734015345266, - "grad_norm": 0.08738470077514648, - "learning_rate": 3.2879324488796755e-05, - "loss": 0.002229847013950348, - "step": 12870 - }, - { - "epoch": 2.1952259164535377, - "grad_norm": 0.04957102984189987, - "learning_rate": 3.285389886851517e-05, - "loss": 0.0017434298992156983, - "step": 12875 - }, - { - "epoch": 2.196078431372549, - "grad_norm": 0.057968392968177795, - "learning_rate": 3.282847541718814e-05, - "loss": 0.003453432023525238, - "step": 12880 - }, - { - "epoch": 2.1969309462915603, - "grad_norm": 0.1128922700881958, - "learning_rate": 3.280305414668419e-05, - "loss": 0.0025962982326745987, - "step": 12885 - }, - { - "epoch": 2.1977834612105713, - "grad_norm": 0.0661446675658226, - "learning_rate": 3.2777635068870784e-05, - "loss": 0.002279244549572468, - "step": 12890 - }, - { - "epoch": 2.1986359761295824, - "grad_norm": 0.09260411560535431, - "learning_rate": 3.275221819561443e-05, - "loss": 0.002637815475463867, - "step": 12895 - }, - { - "epoch": 2.1994884910485935, - "grad_norm": 0.08168021589517593, - "learning_rate": 3.272680353878056e-05, - "loss": 0.0029386602342128753, - "step": 12900 - }, - { - "epoch": 2.2003410059676045, - "grad_norm": 0.06187237799167633, - "learning_rate": 3.270139111023358e-05, - "loss": 0.0018257955089211464, - "step": 12905 - }, - { - "epoch": 2.2011935208866156, - "grad_norm": 0.09450117498636246, - "learning_rate": 3.267598092183684e-05, - "loss": 0.0023655250668525698, - "step": 12910 - }, - { - "epoch": 2.2020460358056266, - "grad_norm": 0.060870688408613205, - "learning_rate": 3.2650572985452685e-05, - "loss": 0.001705418713390827, - "step": 12915 - }, - { - "epoch": 2.2028985507246377, - "grad_norm": 0.06867264956235886, - "learning_rate": 3.262516731294237e-05, - "loss": 0.00248488187789917, - "step": 12920 - }, - { - "epoch": 2.2037510656436488, - "grad_norm": 0.07654258608818054, - "learning_rate": 3.259976391616612e-05, - "loss": 0.002200855314731598, - "step": 12925 - }, - { - "epoch": 2.20460358056266, - "grad_norm": 0.06781245768070221, - "learning_rate": 3.257436280698308e-05, - "loss": 0.002006441354751587, - "step": 12930 - }, - { - "epoch": 2.205456095481671, - "grad_norm": 0.045858342200517654, - "learning_rate": 3.254896399725132e-05, - "loss": 0.0020667938515543938, - "step": 12935 - }, - { - "epoch": 2.206308610400682, - "grad_norm": 0.06805605441331863, - "learning_rate": 3.2523567498827865e-05, - "loss": 0.002215307205915451, - "step": 12940 - }, - { - "epoch": 2.207161125319693, - "grad_norm": 0.07554472237825394, - "learning_rate": 3.2498173323568645e-05, - "loss": 0.0021156981587409974, - "step": 12945 - }, - { - "epoch": 2.208013640238704, - "grad_norm": 0.049611154943704605, - "learning_rate": 3.2472781483328506e-05, - "loss": 0.0037985272705554963, - "step": 12950 - }, - { - "epoch": 2.208866155157715, - "grad_norm": 0.04867832362651825, - "learning_rate": 3.24473919899612e-05, - "loss": 0.0011579260230064393, - "step": 12955 - }, - { - "epoch": 2.209718670076726, - "grad_norm": 0.04439609497785568, - "learning_rate": 3.2422004855319376e-05, - "loss": 0.0033864513039588927, - "step": 12960 - }, - { - "epoch": 2.2105711849957372, - "grad_norm": 0.054114069789648056, - "learning_rate": 3.23966200912546e-05, - "loss": 0.0017186013981699943, - "step": 12965 - }, - { - "epoch": 2.2114236999147483, - "grad_norm": 0.03286417946219444, - "learning_rate": 3.237123770961735e-05, - "loss": 0.0013779066503047943, - "step": 12970 - }, - { - "epoch": 2.21227621483376, - "grad_norm": 0.05740232393145561, - "learning_rate": 3.234585772225694e-05, - "loss": 0.00376686155796051, - "step": 12975 - }, - { - "epoch": 2.213128729752771, - "grad_norm": 0.11821190267801285, - "learning_rate": 3.232048014102158e-05, - "loss": 0.003515421971678734, - "step": 12980 - }, - { - "epoch": 2.213981244671782, - "grad_norm": 0.06561318039894104, - "learning_rate": 3.229510497775838e-05, - "loss": 0.0034034676849842072, - "step": 12985 - }, - { - "epoch": 2.214833759590793, - "grad_norm": 0.06076068431138992, - "learning_rate": 3.226973224431333e-05, - "loss": 0.0018323207274079322, - "step": 12990 - }, - { - "epoch": 2.215686274509804, - "grad_norm": 0.05743642896413803, - "learning_rate": 3.2244361952531266e-05, - "loss": 0.002844391018152237, - "step": 12995 - }, - { - "epoch": 2.216538789428815, - "grad_norm": 0.0632607489824295, - "learning_rate": 3.221899411425586e-05, - "loss": 0.003329380601644516, - "step": 13000 - }, - { - "epoch": 2.217391304347826, - "grad_norm": 0.06082088127732277, - "learning_rate": 3.219362874132966e-05, - "loss": 0.0026398774236440657, - "step": 13005 - }, - { - "epoch": 2.2182438192668372, - "grad_norm": 0.07731121778488159, - "learning_rate": 3.2168265845594075e-05, - "loss": 0.00193992517888546, - "step": 13010 - }, - { - "epoch": 2.2190963341858483, - "grad_norm": 0.08783961087465286, - "learning_rate": 3.214290543888938e-05, - "loss": 0.0019096124917268753, - "step": 13015 - }, - { - "epoch": 2.2199488491048593, - "grad_norm": 0.07576426863670349, - "learning_rate": 3.211754753305461e-05, - "loss": 0.002824045717716217, - "step": 13020 - }, - { - "epoch": 2.2208013640238704, - "grad_norm": 0.0671941265463829, - "learning_rate": 3.20921921399277e-05, - "loss": 0.0025903450325131415, - "step": 13025 - }, - { - "epoch": 2.2216538789428815, - "grad_norm": 0.025313038378953934, - "learning_rate": 3.206683927134538e-05, - "loss": 0.001357127632945776, - "step": 13030 - }, - { - "epoch": 2.2225063938618925, - "grad_norm": 0.0281735397875309, - "learning_rate": 3.204148893914323e-05, - "loss": 0.0018472330644726752, - "step": 13035 - }, - { - "epoch": 2.2233589087809036, - "grad_norm": 0.027222834527492523, - "learning_rate": 3.2016141155155625e-05, - "loss": 0.0018411261960864067, - "step": 13040 - }, - { - "epoch": 2.2242114236999146, - "grad_norm": 0.04794001951813698, - "learning_rate": 3.199079593121574e-05, - "loss": 0.0015307093039155007, - "step": 13045 - }, - { - "epoch": 2.2250639386189257, - "grad_norm": 0.05856316536664963, - "learning_rate": 3.196545327915558e-05, - "loss": 0.001051103323698044, - "step": 13050 - }, - { - "epoch": 2.2259164535379368, - "grad_norm": 0.037851642817258835, - "learning_rate": 3.194011321080592e-05, - "loss": 0.0020413145422935484, - "step": 13055 - }, - { - "epoch": 2.226768968456948, - "grad_norm": 0.04197809472680092, - "learning_rate": 3.191477573799638e-05, - "loss": 0.0025324104353785515, - "step": 13060 - }, - { - "epoch": 2.227621483375959, - "grad_norm": 0.04126058518886566, - "learning_rate": 3.188944087255531e-05, - "loss": 0.001765124499797821, - "step": 13065 - }, - { - "epoch": 2.2284739982949704, - "grad_norm": 0.13436861336231232, - "learning_rate": 3.186410862630988e-05, - "loss": 0.003620542213320732, - "step": 13070 - }, - { - "epoch": 2.229326513213981, - "grad_norm": 0.05177616328001022, - "learning_rate": 3.183877901108601e-05, - "loss": 0.001679854467511177, - "step": 13075 - }, - { - "epoch": 2.2301790281329925, - "grad_norm": 0.03360729292035103, - "learning_rate": 3.1813452038708415e-05, - "loss": 0.002009689994156361, - "step": 13080 - }, - { - "epoch": 2.2310315430520036, - "grad_norm": 0.102437824010849, - "learning_rate": 3.178812772100058e-05, - "loss": 0.002533908933401108, - "step": 13085 - }, - { - "epoch": 2.2318840579710146, - "grad_norm": 0.045174695551395416, - "learning_rate": 3.176280606978473e-05, - "loss": 0.0023472383618354797, - "step": 13090 - }, - { - "epoch": 2.2327365728900257, - "grad_norm": 0.0679149329662323, - "learning_rate": 3.173748709688184e-05, - "loss": 0.00249241441488266, - "step": 13095 - }, - { - "epoch": 2.2335890878090368, - "grad_norm": 0.1367262750864029, - "learning_rate": 3.171217081411166e-05, - "loss": 0.002387053519487381, - "step": 13100 - }, - { - "epoch": 2.234441602728048, - "grad_norm": 0.06661707162857056, - "learning_rate": 3.168685723329269e-05, - "loss": 0.002376999333500862, - "step": 13105 - }, - { - "epoch": 2.235294117647059, - "grad_norm": 0.08916410058736801, - "learning_rate": 3.166154636624214e-05, - "loss": 0.0027421964332461357, - "step": 13110 - }, - { - "epoch": 2.23614663256607, - "grad_norm": 0.058119386434555054, - "learning_rate": 3.163623822477595e-05, - "loss": 0.0018962904810905456, - "step": 13115 - }, - { - "epoch": 2.236999147485081, - "grad_norm": 0.06457269936800003, - "learning_rate": 3.161093282070882e-05, - "loss": 0.001441392581909895, - "step": 13120 - }, - { - "epoch": 2.237851662404092, - "grad_norm": 0.1250019371509552, - "learning_rate": 3.158563016585412e-05, - "loss": 0.002274188958108425, - "step": 13125 - }, - { - "epoch": 2.238704177323103, - "grad_norm": 0.03324245661497116, - "learning_rate": 3.156033027202403e-05, - "loss": 0.002002820558845997, - "step": 13130 - }, - { - "epoch": 2.239556692242114, - "grad_norm": 0.01897227205336094, - "learning_rate": 3.153503315102934e-05, - "loss": 0.0016582176089286805, - "step": 13135 - }, - { - "epoch": 2.2404092071611252, - "grad_norm": 0.07142049074172974, - "learning_rate": 3.15097388146796e-05, - "loss": 0.002489439025521278, - "step": 13140 - }, - { - "epoch": 2.2412617220801363, - "grad_norm": 0.05619347095489502, - "learning_rate": 3.148444727478303e-05, - "loss": 0.0021767957136034966, - "step": 13145 - }, - { - "epoch": 2.2421142369991474, - "grad_norm": 0.0950259119272232, - "learning_rate": 3.14591585431466e-05, - "loss": 0.001732981950044632, - "step": 13150 - }, - { - "epoch": 2.2429667519181584, - "grad_norm": 0.06186724454164505, - "learning_rate": 3.143387263157591e-05, - "loss": 0.001604793407022953, - "step": 13155 - }, - { - "epoch": 2.2438192668371695, - "grad_norm": 0.0921434834599495, - "learning_rate": 3.1408589551875256e-05, - "loss": 0.001957142725586891, - "step": 13160 - }, - { - "epoch": 2.2446717817561805, - "grad_norm": 0.05556231364607811, - "learning_rate": 3.138330931584763e-05, - "loss": 0.002686610072851181, - "step": 13165 - }, - { - "epoch": 2.2455242966751916, - "grad_norm": 0.10184850543737411, - "learning_rate": 3.1358031935294666e-05, - "loss": 0.0019098062068223954, - "step": 13170 - }, - { - "epoch": 2.246376811594203, - "grad_norm": 0.08860436826944351, - "learning_rate": 3.133275742201673e-05, - "loss": 0.002402664348483086, - "step": 13175 - }, - { - "epoch": 2.247229326513214, - "grad_norm": 0.06324724107980728, - "learning_rate": 3.130748578781278e-05, - "loss": 0.0018930312246084214, - "step": 13180 - }, - { - "epoch": 2.2480818414322252, - "grad_norm": 0.07382629811763763, - "learning_rate": 3.128221704448045e-05, - "loss": 0.0026824956759810446, - "step": 13185 - }, - { - "epoch": 2.2489343563512363, - "grad_norm": 0.1002819687128067, - "learning_rate": 3.125695120381603e-05, - "loss": 0.0030449360609054567, - "step": 13190 - }, - { - "epoch": 2.249616368286445, - "eval_loss": 0.046705588698387146, - "eval_runtime": 3.7196, - "eval_samples_per_second": 67.748, - "eval_steps_per_second": 1.075, - "step": 13194 - }, - { - "eval_cer_subset": 0.013842070122298761, - "eval_cer_subset_edit_distance": 850, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 13194 - }, - { - "epoch": 2.2497868712702473, - "grad_norm": 0.030392520129680634, - "learning_rate": 3.123168827761447e-05, - "loss": 0.0015232504345476627, - "step": 13195 - }, - { - "epoch": 2.2506393861892584, - "grad_norm": 0.04160630702972412, - "learning_rate": 3.1206428277669336e-05, - "loss": 0.0026638204231858253, - "step": 13200 - }, - { - "epoch": 2.2514919011082695, - "grad_norm": 0.06140404939651489, - "learning_rate": 3.118117121577284e-05, - "loss": 0.003001154027879238, - "step": 13205 - }, - { - "epoch": 2.2523444160272805, - "grad_norm": 0.06974830478429794, - "learning_rate": 3.115591710371581e-05, - "loss": 0.0032261811196804047, - "step": 13210 - }, - { - "epoch": 2.2531969309462916, - "grad_norm": 0.09120716899633408, - "learning_rate": 3.1130665953287695e-05, - "loss": 0.001386938989162445, - "step": 13215 - }, - { - "epoch": 2.2540494458653026, - "grad_norm": 0.06130429729819298, - "learning_rate": 3.110541777627661e-05, - "loss": 0.0014743787236511708, - "step": 13220 - }, - { - "epoch": 2.2549019607843137, - "grad_norm": 0.07033205777406693, - "learning_rate": 3.108017258446921e-05, - "loss": 0.003749256581068039, - "step": 13225 - }, - { - "epoch": 2.2557544757033248, - "grad_norm": 0.08650046586990356, - "learning_rate": 3.1054930389650804e-05, - "loss": 0.0023554276674985887, - "step": 13230 - }, - { - "epoch": 2.256606990622336, - "grad_norm": 0.06045643612742424, - "learning_rate": 3.102969120360529e-05, - "loss": 0.0019686706364154816, - "step": 13235 - }, - { - "epoch": 2.257459505541347, - "grad_norm": 0.1004268005490303, - "learning_rate": 3.100445503811514e-05, - "loss": 0.003136196732521057, - "step": 13240 - }, - { - "epoch": 2.258312020460358, - "grad_norm": 0.08810209482908249, - "learning_rate": 3.097922190496146e-05, - "loss": 0.002239716053009033, - "step": 13245 - }, - { - "epoch": 2.259164535379369, - "grad_norm": 0.10518727451562881, - "learning_rate": 3.095399181592392e-05, - "loss": 0.002447150461375713, - "step": 13250 - }, - { - "epoch": 2.26001705029838, - "grad_norm": 0.049536559730768204, - "learning_rate": 3.092876478278074e-05, - "loss": 0.0023296492174267767, - "step": 13255 - }, - { - "epoch": 2.260869565217391, - "grad_norm": 0.057701822370290756, - "learning_rate": 3.0903540817308734e-05, - "loss": 0.0018970953300595284, - "step": 13260 - }, - { - "epoch": 2.261722080136402, - "grad_norm": 0.04391616955399513, - "learning_rate": 3.087831993128333e-05, - "loss": 0.0026229951530694962, - "step": 13265 - }, - { - "epoch": 2.2625745950554137, - "grad_norm": 0.048150911927223206, - "learning_rate": 3.0853102136478444e-05, - "loss": 0.0015288691036403179, - "step": 13270 - }, - { - "epoch": 2.2634271099744243, - "grad_norm": 0.12074416130781174, - "learning_rate": 3.082788744466659e-05, - "loss": 0.0025329213589429856, - "step": 13275 - }, - { - "epoch": 2.264279624893436, - "grad_norm": 0.05400107055902481, - "learning_rate": 3.080267586761881e-05, - "loss": 0.0017294475808739662, - "step": 13280 - }, - { - "epoch": 2.265132139812447, - "grad_norm": 0.07027488201856613, - "learning_rate": 3.0777467417104717e-05, - "loss": 0.0026237966492772104, - "step": 13285 - }, - { - "epoch": 2.265984654731458, - "grad_norm": 0.06868001073598862, - "learning_rate": 3.075226210489247e-05, - "loss": 0.0021411897614598274, - "step": 13290 - }, - { - "epoch": 2.266837169650469, - "grad_norm": 0.07447243481874466, - "learning_rate": 3.072705994274874e-05, - "loss": 0.002808676287531853, - "step": 13295 - }, - { - "epoch": 2.26768968456948, - "grad_norm": 0.04292432591319084, - "learning_rate": 3.070186094243872e-05, - "loss": 0.001994679495692253, - "step": 13300 - }, - { - "epoch": 2.268542199488491, - "grad_norm": 0.06083334609866142, - "learning_rate": 3.067666511572614e-05, - "loss": 0.001621294766664505, - "step": 13305 - }, - { - "epoch": 2.269394714407502, - "grad_norm": 0.04339296743273735, - "learning_rate": 3.065147247437327e-05, - "loss": 0.002122482657432556, - "step": 13310 - }, - { - "epoch": 2.2702472293265132, - "grad_norm": 0.07901404052972794, - "learning_rate": 3.062628303014087e-05, - "loss": 0.0030757525935769083, - "step": 13315 - }, - { - "epoch": 2.2710997442455243, - "grad_norm": 0.046554502099752426, - "learning_rate": 3.060109679478821e-05, - "loss": 0.0022816451266407965, - "step": 13320 - }, - { - "epoch": 2.2719522591645354, - "grad_norm": 0.03428821638226509, - "learning_rate": 3.0575913780073036e-05, - "loss": 0.002047870494425297, - "step": 13325 - }, - { - "epoch": 2.2728047740835464, - "grad_norm": 0.09298217296600342, - "learning_rate": 3.0550733997751634e-05, - "loss": 0.002046193927526474, - "step": 13330 - }, - { - "epoch": 2.2736572890025575, - "grad_norm": 0.08458553999662399, - "learning_rate": 3.0525557459578786e-05, - "loss": 0.002566727437078953, - "step": 13335 - }, - { - "epoch": 2.2745098039215685, - "grad_norm": 0.10309132188558578, - "learning_rate": 3.050038417730772e-05, - "loss": 0.00358976349234581, - "step": 13340 - }, - { - "epoch": 2.2753623188405796, - "grad_norm": 0.08116701990365982, - "learning_rate": 3.0475214162690144e-05, - "loss": 0.003372102603316307, - "step": 13345 - }, - { - "epoch": 2.2762148337595907, - "grad_norm": 0.09258918464183807, - "learning_rate": 3.0450047427476292e-05, - "loss": 0.0037133049219846724, - "step": 13350 - }, - { - "epoch": 2.2770673486786017, - "grad_norm": 0.09618882834911346, - "learning_rate": 3.0424883983414797e-05, - "loss": 0.0024330444633960725, - "step": 13355 - }, - { - "epoch": 2.277919863597613, - "grad_norm": 0.04637463390827179, - "learning_rate": 3.039972384225282e-05, - "loss": 0.0011583495885133742, - "step": 13360 - }, - { - "epoch": 2.2787723785166243, - "grad_norm": 0.04919019341468811, - "learning_rate": 3.0374567015735953e-05, - "loss": 0.0017433254048228263, - "step": 13365 - }, - { - "epoch": 2.279624893435635, - "grad_norm": 0.07092445343732834, - "learning_rate": 3.0349413515608213e-05, - "loss": 0.0010275598615407944, - "step": 13370 - }, - { - "epoch": 2.2804774083546464, - "grad_norm": 0.06819095462560654, - "learning_rate": 3.03242633536121e-05, - "loss": 0.0018655678257346153, - "step": 13375 - }, - { - "epoch": 2.2813299232736575, - "grad_norm": 0.13397860527038574, - "learning_rate": 3.029911654148857e-05, - "loss": 0.0029754094779491425, - "step": 13380 - }, - { - "epoch": 2.2821824381926685, - "grad_norm": 0.09142930805683136, - "learning_rate": 3.0273973090976974e-05, - "loss": 0.0027707524597644804, - "step": 13385 - }, - { - "epoch": 2.2830349531116796, - "grad_norm": 0.06282728165388107, - "learning_rate": 3.0248833013815112e-05, - "loss": 0.0018412042409181595, - "step": 13390 - }, - { - "epoch": 2.2838874680306906, - "grad_norm": 0.05533494055271149, - "learning_rate": 3.0223696321739196e-05, - "loss": 0.0025158364325761793, - "step": 13395 - }, - { - "epoch": 2.2847399829497017, - "grad_norm": 0.08349598199129105, - "learning_rate": 3.0198563026483876e-05, - "loss": 0.002777436375617981, - "step": 13400 - }, - { - "epoch": 2.2855924978687128, - "grad_norm": 0.07469198107719421, - "learning_rate": 3.0173433139782227e-05, - "loss": 0.001951916702091694, - "step": 13405 - }, - { - "epoch": 2.286445012787724, - "grad_norm": 0.07126526534557343, - "learning_rate": 3.0148306673365708e-05, - "loss": 0.0031182590872049333, - "step": 13410 - }, - { - "epoch": 2.287297527706735, - "grad_norm": 0.06499479711055756, - "learning_rate": 3.0123183638964183e-05, - "loss": 0.001717902161180973, - "step": 13415 - }, - { - "epoch": 2.288150042625746, - "grad_norm": 0.03133346140384674, - "learning_rate": 3.0098064048305917e-05, - "loss": 0.0015830917283892632, - "step": 13420 - }, - { - "epoch": 2.289002557544757, - "grad_norm": 0.06725561618804932, - "learning_rate": 3.0072947913117573e-05, - "loss": 0.004541714489459991, - "step": 13425 - }, - { - "epoch": 2.289855072463768, - "grad_norm": 0.13644525408744812, - "learning_rate": 3.0047835245124216e-05, - "loss": 0.0027179479598999023, - "step": 13430 - }, - { - "epoch": 2.290707587382779, - "grad_norm": 0.06966832280158997, - "learning_rate": 3.0022726056049262e-05, - "loss": 0.0026542846113443374, - "step": 13435 - }, - { - "epoch": 2.29156010230179, - "grad_norm": 0.04449222609400749, - "learning_rate": 2.999762035761451e-05, - "loss": 0.0014596210792660713, - "step": 13440 - }, - { - "epoch": 2.2924126172208013, - "grad_norm": 0.05453059822320938, - "learning_rate": 2.9972518161540124e-05, - "loss": 0.0024629242718219755, - "step": 13445 - }, - { - "epoch": 2.2932651321398123, - "grad_norm": 0.06370346248149872, - "learning_rate": 2.9947419479544677e-05, - "loss": 0.0018157381564378738, - "step": 13450 - }, - { - "epoch": 2.2941176470588234, - "grad_norm": 0.149154931306839, - "learning_rate": 2.992232432334505e-05, - "loss": 0.0038953136652708055, - "step": 13455 - }, - { - "epoch": 2.2949701619778344, - "grad_norm": 0.08758609741926193, - "learning_rate": 2.9897232704656494e-05, - "loss": 0.00197781715542078, - "step": 13460 - }, - { - "epoch": 2.2958226768968455, - "grad_norm": 0.04877983406186104, - "learning_rate": 2.9872144635192625e-05, - "loss": 0.0018029011785984038, - "step": 13465 - }, - { - "epoch": 2.296675191815857, - "grad_norm": 0.029492873698472977, - "learning_rate": 2.984706012666536e-05, - "loss": 0.00226336307823658, - "step": 13470 - }, - { - "epoch": 2.2975277067348676, - "grad_norm": 0.09038830548524857, - "learning_rate": 2.982197919078502e-05, - "loss": 0.0024063091725111006, - "step": 13475 - }, - { - "epoch": 2.298380221653879, - "grad_norm": 0.08629653602838516, - "learning_rate": 2.97969018392602e-05, - "loss": 0.0019390033558011055, - "step": 13480 - }, - { - "epoch": 2.29923273657289, - "grad_norm": 0.08667116612195969, - "learning_rate": 2.9771828083797832e-05, - "loss": 0.003171199932694435, - "step": 13485 - }, - { - "epoch": 2.3000852514919012, - "grad_norm": 0.07069036364555359, - "learning_rate": 2.974675793610318e-05, - "loss": 0.002098524570465088, - "step": 13490 - }, - { - "epoch": 2.3009377664109123, - "grad_norm": 0.0887150913476944, - "learning_rate": 2.972169140787985e-05, - "loss": 0.001710166409611702, - "step": 13495 - }, - { - "epoch": 2.3017902813299234, - "grad_norm": 0.08873872458934784, - "learning_rate": 2.969662851082972e-05, - "loss": 0.002029442973434925, - "step": 13500 - }, - { - "epoch": 2.3026427962489344, - "grad_norm": 0.09199293702840805, - "learning_rate": 2.9671569256652976e-05, - "loss": 0.0015904868021607399, - "step": 13505 - }, - { - "epoch": 2.3034953111679455, - "grad_norm": 0.07347019016742706, - "learning_rate": 2.9646513657048106e-05, - "loss": 0.002239963971078396, - "step": 13510 - }, - { - "epoch": 2.3043478260869565, - "grad_norm": 0.056011516600847244, - "learning_rate": 2.9621461723711897e-05, - "loss": 0.003089374490082264, - "step": 13515 - }, - { - "epoch": 2.3052003410059676, - "grad_norm": 0.05805368721485138, - "learning_rate": 2.9596413468339447e-05, - "loss": 0.0011475264094769956, - "step": 13520 - }, - { - "epoch": 2.3060528559249787, - "grad_norm": 0.08263146877288818, - "learning_rate": 2.95713689026241e-05, - "loss": 0.0027705669403076173, - "step": 13525 - }, - { - "epoch": 2.3069053708439897, - "grad_norm": 0.10079067945480347, - "learning_rate": 2.954632803825749e-05, - "loss": 0.0038317229598760607, - "step": 13530 - }, - { - "epoch": 2.307757885763001, - "grad_norm": 0.07248156517744064, - "learning_rate": 2.9521290886929514e-05, - "loss": 0.0017008930444717407, - "step": 13535 - }, - { - "epoch": 2.308610400682012, - "grad_norm": 0.09252380579710007, - "learning_rate": 2.949625746032838e-05, - "loss": 0.0021895600482821466, - "step": 13540 - }, - { - "epoch": 2.309462915601023, - "grad_norm": 0.03231853246688843, - "learning_rate": 2.947122777014051e-05, - "loss": 0.002471560053527355, - "step": 13545 - }, - { - "epoch": 2.310315430520034, - "grad_norm": 0.09625072032213211, - "learning_rate": 2.944620182805059e-05, - "loss": 0.002643503434956074, - "step": 13550 - }, - { - "epoch": 2.311167945439045, - "grad_norm": 0.11135435849428177, - "learning_rate": 2.9421179645741552e-05, - "loss": 0.0015677452087402345, - "step": 13555 - }, - { - "epoch": 2.312020460358056, - "grad_norm": 0.07239774614572525, - "learning_rate": 2.939616123489459e-05, - "loss": 0.0020940851420164107, - "step": 13560 - }, - { - "epoch": 2.3128729752770676, - "grad_norm": 0.0686500295996666, - "learning_rate": 2.937114660718915e-05, - "loss": 0.004896241426467896, - "step": 13565 - }, - { - "epoch": 2.313725490196078, - "grad_norm": 0.04634196311235428, - "learning_rate": 2.934613577430288e-05, - "loss": 0.0017542928457260133, - "step": 13570 - }, - { - "epoch": 2.3145780051150897, - "grad_norm": 0.08693452924489975, - "learning_rate": 2.9321128747911657e-05, - "loss": 0.003124900534749031, - "step": 13575 - }, - { - "epoch": 2.3154305200341008, - "grad_norm": 0.053911175578832626, - "learning_rate": 2.9296125539689615e-05, - "loss": 0.001699080690741539, - "step": 13580 - }, - { - "epoch": 2.316283034953112, - "grad_norm": 0.07346964627504349, - "learning_rate": 2.9271126161309052e-05, - "loss": 0.0027174966409802435, - "step": 13585 - }, - { - "epoch": 2.317135549872123, - "grad_norm": 0.07157005369663239, - "learning_rate": 2.9246130624440546e-05, - "loss": 0.0026199813932180406, - "step": 13590 - }, - { - "epoch": 2.317988064791134, - "grad_norm": 0.0852048397064209, - "learning_rate": 2.922113894075282e-05, - "loss": 0.002349478751420975, - "step": 13595 - }, - { - "epoch": 2.318840579710145, - "grad_norm": 0.069539375603199, - "learning_rate": 2.9196151121912828e-05, - "loss": 0.002428753860294819, - "step": 13600 - }, - { - "epoch": 2.319693094629156, - "grad_norm": 0.06993792951107025, - "learning_rate": 2.9171167179585712e-05, - "loss": 0.0025543162599205972, - "step": 13605 - }, - { - "epoch": 2.320545609548167, - "grad_norm": 0.09210001677274704, - "learning_rate": 2.9146187125434826e-05, - "loss": 0.004642657563090324, - "step": 13610 - }, - { - "epoch": 2.321398124467178, - "grad_norm": 0.03652270883321762, - "learning_rate": 2.9121210971121674e-05, - "loss": 0.0019740790128707887, - "step": 13615 - }, - { - "epoch": 2.3222506393861893, - "grad_norm": 0.032051410526037216, - "learning_rate": 2.9096238728305957e-05, - "loss": 0.0020309314131736755, - "step": 13620 - }, - { - "epoch": 2.3231031543052003, - "grad_norm": 0.08898582309484482, - "learning_rate": 2.907127040864556e-05, - "loss": 0.0012645654380321503, - "step": 13625 - }, - { - "epoch": 2.3239556692242114, - "grad_norm": 0.21863117814064026, - "learning_rate": 2.9046306023796493e-05, - "loss": 0.0025009674951434135, - "step": 13630 - }, - { - "epoch": 2.3248081841432224, - "grad_norm": 0.06401807814836502, - "learning_rate": 2.9021345585413004e-05, - "loss": 0.002794540859758854, - "step": 13635 - }, - { - "epoch": 2.3256606990622335, - "grad_norm": 0.049468256533145905, - "learning_rate": 2.8996389105147437e-05, - "loss": 0.0024725871160626413, - "step": 13640 - }, - { - "epoch": 2.3265132139812446, - "grad_norm": 0.0904751718044281, - "learning_rate": 2.8971436594650292e-05, - "loss": 0.0033982183784246446, - "step": 13645 - }, - { - "epoch": 2.3273657289002556, - "grad_norm": 0.11576029658317566, - "learning_rate": 2.8946488065570242e-05, - "loss": 0.004228492826223373, - "step": 13650 - }, - { - "epoch": 2.3282182438192667, - "grad_norm": 0.08191253244876862, - "learning_rate": 2.892154352955411e-05, - "loss": 0.0015400771982967854, - "step": 13655 - }, - { - "epoch": 2.3290707587382777, - "grad_norm": 0.03641185909509659, - "learning_rate": 2.8896602998246817e-05, - "loss": 0.002032958157360554, - "step": 13660 - }, - { - "epoch": 2.329923273657289, - "grad_norm": 0.09123575687408447, - "learning_rate": 2.8871666483291433e-05, - "loss": 0.00326089970767498, - "step": 13665 - }, - { - "epoch": 2.3307757885763003, - "grad_norm": 0.07897967845201492, - "learning_rate": 2.8846733996329148e-05, - "loss": 0.0022133901715278626, - "step": 13670 - }, - { - "epoch": 2.3316283034953114, - "grad_norm": 0.0802898034453392, - "learning_rate": 2.8821805548999275e-05, - "loss": 0.002646717242896557, - "step": 13675 - }, - { - "epoch": 2.3324808184143224, - "grad_norm": 0.05337275192141533, - "learning_rate": 2.879688115293926e-05, - "loss": 0.0022310430184006693, - "step": 13680 - }, - { - "epoch": 2.3333333333333335, - "grad_norm": 0.026133684441447258, - "learning_rate": 2.8771960819784635e-05, - "loss": 0.0013902435079216958, - "step": 13685 - }, - { - "epoch": 2.3341858482523445, - "grad_norm": 0.0701865404844284, - "learning_rate": 2.8747044561169026e-05, - "loss": 0.0030527923256158827, - "step": 13690 - }, - { - "epoch": 2.3350383631713556, - "grad_norm": 0.023815227672457695, - "learning_rate": 2.8722132388724187e-05, - "loss": 0.001688534766435623, - "step": 13695 - }, - { - "epoch": 2.3358908780903667, - "grad_norm": 0.0819278210401535, - "learning_rate": 2.8697224314079928e-05, - "loss": 0.0028546562418341635, - "step": 13700 - }, - { - "epoch": 2.3367433930093777, - "grad_norm": 0.03683038055896759, - "learning_rate": 2.86723203488642e-05, - "loss": 0.0024238623678684234, - "step": 13705 - }, - { - "epoch": 2.337595907928389, - "grad_norm": 0.050080958753824234, - "learning_rate": 2.8647420504702977e-05, - "loss": 0.001459009852260351, - "step": 13710 - }, - { - "epoch": 2.3384484228474, - "grad_norm": 0.04246260225772858, - "learning_rate": 2.8622524793220336e-05, - "loss": 0.0024909645318984984, - "step": 13715 - }, - { - "epoch": 2.339300937766411, - "grad_norm": 0.04298778250813484, - "learning_rate": 2.8597633226038422e-05, - "loss": 0.0017042815685272216, - "step": 13720 - }, - { - "epoch": 2.340153452685422, - "grad_norm": 0.08792980760335922, - "learning_rate": 2.857274581477747e-05, - "loss": 0.0021930102258920668, - "step": 13725 - }, - { - "epoch": 2.341005967604433, - "grad_norm": 0.030293628573417664, - "learning_rate": 2.854786257105573e-05, - "loss": 0.002472694218158722, - "step": 13730 - }, - { - "epoch": 2.341858482523444, - "grad_norm": 0.100398488342762, - "learning_rate": 2.852298350648953e-05, - "loss": 0.0016385417431592942, - "step": 13735 - }, - { - "epoch": 2.342710997442455, - "grad_norm": 0.056936830282211304, - "learning_rate": 2.849810863269325e-05, - "loss": 0.0014652124606072902, - "step": 13740 - }, - { - "epoch": 2.343563512361466, - "grad_norm": 0.04332558810710907, - "learning_rate": 2.8473237961279293e-05, - "loss": 0.0029267419129610063, - "step": 13745 - }, - { - "epoch": 2.3444160272804773, - "grad_norm": 0.051982469856739044, - "learning_rate": 2.8448371503858143e-05, - "loss": 0.001836571842432022, - "step": 13750 - }, - { - "epoch": 2.3452685421994883, - "grad_norm": 0.1215415671467781, - "learning_rate": 2.8423509272038276e-05, - "loss": 0.002749188058078289, - "step": 13755 - }, - { - "epoch": 2.3461210571184994, - "grad_norm": 0.044508881866931915, - "learning_rate": 2.8398651277426203e-05, - "loss": 0.0023854803293943405, - "step": 13760 - }, - { - "epoch": 2.346973572037511, - "grad_norm": 0.09419308602809906, - "learning_rate": 2.837379753162647e-05, - "loss": 0.00259498693048954, - "step": 13765 - }, - { - "epoch": 2.3478260869565215, - "grad_norm": 0.0996370017528534, - "learning_rate": 2.8348948046241616e-05, - "loss": 0.003275657445192337, - "step": 13770 - }, - { - "epoch": 2.348678601875533, - "grad_norm": 0.0585092268884182, - "learning_rate": 2.8324102832872238e-05, - "loss": 0.0023032236844301225, - "step": 13775 - }, - { - "epoch": 2.349531116794544, - "grad_norm": 0.06259947270154953, - "learning_rate": 2.829926190311689e-05, - "loss": 0.0022853843867778776, - "step": 13780 - }, - { - "epoch": 2.350383631713555, - "grad_norm": 0.1343093067407608, - "learning_rate": 2.827442526857214e-05, - "loss": 0.0019558047875761985, - "step": 13785 - }, - { - "epoch": 2.351236146632566, - "grad_norm": 0.03901712968945503, - "learning_rate": 2.8249592940832552e-05, - "loss": 0.0019383212551474572, - "step": 13790 - }, - { - "epoch": 2.3520886615515773, - "grad_norm": 0.08933644741773605, - "learning_rate": 2.8224764931490707e-05, - "loss": 0.0019501563161611556, - "step": 13795 - }, - { - "epoch": 2.3529411764705883, - "grad_norm": 0.06790988147258759, - "learning_rate": 2.819994125213713e-05, - "loss": 0.0018905265256762504, - "step": 13800 - }, - { - "epoch": 2.3537936913895994, - "grad_norm": 0.10576235502958298, - "learning_rate": 2.817512191436033e-05, - "loss": 0.0017807571217417716, - "step": 13805 - }, - { - "epoch": 2.3546462063086104, - "grad_norm": 0.07914351671934128, - "learning_rate": 2.8150306929746826e-05, - "loss": 0.002854841575026512, - "step": 13810 - }, - { - "epoch": 2.3554987212276215, - "grad_norm": 0.10912367701530457, - "learning_rate": 2.812549630988104e-05, - "loss": 0.0028494328260421755, - "step": 13815 - }, - { - "epoch": 2.3563512361466326, - "grad_norm": 0.07309834659099579, - "learning_rate": 2.8100690066345434e-05, - "loss": 0.001808878593146801, - "step": 13820 - }, - { - "epoch": 2.3572037510656436, - "grad_norm": 0.07053545117378235, - "learning_rate": 2.807588821072037e-05, - "loss": 0.0024722769856452944, - "step": 13825 - }, - { - "epoch": 2.3580562659846547, - "grad_norm": 0.06512318551540375, - "learning_rate": 2.8051090754584176e-05, - "loss": 0.0025828687474131586, - "step": 13830 - }, - { - "epoch": 2.3589087809036657, - "grad_norm": 0.06797149777412415, - "learning_rate": 2.8026297709513125e-05, - "loss": 0.0021874068304896356, - "step": 13835 - }, - { - "epoch": 2.359761295822677, - "grad_norm": 0.12261441349983215, - "learning_rate": 2.800150908708145e-05, - "loss": 0.00291924811899662, - "step": 13840 - }, - { - "epoch": 2.360613810741688, - "grad_norm": 0.05696386098861694, - "learning_rate": 2.797672489886131e-05, - "loss": 0.003488580882549286, - "step": 13845 - }, - { - "epoch": 2.361466325660699, - "grad_norm": 0.3340120315551758, - "learning_rate": 2.795194515642276e-05, - "loss": 0.0033275336027145386, - "step": 13850 - }, - { - "epoch": 2.36231884057971, - "grad_norm": 0.08209964632987976, - "learning_rate": 2.7927169871333836e-05, - "loss": 0.0020242417231202126, - "step": 13855 - }, - { - "epoch": 2.363171355498721, - "grad_norm": 0.04942183569073677, - "learning_rate": 2.7902399055160435e-05, - "loss": 0.0015470117330551147, - "step": 13860 - }, - { - "epoch": 2.364023870417732, - "grad_norm": 0.07711990922689438, - "learning_rate": 2.7877632719466438e-05, - "loss": 0.002402086555957794, - "step": 13865 - }, - { - "epoch": 2.3648763853367436, - "grad_norm": 0.06835886090993881, - "learning_rate": 2.7852870875813572e-05, - "loss": 0.002709807641804218, - "step": 13870 - }, - { - "epoch": 2.3657289002557547, - "grad_norm": 0.01572684571146965, - "learning_rate": 2.7828113535761476e-05, - "loss": 0.0037427868694067, - "step": 13875 - }, - { - "epoch": 2.3665814151747657, - "grad_norm": 0.03897464647889137, - "learning_rate": 2.7803360710867728e-05, - "loss": 0.0029004696756601334, - "step": 13880 - }, - { - "epoch": 2.367433930093777, - "grad_norm": 0.1281740814447403, - "learning_rate": 2.777861241268774e-05, - "loss": 0.0021549168974161147, - "step": 13885 - }, - { - "epoch": 2.368286445012788, - "grad_norm": 0.04390920698642731, - "learning_rate": 2.7753868652774873e-05, - "loss": 0.0019567809998989106, - "step": 13890 - }, - { - "epoch": 2.369138959931799, - "grad_norm": 0.09526315331459045, - "learning_rate": 2.7729129442680314e-05, - "loss": 0.001414876524358988, - "step": 13895 - }, - { - "epoch": 2.36999147485081, - "grad_norm": 0.041541386395692825, - "learning_rate": 2.7704394793953162e-05, - "loss": 0.0023986730724573136, - "step": 13900 - }, - { - "epoch": 2.370843989769821, - "grad_norm": 0.056684307754039764, - "learning_rate": 2.7679664718140354e-05, - "loss": 0.0023011576384305956, - "step": 13905 - }, - { - "epoch": 2.371696504688832, - "grad_norm": 0.04548821225762367, - "learning_rate": 2.765493922678674e-05, - "loss": 0.002776668407022953, - "step": 13910 - }, - { - "epoch": 2.372549019607843, - "grad_norm": 0.05635173246264458, - "learning_rate": 2.763021833143499e-05, - "loss": 0.0021549917757511137, - "step": 13915 - }, - { - "epoch": 2.373401534526854, - "grad_norm": 0.06744635850191116, - "learning_rate": 2.7605502043625636e-05, - "loss": 0.0014210479333996774, - "step": 13920 - }, - { - "epoch": 2.3742540494458653, - "grad_norm": 0.03131572902202606, - "learning_rate": 2.758079037489707e-05, - "loss": 0.002670668438076973, - "step": 13925 - }, - { - "epoch": 2.3751065643648763, - "grad_norm": 0.1132262721657753, - "learning_rate": 2.75560833367855e-05, - "loss": 0.004025829955935478, - "step": 13930 - }, - { - "epoch": 2.3759590792838874, - "grad_norm": 0.08719862997531891, - "learning_rate": 2.753138094082502e-05, - "loss": 0.0026264961808919905, - "step": 13935 - }, - { - "epoch": 2.3768115942028984, - "grad_norm": 0.045282550156116486, - "learning_rate": 2.7506683198547527e-05, - "loss": 0.0016890913248062134, - "step": 13940 - }, - { - "epoch": 2.3776641091219095, - "grad_norm": 0.03815371170639992, - "learning_rate": 2.7481990121482737e-05, - "loss": 0.0017980627715587616, - "step": 13945 - }, - { - "epoch": 2.3785166240409206, - "grad_norm": 0.05136419087648392, - "learning_rate": 2.745730172115819e-05, - "loss": 0.0017518583685159684, - "step": 13950 - }, - { - "epoch": 2.3793691389599316, - "grad_norm": 0.076651431620121, - "learning_rate": 2.743261800909929e-05, - "loss": 0.0021933792158961296, - "step": 13955 - }, - { - "epoch": 2.3802216538789427, - "grad_norm": 0.04328504204750061, - "learning_rate": 2.740793899682919e-05, - "loss": 0.0015049883164465427, - "step": 13960 - }, - { - "epoch": 2.381074168797954, - "grad_norm": 0.029004819691181183, - "learning_rate": 2.7383264695868863e-05, - "loss": 0.0023387337103486063, - "step": 13965 - }, - { - "epoch": 2.381926683716965, - "grad_norm": 0.11483976989984512, - "learning_rate": 2.7358595117737118e-05, - "loss": 0.00246519148349762, - "step": 13970 - }, - { - "epoch": 2.3827791986359763, - "grad_norm": 0.09073470532894135, - "learning_rate": 2.733393027395051e-05, - "loss": 0.0031791247427463533, - "step": 13975 - }, - { - "epoch": 2.3836317135549874, - "grad_norm": 0.12094864249229431, - "learning_rate": 2.7309270176023436e-05, - "loss": 0.0025795340538024903, - "step": 13980 - }, - { - "epoch": 2.3844842284739984, - "grad_norm": 0.13568098843097687, - "learning_rate": 2.7284614835468035e-05, - "loss": 0.0057578980922698975, - "step": 13985 - }, - { - "epoch": 2.3853367433930095, - "grad_norm": 0.06415567547082901, - "learning_rate": 2.725996426379423e-05, - "loss": 0.0024575673043727873, - "step": 13990 - }, - { - "epoch": 2.3861892583120206, - "grad_norm": 0.05898221582174301, - "learning_rate": 2.723531847250975e-05, - "loss": 0.0013358716852962971, - "step": 13995 - }, - { - "epoch": 2.3870417732310316, - "grad_norm": 0.019117049872875214, - "learning_rate": 2.721067747312004e-05, - "loss": 0.0016026780009269713, - "step": 14000 - }, - { - "epoch": 2.3878942881500427, - "grad_norm": 0.028591491281986237, - "learning_rate": 2.7186041277128383e-05, - "loss": 0.001663113385438919, - "step": 14005 - }, - { - "epoch": 2.3887468030690537, - "grad_norm": 0.03701665997505188, - "learning_rate": 2.7161409896035733e-05, - "loss": 0.0012899260967969895, - "step": 14010 - }, - { - "epoch": 2.389599317988065, - "grad_norm": 0.05777057632803917, - "learning_rate": 2.7136783341340862e-05, - "loss": 0.0018556809052824974, - "step": 14015 - }, - { - "epoch": 2.390451832907076, - "grad_norm": 0.04922354966402054, - "learning_rate": 2.711216162454024e-05, - "loss": 0.002131880074739456, - "step": 14020 - }, - { - "epoch": 2.391304347826087, - "grad_norm": 0.045851659029722214, - "learning_rate": 2.708754475712814e-05, - "loss": 0.001147150807082653, - "step": 14025 - }, - { - "epoch": 2.392156862745098, - "grad_norm": 0.11482678353786469, - "learning_rate": 2.7062932750596514e-05, - "loss": 0.0027298804372549055, - "step": 14030 - }, - { - "epoch": 2.393009377664109, - "grad_norm": 0.054821670055389404, - "learning_rate": 2.7038325616435058e-05, - "loss": 0.0018268844112753868, - "step": 14035 - }, - { - "epoch": 2.39386189258312, - "grad_norm": 0.09821441024541855, - "learning_rate": 2.701372336613122e-05, - "loss": 0.002109052799642086, - "step": 14040 - }, - { - "epoch": 2.394714407502131, - "grad_norm": 0.04923141747713089, - "learning_rate": 2.6989126011170115e-05, - "loss": 0.0021799976006150247, - "step": 14045 - }, - { - "epoch": 2.395566922421142, - "grad_norm": 0.1223372220993042, - "learning_rate": 2.6964533563034648e-05, - "loss": 0.00261150524020195, - "step": 14050 - }, - { - "epoch": 2.3964194373401533, - "grad_norm": 0.04964495450258255, - "learning_rate": 2.6939946033205374e-05, - "loss": 0.001747405156493187, - "step": 14055 - }, - { - "epoch": 2.397271952259165, - "grad_norm": 0.05354087054729462, - "learning_rate": 2.6915363433160562e-05, - "loss": 0.0017880409955978393, - "step": 14060 - }, - { - "epoch": 2.3981244671781754, - "grad_norm": 0.0796194076538086, - "learning_rate": 2.6890785774376188e-05, - "loss": 0.002280256152153015, - "step": 14065 - }, - { - "epoch": 2.398976982097187, - "grad_norm": 0.048979468643665314, - "learning_rate": 2.6866213068325942e-05, - "loss": 0.0034266695380210876, - "step": 14070 - }, - { - "epoch": 2.399829497016198, - "grad_norm": 0.11115774512290955, - "learning_rate": 2.6841645326481166e-05, - "loss": 0.0014098694548010827, - "step": 14075 - }, - { - "epoch": 2.400682011935209, - "grad_norm": 0.14144426584243774, - "learning_rate": 2.681708256031089e-05, - "loss": 0.0017399771139025688, - "step": 14080 - }, - { - "epoch": 2.40153452685422, - "grad_norm": 0.060562510043382645, - "learning_rate": 2.6792524781281846e-05, - "loss": 0.0031288094818592072, - "step": 14085 - }, - { - "epoch": 2.402387041773231, - "grad_norm": 0.08271291851997375, - "learning_rate": 2.6767972000858402e-05, - "loss": 0.002268883027136326, - "step": 14090 - }, - { - "epoch": 2.403239556692242, - "grad_norm": 0.08203598111867905, - "learning_rate": 2.674342423050264e-05, - "loss": 0.0017265897244215012, - "step": 14095 - }, - { - "epoch": 2.4040920716112533, - "grad_norm": 0.07809042930603027, - "learning_rate": 2.6718881481674265e-05, - "loss": 0.0032232727855443953, - "step": 14100 - }, - { - "epoch": 2.4049445865302643, - "grad_norm": 0.043053366243839264, - "learning_rate": 2.6694343765830633e-05, - "loss": 0.0014350255951285362, - "step": 14105 - }, - { - "epoch": 2.4057971014492754, - "grad_norm": 0.2139715999364853, - "learning_rate": 2.666981109442679e-05, - "loss": 0.002208554185926914, - "step": 14110 - }, - { - "epoch": 2.4066496163682864, - "grad_norm": 0.028433851897716522, - "learning_rate": 2.6645283478915373e-05, - "loss": 0.0033426061272621155, - "step": 14115 - }, - { - "epoch": 2.4075021312872975, - "grad_norm": 0.03152618184685707, - "learning_rate": 2.6620760930746726e-05, - "loss": 0.0017683111131191255, - "step": 14120 - }, - { - "epoch": 2.4083546462063086, - "grad_norm": 0.11559031158685684, - "learning_rate": 2.6596243461368762e-05, - "loss": 0.0027762461453676225, - "step": 14125 - }, - { - "epoch": 2.4092071611253196, - "grad_norm": 0.08188942819833755, - "learning_rate": 2.6571731082227068e-05, - "loss": 0.0029629599303007126, - "step": 14130 - }, - { - "epoch": 2.4100596760443307, - "grad_norm": 0.03179270401597023, - "learning_rate": 2.654722380476482e-05, - "loss": 0.001593652181327343, - "step": 14135 - }, - { - "epoch": 2.4109121909633418, - "grad_norm": 0.03763008117675781, - "learning_rate": 2.652272164042285e-05, - "loss": 0.003974568471312523, - "step": 14140 - }, - { - "epoch": 2.411764705882353, - "grad_norm": 0.06221388280391693, - "learning_rate": 2.649822460063958e-05, - "loss": 0.0021382227540016176, - "step": 14145 - }, - { - "epoch": 2.412617220801364, - "grad_norm": 0.13541199266910553, - "learning_rate": 2.6473732696851025e-05, - "loss": 0.0030446551740169526, - "step": 14150 - }, - { - "epoch": 2.413469735720375, - "grad_norm": 0.07515605539083481, - "learning_rate": 2.6449245940490843e-05, - "loss": 0.0023170780390501023, - "step": 14155 - }, - { - "epoch": 2.414322250639386, - "grad_norm": 0.029287993907928467, - "learning_rate": 2.6424764342990247e-05, - "loss": 0.002732834219932556, - "step": 14160 - }, - { - "epoch": 2.4151747655583975, - "grad_norm": 0.056158751249313354, - "learning_rate": 2.6400287915778073e-05, - "loss": 0.0026283055543899537, - "step": 14165 - }, - { - "epoch": 2.416027280477408, - "grad_norm": 0.05005735903978348, - "learning_rate": 2.6375816670280742e-05, - "loss": 0.0021377095952630045, - "step": 14170 - }, - { - "epoch": 2.4168797953964196, - "grad_norm": 0.039338257163763046, - "learning_rate": 2.6351350617922217e-05, - "loss": 0.0010171877220273018, - "step": 14175 - }, - { - "epoch": 2.4177323103154307, - "grad_norm": 0.054605189710855484, - "learning_rate": 2.6326889770124074e-05, - "loss": 0.0015358464792370797, - "step": 14180 - }, - { - "epoch": 2.4185848252344417, - "grad_norm": 0.05107913911342621, - "learning_rate": 2.630243413830547e-05, - "loss": 0.0014638695865869522, - "step": 14185 - }, - { - "epoch": 2.419437340153453, - "grad_norm": 0.14121516048908234, - "learning_rate": 2.62779837338831e-05, - "loss": 0.0018762655556201935, - "step": 14190 - }, - { - "epoch": 2.420289855072464, - "grad_norm": 0.13554073870182037, - "learning_rate": 2.625353856827121e-05, - "loss": 0.002315247431397438, - "step": 14195 - }, - { - "epoch": 2.421142369991475, - "grad_norm": 0.07378100603818893, - "learning_rate": 2.6229098652881636e-05, - "loss": 0.0017681105062365531, - "step": 14200 - }, - { - "epoch": 2.421994884910486, - "grad_norm": 0.0729142278432846, - "learning_rate": 2.6204663999123712e-05, - "loss": 0.0013508319854736329, - "step": 14205 - }, - { - "epoch": 2.422847399829497, - "grad_norm": 0.09028290957212448, - "learning_rate": 2.6180234618404393e-05, - "loss": 0.0023917261511087417, - "step": 14210 - }, - { - "epoch": 2.423699914748508, - "grad_norm": 0.042102012783288956, - "learning_rate": 2.6155810522128105e-05, - "loss": 0.001337253674864769, - "step": 14215 - }, - { - "epoch": 2.424552429667519, - "grad_norm": 0.126102477312088, - "learning_rate": 2.6131391721696812e-05, - "loss": 0.0030670080333948134, - "step": 14220 - }, - { - "epoch": 2.42540494458653, - "grad_norm": 0.08583983033895493, - "learning_rate": 2.6106978228510047e-05, - "loss": 0.0025723014026880265, - "step": 14225 - }, - { - "epoch": 2.4262574595055413, - "grad_norm": 0.0516071692109108, - "learning_rate": 2.608257005396482e-05, - "loss": 0.0020857708528637885, - "step": 14230 - }, - { - "epoch": 2.4271099744245523, - "grad_norm": 0.08321108669042587, - "learning_rate": 2.6058167209455697e-05, - "loss": 0.0023237552493810655, - "step": 14235 - }, - { - "epoch": 2.4279624893435634, - "grad_norm": 0.04344337806105614, - "learning_rate": 2.6033769706374727e-05, - "loss": 0.0016502588987350464, - "step": 14240 - }, - { - "epoch": 2.4288150042625745, - "grad_norm": 0.10716593265533447, - "learning_rate": 2.6009377556111488e-05, - "loss": 0.002213199995458126, - "step": 14245 - }, - { - "epoch": 2.4296675191815855, - "grad_norm": 0.08346270024776459, - "learning_rate": 2.598499077005302e-05, - "loss": 0.0023431163281202316, - "step": 14250 - }, - { - "epoch": 2.4305200341005966, - "grad_norm": 0.032770343124866486, - "learning_rate": 2.596060935958392e-05, - "loss": 0.0011562082916498184, - "step": 14255 - }, - { - "epoch": 2.431372549019608, - "grad_norm": 0.09246552735567093, - "learning_rate": 2.593623333608623e-05, - "loss": 0.002459176816046238, - "step": 14260 - }, - { - "epoch": 2.4322250639386187, - "grad_norm": 0.05482151731848717, - "learning_rate": 2.5911862710939474e-05, - "loss": 0.0019333874806761742, - "step": 14265 - }, - { - "epoch": 2.43307757885763, - "grad_norm": 0.03243163228034973, - "learning_rate": 2.588749749552069e-05, - "loss": 0.0017584215849637986, - "step": 14270 - }, - { - "epoch": 2.4339300937766413, - "grad_norm": 0.07286939024925232, - "learning_rate": 2.586313770120434e-05, - "loss": 0.002444162592291832, - "step": 14275 - }, - { - "epoch": 2.4347826086956523, - "grad_norm": 0.05575154721736908, - "learning_rate": 2.583878333936243e-05, - "loss": 0.0024999476969242095, - "step": 14280 - }, - { - "epoch": 2.4356351236146634, - "grad_norm": 0.10262400656938553, - "learning_rate": 2.5814434421364354e-05, - "loss": 0.0018360136076807977, - "step": 14285 - }, - { - "epoch": 2.4364876385336744, - "grad_norm": 0.023329658433794975, - "learning_rate": 2.5790090958577017e-05, - "loss": 0.002157992497086525, - "step": 14290 - }, - { - "epoch": 2.4373401534526855, - "grad_norm": 0.11155838519334793, - "learning_rate": 2.576575296236473e-05, - "loss": 0.002236923947930336, - "step": 14295 - }, - { - "epoch": 2.4381926683716966, - "grad_norm": 0.015751022845506668, - "learning_rate": 2.5741420444089317e-05, - "loss": 0.0023830370977520944, - "step": 14300 - }, - { - "epoch": 2.4390451832907076, - "grad_norm": 0.06451129168272018, - "learning_rate": 2.5717093415109982e-05, - "loss": 0.0012244164943695068, - "step": 14305 - }, - { - "epoch": 2.4398976982097187, - "grad_norm": 0.05141889676451683, - "learning_rate": 2.569277188678339e-05, - "loss": 0.0008386586792767048, - "step": 14310 - }, - { - "epoch": 2.4407502131287298, - "grad_norm": 0.07528503239154816, - "learning_rate": 2.5668455870463654e-05, - "loss": 0.0027780460193753244, - "step": 14315 - }, - { - "epoch": 2.441602728047741, - "grad_norm": 0.0676177367568016, - "learning_rate": 2.5644145377502277e-05, - "loss": 0.002171286940574646, - "step": 14320 - }, - { - "epoch": 2.442455242966752, - "grad_norm": 0.03209437057375908, - "learning_rate": 2.5619840419248228e-05, - "loss": 0.0011549444869160652, - "step": 14325 - }, - { - "epoch": 2.443307757885763, - "grad_norm": 0.0711345300078392, - "learning_rate": 2.559554100704787e-05, - "loss": 0.0029217278584837913, - "step": 14330 - }, - { - "epoch": 2.444160272804774, - "grad_norm": 0.07314640283584595, - "learning_rate": 2.5571247152244955e-05, - "loss": 0.0019763100892305372, - "step": 14335 - }, - { - "epoch": 2.445012787723785, - "grad_norm": 0.058573171496391296, - "learning_rate": 2.5546958866180686e-05, - "loss": 0.0023175042122602465, - "step": 14340 - }, - { - "epoch": 2.445865302642796, - "grad_norm": 0.06780791282653809, - "learning_rate": 2.552267616019362e-05, - "loss": 0.0022560084238648415, - "step": 14345 - }, - { - "epoch": 2.446717817561807, - "grad_norm": 0.0834873840212822, - "learning_rate": 2.5498399045619755e-05, - "loss": 0.0015980398282408714, - "step": 14350 - }, - { - "epoch": 2.4475703324808182, - "grad_norm": 0.06677491962909698, - "learning_rate": 2.5474127533792443e-05, - "loss": 0.002242721430957317, - "step": 14355 - }, - { - "epoch": 2.4484228473998293, - "grad_norm": 0.11220566183328629, - "learning_rate": 2.5449861636042443e-05, - "loss": 0.001862034946680069, - "step": 14360 - }, - { - "epoch": 2.449275362318841, - "grad_norm": 0.05493709817528725, - "learning_rate": 2.542560136369786e-05, - "loss": 0.0020324042066931724, - "step": 14365 - }, - { - "epoch": 2.4501278772378514, - "grad_norm": 0.09586431086063385, - "learning_rate": 2.5401346728084225e-05, - "loss": 0.001961209811270237, - "step": 14370 - }, - { - "epoch": 2.450980392156863, - "grad_norm": 0.06384766101837158, - "learning_rate": 2.5377097740524402e-05, - "loss": 0.002969523146748543, - "step": 14375 - }, - { - "epoch": 2.451832907075874, - "grad_norm": 0.11584383249282837, - "learning_rate": 2.5352854412338607e-05, - "loss": 0.0037360407412052156, - "step": 14380 - }, - { - "epoch": 2.452685421994885, - "grad_norm": 0.05268854275345802, - "learning_rate": 2.5328616754844447e-05, - "loss": 0.0024207277223467828, - "step": 14385 - }, - { - "epoch": 2.453537936913896, - "grad_norm": 0.10550973564386368, - "learning_rate": 2.5304384779356855e-05, - "loss": 0.002147519588470459, - "step": 14390 - }, - { - "epoch": 2.454390451832907, - "grad_norm": 0.11402281373739243, - "learning_rate": 2.5280158497188144e-05, - "loss": 0.0030479192733764648, - "step": 14395 - }, - { - "epoch": 2.455242966751918, - "grad_norm": 0.042928412556648254, - "learning_rate": 2.5255937919647928e-05, - "loss": 0.0009582490660250187, - "step": 14400 - }, - { - "epoch": 2.4560954816709293, - "grad_norm": 0.09466255456209183, - "learning_rate": 2.52317230580432e-05, - "loss": 0.0028877202421426773, - "step": 14405 - }, - { - "epoch": 2.4569479965899403, - "grad_norm": 0.0167491864413023, - "learning_rate": 2.5207513923678246e-05, - "loss": 0.002237674966454506, - "step": 14410 - }, - { - "epoch": 2.4578005115089514, - "grad_norm": 0.11767696589231491, - "learning_rate": 2.518331052785468e-05, - "loss": 0.00270021203905344, - "step": 14415 - }, - { - "epoch": 2.4586530264279625, - "grad_norm": 0.13400165736675262, - "learning_rate": 2.5159112881871494e-05, - "loss": 0.0025584336370229723, - "step": 14420 - }, - { - "epoch": 2.4595055413469735, - "grad_norm": 0.051460813730955124, - "learning_rate": 2.5134920997024915e-05, - "loss": 0.001182288955897093, - "step": 14425 - }, - { - "epoch": 2.4603580562659846, - "grad_norm": 0.05078651383519173, - "learning_rate": 2.511073488460855e-05, - "loss": 0.001340255793184042, - "step": 14430 - }, - { - "epoch": 2.4612105711849956, - "grad_norm": 0.06714113801717758, - "learning_rate": 2.5086554555913245e-05, - "loss": 0.0019190860912203789, - "step": 14435 - }, - { - "epoch": 2.4620630861040067, - "grad_norm": 0.05757109820842743, - "learning_rate": 2.5062380022227226e-05, - "loss": 0.0016031917184591293, - "step": 14440 - }, - { - "epoch": 2.4629156010230178, - "grad_norm": 0.045739807188510895, - "learning_rate": 2.5038211294835944e-05, - "loss": 0.0020723894238471987, - "step": 14445 - }, - { - "epoch": 2.463768115942029, - "grad_norm": 0.06381653994321823, - "learning_rate": 2.5014048385022156e-05, - "loss": 0.002237732522189617, - "step": 14450 - }, - { - "epoch": 2.46462063086104, - "grad_norm": 0.08096056431531906, - "learning_rate": 2.498989130406594e-05, - "loss": 0.0017275322228670121, - "step": 14455 - }, - { - "epoch": 2.4654731457800514, - "grad_norm": 0.04627775028347969, - "learning_rate": 2.4965740063244582e-05, - "loss": 0.0028135737404227255, - "step": 14460 - }, - { - "epoch": 2.466325660699062, - "grad_norm": 0.07789458334445953, - "learning_rate": 2.4941594673832737e-05, - "loss": 0.0017165482044219972, - "step": 14465 - }, - { - "epoch": 2.4671781756180735, - "grad_norm": 0.03633275255560875, - "learning_rate": 2.491745514710224e-05, - "loss": 0.003017013892531395, - "step": 14470 - }, - { - "epoch": 2.4680306905370846, - "grad_norm": 0.07425010204315186, - "learning_rate": 2.489332149432224e-05, - "loss": 0.002849162742495537, - "step": 14475 - }, - { - "epoch": 2.4688832054560956, - "grad_norm": 0.08738066256046295, - "learning_rate": 2.486919372675911e-05, - "loss": 0.003103286027908325, - "step": 14480 - }, - { - "epoch": 2.4697357203751067, - "grad_norm": 0.059462107717990875, - "learning_rate": 2.4845071855676526e-05, - "loss": 0.003129242733120918, - "step": 14485 - }, - { - "epoch": 2.4705882352941178, - "grad_norm": 0.12157633155584335, - "learning_rate": 2.4820955892335358e-05, - "loss": 0.00188961960375309, - "step": 14490 - }, - { - "epoch": 2.471440750213129, - "grad_norm": 0.04780135303735733, - "learning_rate": 2.4796845847993743e-05, - "loss": 0.001777658425271511, - "step": 14495 - }, - { - "epoch": 2.47229326513214, - "grad_norm": 0.08734847605228424, - "learning_rate": 2.477274173390706e-05, - "loss": 0.0025872459635138513, - "step": 14500 - }, - { - "epoch": 2.473145780051151, - "grad_norm": 0.08637238293886185, - "learning_rate": 2.4748643561327887e-05, - "loss": 0.0034623559564352035, - "step": 14505 - }, - { - "epoch": 2.473998294970162, - "grad_norm": 0.1351020187139511, - "learning_rate": 2.4724551341506083e-05, - "loss": 0.0025932226330041886, - "step": 14510 - }, - { - "epoch": 2.474850809889173, - "grad_norm": 0.0965266153216362, - "learning_rate": 2.4700465085688678e-05, - "loss": 0.0021650340408086778, - "step": 14515 - }, - { - "epoch": 2.475703324808184, - "grad_norm": 0.06353217363357544, - "learning_rate": 2.4676384805119954e-05, - "loss": 0.0017436511814594268, - "step": 14520 - }, - { - "epoch": 2.476555839727195, - "grad_norm": 0.09694099426269531, - "learning_rate": 2.4652310511041376e-05, - "loss": 0.002511733956634998, - "step": 14525 - }, - { - "epoch": 2.4774083546462062, - "grad_norm": 0.13362912833690643, - "learning_rate": 2.4628242214691614e-05, - "loss": 0.0020636413246393204, - "step": 14530 - }, - { - "epoch": 2.4782608695652173, - "grad_norm": 0.05283635854721069, - "learning_rate": 2.4604179927306575e-05, - "loss": 0.002218991331756115, - "step": 14535 - }, - { - "epoch": 2.4791133844842284, - "grad_norm": 0.062003809958696365, - "learning_rate": 2.4580123660119317e-05, - "loss": 0.0021969690918922425, - "step": 14540 - }, - { - "epoch": 2.4799658994032394, - "grad_norm": 0.1058121919631958, - "learning_rate": 2.4556073424360115e-05, - "loss": 0.002514044567942619, - "step": 14545 - }, - { - "epoch": 2.4808184143222505, - "grad_norm": 0.06746378540992737, - "learning_rate": 2.4532029231256397e-05, - "loss": 0.001485797483474016, - "step": 14550 - }, - { - "epoch": 2.4816709292412615, - "grad_norm": 0.043892405927181244, - "learning_rate": 2.4507991092032832e-05, - "loss": 0.0021189235150814055, - "step": 14555 - }, - { - "epoch": 2.4825234441602726, - "grad_norm": 0.04537670686841011, - "learning_rate": 2.4483959017911195e-05, - "loss": 0.0018616810441017151, - "step": 14560 - }, - { - "epoch": 2.483375959079284, - "grad_norm": 0.04895998165011406, - "learning_rate": 2.445993302011046e-05, - "loss": 0.0016737811267375946, - "step": 14565 - }, - { - "epoch": 2.484228473998295, - "grad_norm": 0.07096420228481293, - "learning_rate": 2.4435913109846773e-05, - "loss": 0.0032933827489614485, - "step": 14570 - }, - { - "epoch": 2.485080988917306, - "grad_norm": 0.07391496002674103, - "learning_rate": 2.4411899298333403e-05, - "loss": 0.0021815944463014604, - "step": 14575 - }, - { - "epoch": 2.4859335038363173, - "grad_norm": 0.12835897505283356, - "learning_rate": 2.438789159678083e-05, - "loss": 0.0032001670449972154, - "step": 14580 - }, - { - "epoch": 2.4867860187553283, - "grad_norm": 0.0947527140378952, - "learning_rate": 2.436389001639662e-05, - "loss": 0.002512381225824356, - "step": 14585 - }, - { - "epoch": 2.4876385336743394, - "grad_norm": 0.06699662655591965, - "learning_rate": 2.4339894568385526e-05, - "loss": 0.0014906782656908036, - "step": 14590 - }, - { - "epoch": 2.4884910485933505, - "grad_norm": 0.042523179203271866, - "learning_rate": 2.4315905263949404e-05, - "loss": 0.0012685291469097138, - "step": 14595 - }, - { - "epoch": 2.4893435635123615, - "grad_norm": 0.03687009960412979, - "learning_rate": 2.4291922114287286e-05, - "loss": 0.0016289660707116127, - "step": 14600 - }, - { - "epoch": 2.4901960784313726, - "grad_norm": 0.07698170840740204, - "learning_rate": 2.4267945130595287e-05, - "loss": 0.002090749144554138, - "step": 14605 - }, - { - "epoch": 2.4910485933503836, - "grad_norm": 0.08533983677625656, - "learning_rate": 2.4243974324066653e-05, - "loss": 0.002234157919883728, - "step": 14610 - }, - { - "epoch": 2.4919011082693947, - "grad_norm": 0.10050603002309799, - "learning_rate": 2.422000970589177e-05, - "loss": 0.002818283811211586, - "step": 14615 - }, - { - "epoch": 2.4927536231884058, - "grad_norm": 0.057129960507154465, - "learning_rate": 2.4196051287258095e-05, - "loss": 0.004226747527718544, - "step": 14620 - }, - { - "epoch": 2.493606138107417, - "grad_norm": 0.08218846470117569, - "learning_rate": 2.4172099079350256e-05, - "loss": 0.0016387354582548142, - "step": 14625 - }, - { - "epoch": 2.494458653026428, - "grad_norm": 0.07963220775127411, - "learning_rate": 2.4148153093349894e-05, - "loss": 0.002778450772166252, - "step": 14630 - }, - { - "epoch": 2.495311167945439, - "grad_norm": 0.058049995452165604, - "learning_rate": 2.4124213340435834e-05, - "loss": 0.0024016743525862696, - "step": 14635 - }, - { - "epoch": 2.49616368286445, - "grad_norm": 0.13127438724040985, - "learning_rate": 2.410027983178392e-05, - "loss": 0.0038317706435918807, - "step": 14640 - }, - { - "epoch": 2.497016197783461, - "grad_norm": 0.048698920756578445, - "learning_rate": 2.407635257856711e-05, - "loss": 0.00152621790766716, - "step": 14645 - }, - { - "epoch": 2.497868712702472, - "grad_norm": 0.02338201180100441, - "learning_rate": 2.405243159195546e-05, - "loss": 0.0027417311444878577, - "step": 14650 - }, - { - "epoch": 2.498721227621483, - "grad_norm": 0.07108049094676971, - "learning_rate": 2.402851688311607e-05, - "loss": 0.001716497913002968, - "step": 14655 - }, - { - "epoch": 2.4995737425404947, - "grad_norm": 0.028342491015791893, - "learning_rate": 2.4004608463213126e-05, - "loss": 0.0013954185880720616, - "step": 14660 - }, - { - "epoch": 2.4995737425404947, - "eval_loss": 0.04806143045425415, - "eval_runtime": 3.6619, - "eval_samples_per_second": 68.816, - "eval_steps_per_second": 1.092, - "step": 14660 - }, - { - "eval_cer_subset": 0.01446089208070741, - "eval_cer_subset_edit_distance": 888, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 14660 - }, - { - "epoch": 2.5004262574595053, - "grad_norm": 0.0640476793050766, - "learning_rate": 2.398070634340786e-05, - "loss": 0.002191193774342537, - "step": 14665 - }, - { - "epoch": 2.501278772378517, - "grad_norm": 0.034168582409620285, - "learning_rate": 2.3956810534858607e-05, - "loss": 0.0013371256180107593, - "step": 14670 - }, - { - "epoch": 2.502131287297528, - "grad_norm": 0.07181207835674286, - "learning_rate": 2.3932921048720698e-05, - "loss": 0.0019236713647842406, - "step": 14675 - }, - { - "epoch": 2.502983802216539, - "grad_norm": 0.0469810888171196, - "learning_rate": 2.3909037896146552e-05, - "loss": 0.0018267782405018806, - "step": 14680 - }, - { - "epoch": 2.50383631713555, - "grad_norm": 0.028826232999563217, - "learning_rate": 2.3885161088285617e-05, - "loss": 0.0010010387748479843, - "step": 14685 - }, - { - "epoch": 2.504688832054561, - "grad_norm": 0.10193654894828796, - "learning_rate": 2.386129063628437e-05, - "loss": 0.0024211697280406954, - "step": 14690 - }, - { - "epoch": 2.505541346973572, - "grad_norm": 0.14754284918308258, - "learning_rate": 2.3837426551286357e-05, - "loss": 0.0020253278315067293, - "step": 14695 - }, - { - "epoch": 2.506393861892583, - "grad_norm": 0.12155842036008835, - "learning_rate": 2.3813568844432103e-05, - "loss": 0.002546634152531624, - "step": 14700 - }, - { - "epoch": 2.5072463768115942, - "grad_norm": 0.07209643721580505, - "learning_rate": 2.378971752685919e-05, - "loss": 0.002657034806907177, - "step": 14705 - }, - { - "epoch": 2.5080988917306053, - "grad_norm": 0.1210934966802597, - "learning_rate": 2.3765872609702192e-05, - "loss": 0.002788655459880829, - "step": 14710 - }, - { - "epoch": 2.5089514066496164, - "grad_norm": 0.05647290125489235, - "learning_rate": 2.374203410409274e-05, - "loss": 0.0022313324734568596, - "step": 14715 - }, - { - "epoch": 2.5098039215686274, - "grad_norm": 0.18282383680343628, - "learning_rate": 2.371820202115942e-05, - "loss": 0.0019404273480176926, - "step": 14720 - }, - { - "epoch": 2.5106564364876385, - "grad_norm": 0.022936735302209854, - "learning_rate": 2.369437637202784e-05, - "loss": 0.0015677658841013909, - "step": 14725 - }, - { - "epoch": 2.5115089514066495, - "grad_norm": 0.023840973153710365, - "learning_rate": 2.3670557167820614e-05, - "loss": 0.0017029233276844025, - "step": 14730 - }, - { - "epoch": 2.5123614663256606, - "grad_norm": 0.01897628791630268, - "learning_rate": 2.3646744419657323e-05, - "loss": 0.001359708234667778, - "step": 14735 - }, - { - "epoch": 2.5132139812446717, - "grad_norm": 0.03326602280139923, - "learning_rate": 2.3622938138654575e-05, - "loss": 0.0006220267619937659, - "step": 14740 - }, - { - "epoch": 2.5140664961636827, - "grad_norm": 0.0619979090988636, - "learning_rate": 2.3599138335925914e-05, - "loss": 0.002030659094452858, - "step": 14745 - }, - { - "epoch": 2.514919011082694, - "grad_norm": 0.03451136127114296, - "learning_rate": 2.3575345022581896e-05, - "loss": 0.0016797656193375588, - "step": 14750 - }, - { - "epoch": 2.5157715260017053, - "grad_norm": 0.0554860420525074, - "learning_rate": 2.3551558209730018e-05, - "loss": 0.0016403241083025933, - "step": 14755 - }, - { - "epoch": 2.516624040920716, - "grad_norm": 0.08686158061027527, - "learning_rate": 2.3527777908474744e-05, - "loss": 0.003415975719690323, - "step": 14760 - }, - { - "epoch": 2.5174765558397274, - "grad_norm": 0.06636729836463928, - "learning_rate": 2.3504004129917542e-05, - "loss": 0.0018416630104184152, - "step": 14765 - }, - { - "epoch": 2.518329070758738, - "grad_norm": 0.08038193732500076, - "learning_rate": 2.3480236885156776e-05, - "loss": 0.002185085415840149, - "step": 14770 - }, - { - "epoch": 2.5191815856777495, - "grad_norm": 0.06370148062705994, - "learning_rate": 2.3456476185287802e-05, - "loss": 0.001394746359437704, - "step": 14775 - }, - { - "epoch": 2.5200341005967606, - "grad_norm": 0.05585980415344238, - "learning_rate": 2.3432722041402886e-05, - "loss": 0.003035778924822807, - "step": 14780 - }, - { - "epoch": 2.5208866155157716, - "grad_norm": 0.08676521480083466, - "learning_rate": 2.340897446459128e-05, - "loss": 0.002279702201485634, - "step": 14785 - }, - { - "epoch": 2.5217391304347827, - "grad_norm": 0.0421539731323719, - "learning_rate": 2.3385233465939124e-05, - "loss": 0.0015795350074768067, - "step": 14790 - }, - { - "epoch": 2.5225916453537938, - "grad_norm": 0.09380512684583664, - "learning_rate": 2.3361499056529516e-05, - "loss": 0.0024957180023193358, - "step": 14795 - }, - { - "epoch": 2.523444160272805, - "grad_norm": 0.05541060492396355, - "learning_rate": 2.3337771247442457e-05, - "loss": 0.0022170023992657663, - "step": 14800 - }, - { - "epoch": 2.524296675191816, - "grad_norm": 0.030795352533459663, - "learning_rate": 2.3314050049754872e-05, - "loss": 0.0015011204406619072, - "step": 14805 - }, - { - "epoch": 2.525149190110827, - "grad_norm": 0.040677715092897415, - "learning_rate": 2.329033547454063e-05, - "loss": 0.0023216739296913146, - "step": 14810 - }, - { - "epoch": 2.526001705029838, - "grad_norm": 0.036884501576423645, - "learning_rate": 2.3266627532870462e-05, - "loss": 0.0025786716490983964, - "step": 14815 - }, - { - "epoch": 2.526854219948849, - "grad_norm": 0.02398660033941269, - "learning_rate": 2.324292623581204e-05, - "loss": 0.0017933860421180726, - "step": 14820 - }, - { - "epoch": 2.52770673486786, - "grad_norm": 0.06601176410913467, - "learning_rate": 2.321923159442989e-05, - "loss": 0.002885073609650135, - "step": 14825 - }, - { - "epoch": 2.528559249786871, - "grad_norm": 0.08684834837913513, - "learning_rate": 2.3195543619785496e-05, - "loss": 0.0026486974209547045, - "step": 14830 - }, - { - "epoch": 2.5294117647058822, - "grad_norm": 0.04674920067191124, - "learning_rate": 2.3171862322937173e-05, - "loss": 0.0025192024186253548, - "step": 14835 - }, - { - "epoch": 2.5302642796248933, - "grad_norm": 0.059271425008773804, - "learning_rate": 2.314818771494013e-05, - "loss": 0.001517033763229847, - "step": 14840 - }, - { - "epoch": 2.5311167945439044, - "grad_norm": 0.03094577044248581, - "learning_rate": 2.312451980684648e-05, - "loss": 0.001731237769126892, - "step": 14845 - }, - { - "epoch": 2.531969309462916, - "grad_norm": 0.04043465852737427, - "learning_rate": 2.3100858609705167e-05, - "loss": 0.002348044328391552, - "step": 14850 - }, - { - "epoch": 2.5328218243819265, - "grad_norm": 0.05144953727722168, - "learning_rate": 2.3077204134562054e-05, - "loss": 0.0019841600209474564, - "step": 14855 - }, - { - "epoch": 2.533674339300938, - "grad_norm": 0.07220125198364258, - "learning_rate": 2.3053556392459813e-05, - "loss": 0.002818341739475727, - "step": 14860 - }, - { - "epoch": 2.5345268542199486, - "grad_norm": 0.08199959248304367, - "learning_rate": 2.302991539443801e-05, - "loss": 0.0024914808571338655, - "step": 14865 - }, - { - "epoch": 2.53537936913896, - "grad_norm": 0.07761549204587936, - "learning_rate": 2.3006281151533047e-05, - "loss": 0.003526497259736061, - "step": 14870 - }, - { - "epoch": 2.536231884057971, - "grad_norm": 0.1002337783575058, - "learning_rate": 2.298265367477816e-05, - "loss": 0.0022296305745840073, - "step": 14875 - }, - { - "epoch": 2.5370843989769822, - "grad_norm": 0.05918731540441513, - "learning_rate": 2.295903297520346e-05, - "loss": 0.0012512234039604663, - "step": 14880 - }, - { - "epoch": 2.5379369138959933, - "grad_norm": 0.053112782537937164, - "learning_rate": 2.2935419063835868e-05, - "loss": 0.0017477553337812424, - "step": 14885 - }, - { - "epoch": 2.5387894288150044, - "grad_norm": 0.061820488423109055, - "learning_rate": 2.2911811951699155e-05, - "loss": 0.0022626927122473715, - "step": 14890 - }, - { - "epoch": 2.5396419437340154, - "grad_norm": 0.11703728139400482, - "learning_rate": 2.288821164981387e-05, - "loss": 0.0025926090776920317, - "step": 14895 - }, - { - "epoch": 2.5404944586530265, - "grad_norm": 0.04635873809456825, - "learning_rate": 2.2864618169197468e-05, - "loss": 0.0017809070646762847, - "step": 14900 - }, - { - "epoch": 2.5413469735720375, - "grad_norm": 0.11287315934896469, - "learning_rate": 2.2841031520864136e-05, - "loss": 0.00238190982490778, - "step": 14905 - }, - { - "epoch": 2.5421994884910486, - "grad_norm": 0.026871057227253914, - "learning_rate": 2.2817451715824924e-05, - "loss": 0.0015336395241320132, - "step": 14910 - }, - { - "epoch": 2.5430520034100597, - "grad_norm": 0.06438672542572021, - "learning_rate": 2.279387876508766e-05, - "loss": 0.001995333656668663, - "step": 14915 - }, - { - "epoch": 2.5439045183290707, - "grad_norm": 0.06547024846076965, - "learning_rate": 2.277031267965697e-05, - "loss": 0.002060149237513542, - "step": 14920 - }, - { - "epoch": 2.544757033248082, - "grad_norm": 0.07563283294439316, - "learning_rate": 2.2746753470534322e-05, - "loss": 0.0017446789890527726, - "step": 14925 - }, - { - "epoch": 2.545609548167093, - "grad_norm": 0.028652798384428024, - "learning_rate": 2.27232011487179e-05, - "loss": 0.0022552181035280228, - "step": 14930 - }, - { - "epoch": 2.546462063086104, - "grad_norm": 0.0893654152750969, - "learning_rate": 2.269965572520274e-05, - "loss": 0.0029813185334205627, - "step": 14935 - }, - { - "epoch": 2.547314578005115, - "grad_norm": 0.04628995433449745, - "learning_rate": 2.26761172109806e-05, - "loss": 0.0025255372747778893, - "step": 14940 - }, - { - "epoch": 2.548167092924126, - "grad_norm": 0.09175702929496765, - "learning_rate": 2.2652585617040076e-05, - "loss": 0.004577648639678955, - "step": 14945 - }, - { - "epoch": 2.549019607843137, - "grad_norm": 0.041957542300224304, - "learning_rate": 2.262906095436648e-05, - "loss": 0.002292825095355511, - "step": 14950 - }, - { - "epoch": 2.5498721227621486, - "grad_norm": 0.061231136322021484, - "learning_rate": 2.2605543233941904e-05, - "loss": 0.002193107083439827, - "step": 14955 - }, - { - "epoch": 2.550724637681159, - "grad_norm": 0.08939548581838608, - "learning_rate": 2.2582032466745206e-05, - "loss": 0.0013388695195317268, - "step": 14960 - }, - { - "epoch": 2.5515771526001707, - "grad_norm": 0.10106560587882996, - "learning_rate": 2.255852866375199e-05, - "loss": 0.004271790385246277, - "step": 14965 - }, - { - "epoch": 2.5524296675191813, - "grad_norm": 0.04756918177008629, - "learning_rate": 2.253503183593463e-05, - "loss": 0.002253059670329094, - "step": 14970 - }, - { - "epoch": 2.553282182438193, - "grad_norm": 0.06336323171854019, - "learning_rate": 2.2511541994262203e-05, - "loss": 0.0019065763801336289, - "step": 14975 - }, - { - "epoch": 2.554134697357204, - "grad_norm": 0.021801188588142395, - "learning_rate": 2.2488059149700568e-05, - "loss": 0.001671123132109642, - "step": 14980 - }, - { - "epoch": 2.554987212276215, - "grad_norm": 0.07580327987670898, - "learning_rate": 2.2464583313212294e-05, - "loss": 0.0031477130949497225, - "step": 14985 - }, - { - "epoch": 2.555839727195226, - "grad_norm": 0.07757267355918884, - "learning_rate": 2.244111449575666e-05, - "loss": 0.0026445770636200905, - "step": 14990 - }, - { - "epoch": 2.556692242114237, - "grad_norm": 0.043020669370889664, - "learning_rate": 2.2417652708289726e-05, - "loss": 0.002764601819217205, - "step": 14995 - }, - { - "epoch": 2.557544757033248, - "grad_norm": 0.03915635868906975, - "learning_rate": 2.2394197961764212e-05, - "loss": 0.002291044779121876, - "step": 15000 - }, - { - "epoch": 2.558397271952259, - "grad_norm": 0.0665091872215271, - "learning_rate": 2.2370750267129586e-05, - "loss": 0.0017822932451963425, - "step": 15005 - }, - { - "epoch": 2.5592497868712702, - "grad_norm": 0.08525653183460236, - "learning_rate": 2.234730963533199e-05, - "loss": 0.0018473496660590173, - "step": 15010 - }, - { - "epoch": 2.5601023017902813, - "grad_norm": 0.05346886068582535, - "learning_rate": 2.2323876077314327e-05, - "loss": 0.002567983791232109, - "step": 15015 - }, - { - "epoch": 2.5609548167092924, - "grad_norm": 0.04240184277296066, - "learning_rate": 2.2300449604016123e-05, - "loss": 0.0021606752648949622, - "step": 15020 - }, - { - "epoch": 2.5618073316283034, - "grad_norm": 0.08507288247346878, - "learning_rate": 2.2277030226373667e-05, - "loss": 0.0023022485896945, - "step": 15025 - }, - { - "epoch": 2.5626598465473145, - "grad_norm": 0.07468844205141068, - "learning_rate": 2.225361795531989e-05, - "loss": 0.0030104584991931917, - "step": 15030 - }, - { - "epoch": 2.5635123614663256, - "grad_norm": 0.03731158375740051, - "learning_rate": 2.22302128017844e-05, - "loss": 0.0019535191357135774, - "step": 15035 - }, - { - "epoch": 2.5643648763853366, - "grad_norm": 0.09111307561397552, - "learning_rate": 2.2206814776693536e-05, - "loss": 0.0016553621739149094, - "step": 15040 - }, - { - "epoch": 2.5652173913043477, - "grad_norm": 0.04197632521390915, - "learning_rate": 2.2183423890970255e-05, - "loss": 0.0018846508115530013, - "step": 15045 - }, - { - "epoch": 2.566069906223359, - "grad_norm": 0.09259206801652908, - "learning_rate": 2.2160040155534206e-05, - "loss": 0.0028481241315603256, - "step": 15050 - }, - { - "epoch": 2.56692242114237, - "grad_norm": 0.07880257815122604, - "learning_rate": 2.2136663581301696e-05, - "loss": 0.002117951214313507, - "step": 15055 - }, - { - "epoch": 2.5677749360613813, - "grad_norm": 0.0969267189502716, - "learning_rate": 2.2113294179185667e-05, - "loss": 0.00240680705755949, - "step": 15060 - }, - { - "epoch": 2.568627450980392, - "grad_norm": 0.06295698881149292, - "learning_rate": 2.2089931960095754e-05, - "loss": 0.0012395468540489674, - "step": 15065 - }, - { - "epoch": 2.5694799658994034, - "grad_norm": 0.0716724842786789, - "learning_rate": 2.2066576934938224e-05, - "loss": 0.004721567407250405, - "step": 15070 - }, - { - "epoch": 2.5703324808184145, - "grad_norm": 0.04790467768907547, - "learning_rate": 2.2043229114615967e-05, - "loss": 0.0016566522419452668, - "step": 15075 - }, - { - "epoch": 2.5711849957374255, - "grad_norm": 0.014919254928827286, - "learning_rate": 2.2019888510028515e-05, - "loss": 0.00200834795832634, - "step": 15080 - }, - { - "epoch": 2.5720375106564366, - "grad_norm": 0.07281307876110077, - "learning_rate": 2.1996555132072063e-05, - "loss": 0.0021370718255639075, - "step": 15085 - }, - { - "epoch": 2.5728900255754477, - "grad_norm": 0.04918764904141426, - "learning_rate": 2.197322899163938e-05, - "loss": 0.002188747748732567, - "step": 15090 - }, - { - "epoch": 2.5737425404944587, - "grad_norm": 0.05246208980679512, - "learning_rate": 2.1949910099619913e-05, - "loss": 0.002106213942170143, - "step": 15095 - }, - { - "epoch": 2.57459505541347, - "grad_norm": 0.07900833338499069, - "learning_rate": 2.1926598466899674e-05, - "loss": 0.0014828240498900413, - "step": 15100 - }, - { - "epoch": 2.575447570332481, - "grad_norm": 0.1235758364200592, - "learning_rate": 2.19032941043613e-05, - "loss": 0.0033482640981674196, - "step": 15105 - }, - { - "epoch": 2.576300085251492, - "grad_norm": 0.06170985475182533, - "learning_rate": 2.187999702288408e-05, - "loss": 0.0019921788945794104, - "step": 15110 - }, - { - "epoch": 2.577152600170503, - "grad_norm": 0.1210661381483078, - "learning_rate": 2.185670723334384e-05, - "loss": 0.0019077232107520103, - "step": 15115 - }, - { - "epoch": 2.578005115089514, - "grad_norm": 0.06942020356655121, - "learning_rate": 2.1833424746613026e-05, - "loss": 0.0019503291696310043, - "step": 15120 - }, - { - "epoch": 2.578857630008525, - "grad_norm": 0.09329917281866074, - "learning_rate": 2.1810149573560693e-05, - "loss": 0.0026118636131286623, - "step": 15125 - }, - { - "epoch": 2.579710144927536, - "grad_norm": 0.1026659607887268, - "learning_rate": 2.1786881725052445e-05, - "loss": 0.002567945420742035, - "step": 15130 - }, - { - "epoch": 2.580562659846547, - "grad_norm": 0.06306809186935425, - "learning_rate": 2.1763621211950517e-05, - "loss": 0.001768135279417038, - "step": 15135 - }, - { - "epoch": 2.5814151747655583, - "grad_norm": 0.07647090405225754, - "learning_rate": 2.174036804511367e-05, - "loss": 0.0015752470120787621, - "step": 15140 - }, - { - "epoch": 2.5822676896845693, - "grad_norm": 0.045121923089027405, - "learning_rate": 2.171712223539726e-05, - "loss": 0.0025726621970534325, - "step": 15145 - }, - { - "epoch": 2.5831202046035804, - "grad_norm": 0.040667545050382614, - "learning_rate": 2.1693883793653188e-05, - "loss": 0.002222199738025665, - "step": 15150 - }, - { - "epoch": 2.583972719522592, - "grad_norm": 0.08505896478891373, - "learning_rate": 2.1670652730729968e-05, - "loss": 0.0030935727059841155, - "step": 15155 - }, - { - "epoch": 2.5848252344416025, - "grad_norm": 0.05064573138952255, - "learning_rate": 2.164742905747261e-05, - "loss": 0.002387380041182041, - "step": 15160 - }, - { - "epoch": 2.585677749360614, - "grad_norm": 0.0372583344578743, - "learning_rate": 2.1624212784722684e-05, - "loss": 0.0026363788172602655, - "step": 15165 - }, - { - "epoch": 2.5865302642796246, - "grad_norm": 0.06209828332066536, - "learning_rate": 2.1601003923318344e-05, - "loss": 0.0029974017292261125, - "step": 15170 - }, - { - "epoch": 2.587382779198636, - "grad_norm": 0.049798715859651566, - "learning_rate": 2.157780248409424e-05, - "loss": 0.0016345694661140443, - "step": 15175 - }, - { - "epoch": 2.588235294117647, - "grad_norm": 0.06752602010965347, - "learning_rate": 2.1554608477881597e-05, - "loss": 0.0025367341935634614, - "step": 15180 - }, - { - "epoch": 2.5890878090366582, - "grad_norm": 0.10456907004117966, - "learning_rate": 2.1531421915508137e-05, - "loss": 0.002495551109313965, - "step": 15185 - }, - { - "epoch": 2.5899403239556693, - "grad_norm": 0.0790029838681221, - "learning_rate": 2.1508242807798114e-05, - "loss": 0.0025735165923833846, - "step": 15190 - }, - { - "epoch": 2.5907928388746804, - "grad_norm": 0.030237069353461266, - "learning_rate": 2.1485071165572298e-05, - "loss": 0.0018124323338270187, - "step": 15195 - }, - { - "epoch": 2.5916453537936914, - "grad_norm": 0.06030745431780815, - "learning_rate": 2.1461906999648008e-05, - "loss": 0.002845403365790844, - "step": 15200 - }, - { - "epoch": 2.5924978687127025, - "grad_norm": 0.10071806609630585, - "learning_rate": 2.1438750320839037e-05, - "loss": 0.002326494827866554, - "step": 15205 - }, - { - "epoch": 2.5933503836317136, - "grad_norm": 0.050379570573568344, - "learning_rate": 2.1415601139955686e-05, - "loss": 0.0019888151437044144, - "step": 15210 - }, - { - "epoch": 2.5942028985507246, - "grad_norm": 0.09101511538028717, - "learning_rate": 2.1392459467804753e-05, - "loss": 0.003049125336110592, - "step": 15215 - }, - { - "epoch": 2.5950554134697357, - "grad_norm": 0.03804527968168259, - "learning_rate": 2.1369325315189553e-05, - "loss": 0.0016767382621765137, - "step": 15220 - }, - { - "epoch": 2.5959079283887467, - "grad_norm": 0.0779503807425499, - "learning_rate": 2.1346198692909895e-05, - "loss": 0.001964661478996277, - "step": 15225 - }, - { - "epoch": 2.596760443307758, - "grad_norm": 0.07922998070716858, - "learning_rate": 2.1323079611762033e-05, - "loss": 0.001821339875459671, - "step": 15230 - }, - { - "epoch": 2.597612958226769, - "grad_norm": 0.045152947306632996, - "learning_rate": 2.1299968082538734e-05, - "loss": 0.0011449499055743218, - "step": 15235 - }, - { - "epoch": 2.59846547314578, - "grad_norm": 0.026626303791999817, - "learning_rate": 2.1276864116029207e-05, - "loss": 0.0016753975301980971, - "step": 15240 - }, - { - "epoch": 2.599317988064791, - "grad_norm": 0.10935933142900467, - "learning_rate": 2.1253767723019188e-05, - "loss": 0.0026281427592039107, - "step": 15245 - }, - { - "epoch": 2.6001705029838025, - "grad_norm": 0.08133106678724289, - "learning_rate": 2.123067891429082e-05, - "loss": 0.001925770938396454, - "step": 15250 - }, - { - "epoch": 2.601023017902813, - "grad_norm": 0.04865674301981926, - "learning_rate": 2.1207597700622728e-05, - "loss": 0.0019936567172408105, - "step": 15255 - }, - { - "epoch": 2.6018755328218246, - "grad_norm": 0.11841622740030289, - "learning_rate": 2.1184524092789982e-05, - "loss": 0.00298205092549324, - "step": 15260 - }, - { - "epoch": 2.602728047740835, - "grad_norm": 0.04416264593601227, - "learning_rate": 2.1161458101564115e-05, - "loss": 0.0036853265017271044, - "step": 15265 - }, - { - "epoch": 2.6035805626598467, - "grad_norm": 0.08603575825691223, - "learning_rate": 2.1138399737713118e-05, - "loss": 0.004533383995294571, - "step": 15270 - }, - { - "epoch": 2.604433077578858, - "grad_norm": 0.0626961886882782, - "learning_rate": 2.1115349012001388e-05, - "loss": 0.0017330382019281388, - "step": 15275 - }, - { - "epoch": 2.605285592497869, - "grad_norm": 0.12894456088542938, - "learning_rate": 2.1092305935189773e-05, - "loss": 0.0037327542901039123, - "step": 15280 - }, - { - "epoch": 2.60613810741688, - "grad_norm": 0.10542263090610504, - "learning_rate": 2.106927051803554e-05, - "loss": 0.0026806583628058434, - "step": 15285 - }, - { - "epoch": 2.606990622335891, - "grad_norm": 0.05068397521972656, - "learning_rate": 2.1046242771292386e-05, - "loss": 0.0014822190627455712, - "step": 15290 - }, - { - "epoch": 2.607843137254902, - "grad_norm": 0.08927716314792633, - "learning_rate": 2.102322270571045e-05, - "loss": 0.003242380917072296, - "step": 15295 - }, - { - "epoch": 2.608695652173913, - "grad_norm": 0.05792883411049843, - "learning_rate": 2.1000210332036248e-05, - "loss": 0.0017583563923835755, - "step": 15300 - }, - { - "epoch": 2.609548167092924, - "grad_norm": 0.0648881196975708, - "learning_rate": 2.09772056610127e-05, - "loss": 0.002197427675127983, - "step": 15305 - }, - { - "epoch": 2.610400682011935, - "grad_norm": 0.060977645218372345, - "learning_rate": 2.095420870337919e-05, - "loss": 0.002055848389863968, - "step": 15310 - }, - { - "epoch": 2.6112531969309463, - "grad_norm": 0.04654461517930031, - "learning_rate": 2.093121946987146e-05, - "loss": 0.002073242887854576, - "step": 15315 - }, - { - "epoch": 2.6121057118499573, - "grad_norm": 0.04738753288984299, - "learning_rate": 2.0908237971221634e-05, - "loss": 0.0017290839925408364, - "step": 15320 - }, - { - "epoch": 2.6129582267689684, - "grad_norm": 0.07519782334566116, - "learning_rate": 2.0885264218158248e-05, - "loss": 0.0012821624055504798, - "step": 15325 - }, - { - "epoch": 2.6138107416879794, - "grad_norm": 0.06078832224011421, - "learning_rate": 2.0862298221406206e-05, - "loss": 0.0019888199865818025, - "step": 15330 - }, - { - "epoch": 2.6146632566069905, - "grad_norm": 0.04823920503258705, - "learning_rate": 2.083933999168679e-05, - "loss": 0.0015226650051772595, - "step": 15335 - }, - { - "epoch": 2.6155157715260016, - "grad_norm": 0.04050251096487045, - "learning_rate": 2.0816389539717694e-05, - "loss": 0.0024490740150213243, - "step": 15340 - }, - { - "epoch": 2.6163682864450126, - "grad_norm": 0.08443193882703781, - "learning_rate": 2.0793446876212937e-05, - "loss": 0.0027990926057100294, - "step": 15345 - }, - { - "epoch": 2.6172208013640237, - "grad_norm": 0.03322751075029373, - "learning_rate": 2.07705120118829e-05, - "loss": 0.0011861051432788372, - "step": 15350 - }, - { - "epoch": 2.618073316283035, - "grad_norm": 0.06874673068523407, - "learning_rate": 2.0747584957434375e-05, - "loss": 0.0018939610570669174, - "step": 15355 - }, - { - "epoch": 2.618925831202046, - "grad_norm": 0.04990018159151077, - "learning_rate": 2.0724665723570437e-05, - "loss": 0.0013890796341001987, - "step": 15360 - }, - { - "epoch": 2.6197783461210573, - "grad_norm": 0.06342940032482147, - "learning_rate": 2.0701754320990586e-05, - "loss": 0.0019270982593297958, - "step": 15365 - }, - { - "epoch": 2.620630861040068, - "grad_norm": 0.05647345632314682, - "learning_rate": 2.0678850760390607e-05, - "loss": 0.0019773844629526137, - "step": 15370 - }, - { - "epoch": 2.6214833759590794, - "grad_norm": 0.09767530113458633, - "learning_rate": 2.0655955052462643e-05, - "loss": 0.0025425378233194353, - "step": 15375 - }, - { - "epoch": 2.6223358908780905, - "grad_norm": 0.05659051984548569, - "learning_rate": 2.063306720789516e-05, - "loss": 0.0016861587762832641, - "step": 15380 - }, - { - "epoch": 2.6231884057971016, - "grad_norm": 0.07679109275341034, - "learning_rate": 2.061018723737299e-05, - "loss": 0.0012974600307643414, - "step": 15385 - }, - { - "epoch": 2.6240409207161126, - "grad_norm": 0.032084014266729355, - "learning_rate": 2.0587315151577257e-05, - "loss": 0.0009737671352922916, - "step": 15390 - }, - { - "epoch": 2.6248934356351237, - "grad_norm": 0.07588861882686615, - "learning_rate": 2.056445096118539e-05, - "loss": 0.002771071344614029, - "step": 15395 - }, - { - "epoch": 2.6257459505541347, - "grad_norm": 0.07706267386674881, - "learning_rate": 2.0541594676871188e-05, - "loss": 0.002124561369419098, - "step": 15400 - }, - { - "epoch": 2.626598465473146, - "grad_norm": 0.05501805245876312, - "learning_rate": 2.051874630930469e-05, - "loss": 0.001449206192046404, - "step": 15405 - }, - { - "epoch": 2.627450980392157, - "grad_norm": 0.07360731810331345, - "learning_rate": 2.0495905869152303e-05, - "loss": 0.0014007428660988807, - "step": 15410 - }, - { - "epoch": 2.628303495311168, - "grad_norm": 0.03651239722967148, - "learning_rate": 2.04730733670767e-05, - "loss": 0.0013091465458273887, - "step": 15415 - }, - { - "epoch": 2.629156010230179, - "grad_norm": 0.05154712125658989, - "learning_rate": 2.0450248813736842e-05, - "loss": 0.0017904775217175485, - "step": 15420 - }, - { - "epoch": 2.63000852514919, - "grad_norm": 0.03202452138066292, - "learning_rate": 2.0427432219787978e-05, - "loss": 0.002919047139585018, - "step": 15425 - }, - { - "epoch": 2.630861040068201, - "grad_norm": 0.08954522758722305, - "learning_rate": 2.040462359588169e-05, - "loss": 0.0031249357387423517, - "step": 15430 - }, - { - "epoch": 2.631713554987212, - "grad_norm": 0.09551462531089783, - "learning_rate": 2.038182295266577e-05, - "loss": 0.0016073914244771003, - "step": 15435 - }, - { - "epoch": 2.632566069906223, - "grad_norm": 0.0576794371008873, - "learning_rate": 2.035903030078432e-05, - "loss": 0.0028427325189113615, - "step": 15440 - }, - { - "epoch": 2.6334185848252343, - "grad_norm": 0.0879262238740921, - "learning_rate": 2.0336245650877728e-05, - "loss": 0.0012862576171755792, - "step": 15445 - }, - { - "epoch": 2.634271099744246, - "grad_norm": 0.1022641509771347, - "learning_rate": 2.03134690135826e-05, - "loss": 0.002296357229351997, - "step": 15450 - }, - { - "epoch": 2.6351236146632564, - "grad_norm": 0.07090801000595093, - "learning_rate": 2.029070039953186e-05, - "loss": 0.0032129865139722824, - "step": 15455 - }, - { - "epoch": 2.635976129582268, - "grad_norm": 0.06394338607788086, - "learning_rate": 2.026793981935463e-05, - "loss": 0.0022887293249368667, - "step": 15460 - }, - { - "epoch": 2.6368286445012785, - "grad_norm": 0.03828660771250725, - "learning_rate": 2.0245187283676316e-05, - "loss": 0.0023141488432884215, - "step": 15465 - }, - { - "epoch": 2.63768115942029, - "grad_norm": 0.0748148262500763, - "learning_rate": 2.0222442803118537e-05, - "loss": 0.002477791905403137, - "step": 15470 - }, - { - "epoch": 2.638533674339301, - "grad_norm": 0.02352295070886612, - "learning_rate": 2.019970638829921e-05, - "loss": 0.0021653104573488235, - "step": 15475 - }, - { - "epoch": 2.639386189258312, - "grad_norm": 0.019303878769278526, - "learning_rate": 2.017697804983243e-05, - "loss": 0.0014067382551729679, - "step": 15480 - }, - { - "epoch": 2.640238704177323, - "grad_norm": 0.036747269332408905, - "learning_rate": 2.015425779832854e-05, - "loss": 0.002145359478890896, - "step": 15485 - }, - { - "epoch": 2.6410912190963343, - "grad_norm": 0.04195109382271767, - "learning_rate": 2.0131545644394096e-05, - "loss": 0.0014138499274849892, - "step": 15490 - }, - { - "epoch": 2.6419437340153453, - "grad_norm": 0.07388610392808914, - "learning_rate": 2.0108841598631904e-05, - "loss": 0.0025294892489910125, - "step": 15495 - }, - { - "epoch": 2.6427962489343564, - "grad_norm": 0.0890735536813736, - "learning_rate": 2.0086145671640973e-05, - "loss": 0.0026762137189507485, - "step": 15500 - }, - { - "epoch": 2.6436487638533674, - "grad_norm": 0.07587535679340363, - "learning_rate": 2.006345787401652e-05, - "loss": 0.0031544029712677, - "step": 15505 - }, - { - "epoch": 2.6445012787723785, - "grad_norm": 0.10948733240365982, - "learning_rate": 2.004077821634995e-05, - "loss": 0.0023899499326944353, - "step": 15510 - }, - { - "epoch": 2.6453537936913896, - "grad_norm": 0.07914752513170242, - "learning_rate": 2.0018106709228886e-05, - "loss": 0.004097612574696541, - "step": 15515 - }, - { - "epoch": 2.6462063086104006, - "grad_norm": 0.07947845757007599, - "learning_rate": 1.9995443363237126e-05, - "loss": 0.0022834014147520066, - "step": 15520 - }, - { - "epoch": 2.6470588235294117, - "grad_norm": 0.05973362177610397, - "learning_rate": 1.9972788188954704e-05, - "loss": 0.001445610448718071, - "step": 15525 - }, - { - "epoch": 2.6479113384484227, - "grad_norm": 0.07292830944061279, - "learning_rate": 1.9950141196957792e-05, - "loss": 0.0023502418771386147, - "step": 15530 - }, - { - "epoch": 2.648763853367434, - "grad_norm": 0.09226574003696442, - "learning_rate": 1.9927502397818745e-05, - "loss": 0.002285385876893997, - "step": 15535 - }, - { - "epoch": 2.649616368286445, - "grad_norm": 0.08981240540742874, - "learning_rate": 1.9904871802106124e-05, - "loss": 0.0023617954924702645, - "step": 15540 - }, - { - "epoch": 2.6504688832054564, - "grad_norm": 0.07505398988723755, - "learning_rate": 1.988224942038466e-05, - "loss": 0.0016136666759848594, - "step": 15545 - }, - { - "epoch": 2.651321398124467, - "grad_norm": 0.06795456260442734, - "learning_rate": 1.9859635263215215e-05, - "loss": 0.0014020048081874847, - "step": 15550 - }, - { - "epoch": 2.6521739130434785, - "grad_norm": 0.07863990217447281, - "learning_rate": 1.983702934115483e-05, - "loss": 0.0016099724918603898, - "step": 15555 - }, - { - "epoch": 2.653026427962489, - "grad_norm": 0.15475937724113464, - "learning_rate": 1.9814431664756705e-05, - "loss": 0.0028660917654633523, - "step": 15560 - }, - { - "epoch": 2.6538789428815006, - "grad_norm": 0.09072619676589966, - "learning_rate": 1.979184224457017e-05, - "loss": 0.0038232788443565368, - "step": 15565 - }, - { - "epoch": 2.6547314578005117, - "grad_norm": 0.04944036900997162, - "learning_rate": 1.9769261091140746e-05, - "loss": 0.002762124501168728, - "step": 15570 - }, - { - "epoch": 2.6555839727195227, - "grad_norm": 0.07315114885568619, - "learning_rate": 1.974668821501005e-05, - "loss": 0.0018053753301501274, - "step": 15575 - }, - { - "epoch": 2.656436487638534, - "grad_norm": 0.03133604675531387, - "learning_rate": 1.972412362671584e-05, - "loss": 0.0012923687696456908, - "step": 15580 - }, - { - "epoch": 2.657289002557545, - "grad_norm": 0.07396573573350906, - "learning_rate": 1.9701567336792037e-05, - "loss": 0.004405549541115761, - "step": 15585 - }, - { - "epoch": 2.658141517476556, - "grad_norm": 0.05702332779765129, - "learning_rate": 1.967901935576867e-05, - "loss": 0.001864958368241787, - "step": 15590 - }, - { - "epoch": 2.658994032395567, - "grad_norm": 0.06003536656498909, - "learning_rate": 1.9656479694171882e-05, - "loss": 0.0025712646543979644, - "step": 15595 - }, - { - "epoch": 2.659846547314578, - "grad_norm": 0.06424745172262192, - "learning_rate": 1.963394836252393e-05, - "loss": 0.002156762033700943, - "step": 15600 - }, - { - "epoch": 2.660699062233589, - "grad_norm": 0.0703018307685852, - "learning_rate": 1.9611425371343193e-05, - "loss": 0.0034677576273679732, - "step": 15605 - }, - { - "epoch": 2.6615515771526, - "grad_norm": 0.06616941094398499, - "learning_rate": 1.958891073114414e-05, - "loss": 0.002393544837832451, - "step": 15610 - }, - { - "epoch": 2.662404092071611, - "grad_norm": 0.04988931491971016, - "learning_rate": 1.9566404452437374e-05, - "loss": 0.00223421361297369, - "step": 15615 - }, - { - "epoch": 2.6632566069906223, - "grad_norm": 0.04869920015335083, - "learning_rate": 1.9543906545729573e-05, - "loss": 0.0013234581798315047, - "step": 15620 - }, - { - "epoch": 2.6641091219096333, - "grad_norm": 0.06417152285575867, - "learning_rate": 1.9521417021523482e-05, - "loss": 0.002075556293129921, - "step": 15625 - }, - { - "epoch": 2.6649616368286444, - "grad_norm": 0.07077648490667343, - "learning_rate": 1.949893589031799e-05, - "loss": 0.002136031910777092, - "step": 15630 - }, - { - "epoch": 2.6658141517476555, - "grad_norm": 0.060446277260780334, - "learning_rate": 1.9476463162608016e-05, - "loss": 0.0025891490280628205, - "step": 15635 - }, - { - "epoch": 2.6666666666666665, - "grad_norm": 0.0396479107439518, - "learning_rate": 1.94539988488846e-05, - "loss": 0.0018554994836449622, - "step": 15640 - }, - { - "epoch": 2.6675191815856776, - "grad_norm": 0.06787905842065811, - "learning_rate": 1.9431542959634817e-05, - "loss": 0.0017763305455446242, - "step": 15645 - }, - { - "epoch": 2.668371696504689, - "grad_norm": 0.08236365765333176, - "learning_rate": 1.940909550534182e-05, - "loss": 0.0018430478870868684, - "step": 15650 - }, - { - "epoch": 2.6692242114236997, - "grad_norm": 0.10013429820537567, - "learning_rate": 1.9386656496484816e-05, - "loss": 0.0021154627203941343, - "step": 15655 - }, - { - "epoch": 2.670076726342711, - "grad_norm": 0.046891309320926666, - "learning_rate": 1.936422594353911e-05, - "loss": 0.0018745694309473039, - "step": 15660 - }, - { - "epoch": 2.670929241261722, - "grad_norm": 0.03863799199461937, - "learning_rate": 1.9341803856976016e-05, - "loss": 0.002184972912073135, - "step": 15665 - }, - { - "epoch": 2.6717817561807333, - "grad_norm": 0.05828642472624779, - "learning_rate": 1.9319390247262896e-05, - "loss": 0.0022127529606223106, - "step": 15670 - }, - { - "epoch": 2.6726342710997444, - "grad_norm": 0.08675269782543182, - "learning_rate": 1.9296985124863194e-05, - "loss": 0.002008732967078686, - "step": 15675 - }, - { - "epoch": 2.6734867860187554, - "grad_norm": 0.0700579434633255, - "learning_rate": 1.9274588500236345e-05, - "loss": 0.0024785833433270455, - "step": 15680 - }, - { - "epoch": 2.6743393009377665, - "grad_norm": 0.10120563209056854, - "learning_rate": 1.9252200383837854e-05, - "loss": 0.002890965715050697, - "step": 15685 - }, - { - "epoch": 2.6751918158567776, - "grad_norm": 0.10622604191303253, - "learning_rate": 1.9229820786119235e-05, - "loss": 0.002458018809556961, - "step": 15690 - }, - { - "epoch": 2.6760443307757886, - "grad_norm": 0.07292070984840393, - "learning_rate": 1.920744971752803e-05, - "loss": 0.0030672624707221985, - "step": 15695 - }, - { - "epoch": 2.6768968456947997, - "grad_norm": 0.030893906950950623, - "learning_rate": 1.918508718850779e-05, - "loss": 0.002947884239256382, - "step": 15700 - }, - { - "epoch": 2.6777493606138107, - "grad_norm": 0.07428406924009323, - "learning_rate": 1.9162733209498077e-05, - "loss": 0.003342418372631073, - "step": 15705 - }, - { - "epoch": 2.678601875532822, - "grad_norm": 0.014073869213461876, - "learning_rate": 1.9140387790934502e-05, - "loss": 0.0020058237016201018, - "step": 15710 - }, - { - "epoch": 2.679454390451833, - "grad_norm": 0.08076811581850052, - "learning_rate": 1.911805094324863e-05, - "loss": 0.0020218659192323685, - "step": 15715 - }, - { - "epoch": 2.680306905370844, - "grad_norm": 0.059959858655929565, - "learning_rate": 1.909572267686804e-05, - "loss": 0.0012911208905279636, - "step": 15720 - }, - { - "epoch": 2.681159420289855, - "grad_norm": 0.08688201755285263, - "learning_rate": 1.9073403002216313e-05, - "loss": 0.001681494526565075, - "step": 15725 - }, - { - "epoch": 2.682011935208866, - "grad_norm": 0.08083862066268921, - "learning_rate": 1.905109192971304e-05, - "loss": 0.002467956393957138, - "step": 15730 - }, - { - "epoch": 2.682864450127877, - "grad_norm": 0.06145070865750313, - "learning_rate": 1.902878946977375e-05, - "loss": 0.004355132207274437, - "step": 15735 - }, - { - "epoch": 2.683716965046888, - "grad_norm": 0.07410819083452225, - "learning_rate": 1.900649563280997e-05, - "loss": 0.0029904641211032867, - "step": 15740 - }, - { - "epoch": 2.6845694799658997, - "grad_norm": 0.03833797574043274, - "learning_rate": 1.8984210429229217e-05, - "loss": 0.0012984732165932655, - "step": 15745 - }, - { - "epoch": 2.6854219948849103, - "grad_norm": 0.023088248446583748, - "learning_rate": 1.896193386943494e-05, - "loss": 0.001397434249520302, - "step": 15750 - }, - { - "epoch": 2.686274509803922, - "grad_norm": 0.06918703764677048, - "learning_rate": 1.8939665963826616e-05, - "loss": 0.0015222997404634952, - "step": 15755 - }, - { - "epoch": 2.6871270247229324, - "grad_norm": 0.0286374781280756, - "learning_rate": 1.891740672279962e-05, - "loss": 0.0015881337225437165, - "step": 15760 - }, - { - "epoch": 2.687979539641944, - "grad_norm": 0.05485616624355316, - "learning_rate": 1.88951561567453e-05, - "loss": 0.0034288309514522554, - "step": 15765 - }, - { - "epoch": 2.688832054560955, - "grad_norm": 0.05021583288908005, - "learning_rate": 1.887291427605097e-05, - "loss": 0.0013944344595074654, - "step": 15770 - }, - { - "epoch": 2.689684569479966, - "grad_norm": 0.06752395629882812, - "learning_rate": 1.8850681091099895e-05, - "loss": 0.002590004727244377, - "step": 15775 - }, - { - "epoch": 2.690537084398977, - "grad_norm": 0.04273150861263275, - "learning_rate": 1.8828456612271255e-05, - "loss": 0.0019359454512596131, - "step": 15780 - }, - { - "epoch": 2.691389599317988, - "grad_norm": 0.0928453654050827, - "learning_rate": 1.8806240849940167e-05, - "loss": 0.003046049177646637, - "step": 15785 - }, - { - "epoch": 2.692242114236999, - "grad_norm": 0.025754287838935852, - "learning_rate": 1.8784033814477692e-05, - "loss": 0.0018295232206583024, - "step": 15790 - }, - { - "epoch": 2.6930946291560103, - "grad_norm": 0.07345419377088547, - "learning_rate": 1.8761835516250806e-05, - "loss": 0.0018985627219080925, - "step": 15795 - }, - { - "epoch": 2.6939471440750213, - "grad_norm": 0.08317514508962631, - "learning_rate": 1.873964596562243e-05, - "loss": 0.0030419353395700456, - "step": 15800 - }, - { - "epoch": 2.6947996589940324, - "grad_norm": 0.07300770282745361, - "learning_rate": 1.8717465172951377e-05, - "loss": 0.002040323428809643, - "step": 15805 - }, - { - "epoch": 2.6956521739130435, - "grad_norm": 0.07284363359212875, - "learning_rate": 1.8695293148592362e-05, - "loss": 0.001639954373240471, - "step": 15810 - }, - { - "epoch": 2.6965046888320545, - "grad_norm": 0.05817059800028801, - "learning_rate": 1.867312990289606e-05, - "loss": 0.0015234597958624363, - "step": 15815 - }, - { - "epoch": 2.6973572037510656, - "grad_norm": 0.11319714039564133, - "learning_rate": 1.865097544620897e-05, - "loss": 0.0018295228481292724, - "step": 15820 - }, - { - "epoch": 2.6982097186700766, - "grad_norm": 0.10493957251310349, - "learning_rate": 1.8628829788873567e-05, - "loss": 0.0025029994547367098, - "step": 15825 - }, - { - "epoch": 2.6990622335890877, - "grad_norm": 0.03161423280835152, - "learning_rate": 1.860669294122816e-05, - "loss": 0.0014271627180278302, - "step": 15830 - }, - { - "epoch": 2.6999147485080988, - "grad_norm": 0.03267689794301987, - "learning_rate": 1.858456491360697e-05, - "loss": 0.0012216478586196899, - "step": 15835 - }, - { - "epoch": 2.70076726342711, - "grad_norm": 0.07986247539520264, - "learning_rate": 1.856244571634008e-05, - "loss": 0.0018704459071159363, - "step": 15840 - }, - { - "epoch": 2.701619778346121, - "grad_norm": 0.10120461881160736, - "learning_rate": 1.85403353597535e-05, - "loss": 0.0020706810057163237, - "step": 15845 - }, - { - "epoch": 2.7024722932651324, - "grad_norm": 0.05339881405234337, - "learning_rate": 1.8518233854169056e-05, - "loss": 0.0017986055463552475, - "step": 15850 - }, - { - "epoch": 2.703324808184143, - "grad_norm": 0.11433786898851395, - "learning_rate": 1.8496141209904464e-05, - "loss": 0.0034054510295391084, - "step": 15855 - }, - { - "epoch": 2.7041773231031545, - "grad_norm": 0.061081189662218094, - "learning_rate": 1.8474057437273328e-05, - "loss": 0.002348882704973221, - "step": 15860 - }, - { - "epoch": 2.705029838022165, - "grad_norm": 0.055195316672325134, - "learning_rate": 1.8451982546585055e-05, - "loss": 0.0015221487730741501, - "step": 15865 - }, - { - "epoch": 2.7058823529411766, - "grad_norm": 0.06800514459609985, - "learning_rate": 1.8429916548144973e-05, - "loss": 0.0023088542744517325, - "step": 15870 - }, - { - "epoch": 2.7067348678601877, - "grad_norm": 0.05646739527583122, - "learning_rate": 1.8407859452254206e-05, - "loss": 0.0024141166359186172, - "step": 15875 - }, - { - "epoch": 2.7075873827791987, - "grad_norm": 0.10886628180742264, - "learning_rate": 1.8385811269209743e-05, - "loss": 0.0019476715475320815, - "step": 15880 - }, - { - "epoch": 2.70843989769821, - "grad_norm": 0.04279763624072075, - "learning_rate": 1.8363772009304395e-05, - "loss": 0.002021237276494503, - "step": 15885 - }, - { - "epoch": 2.709292412617221, - "grad_norm": 0.09583209455013275, - "learning_rate": 1.8341741682826852e-05, - "loss": 0.002025018632411957, - "step": 15890 - }, - { - "epoch": 2.710144927536232, - "grad_norm": 0.06695323437452316, - "learning_rate": 1.8319720300061582e-05, - "loss": 0.0026269391179084777, - "step": 15895 - }, - { - "epoch": 2.710997442455243, - "grad_norm": 0.07438764721155167, - "learning_rate": 1.829770787128889e-05, - "loss": 0.0014647828415036202, - "step": 15900 - }, - { - "epoch": 2.711849957374254, - "grad_norm": 0.05395448952913284, - "learning_rate": 1.8275704406784933e-05, - "loss": 0.0024559808894991874, - "step": 15905 - }, - { - "epoch": 2.712702472293265, - "grad_norm": 0.03163938969373703, - "learning_rate": 1.825370991682164e-05, - "loss": 0.0022430509328842164, - "step": 15910 - }, - { - "epoch": 2.713554987212276, - "grad_norm": 0.104282446205616, - "learning_rate": 1.8231724411666794e-05, - "loss": 0.001472956594079733, - "step": 15915 - }, - { - "epoch": 2.7144075021312872, - "grad_norm": 0.07355596870183945, - "learning_rate": 1.8209747901583944e-05, - "loss": 0.0023859225213527678, - "step": 15920 - }, - { - "epoch": 2.7152600170502983, - "grad_norm": 0.06525922566652298, - "learning_rate": 1.8187780396832463e-05, - "loss": 0.00265895314514637, - "step": 15925 - }, - { - "epoch": 2.7161125319693094, - "grad_norm": 0.09379115700721741, - "learning_rate": 1.8165821907667505e-05, - "loss": 0.002496413141489029, - "step": 15930 - }, - { - "epoch": 2.7169650468883204, - "grad_norm": 0.05254679545760155, - "learning_rate": 1.8143872444340017e-05, - "loss": 0.0022162407636642455, - "step": 15935 - }, - { - "epoch": 2.7178175618073315, - "grad_norm": 0.06203889846801758, - "learning_rate": 1.8121932017096758e-05, - "loss": 0.0016900423914194107, - "step": 15940 - }, - { - "epoch": 2.718670076726343, - "grad_norm": 0.08532653003931046, - "learning_rate": 1.810000063618023e-05, - "loss": 0.0028453752398490905, - "step": 15945 - }, - { - "epoch": 2.7195225916453536, - "grad_norm": 0.08361469209194183, - "learning_rate": 1.807807831182875e-05, - "loss": 0.0029737703502178193, - "step": 15950 - }, - { - "epoch": 2.720375106564365, - "grad_norm": 0.06439653784036636, - "learning_rate": 1.805616505427637e-05, - "loss": 0.002233676239848137, - "step": 15955 - }, - { - "epoch": 2.7212276214833757, - "grad_norm": 0.09197837114334106, - "learning_rate": 1.803426087375295e-05, - "loss": 0.0020749013870954513, - "step": 15960 - }, - { - "epoch": 2.722080136402387, - "grad_norm": 0.055145513266325, - "learning_rate": 1.8012365780484074e-05, - "loss": 0.0013141044415533542, - "step": 15965 - }, - { - "epoch": 2.7229326513213983, - "grad_norm": 0.06788767874240875, - "learning_rate": 1.7990479784691105e-05, - "loss": 0.0023008717224001886, - "step": 15970 - }, - { - "epoch": 2.7237851662404093, - "grad_norm": 0.10216987133026123, - "learning_rate": 1.7968602896591152e-05, - "loss": 0.002799564599990845, - "step": 15975 - }, - { - "epoch": 2.7246376811594204, - "grad_norm": 0.0995464101433754, - "learning_rate": 1.7946735126397056e-05, - "loss": 0.0023927824571728707, - "step": 15980 - }, - { - "epoch": 2.7254901960784315, - "grad_norm": 0.05936437472701073, - "learning_rate": 1.7924876484317453e-05, - "loss": 0.001253789383918047, - "step": 15985 - }, - { - "epoch": 2.7263427109974425, - "grad_norm": 0.06160435080528259, - "learning_rate": 1.7903026980556672e-05, - "loss": 0.00238239299505949, - "step": 15990 - }, - { - "epoch": 2.7271952259164536, - "grad_norm": 0.05691118910908699, - "learning_rate": 1.788118662531477e-05, - "loss": 0.0015995081514120102, - "step": 15995 - }, - { - "epoch": 2.7280477408354646, - "grad_norm": 0.0878402590751648, - "learning_rate": 1.7859355428787564e-05, - "loss": 0.002066444233059883, - "step": 16000 - }, - { - "epoch": 2.7289002557544757, - "grad_norm": 0.04065166413784027, - "learning_rate": 1.7837533401166598e-05, - "loss": 0.0022698283195495606, - "step": 16005 - }, - { - "epoch": 2.7297527706734868, - "grad_norm": 0.08980758488178253, - "learning_rate": 1.7815720552639105e-05, - "loss": 0.0016043156385421753, - "step": 16010 - }, - { - "epoch": 2.730605285592498, - "grad_norm": 0.05619784817099571, - "learning_rate": 1.7793916893388055e-05, - "loss": 0.0025583259761333466, - "step": 16015 - }, - { - "epoch": 2.731457800511509, - "grad_norm": 0.09853291511535645, - "learning_rate": 1.7772122433592116e-05, - "loss": 0.0025311170145869257, - "step": 16020 - }, - { - "epoch": 2.73231031543052, - "grad_norm": 0.044340990483760834, - "learning_rate": 1.7750337183425652e-05, - "loss": 0.0020809115841984747, - "step": 16025 - }, - { - "epoch": 2.733162830349531, - "grad_norm": 0.024994025006890297, - "learning_rate": 1.772856115305877e-05, - "loss": 0.001932576857507229, - "step": 16030 - }, - { - "epoch": 2.734015345268542, - "grad_norm": 0.07059352099895477, - "learning_rate": 1.770679435265724e-05, - "loss": 0.002347341552376747, - "step": 16035 - }, - { - "epoch": 2.734867860187553, - "grad_norm": 0.08121193200349808, - "learning_rate": 1.7685036792382506e-05, - "loss": 0.0015123223885893822, - "step": 16040 - }, - { - "epoch": 2.735720375106564, - "grad_norm": 0.07900503277778625, - "learning_rate": 1.766328848239175e-05, - "loss": 0.0028667191043496134, - "step": 16045 - }, - { - "epoch": 2.7365728900255757, - "grad_norm": 0.08574212342500687, - "learning_rate": 1.7641549432837778e-05, - "loss": 0.002038617432117462, - "step": 16050 - }, - { - "epoch": 2.7374254049445863, - "grad_norm": 0.06154071167111397, - "learning_rate": 1.7619819653869132e-05, - "loss": 0.0017743892967700958, - "step": 16055 - }, - { - "epoch": 2.738277919863598, - "grad_norm": 0.06745338439941406, - "learning_rate": 1.7598099155629982e-05, - "loss": 0.0018204674124717712, - "step": 16060 - }, - { - "epoch": 2.7391304347826084, - "grad_norm": 0.029756128787994385, - "learning_rate": 1.7576387948260175e-05, - "loss": 0.0020426372066140175, - "step": 16065 - }, - { - "epoch": 2.73998294970162, - "grad_norm": 0.13447973132133484, - "learning_rate": 1.7554686041895217e-05, - "loss": 0.0023698143661022185, - "step": 16070 - }, - { - "epoch": 2.740835464620631, - "grad_norm": 0.09888533502817154, - "learning_rate": 1.7532993446666298e-05, - "loss": 0.0024117348715662957, - "step": 16075 - }, - { - "epoch": 2.741687979539642, - "grad_norm": 0.05919703096151352, - "learning_rate": 1.751131017270024e-05, - "loss": 0.0027751058340072634, - "step": 16080 - }, - { - "epoch": 2.742540494458653, - "grad_norm": 0.04920949414372444, - "learning_rate": 1.74896362301195e-05, - "loss": 0.0022046850994229318, - "step": 16085 - }, - { - "epoch": 2.743393009377664, - "grad_norm": 0.028095668181777, - "learning_rate": 1.746797162904222e-05, - "loss": 0.001455264538526535, - "step": 16090 - }, - { - "epoch": 2.7442455242966752, - "grad_norm": 0.03558868542313576, - "learning_rate": 1.7446316379582125e-05, - "loss": 0.0023241037502884864, - "step": 16095 - }, - { - "epoch": 2.7450980392156863, - "grad_norm": 0.07124538719654083, - "learning_rate": 1.742467049184864e-05, - "loss": 0.0014614716172218322, - "step": 16100 - }, - { - "epoch": 2.7459505541346974, - "grad_norm": 0.07355284690856934, - "learning_rate": 1.7403033975946774e-05, - "loss": 0.0018932107836008073, - "step": 16105 - }, - { - "epoch": 2.7468030690537084, - "grad_norm": 0.06485545635223389, - "learning_rate": 1.738140684197717e-05, - "loss": 0.0021881703287363052, - "step": 16110 - }, - { - "epoch": 2.7476555839727195, - "grad_norm": 0.05748758837580681, - "learning_rate": 1.735978910003607e-05, - "loss": 0.0019190840423107148, - "step": 16115 - }, - { - "epoch": 2.7485080988917305, - "grad_norm": 0.04986255615949631, - "learning_rate": 1.7338180760215395e-05, - "loss": 0.001525167189538479, - "step": 16120 - }, - { - "epoch": 2.7493606138107416, - "grad_norm": 0.06383983045816422, - "learning_rate": 1.731658183260262e-05, - "loss": 0.0026792695745825766, - "step": 16125 - }, - { - "epoch": 2.749531116794544, - "eval_loss": 0.047858335077762604, - "eval_runtime": 3.7263, - "eval_samples_per_second": 67.627, - "eval_steps_per_second": 1.073, - "step": 16126 - }, - { - "eval_cer_subset": 0.01459117038774081, - "eval_cer_subset_edit_distance": 896, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 16126 - }, - { - "epoch": 2.7502131287297527, - "grad_norm": 0.10121899098157883, - "learning_rate": 1.7294992327280826e-05, - "loss": 0.0027876641601324082, - "step": 16130 - }, - { - "epoch": 2.7510656436487637, - "grad_norm": 0.029004251584410667, - "learning_rate": 1.7273412254328743e-05, - "loss": 0.0015729216858744622, - "step": 16135 - }, - { - "epoch": 2.7519181585677748, - "grad_norm": 0.06613736599683762, - "learning_rate": 1.7251841623820638e-05, - "loss": 0.0020701587200164795, - "step": 16140 - }, - { - "epoch": 2.7527706734867863, - "grad_norm": 0.09212189167737961, - "learning_rate": 1.7230280445826422e-05, - "loss": 0.0026726944372057913, - "step": 16145 - }, - { - "epoch": 2.753623188405797, - "grad_norm": 0.12724192440509796, - "learning_rate": 1.720872873041157e-05, - "loss": 0.002590762265026569, - "step": 16150 - }, - { - "epoch": 2.7544757033248084, - "grad_norm": 0.043855708092451096, - "learning_rate": 1.7187186487637124e-05, - "loss": 0.001780974492430687, - "step": 16155 - }, - { - "epoch": 2.755328218243819, - "grad_norm": 0.10562611371278763, - "learning_rate": 1.7165653727559725e-05, - "loss": 0.002336742728948593, - "step": 16160 - }, - { - "epoch": 2.7561807331628305, - "grad_norm": 0.05162282660603523, - "learning_rate": 1.7144130460231574e-05, - "loss": 0.0018106916919350623, - "step": 16165 - }, - { - "epoch": 2.7570332480818416, - "grad_norm": 0.020110471174120903, - "learning_rate": 1.7122616695700467e-05, - "loss": 0.0014431983232498168, - "step": 16170 - }, - { - "epoch": 2.7578857630008526, - "grad_norm": 0.15154017508029938, - "learning_rate": 1.7101112444009725e-05, - "loss": 0.0019074320793151856, - "step": 16175 - }, - { - "epoch": 2.7587382779198637, - "grad_norm": 0.03481750935316086, - "learning_rate": 1.7079617715198264e-05, - "loss": 0.0037923645228147506, - "step": 16180 - }, - { - "epoch": 2.7595907928388748, - "grad_norm": 0.024081731215119362, - "learning_rate": 1.7058132519300524e-05, - "loss": 0.002791491337120533, - "step": 16185 - }, - { - "epoch": 2.760443307757886, - "grad_norm": 0.07880852371454239, - "learning_rate": 1.703665686634653e-05, - "loss": 0.0028480572625994684, - "step": 16190 - }, - { - "epoch": 2.761295822676897, - "grad_norm": 0.06910362094640732, - "learning_rate": 1.701519076636182e-05, - "loss": 0.0018049828708171845, - "step": 16195 - }, - { - "epoch": 2.762148337595908, - "grad_norm": 0.09321995079517365, - "learning_rate": 1.699373422936748e-05, - "loss": 0.001952703855931759, - "step": 16200 - }, - { - "epoch": 2.763000852514919, - "grad_norm": 0.05871212109923363, - "learning_rate": 1.6972287265380137e-05, - "loss": 0.00121518075466156, - "step": 16205 - }, - { - "epoch": 2.76385336743393, - "grad_norm": 0.10542161762714386, - "learning_rate": 1.6950849884411936e-05, - "loss": 0.0024038642644882203, - "step": 16210 - }, - { - "epoch": 2.764705882352941, - "grad_norm": 0.0580933652818203, - "learning_rate": 1.6929422096470582e-05, - "loss": 0.0021081961691379546, - "step": 16215 - }, - { - "epoch": 2.765558397271952, - "grad_norm": 0.024878472089767456, - "learning_rate": 1.6908003911559256e-05, - "loss": 0.0022545790299773215, - "step": 16220 - }, - { - "epoch": 2.7664109121909632, - "grad_norm": 0.060553766787052155, - "learning_rate": 1.6886595339676703e-05, - "loss": 0.0015277018770575523, - "step": 16225 - }, - { - "epoch": 2.7672634271099743, - "grad_norm": 0.06857582181692123, - "learning_rate": 1.6865196390817137e-05, - "loss": 0.001996198855340481, - "step": 16230 - }, - { - "epoch": 2.7681159420289854, - "grad_norm": 0.06866193562746048, - "learning_rate": 1.6843807074970316e-05, - "loss": 0.0014093054458498954, - "step": 16235 - }, - { - "epoch": 2.7689684569479964, - "grad_norm": 0.12889426946640015, - "learning_rate": 1.6822427402121476e-05, - "loss": 0.0029415406286716463, - "step": 16240 - }, - { - "epoch": 2.7698209718670075, - "grad_norm": 0.05907638370990753, - "learning_rate": 1.6801057382251363e-05, - "loss": 0.0020356021821498873, - "step": 16245 - }, - { - "epoch": 2.770673486786019, - "grad_norm": 0.05899703502655029, - "learning_rate": 1.6779697025336205e-05, - "loss": 0.0010949989780783653, - "step": 16250 - }, - { - "epoch": 2.7715260017050296, - "grad_norm": 0.048360541462898254, - "learning_rate": 1.6758346341347716e-05, - "loss": 0.002375531755387783, - "step": 16255 - }, - { - "epoch": 2.772378516624041, - "grad_norm": 0.06712590157985687, - "learning_rate": 1.6737005340253134e-05, - "loss": 0.0016120089218020438, - "step": 16260 - }, - { - "epoch": 2.7732310315430517, - "grad_norm": 0.04694962501525879, - "learning_rate": 1.6715674032015137e-05, - "loss": 0.0010866542346775533, - "step": 16265 - }, - { - "epoch": 2.7740835464620632, - "grad_norm": 0.06813527643680573, - "learning_rate": 1.6694352426591873e-05, - "loss": 0.001494432892650366, - "step": 16270 - }, - { - "epoch": 2.7749360613810743, - "grad_norm": 0.12899814546108246, - "learning_rate": 1.6673040533937004e-05, - "loss": 0.003590015694499016, - "step": 16275 - }, - { - "epoch": 2.7757885763000854, - "grad_norm": 0.013963109813630581, - "learning_rate": 1.6651738363999604e-05, - "loss": 0.0019298167899250984, - "step": 16280 - }, - { - "epoch": 2.7766410912190964, - "grad_norm": 0.03605286777019501, - "learning_rate": 1.6630445926724262e-05, - "loss": 0.0031480703502893446, - "step": 16285 - }, - { - "epoch": 2.7774936061381075, - "grad_norm": 0.10986622422933578, - "learning_rate": 1.660916323205098e-05, - "loss": 0.002917572297155857, - "step": 16290 - }, - { - "epoch": 2.7783461210571185, - "grad_norm": 0.057930365204811096, - "learning_rate": 1.658789028991523e-05, - "loss": 0.0026299282908439636, - "step": 16295 - }, - { - "epoch": 2.7791986359761296, - "grad_norm": 0.029447276145219803, - "learning_rate": 1.6566627110247917e-05, - "loss": 0.0022400498390197756, - "step": 16300 - }, - { - "epoch": 2.7800511508951407, - "grad_norm": 0.045625604689121246, - "learning_rate": 1.6545373702975423e-05, - "loss": 0.0010993774980306626, - "step": 16305 - }, - { - "epoch": 2.7809036658141517, - "grad_norm": 0.03276116028428078, - "learning_rate": 1.6524130078019536e-05, - "loss": 0.0017030857503414153, - "step": 16310 - }, - { - "epoch": 2.7817561807331628, - "grad_norm": 0.07950260490179062, - "learning_rate": 1.650289624529747e-05, - "loss": 0.0029186248779296876, - "step": 16315 - }, - { - "epoch": 2.782608695652174, - "grad_norm": 0.03196907788515091, - "learning_rate": 1.6481672214721915e-05, - "loss": 0.0021285150200128556, - "step": 16320 - }, - { - "epoch": 2.783461210571185, - "grad_norm": 0.08347548544406891, - "learning_rate": 1.6460457996200926e-05, - "loss": 0.0018467068672180175, - "step": 16325 - }, - { - "epoch": 2.784313725490196, - "grad_norm": 0.062316033989191055, - "learning_rate": 1.643925359963803e-05, - "loss": 0.002080459892749786, - "step": 16330 - }, - { - "epoch": 2.785166240409207, - "grad_norm": 0.06067380681633949, - "learning_rate": 1.641805903493214e-05, - "loss": 0.0014378841035068036, - "step": 16335 - }, - { - "epoch": 2.786018755328218, - "grad_norm": 0.19668881595134735, - "learning_rate": 1.6396874311977574e-05, - "loss": 0.0018663834780454636, - "step": 16340 - }, - { - "epoch": 2.7868712702472296, - "grad_norm": 0.03857511281967163, - "learning_rate": 1.637569944066407e-05, - "loss": 0.0017508219927549363, - "step": 16345 - }, - { - "epoch": 2.78772378516624, - "grad_norm": 0.06684751063585281, - "learning_rate": 1.6354534430876746e-05, - "loss": 0.0021339647471904756, - "step": 16350 - }, - { - "epoch": 2.7885763000852517, - "grad_norm": 0.0722980946302414, - "learning_rate": 1.633337929249616e-05, - "loss": 0.002456018142402172, - "step": 16355 - }, - { - "epoch": 2.7894288150042623, - "grad_norm": 0.013861587271094322, - "learning_rate": 1.6312234035398214e-05, - "loss": 0.0013738014735281468, - "step": 16360 - }, - { - "epoch": 2.790281329923274, - "grad_norm": 0.05103524401783943, - "learning_rate": 1.6291098669454237e-05, - "loss": 0.0012777662836015225, - "step": 16365 - }, - { - "epoch": 2.791133844842285, - "grad_norm": 0.08019815385341644, - "learning_rate": 1.6269973204530896e-05, - "loss": 0.0021779144182801245, - "step": 16370 - }, - { - "epoch": 2.791986359761296, - "grad_norm": 0.11674029380083084, - "learning_rate": 1.6248857650490287e-05, - "loss": 0.003945905342698097, - "step": 16375 - }, - { - "epoch": 2.792838874680307, - "grad_norm": 0.10289142280817032, - "learning_rate": 1.622775201718984e-05, - "loss": 0.0033991221338510514, - "step": 16380 - }, - { - "epoch": 2.793691389599318, - "grad_norm": 0.08295715600252151, - "learning_rate": 1.6206656314482372e-05, - "loss": 0.0025476697832345963, - "step": 16385 - }, - { - "epoch": 2.794543904518329, - "grad_norm": 0.09820916503667831, - "learning_rate": 1.618557055221605e-05, - "loss": 0.002469751611351967, - "step": 16390 - }, - { - "epoch": 2.79539641943734, - "grad_norm": 0.04547690227627754, - "learning_rate": 1.61644947402344e-05, - "loss": 0.0017419423907995223, - "step": 16395 - }, - { - "epoch": 2.7962489343563512, - "grad_norm": 0.09098807722330093, - "learning_rate": 1.6143428888376336e-05, - "loss": 0.0025540072470903396, - "step": 16400 - }, - { - "epoch": 2.7971014492753623, - "grad_norm": 0.06253538280725479, - "learning_rate": 1.6122373006476078e-05, - "loss": 0.00161474347114563, - "step": 16405 - }, - { - "epoch": 2.7979539641943734, - "grad_norm": 0.10068398714065552, - "learning_rate": 1.6101327104363236e-05, - "loss": 0.0030464882031083105, - "step": 16410 - }, - { - "epoch": 2.7988064791133844, - "grad_norm": 0.04052518680691719, - "learning_rate": 1.6080291191862708e-05, - "loss": 0.001292982418090105, - "step": 16415 - }, - { - "epoch": 2.7996589940323955, - "grad_norm": 0.09480784833431244, - "learning_rate": 1.605926527879478e-05, - "loss": 0.002949811331927776, - "step": 16420 - }, - { - "epoch": 2.8005115089514065, - "grad_norm": 0.08064186573028564, - "learning_rate": 1.603824937497505e-05, - "loss": 0.001863202080130577, - "step": 16425 - }, - { - "epoch": 2.8013640238704176, - "grad_norm": 0.03577118366956711, - "learning_rate": 1.601724349021443e-05, - "loss": 0.0015472200699150561, - "step": 16430 - }, - { - "epoch": 2.8022165387894287, - "grad_norm": 0.04698857292532921, - "learning_rate": 1.5996247634319162e-05, - "loss": 0.002168430760502815, - "step": 16435 - }, - { - "epoch": 2.80306905370844, - "grad_norm": 0.09031593799591064, - "learning_rate": 1.5975261817090803e-05, - "loss": 0.0017798427492380143, - "step": 16440 - }, - { - "epoch": 2.803921568627451, - "grad_norm": 0.24021683633327484, - "learning_rate": 1.5954286048326258e-05, - "loss": 0.0024022582918405535, - "step": 16445 - }, - { - "epoch": 2.8047740835464623, - "grad_norm": 0.07379221171140671, - "learning_rate": 1.5933320337817685e-05, - "loss": 0.0016447069123387338, - "step": 16450 - }, - { - "epoch": 2.805626598465473, - "grad_norm": 0.07145442813634872, - "learning_rate": 1.59123646953526e-05, - "loss": 0.002100140042603016, - "step": 16455 - }, - { - "epoch": 2.8064791133844844, - "grad_norm": 0.06444204598665237, - "learning_rate": 1.5891419130713783e-05, - "loss": 0.0022544978186488152, - "step": 16460 - }, - { - "epoch": 2.8073316283034955, - "grad_norm": 0.07764707505702972, - "learning_rate": 1.5870483653679307e-05, - "loss": 0.002028309740126133, - "step": 16465 - }, - { - "epoch": 2.8081841432225065, - "grad_norm": 0.13890637457370758, - "learning_rate": 1.584955827402257e-05, - "loss": 0.001833663322031498, - "step": 16470 - }, - { - "epoch": 2.8090366581415176, - "grad_norm": 0.06412612646818161, - "learning_rate": 1.5828643001512236e-05, - "loss": 0.0017296869307756424, - "step": 16475 - }, - { - "epoch": 2.8098891730605287, - "grad_norm": 0.05978688597679138, - "learning_rate": 1.5807737845912234e-05, - "loss": 0.001933468133211136, - "step": 16480 - }, - { - "epoch": 2.8107416879795397, - "grad_norm": 0.1131395548582077, - "learning_rate": 1.5786842816981778e-05, - "loss": 0.003291580080986023, - "step": 16485 - }, - { - "epoch": 2.8115942028985508, - "grad_norm": 0.0549713559448719, - "learning_rate": 1.5765957924475394e-05, - "loss": 0.0019789932295680044, - "step": 16490 - }, - { - "epoch": 2.812446717817562, - "grad_norm": 0.08038460463285446, - "learning_rate": 1.5745083178142833e-05, - "loss": 0.002347235009074211, - "step": 16495 - }, - { - "epoch": 2.813299232736573, - "grad_norm": 0.05014783889055252, - "learning_rate": 1.5724218587729098e-05, - "loss": 0.0016623528674244881, - "step": 16500 - }, - { - "epoch": 2.814151747655584, - "grad_norm": 0.05042316019535065, - "learning_rate": 1.5703364162974503e-05, - "loss": 0.0018199939280748368, - "step": 16505 - }, - { - "epoch": 2.815004262574595, - "grad_norm": 0.056051138788461685, - "learning_rate": 1.5682519913614565e-05, - "loss": 0.0016215803101658822, - "step": 16510 - }, - { - "epoch": 2.815856777493606, - "grad_norm": 0.04295732453465462, - "learning_rate": 1.5661685849380098e-05, - "loss": 0.0020044256001710893, - "step": 16515 - }, - { - "epoch": 2.816709292412617, - "grad_norm": 0.02020161598920822, - "learning_rate": 1.564086197999712e-05, - "loss": 0.0018876813352108, - "step": 16520 - }, - { - "epoch": 2.817561807331628, - "grad_norm": 0.09220346808433533, - "learning_rate": 1.562004831518691e-05, - "loss": 0.0015535833314061164, - "step": 16525 - }, - { - "epoch": 2.8184143222506393, - "grad_norm": 0.09728234261274338, - "learning_rate": 1.5599244864665966e-05, - "loss": 0.0015536649152636528, - "step": 16530 - }, - { - "epoch": 2.8192668371696503, - "grad_norm": 0.17288024723529816, - "learning_rate": 1.5578451638146053e-05, - "loss": 0.0021170184016227724, - "step": 16535 - }, - { - "epoch": 2.8201193520886614, - "grad_norm": 0.056582558900117874, - "learning_rate": 1.5557668645334132e-05, - "loss": 0.0030540911480784415, - "step": 16540 - }, - { - "epoch": 2.820971867007673, - "grad_norm": 0.17674417793750763, - "learning_rate": 1.553689589593238e-05, - "loss": 0.001543693896383047, - "step": 16545 - }, - { - "epoch": 2.8218243819266835, - "grad_norm": 0.06186344474554062, - "learning_rate": 1.551613339963823e-05, - "loss": 0.001764528639614582, - "step": 16550 - }, - { - "epoch": 2.822676896845695, - "grad_norm": 0.13224560022354126, - "learning_rate": 1.5495381166144288e-05, - "loss": 0.004735496640205383, - "step": 16555 - }, - { - "epoch": 2.8235294117647056, - "grad_norm": 0.1427813619375229, - "learning_rate": 1.5474639205138406e-05, - "loss": 0.003041662834584713, - "step": 16560 - }, - { - "epoch": 2.824381926683717, - "grad_norm": 0.09970462322235107, - "learning_rate": 1.5453907526303614e-05, - "loss": 0.0025150768458843233, - "step": 16565 - }, - { - "epoch": 2.825234441602728, - "grad_norm": 0.02305634692311287, - "learning_rate": 1.5433186139318144e-05, - "loss": 0.001219399645924568, - "step": 16570 - }, - { - "epoch": 2.8260869565217392, - "grad_norm": 0.04805911332368851, - "learning_rate": 1.541247505385543e-05, - "loss": 0.0012801218777894973, - "step": 16575 - }, - { - "epoch": 2.8269394714407503, - "grad_norm": 0.08059800416231155, - "learning_rate": 1.539177427958408e-05, - "loss": 0.0031003907322883608, - "step": 16580 - }, - { - "epoch": 2.8277919863597614, - "grad_norm": 0.05763188377022743, - "learning_rate": 1.537108382616794e-05, - "loss": 0.002337191253900528, - "step": 16585 - }, - { - "epoch": 2.8286445012787724, - "grad_norm": 0.06821907311677933, - "learning_rate": 1.535040370326597e-05, - "loss": 0.0030008716508746146, - "step": 16590 - }, - { - "epoch": 2.8294970161977835, - "grad_norm": 0.12901924550533295, - "learning_rate": 1.5329733920532358e-05, - "loss": 0.0035179533064365388, - "step": 16595 - }, - { - "epoch": 2.8303495311167945, - "grad_norm": 0.040896832942962646, - "learning_rate": 1.5309074487616435e-05, - "loss": 0.0020170003175735475, - "step": 16600 - }, - { - "epoch": 2.8312020460358056, - "grad_norm": 0.06776095926761627, - "learning_rate": 1.5288425414162725e-05, - "loss": 0.0017662534490227699, - "step": 16605 - }, - { - "epoch": 2.8320545609548167, - "grad_norm": 0.08130808174610138, - "learning_rate": 1.5267786709810897e-05, - "loss": 0.0018257603049278259, - "step": 16610 - }, - { - "epoch": 2.8329070758738277, - "grad_norm": 0.05846976861357689, - "learning_rate": 1.5247158384195778e-05, - "loss": 0.0013240544125437737, - "step": 16615 - }, - { - "epoch": 2.833759590792839, - "grad_norm": 0.113974429666996, - "learning_rate": 1.522654044694736e-05, - "loss": 0.002671768143773079, - "step": 16620 - }, - { - "epoch": 2.83461210571185, - "grad_norm": 0.03519630804657936, - "learning_rate": 1.5205932907690771e-05, - "loss": 0.001667863130569458, - "step": 16625 - }, - { - "epoch": 2.835464620630861, - "grad_norm": 0.014673003926873207, - "learning_rate": 1.5185335776046322e-05, - "loss": 0.002035524509847164, - "step": 16630 - }, - { - "epoch": 2.836317135549872, - "grad_norm": 0.05683857575058937, - "learning_rate": 1.5164749061629407e-05, - "loss": 0.0021878147497773172, - "step": 16635 - }, - { - "epoch": 2.8371696504688835, - "grad_norm": 0.08671200275421143, - "learning_rate": 1.5144172774050623e-05, - "loss": 0.002064511738717556, - "step": 16640 - }, - { - "epoch": 2.838022165387894, - "grad_norm": 0.041581057012081146, - "learning_rate": 1.512360692291563e-05, - "loss": 0.0019536083564162254, - "step": 16645 - }, - { - "epoch": 2.8388746803069056, - "grad_norm": 0.10846979171037674, - "learning_rate": 1.5103051517825288e-05, - "loss": 0.0026564691215753555, - "step": 16650 - }, - { - "epoch": 2.839727195225916, - "grad_norm": 0.026884516701102257, - "learning_rate": 1.5082506568375526e-05, - "loss": 0.0026851309463381766, - "step": 16655 - }, - { - "epoch": 2.8405797101449277, - "grad_norm": 0.0613347552716732, - "learning_rate": 1.506197208415741e-05, - "loss": 0.0014739801175892354, - "step": 16660 - }, - { - "epoch": 2.8414322250639388, - "grad_norm": 0.06315013766288757, - "learning_rate": 1.504144807475712e-05, - "loss": 0.0026756677776575088, - "step": 16665 - }, - { - "epoch": 2.84228473998295, - "grad_norm": 0.04869166761636734, - "learning_rate": 1.5020934549755933e-05, - "loss": 0.0020816361531615256, - "step": 16670 - }, - { - "epoch": 2.843137254901961, - "grad_norm": 0.07282520830631256, - "learning_rate": 1.5000431518730273e-05, - "loss": 0.0008225045166909695, - "step": 16675 - }, - { - "epoch": 2.843989769820972, - "grad_norm": 0.051693812012672424, - "learning_rate": 1.4979938991251607e-05, - "loss": 0.002745438739657402, - "step": 16680 - }, - { - "epoch": 2.844842284739983, - "grad_norm": 0.1495431363582611, - "learning_rate": 1.4959456976886558e-05, - "loss": 0.001805400662124157, - "step": 16685 - }, - { - "epoch": 2.845694799658994, - "grad_norm": 0.05393834039568901, - "learning_rate": 1.4938985485196799e-05, - "loss": 0.0017135551199316979, - "step": 16690 - }, - { - "epoch": 2.846547314578005, - "grad_norm": 0.06205644831061363, - "learning_rate": 1.4918524525739088e-05, - "loss": 0.002358596958220005, - "step": 16695 - }, - { - "epoch": 2.847399829497016, - "grad_norm": 0.1177382543683052, - "learning_rate": 1.4898074108065306e-05, - "loss": 0.00382155142724514, - "step": 16700 - }, - { - "epoch": 2.8482523444160273, - "grad_norm": 0.06532850116491318, - "learning_rate": 1.487763424172238e-05, - "loss": 0.002384480834007263, - "step": 16705 - }, - { - "epoch": 2.8491048593350383, - "grad_norm": 0.05195530876517296, - "learning_rate": 1.4857204936252313e-05, - "loss": 0.0030395207926630975, - "step": 16710 - }, - { - "epoch": 2.8499573742540494, - "grad_norm": 0.06609994173049927, - "learning_rate": 1.4836786201192182e-05, - "loss": 0.002476612851023674, - "step": 16715 - }, - { - "epoch": 2.8508098891730604, - "grad_norm": 0.07928726077079773, - "learning_rate": 1.4816378046074146e-05, - "loss": 0.001881701312959194, - "step": 16720 - }, - { - "epoch": 2.8516624040920715, - "grad_norm": 0.08206343650817871, - "learning_rate": 1.4795980480425392e-05, - "loss": 0.0017553886398673057, - "step": 16725 - }, - { - "epoch": 2.8525149190110826, - "grad_norm": 0.08301947265863419, - "learning_rate": 1.4775593513768202e-05, - "loss": 0.0031315773725509644, - "step": 16730 - }, - { - "epoch": 2.8533674339300936, - "grad_norm": 0.034867819398641586, - "learning_rate": 1.4755217155619887e-05, - "loss": 0.0016052091494202613, - "step": 16735 - }, - { - "epoch": 2.8542199488491047, - "grad_norm": 0.03188352286815643, - "learning_rate": 1.4734851415492789e-05, - "loss": 0.002192831225693226, - "step": 16740 - }, - { - "epoch": 2.855072463768116, - "grad_norm": 0.07953578233718872, - "learning_rate": 1.4714496302894339e-05, - "loss": 0.002898801490664482, - "step": 16745 - }, - { - "epoch": 2.855924978687127, - "grad_norm": 0.06410107016563416, - "learning_rate": 1.4694151827326966e-05, - "loss": 0.0023399315774440765, - "step": 16750 - }, - { - "epoch": 2.8567774936061383, - "grad_norm": 0.09000501781702042, - "learning_rate": 1.4673817998288152e-05, - "loss": 0.003346502408385277, - "step": 16755 - }, - { - "epoch": 2.857630008525149, - "grad_norm": 0.07080121338367462, - "learning_rate": 1.465349482527039e-05, - "loss": 0.002062254026532173, - "step": 16760 - }, - { - "epoch": 2.8584825234441604, - "grad_norm": 0.03813991695642471, - "learning_rate": 1.4633182317761244e-05, - "loss": 0.0037174589931964876, - "step": 16765 - }, - { - "epoch": 2.8593350383631715, - "grad_norm": 0.035782843828201294, - "learning_rate": 1.4612880485243246e-05, - "loss": 0.0017096459865570067, - "step": 16770 - }, - { - "epoch": 2.8601875532821825, - "grad_norm": 0.058607637882232666, - "learning_rate": 1.4592589337193962e-05, - "loss": 0.0013915538787841798, - "step": 16775 - }, - { - "epoch": 2.8610400682011936, - "grad_norm": 0.06763444095849991, - "learning_rate": 1.4572308883085995e-05, - "loss": 0.0025088803842663763, - "step": 16780 - }, - { - "epoch": 2.8618925831202047, - "grad_norm": 0.08015233278274536, - "learning_rate": 1.4552039132386913e-05, - "loss": 0.001922524720430374, - "step": 16785 - }, - { - "epoch": 2.8627450980392157, - "grad_norm": 0.07501938939094543, - "learning_rate": 1.4531780094559332e-05, - "loss": 0.0023180417716503142, - "step": 16790 - }, - { - "epoch": 2.863597612958227, - "grad_norm": 0.1105467900633812, - "learning_rate": 1.4511531779060838e-05, - "loss": 0.0017500972375273705, - "step": 16795 - }, - { - "epoch": 2.864450127877238, - "grad_norm": 0.016127226874232292, - "learning_rate": 1.4491294195344016e-05, - "loss": 0.0029237957671284674, - "step": 16800 - }, - { - "epoch": 2.865302642796249, - "grad_norm": 0.06432373076677322, - "learning_rate": 1.447106735285644e-05, - "loss": 0.002439063973724842, - "step": 16805 - }, - { - "epoch": 2.86615515771526, - "grad_norm": 0.07629001885652542, - "learning_rate": 1.4450851261040664e-05, - "loss": 0.0021009005606174467, - "step": 16810 - }, - { - "epoch": 2.867007672634271, - "grad_norm": 0.05186440795660019, - "learning_rate": 1.4430645929334253e-05, - "loss": 0.0010275249369442463, - "step": 16815 - }, - { - "epoch": 2.867860187553282, - "grad_norm": 0.06517529487609863, - "learning_rate": 1.4410451367169705e-05, - "loss": 0.0022583767771720887, - "step": 16820 - }, - { - "epoch": 2.868712702472293, - "grad_norm": 0.03262385353446007, - "learning_rate": 1.4390267583974544e-05, - "loss": 0.002132249251008034, - "step": 16825 - }, - { - "epoch": 2.869565217391304, - "grad_norm": 0.04578368365764618, - "learning_rate": 1.4370094589171199e-05, - "loss": 0.0015474225394427777, - "step": 16830 - }, - { - "epoch": 2.8704177323103153, - "grad_norm": 0.11160826683044434, - "learning_rate": 1.4349932392177122e-05, - "loss": 0.001869696006178856, - "step": 16835 - }, - { - "epoch": 2.8712702472293268, - "grad_norm": 0.07949322462081909, - "learning_rate": 1.4329781002404687e-05, - "loss": 0.002716188505291939, - "step": 16840 - }, - { - "epoch": 2.8721227621483374, - "grad_norm": 0.12685050070285797, - "learning_rate": 1.430964042926123e-05, - "loss": 0.0026786208152770998, - "step": 16845 - }, - { - "epoch": 2.872975277067349, - "grad_norm": 0.03826960548758507, - "learning_rate": 1.428951068214904e-05, - "loss": 0.0015644762665033341, - "step": 16850 - }, - { - "epoch": 2.8738277919863595, - "grad_norm": 0.0909774899482727, - "learning_rate": 1.4269391770465346e-05, - "loss": 0.0020492007955908776, - "step": 16855 - }, - { - "epoch": 2.874680306905371, - "grad_norm": 0.09891391545534134, - "learning_rate": 1.4249283703602345e-05, - "loss": 0.0028120437636971474, - "step": 16860 - }, - { - "epoch": 2.875532821824382, - "grad_norm": 0.06281251460313797, - "learning_rate": 1.4229186490947126e-05, - "loss": 0.001888560503721237, - "step": 16865 - }, - { - "epoch": 2.876385336743393, - "grad_norm": 0.0330815464258194, - "learning_rate": 1.4209100141881763e-05, - "loss": 0.002112870290875435, - "step": 16870 - }, - { - "epoch": 2.877237851662404, - "grad_norm": 0.053650904446840286, - "learning_rate": 1.4189024665783207e-05, - "loss": 0.0012864695861935615, - "step": 16875 - }, - { - "epoch": 2.8780903665814153, - "grad_norm": 0.035941146314144135, - "learning_rate": 1.4168960072023384e-05, - "loss": 0.0028607305139303207, - "step": 16880 - }, - { - "epoch": 2.8789428815004263, - "grad_norm": 0.025085339322686195, - "learning_rate": 1.41489063699691e-05, - "loss": 0.001800362393260002, - "step": 16885 - }, - { - "epoch": 2.8797953964194374, - "grad_norm": 0.08627615869045258, - "learning_rate": 1.4128863568982088e-05, - "loss": 0.0023837506771087645, - "step": 16890 - }, - { - "epoch": 2.8806479113384484, - "grad_norm": 0.11542297154664993, - "learning_rate": 1.4108831678419e-05, - "loss": 0.003114992380142212, - "step": 16895 - }, - { - "epoch": 2.8815004262574595, - "grad_norm": 0.04762958735227585, - "learning_rate": 1.4088810707631375e-05, - "loss": 0.0020215384662151336, - "step": 16900 - }, - { - "epoch": 2.8823529411764706, - "grad_norm": 0.08232380449771881, - "learning_rate": 1.4068800665965687e-05, - "loss": 0.002120315283536911, - "step": 16905 - }, - { - "epoch": 2.8832054560954816, - "grad_norm": 0.04248562082648277, - "learning_rate": 1.4048801562763272e-05, - "loss": 0.001563185639679432, - "step": 16910 - }, - { - "epoch": 2.8840579710144927, - "grad_norm": 0.058416519314050674, - "learning_rate": 1.4028813407360393e-05, - "loss": 0.0017185319215059281, - "step": 16915 - }, - { - "epoch": 2.8849104859335037, - "grad_norm": 0.03542419150471687, - "learning_rate": 1.4008836209088185e-05, - "loss": 0.0017645543441176415, - "step": 16920 - }, - { - "epoch": 2.885763000852515, - "grad_norm": 0.055227622389793396, - "learning_rate": 1.3988869977272645e-05, - "loss": 0.002331301011145115, - "step": 16925 - }, - { - "epoch": 2.886615515771526, - "grad_norm": 0.02851465903222561, - "learning_rate": 1.3968914721234703e-05, - "loss": 0.00188722126185894, - "step": 16930 - }, - { - "epoch": 2.887468030690537, - "grad_norm": 0.10346336662769318, - "learning_rate": 1.3948970450290129e-05, - "loss": 0.003334081172943115, - "step": 16935 - }, - { - "epoch": 2.888320545609548, - "grad_norm": 0.040114935487508774, - "learning_rate": 1.3929037173749564e-05, - "loss": 0.002542957104742527, - "step": 16940 - }, - { - "epoch": 2.8891730605285595, - "grad_norm": 0.06734409183263779, - "learning_rate": 1.3909114900918517e-05, - "loss": 0.002022533863782883, - "step": 16945 - }, - { - "epoch": 2.89002557544757, - "grad_norm": 0.03672570362687111, - "learning_rate": 1.3889203641097392e-05, - "loss": 0.0017688646912574768, - "step": 16950 - }, - { - "epoch": 2.8908780903665816, - "grad_norm": 0.016099590808153152, - "learning_rate": 1.3869303403581397e-05, - "loss": 0.002179678343236446, - "step": 16955 - }, - { - "epoch": 2.8917306052855922, - "grad_norm": 0.0655573159456253, - "learning_rate": 1.384941419766066e-05, - "loss": 0.0020285720005631448, - "step": 16960 - }, - { - "epoch": 2.8925831202046037, - "grad_norm": 0.03573548421263695, - "learning_rate": 1.3829536032620105e-05, - "loss": 0.002248694933950901, - "step": 16965 - }, - { - "epoch": 2.893435635123615, - "grad_norm": 0.06182318180799484, - "learning_rate": 1.3809668917739507e-05, - "loss": 0.002159777097404003, - "step": 16970 - }, - { - "epoch": 2.894288150042626, - "grad_norm": 0.09492490440607071, - "learning_rate": 1.3789812862293527e-05, - "loss": 0.0027505803853273393, - "step": 16975 - }, - { - "epoch": 2.895140664961637, - "grad_norm": 0.043292637914419174, - "learning_rate": 1.3769967875551613e-05, - "loss": 0.0018307223916053772, - "step": 16980 - }, - { - "epoch": 2.895993179880648, - "grad_norm": 0.08455146849155426, - "learning_rate": 1.375013396677807e-05, - "loss": 0.0019843194633722304, - "step": 16985 - }, - { - "epoch": 2.896845694799659, - "grad_norm": 0.06926032900810242, - "learning_rate": 1.3730311145232023e-05, - "loss": 0.0024761717766523363, - "step": 16990 - }, - { - "epoch": 2.89769820971867, - "grad_norm": 0.0860179215669632, - "learning_rate": 1.3710499420167413e-05, - "loss": 0.002175389975309372, - "step": 16995 - }, - { - "epoch": 2.898550724637681, - "grad_norm": 0.10651890188455582, - "learning_rate": 1.3690698800833026e-05, - "loss": 0.0033860310912132265, - "step": 17000 - }, - { - "epoch": 2.899403239556692, - "grad_norm": 0.09691976010799408, - "learning_rate": 1.3670909296472464e-05, - "loss": 0.0021878845989704134, - "step": 17005 - }, - { - "epoch": 2.9002557544757033, - "grad_norm": 0.11704960465431213, - "learning_rate": 1.3651130916324107e-05, - "loss": 0.00286871287971735, - "step": 17010 - }, - { - "epoch": 2.9011082693947143, - "grad_norm": 0.09645909816026688, - "learning_rate": 1.3631363669621153e-05, - "loss": 0.001873398572206497, - "step": 17015 - }, - { - "epoch": 2.9019607843137254, - "grad_norm": 0.13174127042293549, - "learning_rate": 1.3611607565591639e-05, - "loss": 0.00285712368786335, - "step": 17020 - }, - { - "epoch": 2.9028132992327365, - "grad_norm": 0.07539260387420654, - "learning_rate": 1.359186261345835e-05, - "loss": 0.0027119526639580727, - "step": 17025 - }, - { - "epoch": 2.9036658141517475, - "grad_norm": 0.06165684387087822, - "learning_rate": 1.3572128822438892e-05, - "loss": 0.0018354985862970353, - "step": 17030 - }, - { - "epoch": 2.9045183290707586, - "grad_norm": 0.06021244078874588, - "learning_rate": 1.3552406201745654e-05, - "loss": 0.0016940701752901077, - "step": 17035 - }, - { - "epoch": 2.90537084398977, - "grad_norm": 0.09488464146852493, - "learning_rate": 1.3532694760585795e-05, - "loss": 0.0019129924476146698, - "step": 17040 - }, - { - "epoch": 2.9062233589087807, - "grad_norm": 0.04894041642546654, - "learning_rate": 1.3512994508161307e-05, - "loss": 0.002598444186151028, - "step": 17045 - }, - { - "epoch": 2.907075873827792, - "grad_norm": 0.045589860528707504, - "learning_rate": 1.349330545366889e-05, - "loss": 0.0018267668783664703, - "step": 17050 - }, - { - "epoch": 2.907928388746803, - "grad_norm": 0.04273771867156029, - "learning_rate": 1.3473627606300071e-05, - "loss": 0.0013479530811309815, - "step": 17055 - }, - { - "epoch": 2.9087809036658143, - "grad_norm": 0.050675440579652786, - "learning_rate": 1.345396097524111e-05, - "loss": 0.001664750650525093, - "step": 17060 - }, - { - "epoch": 2.9096334185848254, - "grad_norm": 0.07637523114681244, - "learning_rate": 1.3434305569673059e-05, - "loss": 0.001719363033771515, - "step": 17065 - }, - { - "epoch": 2.9104859335038364, - "grad_norm": 0.03540422394871712, - "learning_rate": 1.3414661398771711e-05, - "loss": 0.002338713780045509, - "step": 17070 - }, - { - "epoch": 2.9113384484228475, - "grad_norm": 0.09252000600099564, - "learning_rate": 1.3395028471707613e-05, - "loss": 0.0018722079694271088, - "step": 17075 - }, - { - "epoch": 2.9121909633418586, - "grad_norm": 0.08759574592113495, - "learning_rate": 1.3375406797646068e-05, - "loss": 0.003211042284965515, - "step": 17080 - }, - { - "epoch": 2.9130434782608696, - "grad_norm": 0.07291707396507263, - "learning_rate": 1.3355796385747121e-05, - "loss": 0.002141663059592247, - "step": 17085 - }, - { - "epoch": 2.9138959931798807, - "grad_norm": 0.03608965128660202, - "learning_rate": 1.3336197245165578e-05, - "loss": 0.0015133512206375599, - "step": 17090 - }, - { - "epoch": 2.9147485080988917, - "grad_norm": 0.0686686635017395, - "learning_rate": 1.3316609385050954e-05, - "loss": 0.0015084316954016685, - "step": 17095 - }, - { - "epoch": 2.915601023017903, - "grad_norm": 0.052468664944171906, - "learning_rate": 1.3297032814547539e-05, - "loss": 0.00120701240375638, - "step": 17100 - }, - { - "epoch": 2.916453537936914, - "grad_norm": 0.06129363924264908, - "learning_rate": 1.3277467542794304e-05, - "loss": 0.002575872652232647, - "step": 17105 - }, - { - "epoch": 2.917306052855925, - "grad_norm": 0.06045043095946312, - "learning_rate": 1.3257913578924969e-05, - "loss": 0.0022510627284646036, - "step": 17110 - }, - { - "epoch": 2.918158567774936, - "grad_norm": 0.09090365469455719, - "learning_rate": 1.3238370932067996e-05, - "loss": 0.002203880250453949, - "step": 17115 - }, - { - "epoch": 2.919011082693947, - "grad_norm": 0.03382663428783417, - "learning_rate": 1.3218839611346522e-05, - "loss": 0.0009420939721167087, - "step": 17120 - }, - { - "epoch": 2.919863597612958, - "grad_norm": 0.06900735199451447, - "learning_rate": 1.3199319625878431e-05, - "loss": 0.0021647622808814047, - "step": 17125 - }, - { - "epoch": 2.920716112531969, - "grad_norm": 0.04494655504822731, - "learning_rate": 1.3179810984776277e-05, - "loss": 0.0027208495885133743, - "step": 17130 - }, - { - "epoch": 2.9215686274509802, - "grad_norm": 0.05262625217437744, - "learning_rate": 1.3160313697147373e-05, - "loss": 0.0015311154536902904, - "step": 17135 - }, - { - "epoch": 2.9224211423699913, - "grad_norm": 0.025083297863602638, - "learning_rate": 1.314082777209368e-05, - "loss": 0.00193443913012743, - "step": 17140 - }, - { - "epoch": 2.923273657289003, - "grad_norm": 0.08246373385190964, - "learning_rate": 1.3121353218711892e-05, - "loss": 0.0019143052399158479, - "step": 17145 - }, - { - "epoch": 2.9241261722080134, - "grad_norm": 0.1049862802028656, - "learning_rate": 1.3101890046093376e-05, - "loss": 0.002230258658528328, - "step": 17150 - }, - { - "epoch": 2.924978687127025, - "grad_norm": 0.042054325342178345, - "learning_rate": 1.3082438263324169e-05, - "loss": 0.0011081861332058907, - "step": 17155 - }, - { - "epoch": 2.9258312020460355, - "grad_norm": 0.0713399276137352, - "learning_rate": 1.3062997879485033e-05, - "loss": 0.0015817128121852874, - "step": 17160 - }, - { - "epoch": 2.926683716965047, - "grad_norm": 0.07212921977043152, - "learning_rate": 1.3043568903651381e-05, - "loss": 0.002985073998570442, - "step": 17165 - }, - { - "epoch": 2.927536231884058, - "grad_norm": 0.14285585284233093, - "learning_rate": 1.3024151344893299e-05, - "loss": 0.0019961275160312653, - "step": 17170 - }, - { - "epoch": 2.928388746803069, - "grad_norm": 0.06164155155420303, - "learning_rate": 1.3004745212275543e-05, - "loss": 0.0017055023461580276, - "step": 17175 - }, - { - "epoch": 2.92924126172208, - "grad_norm": 0.02376371994614601, - "learning_rate": 1.298535051485756e-05, - "loss": 0.0013552471064031124, - "step": 17180 - }, - { - "epoch": 2.9300937766410913, - "grad_norm": 0.07454569637775421, - "learning_rate": 1.296596726169342e-05, - "loss": 0.002513031102716923, - "step": 17185 - }, - { - "epoch": 2.9309462915601023, - "grad_norm": 0.0765121579170227, - "learning_rate": 1.2946595461831892e-05, - "loss": 0.0019039563834667207, - "step": 17190 - }, - { - "epoch": 2.9317988064791134, - "grad_norm": 0.07360806316137314, - "learning_rate": 1.2927235124316362e-05, - "loss": 0.001339799538254738, - "step": 17195 - }, - { - "epoch": 2.9326513213981245, - "grad_norm": 0.18903285264968872, - "learning_rate": 1.2907886258184876e-05, - "loss": 0.003720489144325256, - "step": 17200 - }, - { - "epoch": 2.9335038363171355, - "grad_norm": 0.07760016620159149, - "learning_rate": 1.2888548872470143e-05, - "loss": 0.0015237806364893913, - "step": 17205 - }, - { - "epoch": 2.9343563512361466, - "grad_norm": 0.055864643305540085, - "learning_rate": 1.286922297619949e-05, - "loss": 0.0014091457240283489, - "step": 17210 - }, - { - "epoch": 2.9352088661551576, - "grad_norm": 0.08161517977714539, - "learning_rate": 1.2849908578394888e-05, - "loss": 0.002047298289835453, - "step": 17215 - }, - { - "epoch": 2.9360613810741687, - "grad_norm": 0.11219590902328491, - "learning_rate": 1.283060568807294e-05, - "loss": 0.0023268122225999833, - "step": 17220 - }, - { - "epoch": 2.9369138959931798, - "grad_norm": 0.10008323192596436, - "learning_rate": 1.2811314314244867e-05, - "loss": 0.002319963276386261, - "step": 17225 - }, - { - "epoch": 2.937766410912191, - "grad_norm": 0.077080138027668, - "learning_rate": 1.2792034465916536e-05, - "loss": 0.0020459359511733055, - "step": 17230 - }, - { - "epoch": 2.938618925831202, - "grad_norm": 0.09049349278211594, - "learning_rate": 1.2772766152088431e-05, - "loss": 0.0038630947470664977, - "step": 17235 - }, - { - "epoch": 2.9394714407502134, - "grad_norm": 0.09306768327951431, - "learning_rate": 1.275350938175563e-05, - "loss": 0.0017305316403508186, - "step": 17240 - }, - { - "epoch": 2.940323955669224, - "grad_norm": 0.061699800193309784, - "learning_rate": 1.2734264163907824e-05, - "loss": 0.00341113954782486, - "step": 17245 - }, - { - "epoch": 2.9411764705882355, - "grad_norm": 0.11029893159866333, - "learning_rate": 1.2715030507529347e-05, - "loss": 0.0023353056982159614, - "step": 17250 - }, - { - "epoch": 2.942028985507246, - "grad_norm": 0.06272252649068832, - "learning_rate": 1.2695808421599087e-05, - "loss": 0.0012727061286568642, - "step": 17255 - }, - { - "epoch": 2.9428815004262576, - "grad_norm": 0.02106044627726078, - "learning_rate": 1.2676597915090567e-05, - "loss": 0.0020675512030720712, - "step": 17260 - }, - { - "epoch": 2.9437340153452687, - "grad_norm": 0.08245997875928879, - "learning_rate": 1.2657398996971883e-05, - "loss": 0.002128716930747032, - "step": 17265 - }, - { - "epoch": 2.9445865302642797, - "grad_norm": 0.10804266482591629, - "learning_rate": 1.2638211676205718e-05, - "loss": 0.0012407343834638595, - "step": 17270 - }, - { - "epoch": 2.945439045183291, - "grad_norm": 0.0485721081495285, - "learning_rate": 1.2619035961749375e-05, - "loss": 0.0019056517630815506, - "step": 17275 - }, - { - "epoch": 2.946291560102302, - "grad_norm": 0.04094598814845085, - "learning_rate": 1.2599871862554694e-05, - "loss": 0.0014778503216803073, - "step": 17280 - }, - { - "epoch": 2.947144075021313, - "grad_norm": 0.08831547200679779, - "learning_rate": 1.2580719387568133e-05, - "loss": 0.002304557338356972, - "step": 17285 - }, - { - "epoch": 2.947996589940324, - "grad_norm": 0.02547610178589821, - "learning_rate": 1.2561578545730685e-05, - "loss": 0.0010631450451910496, - "step": 17290 - }, - { - "epoch": 2.948849104859335, - "grad_norm": 0.09562932699918747, - "learning_rate": 1.2542449345977952e-05, - "loss": 0.0021377883851528166, - "step": 17295 - }, - { - "epoch": 2.949701619778346, - "grad_norm": 0.02090577222406864, - "learning_rate": 1.2523331797240072e-05, - "loss": 0.001333952508866787, - "step": 17300 - }, - { - "epoch": 2.950554134697357, - "grad_norm": 0.12461904436349869, - "learning_rate": 1.2504225908441751e-05, - "loss": 0.0025647601112723352, - "step": 17305 - }, - { - "epoch": 2.9514066496163682, - "grad_norm": 0.047791410237550735, - "learning_rate": 1.2485131688502254e-05, - "loss": 0.0014650242403149605, - "step": 17310 - }, - { - "epoch": 2.9522591645353793, - "grad_norm": 0.055085547268390656, - "learning_rate": 1.2466049146335387e-05, - "loss": 0.002520528435707092, - "step": 17315 - }, - { - "epoch": 2.9531116794543903, - "grad_norm": 0.09370748698711395, - "learning_rate": 1.2446978290849538e-05, - "loss": 0.002327192947268486, - "step": 17320 - }, - { - "epoch": 2.9539641943734014, - "grad_norm": 0.06663045287132263, - "learning_rate": 1.242791913094759e-05, - "loss": 0.0025285203009843826, - "step": 17325 - }, - { - "epoch": 2.9548167092924125, - "grad_norm": 0.06620613485574722, - "learning_rate": 1.2408871675527022e-05, - "loss": 0.001520772185176611, - "step": 17330 - }, - { - "epoch": 2.955669224211424, - "grad_norm": 0.08397935330867767, - "learning_rate": 1.2389835933479805e-05, - "loss": 0.001917354017496109, - "step": 17335 - }, - { - "epoch": 2.9565217391304346, - "grad_norm": 0.037347212433815, - "learning_rate": 1.2370811913692447e-05, - "loss": 0.001991302520036697, - "step": 17340 - }, - { - "epoch": 2.957374254049446, - "grad_norm": 0.09309769421815872, - "learning_rate": 1.2351799625046013e-05, - "loss": 0.0028038494288921355, - "step": 17345 - }, - { - "epoch": 2.9582267689684567, - "grad_norm": 0.03684366121888161, - "learning_rate": 1.2332799076416064e-05, - "loss": 0.0017773956060409546, - "step": 17350 - }, - { - "epoch": 2.959079283887468, - "grad_norm": 0.05473257228732109, - "learning_rate": 1.2313810276672687e-05, - "loss": 0.0012853020802140237, - "step": 17355 - }, - { - "epoch": 2.9599317988064793, - "grad_norm": 0.042117465287446976, - "learning_rate": 1.2294833234680473e-05, - "loss": 0.001919369027018547, - "step": 17360 - }, - { - "epoch": 2.9607843137254903, - "grad_norm": 0.05097515508532524, - "learning_rate": 1.2275867959298559e-05, - "loss": 0.001891462691128254, - "step": 17365 - }, - { - "epoch": 2.9616368286445014, - "grad_norm": 0.09409259259700775, - "learning_rate": 1.2256914459380544e-05, - "loss": 0.0014902386814355851, - "step": 17370 - }, - { - "epoch": 2.9624893435635125, - "grad_norm": 0.09465356171131134, - "learning_rate": 1.2237972743774576e-05, - "loss": 0.002463678829371929, - "step": 17375 - }, - { - "epoch": 2.9633418584825235, - "grad_norm": 0.02534087561070919, - "learning_rate": 1.221904282132327e-05, - "loss": 0.0023292653262615205, - "step": 17380 - }, - { - "epoch": 2.9641943734015346, - "grad_norm": 0.1058032363653183, - "learning_rate": 1.2200124700863723e-05, - "loss": 0.002900855429470539, - "step": 17385 - }, - { - "epoch": 2.9650468883205456, - "grad_norm": 0.07726191729307175, - "learning_rate": 1.218121839122757e-05, - "loss": 0.0014870663173496724, - "step": 17390 - }, - { - "epoch": 2.9658994032395567, - "grad_norm": 0.0792614072561264, - "learning_rate": 1.21623239012409e-05, - "loss": 0.001744781993329525, - "step": 17395 - }, - { - "epoch": 2.9667519181585678, - "grad_norm": 0.07266564667224884, - "learning_rate": 1.214344123972428e-05, - "loss": 0.002622047811746597, - "step": 17400 - }, - { - "epoch": 2.967604433077579, - "grad_norm": 0.06203412637114525, - "learning_rate": 1.2124570415492758e-05, - "loss": 0.002504969388246536, - "step": 17405 - }, - { - "epoch": 2.96845694799659, - "grad_norm": 0.07259709388017654, - "learning_rate": 1.2105711437355884e-05, - "loss": 0.0018782744184136391, - "step": 17410 - }, - { - "epoch": 2.969309462915601, - "grad_norm": 0.05496470257639885, - "learning_rate": 1.2086864314117633e-05, - "loss": 0.0018179532140493392, - "step": 17415 - }, - { - "epoch": 2.970161977834612, - "grad_norm": 0.0235351100564003, - "learning_rate": 1.2068029054576496e-05, - "loss": 0.0015613840892910956, - "step": 17420 - }, - { - "epoch": 2.971014492753623, - "grad_norm": 0.046441882848739624, - "learning_rate": 1.2049205667525383e-05, - "loss": 0.0014228712767362594, - "step": 17425 - }, - { - "epoch": 2.971867007672634, - "grad_norm": 0.06290153414011002, - "learning_rate": 1.2030394161751664e-05, - "loss": 0.0011624433100223541, - "step": 17430 - }, - { - "epoch": 2.972719522591645, - "grad_norm": 0.0662989467382431, - "learning_rate": 1.2011594546037205e-05, - "loss": 0.002170179411768913, - "step": 17435 - }, - { - "epoch": 2.9735720375106567, - "grad_norm": 0.06470426172018051, - "learning_rate": 1.1992806829158275e-05, - "loss": 0.0010997526347637176, - "step": 17440 - }, - { - "epoch": 2.9744245524296673, - "grad_norm": 0.039091553539037704, - "learning_rate": 1.1974031019885612e-05, - "loss": 0.0014238604344427586, - "step": 17445 - }, - { - "epoch": 2.975277067348679, - "grad_norm": 0.03796529024839401, - "learning_rate": 1.1955267126984376e-05, - "loss": 0.002270728349685669, - "step": 17450 - }, - { - "epoch": 2.9761295822676894, - "grad_norm": 0.09608127176761627, - "learning_rate": 1.1936515159214177e-05, - "loss": 0.0030095497146248817, - "step": 17455 - }, - { - "epoch": 2.976982097186701, - "grad_norm": 0.09011568874120712, - "learning_rate": 1.1917775125329063e-05, - "loss": 0.0031840000301599503, - "step": 17460 - }, - { - "epoch": 2.977834612105712, - "grad_norm": 0.057273294776678085, - "learning_rate": 1.1899047034077522e-05, - "loss": 0.0011888986453413963, - "step": 17465 - }, - { - "epoch": 2.978687127024723, - "grad_norm": 0.14515799283981323, - "learning_rate": 1.1880330894202432e-05, - "loss": 0.001710682176053524, - "step": 17470 - }, - { - "epoch": 2.979539641943734, - "grad_norm": 0.09522838145494461, - "learning_rate": 1.1861626714441096e-05, - "loss": 0.002519896999001503, - "step": 17475 - }, - { - "epoch": 2.980392156862745, - "grad_norm": 0.08164853602647781, - "learning_rate": 1.1842934503525282e-05, - "loss": 0.002578527852892876, - "step": 17480 - }, - { - "epoch": 2.9812446717817562, - "grad_norm": 0.08428774774074554, - "learning_rate": 1.1824254270181112e-05, - "loss": 0.0012953916564583778, - "step": 17485 - }, - { - "epoch": 2.9820971867007673, - "grad_norm": 0.07469037175178528, - "learning_rate": 1.180558602312915e-05, - "loss": 0.0037867244333028792, - "step": 17490 - }, - { - "epoch": 2.9829497016197783, - "grad_norm": 0.08371725678443909, - "learning_rate": 1.1786929771084346e-05, - "loss": 0.002791520766913891, - "step": 17495 - }, - { - "epoch": 2.9838022165387894, - "grad_norm": 0.014852025546133518, - "learning_rate": 1.1768285522756056e-05, - "loss": 0.0014176778495311737, - "step": 17500 - }, - { - "epoch": 2.9846547314578005, - "grad_norm": 0.04576858505606651, - "learning_rate": 1.174965328684804e-05, - "loss": 0.002578184753656387, - "step": 17505 - }, - { - "epoch": 2.9855072463768115, - "grad_norm": 0.05726059526205063, - "learning_rate": 1.1731033072058464e-05, - "loss": 0.0016687212511897088, - "step": 17510 - }, - { - "epoch": 2.9863597612958226, - "grad_norm": 0.0770409107208252, - "learning_rate": 1.171242488707984e-05, - "loss": 0.0013428821228444576, - "step": 17515 - }, - { - "epoch": 2.9872122762148337, - "grad_norm": 0.10322020947933197, - "learning_rate": 1.1693828740599093e-05, - "loss": 0.0019340002909302711, - "step": 17520 - }, - { - "epoch": 2.9880647911338447, - "grad_norm": 0.08900497853755951, - "learning_rate": 1.1675244641297531e-05, - "loss": 0.002262430638074875, - "step": 17525 - }, - { - "epoch": 2.9889173060528558, - "grad_norm": 0.06439421325922012, - "learning_rate": 1.1656672597850828e-05, - "loss": 0.003663495182991028, - "step": 17530 - }, - { - "epoch": 2.9897698209718673, - "grad_norm": 0.032524604350328445, - "learning_rate": 1.1638112618929023e-05, - "loss": 0.00146266371011734, - "step": 17535 - }, - { - "epoch": 2.990622335890878, - "grad_norm": 0.09089723974466324, - "learning_rate": 1.1619564713196542e-05, - "loss": 0.002597668394446373, - "step": 17540 - }, - { - "epoch": 2.9914748508098894, - "grad_norm": 0.11931595206260681, - "learning_rate": 1.1601028889312144e-05, - "loss": 0.0025284418836236, - "step": 17545 - }, - { - "epoch": 2.9923273657289, - "grad_norm": 0.05474149063229561, - "learning_rate": 1.1582505155928994e-05, - "loss": 0.002077813073992729, - "step": 17550 - }, - { - "epoch": 2.9931798806479115, - "grad_norm": 0.060414139181375504, - "learning_rate": 1.1563993521694564e-05, - "loss": 0.0014027852565050125, - "step": 17555 - }, - { - "epoch": 2.9940323955669226, - "grad_norm": 0.03036579303443432, - "learning_rate": 1.1545493995250727e-05, - "loss": 0.0008949190378189087, - "step": 17560 - }, - { - "epoch": 2.9948849104859336, - "grad_norm": 0.030154328793287277, - "learning_rate": 1.1527006585233662e-05, - "loss": 0.002073490060865879, - "step": 17565 - }, - { - "epoch": 2.9957374254049447, - "grad_norm": 0.04413657262921333, - "learning_rate": 1.1508531300273893e-05, - "loss": 0.0018356587737798692, - "step": 17570 - }, - { - "epoch": 2.9965899403239558, - "grad_norm": 0.022916359826922417, - "learning_rate": 1.1490068148996329e-05, - "loss": 0.0018058544024825095, - "step": 17575 - }, - { - "epoch": 2.997442455242967, - "grad_norm": 0.059595149010419846, - "learning_rate": 1.1471617140020162e-05, - "loss": 0.0019177049398422241, - "step": 17580 - }, - { - "epoch": 2.998294970161978, - "grad_norm": 0.038439393043518066, - "learning_rate": 1.1453178281958944e-05, - "loss": 0.002159320004284382, - "step": 17585 - }, - { - "epoch": 2.999147485080989, - "grad_norm": 0.021210921928286552, - "learning_rate": 1.1434751583420536e-05, - "loss": 0.0014576959423720838, - "step": 17590 - }, - { - "epoch": 2.9994884910485933, - "eval_loss": 0.04721549153327942, - "eval_runtime": 3.7007, - "eval_samples_per_second": 68.095, - "eval_steps_per_second": 1.081, - "step": 17592 - }, - { - "eval_cer_subset": 0.014346898562053186, - "eval_cer_subset_edit_distance": 881, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 17592 - }, - { - "epoch": 3.0, - "grad_norm": 0.12229876965284348, - "learning_rate": 1.1416337053007148e-05, - "loss": 0.0023294053971767426, - "step": 17595 - }, - { - "epoch": 3.000852514919011, - "grad_norm": 0.00884711928665638, - "learning_rate": 1.1397934699315283e-05, - "loss": 0.0007601963356137275, - "step": 17600 - }, - { - "epoch": 3.001705029838022, - "grad_norm": 0.006744864396750927, - "learning_rate": 1.1379544530935788e-05, - "loss": 0.0009616459719836712, - "step": 17605 - }, - { - "epoch": 3.002557544757033, - "grad_norm": 0.015270856209099293, - "learning_rate": 1.1361166556453794e-05, - "loss": 0.0008831757120788097, - "step": 17610 - }, - { - "epoch": 3.0034100596760442, - "grad_norm": 0.03363358601927757, - "learning_rate": 1.1342800784448747e-05, - "loss": 0.000755470572039485, - "step": 17615 - }, - { - "epoch": 3.0042625745950553, - "grad_norm": 0.013082304038107395, - "learning_rate": 1.1324447223494415e-05, - "loss": 0.0005882583092898131, - "step": 17620 - }, - { - "epoch": 3.0051150895140664, - "grad_norm": 0.02833358384668827, - "learning_rate": 1.1306105882158842e-05, - "loss": 0.0011209994554519653, - "step": 17625 - }, - { - "epoch": 3.0059676044330774, - "grad_norm": 0.018923277035355568, - "learning_rate": 1.1287776769004374e-05, - "loss": 0.0006263695657253265, - "step": 17630 - }, - { - "epoch": 3.0068201193520885, - "grad_norm": 0.06693795323371887, - "learning_rate": 1.1269459892587659e-05, - "loss": 0.0004380833823233843, - "step": 17635 - }, - { - "epoch": 3.0076726342710995, - "grad_norm": 0.046298157423734665, - "learning_rate": 1.1251155261459601e-05, - "loss": 0.0008744291961193084, - "step": 17640 - }, - { - "epoch": 3.008525149190111, - "grad_norm": 0.01699778623878956, - "learning_rate": 1.1232862884165428e-05, - "loss": 0.0010823222808539867, - "step": 17645 - }, - { - "epoch": 3.009377664109122, - "grad_norm": 0.03985033556818962, - "learning_rate": 1.1214582769244643e-05, - "loss": 0.0010692596435546875, - "step": 17650 - }, - { - "epoch": 3.010230179028133, - "grad_norm": 0.03855466470122337, - "learning_rate": 1.1196314925231002e-05, - "loss": 0.0005569665227085352, - "step": 17655 - }, - { - "epoch": 3.0110826939471442, - "grad_norm": 0.04734903946518898, - "learning_rate": 1.1178059360652523e-05, - "loss": 0.0006342111155390739, - "step": 17660 - }, - { - "epoch": 3.0119352088661553, - "grad_norm": 0.016796378418803215, - "learning_rate": 1.115981608403154e-05, - "loss": 0.0004393692128360271, - "step": 17665 - }, - { - "epoch": 3.0127877237851663, - "grad_norm": 0.052275605499744415, - "learning_rate": 1.1141585103884607e-05, - "loss": 0.0007596808485686779, - "step": 17670 - }, - { - "epoch": 3.0136402387041774, - "grad_norm": 0.01877668686211109, - "learning_rate": 1.1123366428722558e-05, - "loss": 0.000605479022487998, - "step": 17675 - }, - { - "epoch": 3.0144927536231885, - "grad_norm": 0.03435613960027695, - "learning_rate": 1.1105160067050468e-05, - "loss": 0.0006241496652364731, - "step": 17680 - }, - { - "epoch": 3.0153452685421995, - "grad_norm": 0.017725376412272453, - "learning_rate": 1.1086966027367666e-05, - "loss": 0.0004620179533958435, - "step": 17685 - }, - { - "epoch": 3.0161977834612106, - "grad_norm": 0.08081609010696411, - "learning_rate": 1.1068784318167741e-05, - "loss": 0.0007450764998793602, - "step": 17690 - }, - { - "epoch": 3.0170502983802217, - "grad_norm": 0.01702817529439926, - "learning_rate": 1.105061494793854e-05, - "loss": 0.0004339885897934437, - "step": 17695 - }, - { - "epoch": 3.0179028132992327, - "grad_norm": 0.01536079403012991, - "learning_rate": 1.1032457925162112e-05, - "loss": 0.00040107620880007743, - "step": 17700 - }, - { - "epoch": 3.0187553282182438, - "grad_norm": 0.005985455587506294, - "learning_rate": 1.101431325831475e-05, - "loss": 0.0005834253039211035, - "step": 17705 - }, - { - "epoch": 3.019607843137255, - "grad_norm": 0.06888663023710251, - "learning_rate": 1.099618095586701e-05, - "loss": 0.00041420520283281804, - "step": 17710 - }, - { - "epoch": 3.020460358056266, - "grad_norm": 0.017603853717446327, - "learning_rate": 1.097806102628364e-05, - "loss": 0.001230797078460455, - "step": 17715 - }, - { - "epoch": 3.021312872975277, - "grad_norm": 0.08015599846839905, - "learning_rate": 1.0959953478023628e-05, - "loss": 0.0010655376128852368, - "step": 17720 - }, - { - "epoch": 3.022165387894288, - "grad_norm": 0.03606700897216797, - "learning_rate": 1.0941858319540184e-05, - "loss": 0.0005988342221826315, - "step": 17725 - }, - { - "epoch": 3.023017902813299, - "grad_norm": 0.01536708977073431, - "learning_rate": 1.0923775559280712e-05, - "loss": 0.0006389651913195849, - "step": 17730 - }, - { - "epoch": 3.02387041773231, - "grad_norm": 0.007655138149857521, - "learning_rate": 1.090570520568686e-05, - "loss": 0.00042916592210531237, - "step": 17735 - }, - { - "epoch": 3.024722932651321, - "grad_norm": 0.029535889625549316, - "learning_rate": 1.0887647267194479e-05, - "loss": 0.0005783494096249342, - "step": 17740 - }, - { - "epoch": 3.0255754475703327, - "grad_norm": 0.04330015555024147, - "learning_rate": 1.0869601752233612e-05, - "loss": 0.000689673563465476, - "step": 17745 - }, - { - "epoch": 3.0264279624893438, - "grad_norm": 0.010324080474674702, - "learning_rate": 1.08515686692285e-05, - "loss": 0.00033456801902502774, - "step": 17750 - }, - { - "epoch": 3.027280477408355, - "grad_norm": 0.021205585449934006, - "learning_rate": 1.0833548026597568e-05, - "loss": 0.0005186548456549645, - "step": 17755 - }, - { - "epoch": 3.028132992327366, - "grad_norm": 0.01829897239804268, - "learning_rate": 1.081553983275349e-05, - "loss": 0.0010600530542433262, - "step": 17760 - }, - { - "epoch": 3.028985507246377, - "grad_norm": 0.014599860645830631, - "learning_rate": 1.0797544096103066e-05, - "loss": 0.0007877435535192489, - "step": 17765 - }, - { - "epoch": 3.029838022165388, - "grad_norm": 0.04433593526482582, - "learning_rate": 1.0779560825047306e-05, - "loss": 0.0006474100053310394, - "step": 17770 - }, - { - "epoch": 3.030690537084399, - "grad_norm": 0.037262968719005585, - "learning_rate": 1.0761590027981393e-05, - "loss": 0.0009919026866555213, - "step": 17775 - }, - { - "epoch": 3.03154305200341, - "grad_norm": 0.009379384107887745, - "learning_rate": 1.0743631713294696e-05, - "loss": 0.0008003567345440388, - "step": 17780 - }, - { - "epoch": 3.032395566922421, - "grad_norm": 0.05301728472113609, - "learning_rate": 1.0725685889370778e-05, - "loss": 0.0005272284150123596, - "step": 17785 - }, - { - "epoch": 3.0332480818414322, - "grad_norm": 0.014656663872301579, - "learning_rate": 1.0707752564587322e-05, - "loss": 0.0008352659642696381, - "step": 17790 - }, - { - "epoch": 3.0341005967604433, - "grad_norm": 0.022964198142290115, - "learning_rate": 1.0689831747316206e-05, - "loss": 0.0004729554522782564, - "step": 17795 - }, - { - "epoch": 3.0349531116794544, - "grad_norm": 0.00882013700902462, - "learning_rate": 1.0671923445923454e-05, - "loss": 0.0004315647296607494, - "step": 17800 - }, - { - "epoch": 3.0358056265984654, - "grad_norm": 0.03546347841620445, - "learning_rate": 1.0654027668769282e-05, - "loss": 0.0006825461052358151, - "step": 17805 - }, - { - "epoch": 3.0366581415174765, - "grad_norm": 0.009973641484975815, - "learning_rate": 1.063614442420801e-05, - "loss": 0.00043485076166689397, - "step": 17810 - }, - { - "epoch": 3.0375106564364875, - "grad_norm": 0.04359155148267746, - "learning_rate": 1.0618273720588144e-05, - "loss": 0.0005858796648681164, - "step": 17815 - }, - { - "epoch": 3.0383631713554986, - "grad_norm": 0.03281440958380699, - "learning_rate": 1.0600415566252307e-05, - "loss": 0.0004939477425068617, - "step": 17820 - }, - { - "epoch": 3.0392156862745097, - "grad_norm": 0.008886588737368584, - "learning_rate": 1.0582569969537304e-05, - "loss": 0.00031390462536364796, - "step": 17825 - }, - { - "epoch": 3.0400682011935207, - "grad_norm": 0.03827208653092384, - "learning_rate": 1.0564736938774028e-05, - "loss": 0.0004257161170244217, - "step": 17830 - }, - { - "epoch": 3.040920716112532, - "grad_norm": 0.01186210848391056, - "learning_rate": 1.0546916482287554e-05, - "loss": 0.0004740235395729542, - "step": 17835 - }, - { - "epoch": 3.041773231031543, - "grad_norm": 0.03290088102221489, - "learning_rate": 1.0529108608397058e-05, - "loss": 0.0005131486803293228, - "step": 17840 - }, - { - "epoch": 3.0426257459505544, - "grad_norm": 0.0059613995254039764, - "learning_rate": 1.0511313325415826e-05, - "loss": 0.0003371193306520581, - "step": 17845 - }, - { - "epoch": 3.0434782608695654, - "grad_norm": 0.008181710727512836, - "learning_rate": 1.049353064165132e-05, - "loss": 0.0003253704868257046, - "step": 17850 - }, - { - "epoch": 3.0443307757885765, - "grad_norm": 0.0913059189915657, - "learning_rate": 1.0475760565405071e-05, - "loss": 0.0008136253803968429, - "step": 17855 - }, - { - "epoch": 3.0451832907075875, - "grad_norm": 0.022350724786520004, - "learning_rate": 1.0458003104972746e-05, - "loss": 0.0003261453006416559, - "step": 17860 - }, - { - "epoch": 3.0460358056265986, - "grad_norm": 0.006000332068651915, - "learning_rate": 1.0440258268644106e-05, - "loss": 0.0004110721405595541, - "step": 17865 - }, - { - "epoch": 3.0468883205456097, - "grad_norm": 0.029682369902729988, - "learning_rate": 1.0422526064703051e-05, - "loss": 0.0003804177977144718, - "step": 17870 - }, - { - "epoch": 3.0477408354646207, - "grad_norm": 0.027596490457654, - "learning_rate": 1.0404806501427545e-05, - "loss": 0.00084029920399189, - "step": 17875 - }, - { - "epoch": 3.0485933503836318, - "grad_norm": 0.024016134440898895, - "learning_rate": 1.0387099587089688e-05, - "loss": 0.0004024073481559753, - "step": 17880 - }, - { - "epoch": 3.049445865302643, - "grad_norm": 0.03249691426753998, - "learning_rate": 1.0369405329955648e-05, - "loss": 0.000899493508040905, - "step": 17885 - }, - { - "epoch": 3.050298380221654, - "grad_norm": 0.06273671239614487, - "learning_rate": 1.035172373828568e-05, - "loss": 0.0005969330668449402, - "step": 17890 - }, - { - "epoch": 3.051150895140665, - "grad_norm": 0.02702365443110466, - "learning_rate": 1.0334054820334163e-05, - "loss": 0.0006494319997727871, - "step": 17895 - }, - { - "epoch": 3.052003410059676, - "grad_norm": 0.026773499324917793, - "learning_rate": 1.0316398584349527e-05, - "loss": 0.000355540681630373, - "step": 17900 - }, - { - "epoch": 3.052855924978687, - "grad_norm": 0.008991194888949394, - "learning_rate": 1.0298755038574284e-05, - "loss": 0.00036041475832462313, - "step": 17905 - }, - { - "epoch": 3.053708439897698, - "grad_norm": 0.0076339710503816605, - "learning_rate": 1.0281124191245031e-05, - "loss": 0.0003042724449187517, - "step": 17910 - }, - { - "epoch": 3.054560954816709, - "grad_norm": 0.026531491428613663, - "learning_rate": 1.0263506050592423e-05, - "loss": 0.0005534607917070389, - "step": 17915 - }, - { - "epoch": 3.0554134697357203, - "grad_norm": 0.009419003501534462, - "learning_rate": 1.0245900624841207e-05, - "loss": 0.0007199038751423359, - "step": 17920 - }, - { - "epoch": 3.0562659846547313, - "grad_norm": 0.02511359192430973, - "learning_rate": 1.0228307922210192e-05, - "loss": 0.0004511539824306965, - "step": 17925 - }, - { - "epoch": 3.0571184995737424, - "grad_norm": 0.007004071492701769, - "learning_rate": 1.0210727950912223e-05, - "loss": 0.0005472676362842322, - "step": 17930 - }, - { - "epoch": 3.0579710144927534, - "grad_norm": 0.005720047280192375, - "learning_rate": 1.0193160719154206e-05, - "loss": 0.0003349650418385863, - "step": 17935 - }, - { - "epoch": 3.0588235294117645, - "grad_norm": 0.00688981031998992, - "learning_rate": 1.017560623513713e-05, - "loss": 0.00028961682692170144, - "step": 17940 - }, - { - "epoch": 3.059676044330776, - "grad_norm": 0.034819502383470535, - "learning_rate": 1.0158064507056004e-05, - "loss": 0.0006485281512141228, - "step": 17945 - }, - { - "epoch": 3.060528559249787, - "grad_norm": 0.016207491979002953, - "learning_rate": 1.0140535543099885e-05, - "loss": 0.0006803269498050213, - "step": 17950 - }, - { - "epoch": 3.061381074168798, - "grad_norm": 0.013904010877013206, - "learning_rate": 1.0123019351451886e-05, - "loss": 0.001280614733695984, - "step": 17955 - }, - { - "epoch": 3.062233589087809, - "grad_norm": 0.006732371635735035, - "learning_rate": 1.0105515940289128e-05, - "loss": 0.0004976587370038033, - "step": 17960 - }, - { - "epoch": 3.0630861040068202, - "grad_norm": 0.004747320897877216, - "learning_rate": 1.0088025317782798e-05, - "loss": 0.00041887001134455204, - "step": 17965 - }, - { - "epoch": 3.0639386189258313, - "grad_norm": 0.024853700771927834, - "learning_rate": 1.0070547492098114e-05, - "loss": 0.0002716945484280586, - "step": 17970 - }, - { - "epoch": 3.0647911338448424, - "grad_norm": 0.014338959008455276, - "learning_rate": 1.0053082471394292e-05, - "loss": 0.00021331470925360917, - "step": 17975 - }, - { - "epoch": 3.0656436487638534, - "grad_norm": 0.09530264884233475, - "learning_rate": 1.003563026382459e-05, - "loss": 0.0005737710744142532, - "step": 17980 - }, - { - "epoch": 3.0664961636828645, - "grad_norm": 0.055152688175439835, - "learning_rate": 1.0018190877536263e-05, - "loss": 0.0005966671742498875, - "step": 17985 - }, - { - "epoch": 3.0673486786018755, - "grad_norm": 0.014675126411020756, - "learning_rate": 1.0000764320670622e-05, - "loss": 0.00021265523973852397, - "step": 17990 - }, - { - "epoch": 3.0682011935208866, - "grad_norm": 0.03896075114607811, - "learning_rate": 9.983350601362952e-06, - "loss": 0.00042183417826890945, - "step": 17995 - }, - { - "epoch": 3.0690537084398977, - "grad_norm": 0.026887232437729836, - "learning_rate": 9.965949727742554e-06, - "loss": 0.0004932911600917578, - "step": 18000 - }, - { - "epoch": 3.0699062233589087, - "grad_norm": 0.031420499086380005, - "learning_rate": 9.948561707932722e-06, - "loss": 0.0003844423685222864, - "step": 18005 - }, - { - "epoch": 3.07075873827792, - "grad_norm": 0.036880481988191605, - "learning_rate": 9.931186550050781e-06, - "loss": 0.0004236038308590651, - "step": 18010 - }, - { - "epoch": 3.071611253196931, - "grad_norm": 0.09423381090164185, - "learning_rate": 9.913824262208035e-06, - "loss": 0.0002767757046967745, - "step": 18015 - }, - { - "epoch": 3.072463768115942, - "grad_norm": 0.02230706810951233, - "learning_rate": 9.896474852509774e-06, - "loss": 0.00040940651670098307, - "step": 18020 - }, - { - "epoch": 3.073316283034953, - "grad_norm": 0.016502562910318375, - "learning_rate": 9.879138329055277e-06, - "loss": 0.0003771143034100533, - "step": 18025 - }, - { - "epoch": 3.074168797953964, - "grad_norm": 0.11179275810718536, - "learning_rate": 9.861814699937794e-06, - "loss": 0.0011194558814167977, - "step": 18030 - }, - { - "epoch": 3.075021312872975, - "grad_norm": 0.0195760540664196, - "learning_rate": 9.844503973244599e-06, - "loss": 0.00031050120014697313, - "step": 18035 - }, - { - "epoch": 3.075873827791986, - "grad_norm": 0.022706160321831703, - "learning_rate": 9.827206157056901e-06, - "loss": 0.0006133354268968106, - "step": 18040 - }, - { - "epoch": 3.0767263427109977, - "grad_norm": 0.04711553826928139, - "learning_rate": 9.809921259449896e-06, - "loss": 0.0005805216729640961, - "step": 18045 - }, - { - "epoch": 3.0775788576300087, - "grad_norm": 0.013767831958830357, - "learning_rate": 9.792649288492741e-06, - "loss": 0.0018730144947767258, - "step": 18050 - }, - { - "epoch": 3.0784313725490198, - "grad_norm": 0.03198297694325447, - "learning_rate": 9.775390252248584e-06, - "loss": 0.0008407266810536385, - "step": 18055 - }, - { - "epoch": 3.079283887468031, - "grad_norm": 0.004666489083319902, - "learning_rate": 9.758144158774502e-06, - "loss": 0.0006300830282270908, - "step": 18060 - }, - { - "epoch": 3.080136402387042, - "grad_norm": 0.046730559319257736, - "learning_rate": 9.740911016121561e-06, - "loss": 0.0010341707617044448, - "step": 18065 - }, - { - "epoch": 3.080988917306053, - "grad_norm": 0.028454085811972618, - "learning_rate": 9.72369083233476e-06, - "loss": 0.0006070803385227919, - "step": 18070 - }, - { - "epoch": 3.081841432225064, - "grad_norm": 0.03427174314856529, - "learning_rate": 9.706483615453036e-06, - "loss": 0.0005169651005417109, - "step": 18075 - }, - { - "epoch": 3.082693947144075, - "grad_norm": 0.06954972445964813, - "learning_rate": 9.689289373509316e-06, - "loss": 0.0006448618602007628, - "step": 18080 - }, - { - "epoch": 3.083546462063086, - "grad_norm": 0.06108829006552696, - "learning_rate": 9.672108114530434e-06, - "loss": 0.000641945656388998, - "step": 18085 - }, - { - "epoch": 3.084398976982097, - "grad_norm": 0.06737220287322998, - "learning_rate": 9.65493984653717e-06, - "loss": 0.0002690809080377221, - "step": 18090 - }, - { - "epoch": 3.0852514919011083, - "grad_norm": 0.016048768535256386, - "learning_rate": 9.637784577544234e-06, - "loss": 0.00035306806676089765, - "step": 18095 - }, - { - "epoch": 3.0861040068201193, - "grad_norm": 0.0573379211127758, - "learning_rate": 9.620642315560295e-06, - "loss": 0.0006647071335464716, - "step": 18100 - }, - { - "epoch": 3.0869565217391304, - "grad_norm": 0.006947176530957222, - "learning_rate": 9.603513068587913e-06, - "loss": 0.00039295474998652936, - "step": 18105 - }, - { - "epoch": 3.0878090366581414, - "grad_norm": 0.012169529683887959, - "learning_rate": 9.586396844623612e-06, - "loss": 0.0002381766214966774, - "step": 18110 - }, - { - "epoch": 3.0886615515771525, - "grad_norm": 0.007689214311540127, - "learning_rate": 9.569293651657802e-06, - "loss": 0.00043741161935031416, - "step": 18115 - }, - { - "epoch": 3.0895140664961636, - "grad_norm": 0.007992210797965527, - "learning_rate": 9.552203497674813e-06, - "loss": 0.00020460875239223242, - "step": 18120 - }, - { - "epoch": 3.0903665814151746, - "grad_norm": 0.0637383833527565, - "learning_rate": 9.535126390652917e-06, - "loss": 0.0009160020388662815, - "step": 18125 - }, - { - "epoch": 3.0912190963341857, - "grad_norm": 0.027393560856580734, - "learning_rate": 9.518062338564269e-06, - "loss": 0.00038999966345727445, - "step": 18130 - }, - { - "epoch": 3.0920716112531967, - "grad_norm": 0.07132820785045624, - "learning_rate": 9.501011349374927e-06, - "loss": 0.0006502022966742516, - "step": 18135 - }, - { - "epoch": 3.092924126172208, - "grad_norm": 0.03045077994465828, - "learning_rate": 9.48397343104486e-06, - "loss": 0.00084984190762043, - "step": 18140 - }, - { - "epoch": 3.0937766410912193, - "grad_norm": 0.009866426698863506, - "learning_rate": 9.466948591527953e-06, - "loss": 0.0005647209007292985, - "step": 18145 - }, - { - "epoch": 3.0946291560102304, - "grad_norm": 0.11743370443582535, - "learning_rate": 9.449936838771943e-06, - "loss": 0.0014441744424402714, - "step": 18150 - }, - { - "epoch": 3.0954816709292414, - "grad_norm": 0.03290848433971405, - "learning_rate": 9.432938180718506e-06, - "loss": 0.00036750044673681257, - "step": 18155 - }, - { - "epoch": 3.0963341858482525, - "grad_norm": 0.011831770651042461, - "learning_rate": 9.415952625303169e-06, - "loss": 0.0004473600536584854, - "step": 18160 - }, - { - "epoch": 3.0971867007672635, - "grad_norm": 0.08015407621860504, - "learning_rate": 9.398980180455355e-06, - "loss": 0.0006069076247513294, - "step": 18165 - }, - { - "epoch": 3.0980392156862746, - "grad_norm": 0.029129406437277794, - "learning_rate": 9.382020854098356e-06, - "loss": 0.0007575173862278461, - "step": 18170 - }, - { - "epoch": 3.0988917306052857, - "grad_norm": 0.0051441071555018425, - "learning_rate": 9.365074654149368e-06, - "loss": 0.00029567121528089045, - "step": 18175 - }, - { - "epoch": 3.0997442455242967, - "grad_norm": 0.055952105671167374, - "learning_rate": 9.348141588519435e-06, - "loss": 0.0005467975046485662, - "step": 18180 - }, - { - "epoch": 3.100596760443308, - "grad_norm": 0.009275187738239765, - "learning_rate": 9.331221665113471e-06, - "loss": 0.0003922369331121445, - "step": 18185 - }, - { - "epoch": 3.101449275362319, - "grad_norm": 0.052929461002349854, - "learning_rate": 9.314314891830251e-06, - "loss": 0.0005707596894353629, - "step": 18190 - }, - { - "epoch": 3.10230179028133, - "grad_norm": 0.011049921624362469, - "learning_rate": 9.297421276562426e-06, - "loss": 0.0002663507591933012, - "step": 18195 - }, - { - "epoch": 3.103154305200341, - "grad_norm": 0.04371742531657219, - "learning_rate": 9.280540827196516e-06, - "loss": 0.00039334925822913646, - "step": 18200 - }, - { - "epoch": 3.104006820119352, - "grad_norm": 0.04068119451403618, - "learning_rate": 9.263673551612858e-06, - "loss": 0.00039259335026144984, - "step": 18205 - }, - { - "epoch": 3.104859335038363, - "grad_norm": 0.020368283614516258, - "learning_rate": 9.246819457685662e-06, - "loss": 0.00041896156035363673, - "step": 18210 - }, - { - "epoch": 3.105711849957374, - "grad_norm": 0.03870120272040367, - "learning_rate": 9.229978553282968e-06, - "loss": 0.00047820848412811757, - "step": 18215 - }, - { - "epoch": 3.106564364876385, - "grad_norm": 0.013779827393591404, - "learning_rate": 9.213150846266686e-06, - "loss": 0.0003055138513445854, - "step": 18220 - }, - { - "epoch": 3.1074168797953963, - "grad_norm": 0.013860267587006092, - "learning_rate": 9.19633634449255e-06, - "loss": 0.00031585688702762127, - "step": 18225 - }, - { - "epoch": 3.1082693947144073, - "grad_norm": 0.05094626918435097, - "learning_rate": 9.179535055810118e-06, - "loss": 0.002102493681013584, - "step": 18230 - }, - { - "epoch": 3.1091219096334184, - "grad_norm": 0.007574997376650572, - "learning_rate": 9.162746988062783e-06, - "loss": 0.00019260718254372479, - "step": 18235 - }, - { - "epoch": 3.10997442455243, - "grad_norm": 0.04581240937113762, - "learning_rate": 9.145972149087787e-06, - "loss": 0.0008758898824453354, - "step": 18240 - }, - { - "epoch": 3.110826939471441, - "grad_norm": 0.02130374312400818, - "learning_rate": 9.129210546716194e-06, - "loss": 0.0003915982786566019, - "step": 18245 - }, - { - "epoch": 3.111679454390452, - "grad_norm": 0.022723006084561348, - "learning_rate": 9.112462188772862e-06, - "loss": 0.0007300690747797489, - "step": 18250 - }, - { - "epoch": 3.112531969309463, - "grad_norm": 0.08266850560903549, - "learning_rate": 9.095727083076484e-06, - "loss": 0.0005315537564456463, - "step": 18255 - }, - { - "epoch": 3.113384484228474, - "grad_norm": 0.06052851676940918, - "learning_rate": 9.079005237439557e-06, - "loss": 0.000990215130150318, - "step": 18260 - }, - { - "epoch": 3.114236999147485, - "grad_norm": 0.005194041877985001, - "learning_rate": 9.062296659668411e-06, - "loss": 0.00022260420955717565, - "step": 18265 - }, - { - "epoch": 3.1150895140664963, - "grad_norm": 0.06306616961956024, - "learning_rate": 9.045601357563156e-06, - "loss": 0.00041153267957270143, - "step": 18270 - }, - { - "epoch": 3.1159420289855073, - "grad_norm": 0.010921395383775234, - "learning_rate": 9.028919338917712e-06, - "loss": 0.0006207648664712905, - "step": 18275 - }, - { - "epoch": 3.1167945439045184, - "grad_norm": 0.04868682101368904, - "learning_rate": 9.012250611519802e-06, - "loss": 0.0006932941731065511, - "step": 18280 - }, - { - "epoch": 3.1176470588235294, - "grad_norm": 0.08955781161785126, - "learning_rate": 8.99559518315094e-06, - "loss": 0.0011549662798643113, - "step": 18285 - }, - { - "epoch": 3.1184995737425405, - "grad_norm": 0.013239394873380661, - "learning_rate": 8.978953061586447e-06, - "loss": 0.0007932853884994983, - "step": 18290 - }, - { - "epoch": 3.1193520886615516, - "grad_norm": 0.018361147493124008, - "learning_rate": 8.962324254595406e-06, - "loss": 0.0008410025388002395, - "step": 18295 - }, - { - "epoch": 3.1202046035805626, - "grad_norm": 0.01051001250743866, - "learning_rate": 8.94570876994071e-06, - "loss": 0.000252532446756959, - "step": 18300 - }, - { - "epoch": 3.1210571184995737, - "grad_norm": 0.03304922580718994, - "learning_rate": 8.929106615378996e-06, - "loss": 0.00035131536424160004, - "step": 18305 - }, - { - "epoch": 3.1219096334185847, - "grad_norm": 0.04879309609532356, - "learning_rate": 8.912517798660728e-06, - "loss": 0.000421084463596344, - "step": 18310 - }, - { - "epoch": 3.122762148337596, - "grad_norm": 0.010428894311189651, - "learning_rate": 8.89594232753011e-06, - "loss": 0.0004888340365141631, - "step": 18315 - }, - { - "epoch": 3.123614663256607, - "grad_norm": 0.03332305699586868, - "learning_rate": 8.879380209725114e-06, - "loss": 0.0005710380151867867, - "step": 18320 - }, - { - "epoch": 3.124467178175618, - "grad_norm": 0.11709732562303543, - "learning_rate": 8.862831452977486e-06, - "loss": 0.0007624867372214794, - "step": 18325 - }, - { - "epoch": 3.125319693094629, - "grad_norm": 0.010226447135210037, - "learning_rate": 8.846296065012758e-06, - "loss": 0.0002884409856051207, - "step": 18330 - }, - { - "epoch": 3.12617220801364, - "grad_norm": 0.06316721439361572, - "learning_rate": 8.829774053550167e-06, - "loss": 0.0005763838067650795, - "step": 18335 - }, - { - "epoch": 3.127024722932651, - "grad_norm": 0.009583157487213612, - "learning_rate": 8.813265426302772e-06, - "loss": 0.000740795349702239, - "step": 18340 - }, - { - "epoch": 3.1278772378516626, - "grad_norm": 0.09729946404695511, - "learning_rate": 8.796770190977332e-06, - "loss": 0.0009914403781294823, - "step": 18345 - }, - { - "epoch": 3.1287297527706737, - "grad_norm": 0.06878595054149628, - "learning_rate": 8.78028835527436e-06, - "loss": 0.000869260635226965, - "step": 18350 - }, - { - "epoch": 3.1295822676896847, - "grad_norm": 0.07358408719301224, - "learning_rate": 8.763819926888147e-06, - "loss": 0.0003912035841494799, - "step": 18355 - }, - { - "epoch": 3.130434782608696, - "grad_norm": 0.023181110620498657, - "learning_rate": 8.747364913506694e-06, - "loss": 0.0006454653572291136, - "step": 18360 - }, - { - "epoch": 3.131287297527707, - "grad_norm": 0.014834016561508179, - "learning_rate": 8.730923322811748e-06, - "loss": 0.0004834470339119434, - "step": 18365 - }, - { - "epoch": 3.132139812446718, - "grad_norm": 0.013688327744603157, - "learning_rate": 8.714495162478786e-06, - "loss": 0.00042675542645156386, - "step": 18370 - }, - { - "epoch": 3.132992327365729, - "grad_norm": 0.09365646541118622, - "learning_rate": 8.69808044017703e-06, - "loss": 0.000931151770055294, - "step": 18375 - }, - { - "epoch": 3.13384484228474, - "grad_norm": 0.026040131226181984, - "learning_rate": 8.681679163569399e-06, - "loss": 0.0008362406864762306, - "step": 18380 - }, - { - "epoch": 3.134697357203751, - "grad_norm": 0.026200976222753525, - "learning_rate": 8.665291340312585e-06, - "loss": 0.00024140358436852694, - "step": 18385 - }, - { - "epoch": 3.135549872122762, - "grad_norm": 0.06846249848604202, - "learning_rate": 8.648916978056948e-06, - "loss": 0.00040455334819853304, - "step": 18390 - }, - { - "epoch": 3.136402387041773, - "grad_norm": 0.005816053133457899, - "learning_rate": 8.632556084446594e-06, - "loss": 0.00026596912648528813, - "step": 18395 - }, - { - "epoch": 3.1372549019607843, - "grad_norm": 0.04414185881614685, - "learning_rate": 8.616208667119315e-06, - "loss": 0.0012877457775175571, - "step": 18400 - }, - { - "epoch": 3.1381074168797953, - "grad_norm": 0.03767494857311249, - "learning_rate": 8.59987473370665e-06, - "loss": 0.0010142676532268525, - "step": 18405 - }, - { - "epoch": 3.1389599317988064, - "grad_norm": 0.04332097992300987, - "learning_rate": 8.583554291833817e-06, - "loss": 0.0006124789826571942, - "step": 18410 - }, - { - "epoch": 3.1398124467178175, - "grad_norm": 0.007427348289638758, - "learning_rate": 8.567247349119739e-06, - "loss": 0.0007707455195486545, - "step": 18415 - }, - { - "epoch": 3.1406649616368285, - "grad_norm": 0.04704085737466812, - "learning_rate": 8.550953913177026e-06, - "loss": 0.0003297704039141536, - "step": 18420 - }, - { - "epoch": 3.1415174765558396, - "grad_norm": 0.015286453999578953, - "learning_rate": 8.534673991612011e-06, - "loss": 0.0007448584772646427, - "step": 18425 - }, - { - "epoch": 3.1423699914748506, - "grad_norm": 0.06501411646604538, - "learning_rate": 8.518407592024712e-06, - "loss": 0.0009272911585867405, - "step": 18430 - }, - { - "epoch": 3.1432225063938617, - "grad_norm": 0.07314588874578476, - "learning_rate": 8.50215472200881e-06, - "loss": 0.000575255136936903, - "step": 18435 - }, - { - "epoch": 3.144075021312873, - "grad_norm": 0.01809004321694374, - "learning_rate": 8.485915389151694e-06, - "loss": 0.000459101889282465, - "step": 18440 - }, - { - "epoch": 3.1449275362318843, - "grad_norm": 0.0908508151769638, - "learning_rate": 8.469689601034406e-06, - "loss": 0.00044624172151088717, - "step": 18445 - }, - { - "epoch": 3.1457800511508953, - "grad_norm": 0.029488559812307358, - "learning_rate": 8.45347736523171e-06, - "loss": 0.0003597501665353775, - "step": 18450 - }, - { - "epoch": 3.1466325660699064, - "grad_norm": 0.015282983891665936, - "learning_rate": 8.437278689312007e-06, - "loss": 0.0005331444554030895, - "step": 18455 - }, - { - "epoch": 3.1474850809889174, - "grad_norm": 0.0475476048886776, - "learning_rate": 8.421093580837374e-06, - "loss": 0.0010153815150260926, - "step": 18460 - }, - { - "epoch": 3.1483375959079285, - "grad_norm": 0.09270385652780533, - "learning_rate": 8.404922047363548e-06, - "loss": 0.0007084616459906101, - "step": 18465 - }, - { - "epoch": 3.1491901108269396, - "grad_norm": 0.025847190991044044, - "learning_rate": 8.388764096439953e-06, - "loss": 0.00023725461214780809, - "step": 18470 - }, - { - "epoch": 3.1500426257459506, - "grad_norm": 0.023858604952692986, - "learning_rate": 8.372619735609662e-06, - "loss": 0.0003485321067273617, - "step": 18475 - }, - { - "epoch": 3.1508951406649617, - "grad_norm": 0.005237930454313755, - "learning_rate": 8.356488972409398e-06, - "loss": 0.0005028464831411839, - "step": 18480 - }, - { - "epoch": 3.1517476555839727, - "grad_norm": 0.04220377653837204, - "learning_rate": 8.340371814369532e-06, - "loss": 0.0009449001401662827, - "step": 18485 - }, - { - "epoch": 3.152600170502984, - "grad_norm": 0.0714297816157341, - "learning_rate": 8.324268269014078e-06, - "loss": 0.0004925032146275044, - "step": 18490 - }, - { - "epoch": 3.153452685421995, - "grad_norm": 0.023220403119921684, - "learning_rate": 8.308178343860729e-06, - "loss": 0.0006748316343873739, - "step": 18495 - }, - { - "epoch": 3.154305200341006, - "grad_norm": 0.05557497963309288, - "learning_rate": 8.292102046420787e-06, - "loss": 0.0007373414933681488, - "step": 18500 - }, - { - "epoch": 3.155157715260017, - "grad_norm": 0.017496848478913307, - "learning_rate": 8.276039384199203e-06, - "loss": 0.001099762413650751, - "step": 18505 - }, - { - "epoch": 3.156010230179028, - "grad_norm": 0.006222693715244532, - "learning_rate": 8.259990364694557e-06, - "loss": 0.00030525855254381895, - "step": 18510 - }, - { - "epoch": 3.156862745098039, - "grad_norm": 0.016474226489663124, - "learning_rate": 8.243954995399062e-06, - "loss": 0.0003330275183543563, - "step": 18515 - }, - { - "epoch": 3.15771526001705, - "grad_norm": 0.011799500323832035, - "learning_rate": 8.227933283798587e-06, - "loss": 0.0008484587073326111, - "step": 18520 - }, - { - "epoch": 3.1585677749360612, - "grad_norm": 0.10201061517000198, - "learning_rate": 8.211925237372581e-06, - "loss": 0.0006466713268309832, - "step": 18525 - }, - { - "epoch": 3.1594202898550723, - "grad_norm": 0.04381557181477547, - "learning_rate": 8.195930863594131e-06, - "loss": 0.0004293074831366539, - "step": 18530 - }, - { - "epoch": 3.1602728047740833, - "grad_norm": 0.019579825922846794, - "learning_rate": 8.17995016992994e-06, - "loss": 0.000977578666061163, - "step": 18535 - }, - { - "epoch": 3.1611253196930944, - "grad_norm": 0.043777912855148315, - "learning_rate": 8.163983163840338e-06, - "loss": 0.0004046197980642319, - "step": 18540 - }, - { - "epoch": 3.161977834612106, - "grad_norm": 0.006554318591952324, - "learning_rate": 8.148029852779258e-06, - "loss": 0.0013218319974839688, - "step": 18545 - }, - { - "epoch": 3.162830349531117, - "grad_norm": 0.005029724910855293, - "learning_rate": 8.13209024419422e-06, - "loss": 0.0006320577114820481, - "step": 18550 - }, - { - "epoch": 3.163682864450128, - "grad_norm": 0.04088412597775459, - "learning_rate": 8.11616434552637e-06, - "loss": 0.001373323891311884, - "step": 18555 - }, - { - "epoch": 3.164535379369139, - "grad_norm": 0.04089086875319481, - "learning_rate": 8.100252164210444e-06, - "loss": 0.0009590038098394871, - "step": 18560 - }, - { - "epoch": 3.16538789428815, - "grad_norm": 0.059163108468055725, - "learning_rate": 8.084353707674792e-06, - "loss": 0.0007160831708461046, - "step": 18565 - }, - { - "epoch": 3.166240409207161, - "grad_norm": 0.040127795189619064, - "learning_rate": 8.068468983341338e-06, - "loss": 0.000580929359421134, - "step": 18570 - }, - { - "epoch": 3.1670929241261723, - "grad_norm": 0.11662314087152481, - "learning_rate": 8.052597998625588e-06, - "loss": 0.0005010033026337623, - "step": 18575 - }, - { - "epoch": 3.1679454390451833, - "grad_norm": 0.0162198469042778, - "learning_rate": 8.036740760936647e-06, - "loss": 0.0003753812052309513, - "step": 18580 - }, - { - "epoch": 3.1687979539641944, - "grad_norm": 0.12012816220521927, - "learning_rate": 8.020897277677215e-06, - "loss": 0.0009693917818367481, - "step": 18585 - }, - { - "epoch": 3.1696504688832055, - "grad_norm": 0.03412945196032524, - "learning_rate": 8.00506755624355e-06, - "loss": 0.0001745267305523157, - "step": 18590 - }, - { - "epoch": 3.1705029838022165, - "grad_norm": 0.005339973606169224, - "learning_rate": 7.989251604025489e-06, - "loss": 0.0009339713491499424, - "step": 18595 - }, - { - "epoch": 3.1713554987212276, - "grad_norm": 0.021463308483362198, - "learning_rate": 7.973449428406439e-06, - "loss": 0.0006150984205305577, - "step": 18600 - }, - { - "epoch": 3.1722080136402386, - "grad_norm": 0.011554487980902195, - "learning_rate": 7.957661036763397e-06, - "loss": 0.00038701703306287527, - "step": 18605 - }, - { - "epoch": 3.1730605285592497, - "grad_norm": 0.021070247516036034, - "learning_rate": 7.941886436466888e-06, - "loss": 0.0007086104713380336, - "step": 18610 - }, - { - "epoch": 3.1739130434782608, - "grad_norm": 0.07222088426351547, - "learning_rate": 7.926125634881047e-06, - "loss": 0.0005243740510195493, - "step": 18615 - }, - { - "epoch": 3.174765558397272, - "grad_norm": 0.09254760295152664, - "learning_rate": 7.910378639363528e-06, - "loss": 0.0007765952497720719, - "step": 18620 - }, - { - "epoch": 3.175618073316283, - "grad_norm": 0.037701316177845, - "learning_rate": 7.89464545726555e-06, - "loss": 0.00039138970896601677, - "step": 18625 - }, - { - "epoch": 3.176470588235294, - "grad_norm": 0.015249347314238548, - "learning_rate": 7.878926095931876e-06, - "loss": 0.0003943302668631077, - "step": 18630 - }, - { - "epoch": 3.177323103154305, - "grad_norm": 0.016091618686914444, - "learning_rate": 7.863220562700847e-06, - "loss": 0.000575948553159833, - "step": 18635 - }, - { - "epoch": 3.1781756180733165, - "grad_norm": 0.05504714697599411, - "learning_rate": 7.847528864904322e-06, - "loss": 0.0012753555551171304, - "step": 18640 - }, - { - "epoch": 3.1790281329923276, - "grad_norm": 0.01844659261405468, - "learning_rate": 7.831851009867693e-06, - "loss": 0.00032608325127512214, - "step": 18645 - }, - { - "epoch": 3.1798806479113386, - "grad_norm": 0.02932833693921566, - "learning_rate": 7.816187004909927e-06, - "loss": 0.0002993215108290315, - "step": 18650 - }, - { - "epoch": 3.1807331628303497, - "grad_norm": 0.03746391460299492, - "learning_rate": 7.800536857343479e-06, - "loss": 0.000614574272185564, - "step": 18655 - }, - { - "epoch": 3.1815856777493607, - "grad_norm": 0.04635264351963997, - "learning_rate": 7.784900574474383e-06, - "loss": 0.0005155592691153288, - "step": 18660 - }, - { - "epoch": 3.182438192668372, - "grad_norm": 0.024929136037826538, - "learning_rate": 7.769278163602164e-06, - "loss": 0.0003661647439002991, - "step": 18665 - }, - { - "epoch": 3.183290707587383, - "grad_norm": 0.005773736163973808, - "learning_rate": 7.753669632019881e-06, - "loss": 0.0003662605304270983, - "step": 18670 - }, - { - "epoch": 3.184143222506394, - "grad_norm": 0.14380963146686554, - "learning_rate": 7.738074987014107e-06, - "loss": 0.000703729223459959, - "step": 18675 - }, - { - "epoch": 3.184995737425405, - "grad_norm": 0.03039398603141308, - "learning_rate": 7.722494235864967e-06, - "loss": 0.00028703445568680765, - "step": 18680 - }, - { - "epoch": 3.185848252344416, - "grad_norm": 0.022264502942562103, - "learning_rate": 7.706927385846053e-06, - "loss": 0.0004953373223543167, - "step": 18685 - }, - { - "epoch": 3.186700767263427, - "grad_norm": 0.022176261991262436, - "learning_rate": 7.691374444224497e-06, - "loss": 0.0006184632889926434, - "step": 18690 - }, - { - "epoch": 3.187553282182438, - "grad_norm": 0.030033515766263008, - "learning_rate": 7.675835418260915e-06, - "loss": 0.0006910198833793401, - "step": 18695 - }, - { - "epoch": 3.1884057971014492, - "grad_norm": 0.12117313593626022, - "learning_rate": 7.660310315209455e-06, - "loss": 0.0012623773887753486, - "step": 18700 - }, - { - "epoch": 3.1892583120204603, - "grad_norm": 0.013550493866205215, - "learning_rate": 7.644799142317753e-06, - "loss": 0.0007082201074808836, - "step": 18705 - }, - { - "epoch": 3.1901108269394713, - "grad_norm": 0.0489371083676815, - "learning_rate": 7.629301906826945e-06, - "loss": 0.0007669483777135611, - "step": 18710 - }, - { - "epoch": 3.1909633418584824, - "grad_norm": 0.028650205582380295, - "learning_rate": 7.6138186159716435e-06, - "loss": 0.0009685775265097618, - "step": 18715 - }, - { - "epoch": 3.1918158567774935, - "grad_norm": 0.06777958571910858, - "learning_rate": 7.598349276979958e-06, - "loss": 0.0003870198968797922, - "step": 18720 - }, - { - "epoch": 3.1926683716965045, - "grad_norm": 0.027635935693979263, - "learning_rate": 7.582893897073514e-06, - "loss": 0.00038398322649300096, - "step": 18725 - }, - { - "epoch": 3.1935208866155156, - "grad_norm": 0.0027559841983020306, - "learning_rate": 7.567452483467381e-06, - "loss": 0.0003620174713432789, - "step": 18730 - }, - { - "epoch": 3.1943734015345266, - "grad_norm": 0.010767337866127491, - "learning_rate": 7.552025043370125e-06, - "loss": 0.00037704890128225087, - "step": 18735 - }, - { - "epoch": 3.1952259164535377, - "grad_norm": 0.0657992735505104, - "learning_rate": 7.5366115839837815e-06, - "loss": 0.0004264485090970993, - "step": 18740 - }, - { - "epoch": 3.196078431372549, - "grad_norm": 0.029620325192809105, - "learning_rate": 7.5212121125038796e-06, - "loss": 0.0002805137075483799, - "step": 18745 - }, - { - "epoch": 3.1969309462915603, - "grad_norm": 0.04062730073928833, - "learning_rate": 7.505826636119407e-06, - "loss": 0.00017744075739756227, - "step": 18750 - }, - { - "epoch": 3.1977834612105713, - "grad_norm": 0.0391278937458992, - "learning_rate": 7.490455162012808e-06, - "loss": 0.001126928348094225, - "step": 18755 - }, - { - "epoch": 3.1986359761295824, - "grad_norm": 0.0766754299402237, - "learning_rate": 7.4750976973599986e-06, - "loss": 0.0006041087210178376, - "step": 18760 - }, - { - "epoch": 3.1994884910485935, - "grad_norm": 0.01741630584001541, - "learning_rate": 7.459754249330347e-06, - "loss": 0.0007178018335253, - "step": 18765 - }, - { - "epoch": 3.2003410059676045, - "grad_norm": 0.052834443747997284, - "learning_rate": 7.444424825086698e-06, - "loss": 0.0006523634772747755, - "step": 18770 - }, - { - "epoch": 3.2011935208866156, - "grad_norm": 0.051064226776361465, - "learning_rate": 7.4291094317853365e-06, - "loss": 0.0004721490200608969, - "step": 18775 - }, - { - "epoch": 3.2020460358056266, - "grad_norm": 0.01717698574066162, - "learning_rate": 7.4138080765759885e-06, - "loss": 0.0010264517739415168, - "step": 18780 - }, - { - "epoch": 3.2028985507246377, - "grad_norm": 0.0630933865904808, - "learning_rate": 7.398520766601833e-06, - "loss": 0.000731096789240837, - "step": 18785 - }, - { - "epoch": 3.2037510656436488, - "grad_norm": 0.021110277622938156, - "learning_rate": 7.383247508999501e-06, - "loss": 0.00034617548808455465, - "step": 18790 - }, - { - "epoch": 3.20460358056266, - "grad_norm": 0.03756425157189369, - "learning_rate": 7.367988310899066e-06, - "loss": 0.0005103135481476784, - "step": 18795 - }, - { - "epoch": 3.205456095481671, - "grad_norm": 0.009691229090094566, - "learning_rate": 7.352743179424024e-06, - "loss": 0.0007526874542236328, - "step": 18800 - }, - { - "epoch": 3.206308610400682, - "grad_norm": 0.007688464596867561, - "learning_rate": 7.337512121691304e-06, - "loss": 0.0008044790476560593, - "step": 18805 - }, - { - "epoch": 3.207161125319693, - "grad_norm": 0.010440339334309101, - "learning_rate": 7.322295144811276e-06, - "loss": 0.00020275618880987166, - "step": 18810 - }, - { - "epoch": 3.208013640238704, - "grad_norm": 0.01417286042124033, - "learning_rate": 7.307092255887711e-06, - "loss": 0.0007631714455783367, - "step": 18815 - }, - { - "epoch": 3.208866155157715, - "grad_norm": 0.0317782461643219, - "learning_rate": 7.291903462017859e-06, - "loss": 0.0003871546592563391, - "step": 18820 - }, - { - "epoch": 3.209718670076726, - "grad_norm": 0.017280934378504753, - "learning_rate": 7.27672877029233e-06, - "loss": 0.002473811246454716, - "step": 18825 - }, - { - "epoch": 3.2105711849957372, - "grad_norm": 0.015073291026055813, - "learning_rate": 7.261568187795169e-06, - "loss": 0.00022768331691622735, - "step": 18830 - }, - { - "epoch": 3.2114236999147483, - "grad_norm": 0.10031914710998535, - "learning_rate": 7.246421721603867e-06, - "loss": 0.0005759174935519695, - "step": 18835 - }, - { - "epoch": 3.21227621483376, - "grad_norm": 0.012291891500353813, - "learning_rate": 7.2312893787892695e-06, - "loss": 0.000749863451346755, - "step": 18840 - }, - { - "epoch": 3.213128729752771, - "grad_norm": 0.048827286809682846, - "learning_rate": 7.216171166415677e-06, - "loss": 0.00025824215263128283, - "step": 18845 - }, - { - "epoch": 3.213981244671782, - "grad_norm": 0.02683771587908268, - "learning_rate": 7.201067091540773e-06, - "loss": 0.0006501530762761832, - "step": 18850 - }, - { - "epoch": 3.214833759590793, - "grad_norm": 0.019678298383951187, - "learning_rate": 7.185977161215633e-06, - "loss": 0.0006075311917811632, - "step": 18855 - }, - { - "epoch": 3.215686274509804, - "grad_norm": 0.03252946212887764, - "learning_rate": 7.170901382484737e-06, - "loss": 0.0005334909074008465, - "step": 18860 - }, - { - "epoch": 3.216538789428815, - "grad_norm": 0.02117627114057541, - "learning_rate": 7.155839762385973e-06, - "loss": 0.0012689195573329926, - "step": 18865 - }, - { - "epoch": 3.217391304347826, - "grad_norm": 0.057649675756692886, - "learning_rate": 7.140792307950598e-06, - "loss": 0.0006012415513396263, - "step": 18870 - }, - { - "epoch": 3.2182438192668372, - "grad_norm": 0.01991843432188034, - "learning_rate": 7.125759026203254e-06, - "loss": 0.0006623437628149986, - "step": 18875 - }, - { - "epoch": 3.2190963341858483, - "grad_norm": 0.020812440663576126, - "learning_rate": 7.110739924161998e-06, - "loss": 0.00042641563341021537, - "step": 18880 - }, - { - "epoch": 3.2199488491048593, - "grad_norm": 0.001917969319038093, - "learning_rate": 7.095735008838227e-06, - "loss": 0.00040855356492102145, - "step": 18885 - }, - { - "epoch": 3.2208013640238704, - "grad_norm": 0.02301851660013199, - "learning_rate": 7.080744287236753e-06, - "loss": 0.000662582740187645, - "step": 18890 - }, - { - "epoch": 3.2216538789428815, - "grad_norm": 0.045610178261995316, - "learning_rate": 7.065767766355733e-06, - "loss": 0.0003238538280129433, - "step": 18895 - }, - { - "epoch": 3.2225063938618925, - "grad_norm": 0.02546820044517517, - "learning_rate": 7.050805453186707e-06, - "loss": 0.0005394276697188616, - "step": 18900 - }, - { - "epoch": 3.2233589087809036, - "grad_norm": 0.01680169068276882, - "learning_rate": 7.035857354714572e-06, - "loss": 0.0001849454827606678, - "step": 18905 - }, - { - "epoch": 3.2242114236999146, - "grad_norm": 0.07080511003732681, - "learning_rate": 7.020923477917616e-06, - "loss": 0.0009049614891409874, - "step": 18910 - }, - { - "epoch": 3.2250639386189257, - "grad_norm": 0.015143739990890026, - "learning_rate": 7.006003829767458e-06, - "loss": 0.0003394487779587507, - "step": 18915 - }, - { - "epoch": 3.2259164535379368, - "grad_norm": 0.02981925569474697, - "learning_rate": 6.991098417229077e-06, - "loss": 0.0008880021050572395, - "step": 18920 - }, - { - "epoch": 3.226768968456948, - "grad_norm": 0.011556530371308327, - "learning_rate": 6.976207247260836e-06, - "loss": 0.0005170104093849659, - "step": 18925 - }, - { - "epoch": 3.227621483375959, - "grad_norm": 0.06276580691337585, - "learning_rate": 6.961330326814407e-06, - "loss": 0.0013013094663619994, - "step": 18930 - }, - { - "epoch": 3.2284739982949704, - "grad_norm": 0.015998052433133125, - "learning_rate": 6.946467662834852e-06, - "loss": 0.0005359035450965167, - "step": 18935 - }, - { - "epoch": 3.229326513213981, - "grad_norm": 0.04979191720485687, - "learning_rate": 6.931619262260546e-06, - "loss": 0.0007673433981835842, - "step": 18940 - }, - { - "epoch": 3.2301790281329925, - "grad_norm": 0.12047834694385529, - "learning_rate": 6.9167851320232225e-06, - "loss": 0.0017763294279575347, - "step": 18945 - }, - { - "epoch": 3.2310315430520036, - "grad_norm": 0.010395308956503868, - "learning_rate": 6.901965279047926e-06, - "loss": 0.000369875249452889, - "step": 18950 - }, - { - "epoch": 3.2318840579710146, - "grad_norm": 0.07600873708724976, - "learning_rate": 6.887159710253089e-06, - "loss": 0.0005032925866544247, - "step": 18955 - }, - { - "epoch": 3.2327365728900257, - "grad_norm": 0.011709989979863167, - "learning_rate": 6.8723684325504235e-06, - "loss": 0.0009861321188509465, - "step": 18960 - }, - { - "epoch": 3.2335890878090368, - "grad_norm": 0.024761514738202095, - "learning_rate": 6.857591452844994e-06, - "loss": 0.00034510630648583175, - "step": 18965 - }, - { - "epoch": 3.234441602728048, - "grad_norm": 0.04486176744103432, - "learning_rate": 6.8428287780351755e-06, - "loss": 0.00017987118335440754, - "step": 18970 - }, - { - "epoch": 3.235294117647059, - "grad_norm": 0.020876318216323853, - "learning_rate": 6.828080415012691e-06, - "loss": 0.00046160193160176276, - "step": 18975 - }, - { - "epoch": 3.23614663256607, - "grad_norm": 0.005108790006488562, - "learning_rate": 6.813346370662566e-06, - "loss": 0.0002624133136123419, - "step": 18980 - }, - { - "epoch": 3.236999147485081, - "grad_norm": 0.023575518280267715, - "learning_rate": 6.798626651863142e-06, - "loss": 0.0004043182358145714, - "step": 18985 - }, - { - "epoch": 3.237851662404092, - "grad_norm": 0.03796171769499779, - "learning_rate": 6.78392126548607e-06, - "loss": 0.000300011713989079, - "step": 18990 - }, - { - "epoch": 3.238704177323103, - "grad_norm": 0.010747452266514301, - "learning_rate": 6.769230218396302e-06, - "loss": 0.0008313095197081566, - "step": 18995 - }, - { - "epoch": 3.239556692242114, - "grad_norm": 0.037278912961483, - "learning_rate": 6.75455351745213e-06, - "loss": 0.0004106287844479084, - "step": 19000 - }, - { - "epoch": 3.2404092071611252, - "grad_norm": 0.06086887791752815, - "learning_rate": 6.7398911695051155e-06, - "loss": 0.0005375253967940807, - "step": 19005 - }, - { - "epoch": 3.2412617220801363, - "grad_norm": 0.008513805456459522, - "learning_rate": 6.725243181400129e-06, - "loss": 0.0002401076490059495, - "step": 19010 - }, - { - "epoch": 3.2421142369991474, - "grad_norm": 0.11595302820205688, - "learning_rate": 6.71060955997533e-06, - "loss": 0.000469267088919878, - "step": 19015 - }, - { - "epoch": 3.2429667519181584, - "grad_norm": 0.032653845846652985, - "learning_rate": 6.695990312062191e-06, - "loss": 0.0005158457439392805, - "step": 19020 - }, - { - "epoch": 3.2438192668371695, - "grad_norm": 0.0623047836124897, - "learning_rate": 6.6813854444854695e-06, - "loss": 0.0005772956646978855, - "step": 19025 - }, - { - "epoch": 3.2446717817561805, - "grad_norm": 0.003954550251364708, - "learning_rate": 6.666794964063195e-06, - "loss": 0.0011268424801528453, - "step": 19030 - }, - { - "epoch": 3.2455242966751916, - "grad_norm": 0.02917463518679142, - "learning_rate": 6.6522188776066935e-06, - "loss": 0.0007552789058536292, - "step": 19035 - }, - { - "epoch": 3.246376811594203, - "grad_norm": 0.0267369132488966, - "learning_rate": 6.637657191920561e-06, - "loss": 0.00021620083134621382, - "step": 19040 - }, - { - "epoch": 3.247229326513214, - "grad_norm": 0.05929157882928848, - "learning_rate": 6.62310991380267e-06, - "loss": 0.0005157306790351867, - "step": 19045 - }, - { - "epoch": 3.2480818414322252, - "grad_norm": 0.01827944628894329, - "learning_rate": 6.608577050044193e-06, - "loss": 0.0003319120965898037, - "step": 19050 - }, - { - "epoch": 3.2489343563512363, - "grad_norm": 0.013090296648442745, - "learning_rate": 6.594058607429542e-06, - "loss": 0.0005971027072519064, - "step": 19055 - }, - { - "epoch": 3.2494458653026426, - "eval_loss": 0.06181741878390312, - "eval_runtime": 3.7049, - "eval_samples_per_second": 68.017, - "eval_steps_per_second": 1.08, - "step": 19058 - }, - { - "eval_cer_subset": 0.013418665624440211, - "eval_cer_subset_edit_distance": 824, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 19058 - }, - { - "epoch": 3.2497868712702473, - "grad_norm": 0.11219825595617294, - "learning_rate": 6.579554592736402e-06, - "loss": 0.0005919500254094601, - "step": 19060 - }, - { - "epoch": 3.2506393861892584, - "grad_norm": 0.010144476778805256, - "learning_rate": 6.565065012735742e-06, - "loss": 0.00045172283425927163, - "step": 19065 - }, - { - "epoch": 3.2514919011082695, - "grad_norm": 0.013089130632579327, - "learning_rate": 6.550589874191782e-06, - "loss": 0.00025117534678429363, - "step": 19070 - }, - { - "epoch": 3.2523444160272805, - "grad_norm": 0.047521159052848816, - "learning_rate": 6.536129183861994e-06, - "loss": 0.0008897949941456318, - "step": 19075 - }, - { - "epoch": 3.2531969309462916, - "grad_norm": 0.04106447473168373, - "learning_rate": 6.5216829484971085e-06, - "loss": 0.000283010583370924, - "step": 19080 - }, - { - "epoch": 3.2540494458653026, - "grad_norm": 0.04407098516821861, - "learning_rate": 6.507251174841109e-06, - "loss": 0.0006978865712881088, - "step": 19085 - }, - { - "epoch": 3.2549019607843137, - "grad_norm": 0.01719486154615879, - "learning_rate": 6.492833869631217e-06, - "loss": 0.00031038771849125626, - "step": 19090 - }, - { - "epoch": 3.2557544757033248, - "grad_norm": 0.007759191561490297, - "learning_rate": 6.478431039597928e-06, - "loss": 0.0005179021041840315, - "step": 19095 - }, - { - "epoch": 3.256606990622336, - "grad_norm": 0.13484114408493042, - "learning_rate": 6.464042691464956e-06, - "loss": 0.0006286890245974063, - "step": 19100 - }, - { - "epoch": 3.257459505541347, - "grad_norm": 0.015848470851778984, - "learning_rate": 6.449668831949248e-06, - "loss": 0.0005874604452401399, - "step": 19105 - }, - { - "epoch": 3.258312020460358, - "grad_norm": 0.010055625811219215, - "learning_rate": 6.43530946776102e-06, - "loss": 0.00036783108953386545, - "step": 19110 - }, - { - "epoch": 3.259164535379369, - "grad_norm": 0.010497787036001682, - "learning_rate": 6.420964605603681e-06, - "loss": 0.00043828897178173066, - "step": 19115 - }, - { - "epoch": 3.26001705029838, - "grad_norm": 0.024217624217271805, - "learning_rate": 6.40663425217391e-06, - "loss": 0.00042369402945041656, - "step": 19120 - }, - { - "epoch": 3.260869565217391, - "grad_norm": 0.022596243768930435, - "learning_rate": 6.392318414161583e-06, - "loss": 0.00037041325122118, - "step": 19125 - }, - { - "epoch": 3.261722080136402, - "grad_norm": 0.02588561922311783, - "learning_rate": 6.378017098249812e-06, - "loss": 0.00029896264895796777, - "step": 19130 - }, - { - "epoch": 3.2625745950554137, - "grad_norm": 0.01108810119330883, - "learning_rate": 6.363730311114913e-06, - "loss": 0.0006667471025139093, - "step": 19135 - }, - { - "epoch": 3.2634271099744243, - "grad_norm": 0.009552333503961563, - "learning_rate": 6.349458059426453e-06, - "loss": 0.0004497227258980274, - "step": 19140 - }, - { - "epoch": 3.264279624893436, - "grad_norm": 0.01460598036646843, - "learning_rate": 6.335200349847185e-06, - "loss": 0.0002194883767515421, - "step": 19145 - }, - { - "epoch": 3.265132139812447, - "grad_norm": 0.03732374310493469, - "learning_rate": 6.320957189033071e-06, - "loss": 0.0002586513292044401, - "step": 19150 - }, - { - "epoch": 3.265984654731458, - "grad_norm": 0.016899019479751587, - "learning_rate": 6.306728583633319e-06, - "loss": 0.0009543164633214474, - "step": 19155 - }, - { - "epoch": 3.266837169650469, - "grad_norm": 0.01586720161139965, - "learning_rate": 6.292514540290286e-06, - "loss": 0.0008085070177912713, - "step": 19160 - }, - { - "epoch": 3.26768968456948, - "grad_norm": 0.009034652262926102, - "learning_rate": 6.278315065639588e-06, - "loss": 0.000653286511078477, - "step": 19165 - }, - { - "epoch": 3.268542199488491, - "grad_norm": 0.003070715581998229, - "learning_rate": 6.264130166309996e-06, - "loss": 0.00026131083723157644, - "step": 19170 - }, - { - "epoch": 3.269394714407502, - "grad_norm": 0.032700877636671066, - "learning_rate": 6.249959848923497e-06, - "loss": 0.0004788469523191452, - "step": 19175 - }, - { - "epoch": 3.2702472293265132, - "grad_norm": 0.03986335173249245, - "learning_rate": 6.235804120095252e-06, - "loss": 0.0005488947499543428, - "step": 19180 - }, - { - "epoch": 3.2710997442455243, - "grad_norm": 0.02649238146841526, - "learning_rate": 6.221662986433652e-06, - "loss": 0.0008479308336973191, - "step": 19185 - }, - { - "epoch": 3.2719522591645354, - "grad_norm": 0.04019145667552948, - "learning_rate": 6.207536454540235e-06, - "loss": 0.0008174203336238861, - "step": 19190 - }, - { - "epoch": 3.2728047740835464, - "grad_norm": 0.013805892318487167, - "learning_rate": 6.193424531009733e-06, - "loss": 0.0010017482563853264, - "step": 19195 - }, - { - "epoch": 3.2736572890025575, - "grad_norm": 0.022588396444916725, - "learning_rate": 6.17932722243006e-06, - "loss": 0.0004558952059596777, - "step": 19200 - }, - { - "epoch": 3.2745098039215685, - "grad_norm": 0.009211315773427486, - "learning_rate": 6.1652445353823136e-06, - "loss": 0.0002530797151848674, - "step": 19205 - }, - { - "epoch": 3.2753623188405796, - "grad_norm": 0.03019166924059391, - "learning_rate": 6.151176476440768e-06, - "loss": 0.0010970567353069782, - "step": 19210 - }, - { - "epoch": 3.2762148337595907, - "grad_norm": 0.10982025414705276, - "learning_rate": 6.137123052172854e-06, - "loss": 0.00046633705496788023, - "step": 19215 - }, - { - "epoch": 3.2770673486786017, - "grad_norm": 0.09192784875631332, - "learning_rate": 6.123084269139178e-06, - "loss": 0.0013196432963013649, - "step": 19220 - }, - { - "epoch": 3.277919863597613, - "grad_norm": 0.02934069000184536, - "learning_rate": 6.109060133893501e-06, - "loss": 0.0005467353854328394, - "step": 19225 - }, - { - "epoch": 3.2787723785166243, - "grad_norm": 0.03443235158920288, - "learning_rate": 6.095050652982773e-06, - "loss": 0.0004425105173140764, - "step": 19230 - }, - { - "epoch": 3.279624893435635, - "grad_norm": 0.04472684487700462, - "learning_rate": 6.081055832947077e-06, - "loss": 0.0006654649972915649, - "step": 19235 - }, - { - "epoch": 3.2804774083546464, - "grad_norm": 0.04871739074587822, - "learning_rate": 6.067075680319663e-06, - "loss": 0.0011335751041769981, - "step": 19240 - }, - { - "epoch": 3.2813299232736575, - "grad_norm": 0.17364081740379333, - "learning_rate": 6.053110201626918e-06, - "loss": 0.0007838122546672821, - "step": 19245 - }, - { - "epoch": 3.2821824381926685, - "grad_norm": 0.10362228006124496, - "learning_rate": 6.0391594033884035e-06, - "loss": 0.0004850291647017002, - "step": 19250 - }, - { - "epoch": 3.2830349531116796, - "grad_norm": 0.033360131084918976, - "learning_rate": 6.025223292116828e-06, - "loss": 0.0003192754928022623, - "step": 19255 - }, - { - "epoch": 3.2838874680306906, - "grad_norm": 0.019219927489757538, - "learning_rate": 6.0113018743180195e-06, - "loss": 0.0004580964334309101, - "step": 19260 - }, - { - "epoch": 3.2847399829497017, - "grad_norm": 0.019038213416934013, - "learning_rate": 5.997395156490956e-06, - "loss": 0.000247283186763525, - "step": 19265 - }, - { - "epoch": 3.2855924978687128, - "grad_norm": 0.07672185450792313, - "learning_rate": 5.983503145127763e-06, - "loss": 0.00036474117077887056, - "step": 19270 - }, - { - "epoch": 3.286445012787724, - "grad_norm": 0.006674405187368393, - "learning_rate": 5.96962584671368e-06, - "loss": 0.000742178363725543, - "step": 19275 - }, - { - "epoch": 3.287297527706735, - "grad_norm": 0.0242395531386137, - "learning_rate": 5.9557632677271105e-06, - "loss": 0.0003610172076150775, - "step": 19280 - }, - { - "epoch": 3.288150042625746, - "grad_norm": 0.006741875316947699, - "learning_rate": 5.941915414639559e-06, - "loss": 0.0006255440413951874, - "step": 19285 - }, - { - "epoch": 3.289002557544757, - "grad_norm": 0.015137423761188984, - "learning_rate": 5.928082293915652e-06, - "loss": 0.0003517616540193558, - "step": 19290 - }, - { - "epoch": 3.289855072463768, - "grad_norm": 0.006232273764908314, - "learning_rate": 5.9142639120131636e-06, - "loss": 0.0002735992660745978, - "step": 19295 - }, - { - "epoch": 3.290707587382779, - "grad_norm": 0.025010747835040092, - "learning_rate": 5.900460275382981e-06, - "loss": 0.0004658872727304697, - "step": 19300 - }, - { - "epoch": 3.29156010230179, - "grad_norm": 0.00881986878812313, - "learning_rate": 5.88667139046909e-06, - "loss": 0.0007451147306710481, - "step": 19305 - }, - { - "epoch": 3.2924126172208013, - "grad_norm": 0.02796418033540249, - "learning_rate": 5.872897263708607e-06, - "loss": 0.0008796761743724346, - "step": 19310 - }, - { - "epoch": 3.2932651321398123, - "grad_norm": 0.027454577386379242, - "learning_rate": 5.859137901531745e-06, - "loss": 0.00026941425167024133, - "step": 19315 - }, - { - "epoch": 3.2941176470588234, - "grad_norm": 0.0395982526242733, - "learning_rate": 5.84539331036183e-06, - "loss": 0.00041040563955903054, - "step": 19320 - }, - { - "epoch": 3.2949701619778344, - "grad_norm": 0.042481981217861176, - "learning_rate": 5.831663496615304e-06, - "loss": 0.0003256106050685048, - "step": 19325 - }, - { - "epoch": 3.2958226768968455, - "grad_norm": 0.0194789320230484, - "learning_rate": 5.817948466701703e-06, - "loss": 0.00048703285865485667, - "step": 19330 - }, - { - "epoch": 3.296675191815857, - "grad_norm": 0.03693777322769165, - "learning_rate": 5.804248227023639e-06, - "loss": 0.0004573033656924963, - "step": 19335 - }, - { - "epoch": 3.2975277067348676, - "grad_norm": 0.010155921801924706, - "learning_rate": 5.790562783976857e-06, - "loss": 0.00022799526341259478, - "step": 19340 - }, - { - "epoch": 3.298380221653879, - "grad_norm": 0.014926153235137463, - "learning_rate": 5.776892143950181e-06, - "loss": 0.0002296717371791601, - "step": 19345 - }, - { - "epoch": 3.29923273657289, - "grad_norm": 0.025415342301130295, - "learning_rate": 5.763236313325513e-06, - "loss": 0.0001236582640558481, - "step": 19350 - }, - { - "epoch": 3.3000852514919012, - "grad_norm": 0.0994359701871872, - "learning_rate": 5.749595298477851e-06, - "loss": 0.0009945498779416085, - "step": 19355 - }, - { - "epoch": 3.3009377664109123, - "grad_norm": 0.015362569130957127, - "learning_rate": 5.7359691057752705e-06, - "loss": 0.0005355034954845905, - "step": 19360 - }, - { - "epoch": 3.3017902813299234, - "grad_norm": 0.07377626746892929, - "learning_rate": 5.722357741578925e-06, - "loss": 0.0009824702516198157, - "step": 19365 - }, - { - "epoch": 3.3026427962489344, - "grad_norm": 0.01109279878437519, - "learning_rate": 5.708761212243067e-06, - "loss": 0.00042829746380448344, - "step": 19370 - }, - { - "epoch": 3.3034953111679455, - "grad_norm": 0.012984010390937328, - "learning_rate": 5.695179524115008e-06, - "loss": 0.0005177812185138464, - "step": 19375 - }, - { - "epoch": 3.3043478260869565, - "grad_norm": 0.01330599281936884, - "learning_rate": 5.681612683535111e-06, - "loss": 0.00047001498751342297, - "step": 19380 - }, - { - "epoch": 3.3052003410059676, - "grad_norm": 0.027421219274401665, - "learning_rate": 5.66806069683686e-06, - "loss": 0.0005254631396383047, - "step": 19385 - }, - { - "epoch": 3.3060528559249787, - "grad_norm": 0.014183313585817814, - "learning_rate": 5.6545235703467435e-06, - "loss": 0.000249856011942029, - "step": 19390 - }, - { - "epoch": 3.3069053708439897, - "grad_norm": 0.09523740410804749, - "learning_rate": 5.641001310384365e-06, - "loss": 0.000620997790247202, - "step": 19395 - }, - { - "epoch": 3.307757885763001, - "grad_norm": 0.08386892080307007, - "learning_rate": 5.627493923262354e-06, - "loss": 0.0012673554010689259, - "step": 19400 - }, - { - "epoch": 3.308610400682012, - "grad_norm": 0.03303903713822365, - "learning_rate": 5.614001415286412e-06, - "loss": 0.0011139905080199241, - "step": 19405 - }, - { - "epoch": 3.309462915601023, - "grad_norm": 0.03811914473772049, - "learning_rate": 5.6005237927552805e-06, - "loss": 0.0006227992475032806, - "step": 19410 - }, - { - "epoch": 3.310315430520034, - "grad_norm": 0.016570856794714928, - "learning_rate": 5.5870610619607805e-06, - "loss": 0.0005445381160825491, - "step": 19415 - }, - { - "epoch": 3.311167945439045, - "grad_norm": 0.013608088716864586, - "learning_rate": 5.573613229187751e-06, - "loss": 0.0004142835270613432, - "step": 19420 - }, - { - "epoch": 3.312020460358056, - "grad_norm": 0.053280171006917953, - "learning_rate": 5.560180300714079e-06, - "loss": 0.0003944558557122946, - "step": 19425 - }, - { - "epoch": 3.3128729752770676, - "grad_norm": 0.04067116975784302, - "learning_rate": 5.5467622828107225e-06, - "loss": 0.0008278630673885345, - "step": 19430 - }, - { - "epoch": 3.313725490196078, - "grad_norm": 0.0459442101418972, - "learning_rate": 5.533359181741638e-06, - "loss": 0.00037522357888519764, - "step": 19435 - }, - { - "epoch": 3.3145780051150897, - "grad_norm": 0.07973090559244156, - "learning_rate": 5.519971003763862e-06, - "loss": 0.0006369464099407196, - "step": 19440 - }, - { - "epoch": 3.3154305200341008, - "grad_norm": 0.0318770669400692, - "learning_rate": 5.506597755127425e-06, - "loss": 0.0006823249161243439, - "step": 19445 - }, - { - "epoch": 3.316283034953112, - "grad_norm": 0.04201148822903633, - "learning_rate": 5.49323944207541e-06, - "loss": 0.000304691749624908, - "step": 19450 - }, - { - "epoch": 3.317135549872123, - "grad_norm": 0.011656812392175198, - "learning_rate": 5.479896070843919e-06, - "loss": 0.00038321034517139194, - "step": 19455 - }, - { - "epoch": 3.317988064791134, - "grad_norm": 0.03550105541944504, - "learning_rate": 5.466567647662075e-06, - "loss": 0.0007771219592541456, - "step": 19460 - }, - { - "epoch": 3.318840579710145, - "grad_norm": 0.010552220977842808, - "learning_rate": 5.453254178752044e-06, - "loss": 0.0008830759674310684, - "step": 19465 - }, - { - "epoch": 3.319693094629156, - "grad_norm": 0.0077703725546598434, - "learning_rate": 5.439955670328987e-06, - "loss": 0.0003484194632619619, - "step": 19470 - }, - { - "epoch": 3.320545609548167, - "grad_norm": 0.013720662333071232, - "learning_rate": 5.426672128601088e-06, - "loss": 0.0005347099620848894, - "step": 19475 - }, - { - "epoch": 3.321398124467178, - "grad_norm": 0.005950555205345154, - "learning_rate": 5.413403559769549e-06, - "loss": 0.00040374435484409333, - "step": 19480 - }, - { - "epoch": 3.3222506393861893, - "grad_norm": 0.005855921655893326, - "learning_rate": 5.400149970028587e-06, - "loss": 0.00011817219201475382, - "step": 19485 - }, - { - "epoch": 3.3231031543052003, - "grad_norm": 0.05193415656685829, - "learning_rate": 5.3869113655654145e-06, - "loss": 0.000558258919045329, - "step": 19490 - }, - { - "epoch": 3.3239556692242114, - "grad_norm": 0.002798686036840081, - "learning_rate": 5.37368775256025e-06, - "loss": 0.0002853567479178309, - "step": 19495 - }, - { - "epoch": 3.3248081841432224, - "grad_norm": 0.0037216702476143837, - "learning_rate": 5.360479137186315e-06, - "loss": 0.0003375500673428178, - "step": 19500 - }, - { - "epoch": 3.3256606990622335, - "grad_norm": 0.06180913746356964, - "learning_rate": 5.347285525609821e-06, - "loss": 0.00018238723278045653, - "step": 19505 - }, - { - "epoch": 3.3265132139812446, - "grad_norm": 0.034047432243824005, - "learning_rate": 5.334106923990009e-06, - "loss": 0.0006082602776587009, - "step": 19510 - }, - { - "epoch": 3.3273657289002556, - "grad_norm": 0.00867203064262867, - "learning_rate": 5.32094333847907e-06, - "loss": 0.0003369096200913191, - "step": 19515 - }, - { - "epoch": 3.3282182438192667, - "grad_norm": 0.04999540373682976, - "learning_rate": 5.3077947752222e-06, - "loss": 0.00042240540497004984, - "step": 19520 - }, - { - "epoch": 3.3290707587382777, - "grad_norm": 0.10174256563186646, - "learning_rate": 5.294661240357599e-06, - "loss": 0.0012334841303527355, - "step": 19525 - }, - { - "epoch": 3.329923273657289, - "grad_norm": 0.019731154665350914, - "learning_rate": 5.2815427400164365e-06, - "loss": 0.0002502906369045377, - "step": 19530 - }, - { - "epoch": 3.3307757885763003, - "grad_norm": 0.040488291531801224, - "learning_rate": 5.268439280322864e-06, - "loss": 0.0006264269817620516, - "step": 19535 - }, - { - "epoch": 3.3316283034953114, - "grad_norm": 0.027734950184822083, - "learning_rate": 5.2553508673940095e-06, - "loss": 0.0006609380245208741, - "step": 19540 - }, - { - "epoch": 3.3324808184143224, - "grad_norm": 0.033032696694135666, - "learning_rate": 5.24227750733998e-06, - "loss": 0.00046310769394040106, - "step": 19545 - }, - { - "epoch": 3.3333333333333335, - "grad_norm": 0.009758932515978813, - "learning_rate": 5.2292192062638485e-06, - "loss": 0.0003860333003103733, - "step": 19550 - }, - { - "epoch": 3.3341858482523445, - "grad_norm": 0.010039775632321835, - "learning_rate": 5.2161759702616764e-06, - "loss": 0.0005642361007630825, - "step": 19555 - }, - { - "epoch": 3.3350383631713556, - "grad_norm": 0.018729569390416145, - "learning_rate": 5.203147805422476e-06, - "loss": 0.0002538987435400486, - "step": 19560 - }, - { - "epoch": 3.3358908780903667, - "grad_norm": 0.07940587401390076, - "learning_rate": 5.190134717828216e-06, - "loss": 0.0003814149182289839, - "step": 19565 - }, - { - "epoch": 3.3367433930093777, - "grad_norm": 0.002807241166010499, - "learning_rate": 5.1771367135538575e-06, - "loss": 0.0005854971241205931, - "step": 19570 - }, - { - "epoch": 3.337595907928389, - "grad_norm": 0.029841719195246696, - "learning_rate": 5.164153798667284e-06, - "loss": 0.00021142382174730301, - "step": 19575 - }, - { - "epoch": 3.3384484228474, - "grad_norm": 0.017503969371318817, - "learning_rate": 5.151185979229372e-06, - "loss": 0.0005035904701799154, - "step": 19580 - }, - { - "epoch": 3.339300937766411, - "grad_norm": 0.033913351595401764, - "learning_rate": 5.138233261293917e-06, - "loss": 0.00033289811108261347, - "step": 19585 - }, - { - "epoch": 3.340153452685422, - "grad_norm": 0.027594633400440216, - "learning_rate": 5.125295650907682e-06, - "loss": 0.0006479782052338124, - "step": 19590 - }, - { - "epoch": 3.341005967604433, - "grad_norm": 0.017926139757037163, - "learning_rate": 5.112373154110365e-06, - "loss": 0.0009788990020751954, - "step": 19595 - }, - { - "epoch": 3.341858482523444, - "grad_norm": 0.012236343696713448, - "learning_rate": 5.099465776934636e-06, - "loss": 0.0009869396686553954, - "step": 19600 - }, - { - "epoch": 3.342710997442455, - "grad_norm": 0.1368396282196045, - "learning_rate": 5.086573525406075e-06, - "loss": 0.0009202501736581325, - "step": 19605 - }, - { - "epoch": 3.343563512361466, - "grad_norm": 0.04556318372488022, - "learning_rate": 5.07369640554321e-06, - "loss": 0.0002738154027611017, - "step": 19610 - }, - { - "epoch": 3.3444160272804773, - "grad_norm": 0.10456430912017822, - "learning_rate": 5.060834423357522e-06, - "loss": 0.0005405619740486145, - "step": 19615 - }, - { - "epoch": 3.3452685421994883, - "grad_norm": 0.009618780575692654, - "learning_rate": 5.047987584853398e-06, - "loss": 0.0003645260352641344, - "step": 19620 - }, - { - "epoch": 3.3461210571184994, - "grad_norm": 0.0008346811519004405, - "learning_rate": 5.035155896028186e-06, - "loss": 0.0009771523997187614, - "step": 19625 - }, - { - "epoch": 3.346973572037511, - "grad_norm": 0.07436166703701019, - "learning_rate": 5.022339362872134e-06, - "loss": 0.0005307651124894619, - "step": 19630 - }, - { - "epoch": 3.3478260869565215, - "grad_norm": 0.047390375286340714, - "learning_rate": 5.0095379913684326e-06, - "loss": 0.00032626844476908446, - "step": 19635 - }, - { - "epoch": 3.348678601875533, - "grad_norm": 0.11302479356527328, - "learning_rate": 4.996751787493172e-06, - "loss": 0.0006419796496629715, - "step": 19640 - }, - { - "epoch": 3.349531116794544, - "grad_norm": 0.04735693335533142, - "learning_rate": 4.983980757215398e-06, - "loss": 0.0004858987871557474, - "step": 19645 - }, - { - "epoch": 3.350383631713555, - "grad_norm": 0.05677567049860954, - "learning_rate": 4.971224906497043e-06, - "loss": 0.0009346410632133483, - "step": 19650 - }, - { - "epoch": 3.351236146632566, - "grad_norm": 0.01608835905790329, - "learning_rate": 4.958484241292954e-06, - "loss": 0.000258720014244318, - "step": 19655 - }, - { - "epoch": 3.3520886615515773, - "grad_norm": 0.023287836462259293, - "learning_rate": 4.9457587675509155e-06, - "loss": 0.0007150916382670403, - "step": 19660 - }, - { - "epoch": 3.3529411764705883, - "grad_norm": 0.02774999849498272, - "learning_rate": 4.9330484912115845e-06, - "loss": 0.000649323221296072, - "step": 19665 - }, - { - "epoch": 3.3537936913895994, - "grad_norm": 0.012109563685953617, - "learning_rate": 4.920353418208556e-06, - "loss": 0.00036820617970079184, - "step": 19670 - }, - { - "epoch": 3.3546462063086104, - "grad_norm": 0.03116477094590664, - "learning_rate": 4.907673554468305e-06, - "loss": 0.0009199230931699276, - "step": 19675 - }, - { - "epoch": 3.3554987212276215, - "grad_norm": 0.037316855043172836, - "learning_rate": 4.895008905910219e-06, - "loss": 0.0005375304259359837, - "step": 19680 - }, - { - "epoch": 3.3563512361466326, - "grad_norm": 0.02448320761322975, - "learning_rate": 4.882359478446568e-06, - "loss": 0.0007062189746648073, - "step": 19685 - }, - { - "epoch": 3.3572037510656436, - "grad_norm": 0.013858492486178875, - "learning_rate": 4.8697252779825195e-06, - "loss": 0.0003158868057653308, - "step": 19690 - }, - { - "epoch": 3.3580562659846547, - "grad_norm": 0.007077233865857124, - "learning_rate": 4.857106310416161e-06, - "loss": 0.00016839986201375723, - "step": 19695 - }, - { - "epoch": 3.3589087809036657, - "grad_norm": 0.00671799760311842, - "learning_rate": 4.844502581638424e-06, - "loss": 0.0013290375471115112, - "step": 19700 - }, - { - "epoch": 3.359761295822677, - "grad_norm": 0.11925818771123886, - "learning_rate": 4.83191409753317e-06, - "loss": 0.0008001517504453659, - "step": 19705 - }, - { - "epoch": 3.360613810741688, - "grad_norm": 0.012915749102830887, - "learning_rate": 4.819340863977098e-06, - "loss": 0.0003090864047408104, - "step": 19710 - }, - { - "epoch": 3.361466325660699, - "grad_norm": 0.0198194682598114, - "learning_rate": 4.806782886839833e-06, - "loss": 0.00015502141322940589, - "step": 19715 - }, - { - "epoch": 3.36231884057971, - "grad_norm": 0.02647668495774269, - "learning_rate": 4.794240171983848e-06, - "loss": 0.00032354283612221477, - "step": 19720 - }, - { - "epoch": 3.363171355498721, - "grad_norm": 0.03167302906513214, - "learning_rate": 4.781712725264503e-06, - "loss": 0.0008794944733381271, - "step": 19725 - }, - { - "epoch": 3.364023870417732, - "grad_norm": 0.03751087561249733, - "learning_rate": 4.769200552530017e-06, - "loss": 0.0017323100939393044, - "step": 19730 - }, - { - "epoch": 3.3648763853367436, - "grad_norm": 0.08725135773420334, - "learning_rate": 4.75670365962149e-06, - "loss": 0.0009663975797593594, - "step": 19735 - }, - { - "epoch": 3.3657289002557547, - "grad_norm": 0.014557529240846634, - "learning_rate": 4.7442220523729005e-06, - "loss": 0.0004029064439237118, - "step": 19740 - }, - { - "epoch": 3.3665814151747657, - "grad_norm": 0.05396854132413864, - "learning_rate": 4.731755736611068e-06, - "loss": 0.0011473988182842732, - "step": 19745 - }, - { - "epoch": 3.367433930093777, - "grad_norm": 0.06434670090675354, - "learning_rate": 4.7193047181556764e-06, - "loss": 0.00039711645804345607, - "step": 19750 - }, - { - "epoch": 3.368286445012788, - "grad_norm": 0.01898345723748207, - "learning_rate": 4.706869002819287e-06, - "loss": 0.0002789617981761694, - "step": 19755 - }, - { - "epoch": 3.369138959931799, - "grad_norm": 0.021839376538991928, - "learning_rate": 4.6944485964073085e-06, - "loss": 0.001008017361164093, - "step": 19760 - }, - { - "epoch": 3.36999147485081, - "grad_norm": 0.013436227105557919, - "learning_rate": 4.682043504717991e-06, - "loss": 0.0002914538374170661, - "step": 19765 - }, - { - "epoch": 3.370843989769821, - "grad_norm": 0.04120805487036705, - "learning_rate": 4.6696537335424485e-06, - "loss": 0.00099704097956419, - "step": 19770 - }, - { - "epoch": 3.371696504688832, - "grad_norm": 0.015087714418768883, - "learning_rate": 4.6572792886646326e-06, - "loss": 0.00031175173353403807, - "step": 19775 - }, - { - "epoch": 3.372549019607843, - "grad_norm": 0.09424779564142227, - "learning_rate": 4.644920175861347e-06, - "loss": 0.0008490364067256451, - "step": 19780 - }, - { - "epoch": 3.373401534526854, - "grad_norm": 0.15744835138320923, - "learning_rate": 4.632576400902244e-06, - "loss": 0.0011794422753155231, - "step": 19785 - }, - { - "epoch": 3.3742540494458653, - "grad_norm": 0.07353512197732925, - "learning_rate": 4.620247969549801e-06, - "loss": 0.0005946496035903692, - "step": 19790 - }, - { - "epoch": 3.3751065643648763, - "grad_norm": 0.05715373530983925, - "learning_rate": 4.607934887559335e-06, - "loss": 0.0005888998974114656, - "step": 19795 - }, - { - "epoch": 3.3759590792838874, - "grad_norm": 0.09267253428697586, - "learning_rate": 4.5956371606790195e-06, - "loss": 0.0007545445580035449, - "step": 19800 - }, - { - "epoch": 3.3768115942028984, - "grad_norm": 0.041159722954034805, - "learning_rate": 4.5833547946498235e-06, - "loss": 0.0006760005839169025, - "step": 19805 - }, - { - "epoch": 3.3776641091219095, - "grad_norm": 0.1095680296421051, - "learning_rate": 4.571087795205583e-06, - "loss": 0.0010204846039414406, - "step": 19810 - }, - { - "epoch": 3.3785166240409206, - "grad_norm": 0.006533615291118622, - "learning_rate": 4.558836168072928e-06, - "loss": 0.00032924620900303124, - "step": 19815 - }, - { - "epoch": 3.3793691389599316, - "grad_norm": 0.09690971672534943, - "learning_rate": 4.5465999189713305e-06, - "loss": 0.0006089920178055763, - "step": 19820 - }, - { - "epoch": 3.3802216538789427, - "grad_norm": 0.03703468665480614, - "learning_rate": 4.53437905361307e-06, - "loss": 0.00024356732610613107, - "step": 19825 - }, - { - "epoch": 3.381074168797954, - "grad_norm": 0.03449544310569763, - "learning_rate": 4.522173577703267e-06, - "loss": 0.0004322177264839411, - "step": 19830 - }, - { - "epoch": 3.381926683716965, - "grad_norm": 0.022056737914681435, - "learning_rate": 4.509983496939834e-06, - "loss": 0.00032165104057639836, - "step": 19835 - }, - { - "epoch": 3.3827791986359763, - "grad_norm": 0.06808804720640182, - "learning_rate": 4.4978088170135064e-06, - "loss": 0.0004901651758700609, - "step": 19840 - }, - { - "epoch": 3.3836317135549874, - "grad_norm": 0.035225335508584976, - "learning_rate": 4.485649543607835e-06, - "loss": 0.000494948634877801, - "step": 19845 - }, - { - "epoch": 3.3844842284739984, - "grad_norm": 0.005756362807005644, - "learning_rate": 4.473505682399165e-06, - "loss": 0.00037348996847867965, - "step": 19850 - }, - { - "epoch": 3.3853367433930095, - "grad_norm": 0.015896733850240707, - "learning_rate": 4.461377239056669e-06, - "loss": 0.001073040347546339, - "step": 19855 - }, - { - "epoch": 3.3861892583120206, - "grad_norm": 0.005726287607103586, - "learning_rate": 4.449264219242296e-06, - "loss": 0.00016913213767111301, - "step": 19860 - }, - { - "epoch": 3.3870417732310316, - "grad_norm": 0.01894184947013855, - "learning_rate": 4.4371666286108125e-06, - "loss": 0.0001936727436259389, - "step": 19865 - }, - { - "epoch": 3.3878942881500427, - "grad_norm": 0.0019047146197408438, - "learning_rate": 4.425084472809763e-06, - "loss": 0.00023375547025352716, - "step": 19870 - }, - { - "epoch": 3.3887468030690537, - "grad_norm": 0.004392183385789394, - "learning_rate": 4.41301775747952e-06, - "loss": 0.0006707040593028069, - "step": 19875 - }, - { - "epoch": 3.389599317988065, - "grad_norm": 0.024085786193609238, - "learning_rate": 4.400966488253218e-06, - "loss": 0.0002247063210234046, - "step": 19880 - }, - { - "epoch": 3.390451832907076, - "grad_norm": 0.07025684416294098, - "learning_rate": 4.388930670756779e-06, - "loss": 0.0007792794145643711, - "step": 19885 - }, - { - "epoch": 3.391304347826087, - "grad_norm": 0.06971945613622665, - "learning_rate": 4.3769103106089454e-06, - "loss": 0.0019492624327540399, - "step": 19890 - }, - { - "epoch": 3.392156862745098, - "grad_norm": 0.065009705722332, - "learning_rate": 4.364905413421204e-06, - "loss": 0.0009217139333486557, - "step": 19895 - }, - { - "epoch": 3.393009377664109, - "grad_norm": 0.050812624394893646, - "learning_rate": 4.352915984797849e-06, - "loss": 0.0007668033242225647, - "step": 19900 - }, - { - "epoch": 3.39386189258312, - "grad_norm": 0.0837833359837532, - "learning_rate": 4.340942030335942e-06, - "loss": 0.0005806859582662583, - "step": 19905 - }, - { - "epoch": 3.394714407502131, - "grad_norm": 0.03263656422495842, - "learning_rate": 4.3289835556253205e-06, - "loss": 0.0011843616142868997, - "step": 19910 - }, - { - "epoch": 3.395566922421142, - "grad_norm": 0.01964580826461315, - "learning_rate": 4.317040566248605e-06, - "loss": 0.0004248973447829485, - "step": 19915 - }, - { - "epoch": 3.3964194373401533, - "grad_norm": 0.05140439420938492, - "learning_rate": 4.305113067781167e-06, - "loss": 0.0004183043260127306, - "step": 19920 - }, - { - "epoch": 3.397271952259165, - "grad_norm": 0.015379955060780048, - "learning_rate": 4.293201065791172e-06, - "loss": 0.000815888587385416, - "step": 19925 - }, - { - "epoch": 3.3981244671781754, - "grad_norm": 0.0026071579195559025, - "learning_rate": 4.281304565839533e-06, - "loss": 0.0003499687649309635, - "step": 19930 - }, - { - "epoch": 3.398976982097187, - "grad_norm": 0.01917382702231407, - "learning_rate": 4.269423573479938e-06, - "loss": 0.0005561482626944781, - "step": 19935 - }, - { - "epoch": 3.399829497016198, - "grad_norm": 0.02250206656754017, - "learning_rate": 4.257558094258817e-06, - "loss": 0.0003818372031673789, - "step": 19940 - }, - { - "epoch": 3.400682011935209, - "grad_norm": 0.10248809307813644, - "learning_rate": 4.245708133715389e-06, - "loss": 0.0005628989078104496, - "step": 19945 - }, - { - "epoch": 3.40153452685422, - "grad_norm": 0.017903871834278107, - "learning_rate": 4.233873697381596e-06, - "loss": 0.000215845531783998, - "step": 19950 - }, - { - "epoch": 3.402387041773231, - "grad_norm": 0.061437349766492844, - "learning_rate": 4.222054790782155e-06, - "loss": 0.0007492574863135814, - "step": 19955 - }, - { - "epoch": 3.403239556692242, - "grad_norm": 0.04522673040628433, - "learning_rate": 4.210251419434515e-06, - "loss": 0.001055066753178835, - "step": 19960 - }, - { - "epoch": 3.4040920716112533, - "grad_norm": 0.005560046993196011, - "learning_rate": 4.198463588848883e-06, - "loss": 0.00024275691248476505, - "step": 19965 - }, - { - "epoch": 3.4049445865302643, - "grad_norm": 0.025880778208374977, - "learning_rate": 4.186691304528221e-06, - "loss": 0.00034111484419554474, - "step": 19970 - }, - { - "epoch": 3.4057971014492754, - "grad_norm": 0.02516460418701172, - "learning_rate": 4.174934571968218e-06, - "loss": 0.000534482765942812, - "step": 19975 - }, - { - "epoch": 3.4066496163682864, - "grad_norm": 0.027490204200148582, - "learning_rate": 4.1631933966572954e-06, - "loss": 0.001637015864253044, - "step": 19980 - }, - { - "epoch": 3.4075021312872975, - "grad_norm": 0.030315211042761803, - "learning_rate": 4.1514677840766395e-06, - "loss": 0.00029935024213045835, - "step": 19985 - }, - { - "epoch": 3.4083546462063086, - "grad_norm": 0.06448766589164734, - "learning_rate": 4.139757739700156e-06, - "loss": 0.0004935414995998144, - "step": 19990 - }, - { - "epoch": 3.4092071611253196, - "grad_norm": 0.007854131981730461, - "learning_rate": 4.128063268994479e-06, - "loss": 0.00030187955126166345, - "step": 19995 - }, - { - "epoch": 3.4100596760443307, - "grad_norm": 0.029494259506464005, - "learning_rate": 4.116384377418979e-06, - "loss": 0.0003482209984213114, - "step": 20000 - }, - { - "epoch": 3.4109121909633418, - "grad_norm": 0.030381083488464355, - "learning_rate": 4.104721070425751e-06, - "loss": 0.0002413678914308548, - "step": 20005 - }, - { - "epoch": 3.411764705882353, - "grad_norm": 0.006045108195394278, - "learning_rate": 4.093073353459604e-06, - "loss": 0.0004130109678953886, - "step": 20010 - }, - { - "epoch": 3.412617220801364, - "grad_norm": 0.0822497308254242, - "learning_rate": 4.081441231958094e-06, - "loss": 0.0007556038908660412, - "step": 20015 - }, - { - "epoch": 3.413469735720375, - "grad_norm": 0.02197144739329815, - "learning_rate": 4.069824711351475e-06, - "loss": 0.00042886766605079176, - "step": 20020 - }, - { - "epoch": 3.414322250639386, - "grad_norm": 0.01506667211651802, - "learning_rate": 4.0582237970627204e-06, - "loss": 0.0004569370299577713, - "step": 20025 - }, - { - "epoch": 3.4151747655583975, - "grad_norm": 0.0023130911868065596, - "learning_rate": 4.046638494507538e-06, - "loss": 0.0007974251173436641, - "step": 20030 - }, - { - "epoch": 3.416027280477408, - "grad_norm": 0.08822524547576904, - "learning_rate": 4.035068809094319e-06, - "loss": 0.0006814738735556602, - "step": 20035 - }, - { - "epoch": 3.4168797953964196, - "grad_norm": 0.026947883889079094, - "learning_rate": 4.023514746224184e-06, - "loss": 0.0002705232938751578, - "step": 20040 - }, - { - "epoch": 3.4177323103154307, - "grad_norm": 0.02061464823782444, - "learning_rate": 4.011976311290956e-06, - "loss": 0.0008053860627114772, - "step": 20045 - }, - { - "epoch": 3.4185848252344417, - "grad_norm": 0.01110768411308527, - "learning_rate": 4.000453509681155e-06, - "loss": 0.0005702998489141465, - "step": 20050 - }, - { - "epoch": 3.419437340153453, - "grad_norm": 0.06727463006973267, - "learning_rate": 3.9889463467739995e-06, - "loss": 0.00048296600580215453, - "step": 20055 - }, - { - "epoch": 3.420289855072464, - "grad_norm": 0.01981664076447487, - "learning_rate": 3.977454827941438e-06, - "loss": 0.0007956895977258682, - "step": 20060 - }, - { - "epoch": 3.421142369991475, - "grad_norm": 0.010179187171161175, - "learning_rate": 3.965978958548076e-06, - "loss": 0.001441807672381401, - "step": 20065 - }, - { - "epoch": 3.421994884910486, - "grad_norm": 0.028387323021888733, - "learning_rate": 3.954518743951235e-06, - "loss": 0.000527799129486084, - "step": 20070 - }, - { - "epoch": 3.422847399829497, - "grad_norm": 0.011368883773684502, - "learning_rate": 3.9430741895009275e-06, - "loss": 0.00046253204345703125, - "step": 20075 - }, - { - "epoch": 3.423699914748508, - "grad_norm": 0.012177668511867523, - "learning_rate": 3.931645300539847e-06, - "loss": 0.00043948981910943983, - "step": 20080 - }, - { - "epoch": 3.424552429667519, - "grad_norm": 0.07201547920703888, - "learning_rate": 3.920232082403392e-06, - "loss": 0.0005337335169315338, - "step": 20085 - }, - { - "epoch": 3.42540494458653, - "grad_norm": 0.12001162767410278, - "learning_rate": 3.908834540419621e-06, - "loss": 0.0008155249059200286, - "step": 20090 - }, - { - "epoch": 3.4262574595055413, - "grad_norm": 0.0132389971986413, - "learning_rate": 3.897452679909287e-06, - "loss": 0.000174278998747468, - "step": 20095 - }, - { - "epoch": 3.4271099744245523, - "grad_norm": 0.0051491111516952515, - "learning_rate": 3.886086506185822e-06, - "loss": 0.0006445198785513639, - "step": 20100 - }, - { - "epoch": 3.4279624893435634, - "grad_norm": 0.039136942476034164, - "learning_rate": 3.874736024555328e-06, - "loss": 0.0005972872488200665, - "step": 20105 - }, - { - "epoch": 3.4288150042625745, - "grad_norm": 0.00988066103309393, - "learning_rate": 3.863401240316599e-06, - "loss": 0.00036287889815866947, - "step": 20110 - }, - { - "epoch": 3.4296675191815855, - "grad_norm": 0.012278315611183643, - "learning_rate": 3.852082158761074e-06, - "loss": 0.0004206370562314987, - "step": 20115 - }, - { - "epoch": 3.4305200341005966, - "grad_norm": 0.08950433880090714, - "learning_rate": 3.840778785172897e-06, - "loss": 0.0007027041632682085, - "step": 20120 - }, - { - "epoch": 3.431372549019608, - "grad_norm": 0.015487313270568848, - "learning_rate": 3.829491124828843e-06, - "loss": 0.00030801878310739996, - "step": 20125 - }, - { - "epoch": 3.4322250639386187, - "grad_norm": 0.012695780955255032, - "learning_rate": 3.818219182998379e-06, - "loss": 0.00033567410428076984, - "step": 20130 - }, - { - "epoch": 3.43307757885763, - "grad_norm": 0.013385625556111336, - "learning_rate": 3.8069629649436134e-06, - "loss": 0.00033784976694732903, - "step": 20135 - }, - { - "epoch": 3.4339300937766413, - "grad_norm": 0.02653471939265728, - "learning_rate": 3.7957224759193258e-06, - "loss": 0.00037979823537170887, - "step": 20140 - }, - { - "epoch": 3.4347826086956523, - "grad_norm": 0.045122601091861725, - "learning_rate": 3.7844977211729523e-06, - "loss": 0.0003743718843907118, - "step": 20145 - }, - { - "epoch": 3.4356351236146634, - "grad_norm": 0.004004189744591713, - "learning_rate": 3.7732887059445717e-06, - "loss": 0.00024804847780615093, - "step": 20150 - }, - { - "epoch": 3.4364876385336744, - "grad_norm": 0.09962640702724457, - "learning_rate": 3.7620954354669443e-06, - "loss": 0.0007725684903562069, - "step": 20155 - }, - { - "epoch": 3.4373401534526855, - "grad_norm": 0.026793939992785454, - "learning_rate": 3.75091791496544e-06, - "loss": 0.0003023044904693961, - "step": 20160 - }, - { - "epoch": 3.4381926683716966, - "grad_norm": 0.06049729883670807, - "learning_rate": 3.7397561496581143e-06, - "loss": 0.00038756374269723894, - "step": 20165 - }, - { - "epoch": 3.4390451832907076, - "grad_norm": 0.10067807883024216, - "learning_rate": 3.7286101447556365e-06, - "loss": 0.00040011387318372726, - "step": 20170 - }, - { - "epoch": 3.4398976982097187, - "grad_norm": 0.025836393237113953, - "learning_rate": 3.7174799054613444e-06, - "loss": 0.0009764598682522774, - "step": 20175 - }, - { - "epoch": 3.4407502131287298, - "grad_norm": 0.03506815806031227, - "learning_rate": 3.7063654369712022e-06, - "loss": 0.0005544353742152452, - "step": 20180 - }, - { - "epoch": 3.441602728047741, - "grad_norm": 0.013711848296225071, - "learning_rate": 3.69526674447381e-06, - "loss": 0.0002796804532408714, - "step": 20185 - }, - { - "epoch": 3.442455242966752, - "grad_norm": 0.014671099372208118, - "learning_rate": 3.684183833150406e-06, - "loss": 0.0007412171456962824, - "step": 20190 - }, - { - "epoch": 3.443307757885763, - "grad_norm": 0.09581290930509567, - "learning_rate": 3.67311670817486e-06, - "loss": 0.0009363777935504913, - "step": 20195 - }, - { - "epoch": 3.444160272804774, - "grad_norm": 0.012721995823085308, - "learning_rate": 3.662065374713681e-06, - "loss": 0.0003047358011826873, - "step": 20200 - }, - { - "epoch": 3.445012787723785, - "grad_norm": 0.08440826833248138, - "learning_rate": 3.6510298379259883e-06, - "loss": 0.0015849992632865906, - "step": 20205 - }, - { - "epoch": 3.445865302642796, - "grad_norm": 0.016568806022405624, - "learning_rate": 3.6400101029635515e-06, - "loss": 0.0003544453531503677, - "step": 20210 - }, - { - "epoch": 3.446717817561807, - "grad_norm": 0.012676285579800606, - "learning_rate": 3.62900617497074e-06, - "loss": 0.00013037940952926875, - "step": 20215 - }, - { - "epoch": 3.4475703324808182, - "grad_norm": 0.028908727690577507, - "learning_rate": 3.618018059084553e-06, - "loss": 0.0004815624561160803, - "step": 20220 - }, - { - "epoch": 3.4484228473998293, - "grad_norm": 0.010771363973617554, - "learning_rate": 3.6070457604346155e-06, - "loss": 0.0008128033950924873, - "step": 20225 - }, - { - "epoch": 3.449275362318841, - "grad_norm": 0.05307495594024658, - "learning_rate": 3.5960892841431556e-06, - "loss": 0.0004443288315087557, - "step": 20230 - }, - { - "epoch": 3.4501278772378514, - "grad_norm": 0.04856376722455025, - "learning_rate": 3.5851486353250274e-06, - "loss": 0.0008865063078701496, - "step": 20235 - }, - { - "epoch": 3.450980392156863, - "grad_norm": 0.04320789873600006, - "learning_rate": 3.5742238190876752e-06, - "loss": 0.00030287024565041066, - "step": 20240 - }, - { - "epoch": 3.451832907075874, - "grad_norm": 0.026490481570363045, - "learning_rate": 3.563314840531181e-06, - "loss": 0.0003270474262535572, - "step": 20245 - }, - { - "epoch": 3.452685421994885, - "grad_norm": 0.01927161030471325, - "learning_rate": 3.5524217047482177e-06, - "loss": 0.0007327934727072716, - "step": 20250 - }, - { - "epoch": 3.453537936913896, - "grad_norm": 0.01646221987903118, - "learning_rate": 3.5415444168240547e-06, - "loss": 0.00015120231546461582, - "step": 20255 - }, - { - "epoch": 3.454390451832907, - "grad_norm": 0.07438748329877853, - "learning_rate": 3.53068298183658e-06, - "loss": 0.0008919765241444111, - "step": 20260 - }, - { - "epoch": 3.455242966751918, - "grad_norm": 0.060105398297309875, - "learning_rate": 3.519837404856263e-06, - "loss": 0.0005192287266254425, - "step": 20265 - }, - { - "epoch": 3.4560954816709293, - "grad_norm": 0.014563803561031818, - "learning_rate": 3.5090076909461946e-06, - "loss": 0.0004962874110788107, - "step": 20270 - }, - { - "epoch": 3.4569479965899403, - "grad_norm": 0.06408075243234634, - "learning_rate": 3.4981938451620393e-06, - "loss": 0.0005642884410917759, - "step": 20275 - }, - { - "epoch": 3.4578005115089514, - "grad_norm": 0.018654122948646545, - "learning_rate": 3.4873958725520555e-06, - "loss": 0.00020208589266985656, - "step": 20280 - }, - { - "epoch": 3.4586530264279625, - "grad_norm": 0.029568253085017204, - "learning_rate": 3.4766137781570934e-06, - "loss": 0.000255450839176774, - "step": 20285 - }, - { - "epoch": 3.4595055413469735, - "grad_norm": 0.007675605826079845, - "learning_rate": 3.465847567010606e-06, - "loss": 0.0007365974131971597, - "step": 20290 - }, - { - "epoch": 3.4603580562659846, - "grad_norm": 0.025231147184967995, - "learning_rate": 3.4550972441386105e-06, - "loss": 0.00035758940503001214, - "step": 20295 - }, - { - "epoch": 3.4612105711849956, - "grad_norm": 0.04905321076512337, - "learning_rate": 3.444362814559709e-06, - "loss": 0.0014069808647036552, - "step": 20300 - }, - { - "epoch": 3.4620630861040067, - "grad_norm": 0.03318062052130699, - "learning_rate": 3.4336442832851056e-06, - "loss": 0.0009246711619198322, - "step": 20305 - }, - { - "epoch": 3.4629156010230178, - "grad_norm": 0.006136562675237656, - "learning_rate": 3.422941655318552e-06, - "loss": 0.0006952826399356127, - "step": 20310 - }, - { - "epoch": 3.463768115942029, - "grad_norm": 0.025494717061519623, - "learning_rate": 3.4122549356564057e-06, - "loss": 0.0005774838849902153, - "step": 20315 - }, - { - "epoch": 3.46462063086104, - "grad_norm": 0.07548290491104126, - "learning_rate": 3.4015841292875754e-06, - "loss": 0.0007774532772600651, - "step": 20320 - }, - { - "epoch": 3.4654731457800514, - "grad_norm": 0.05289645493030548, - "learning_rate": 3.3909292411935475e-06, - "loss": 0.0011253023520112037, - "step": 20325 - }, - { - "epoch": 3.466325660699062, - "grad_norm": 0.018192177638411522, - "learning_rate": 3.380290276348377e-06, - "loss": 0.00043428516946733, - "step": 20330 - }, - { - "epoch": 3.4671781756180735, - "grad_norm": 0.04587262123823166, - "learning_rate": 3.3696672397186862e-06, - "loss": 0.00036711143329739573, - "step": 20335 - }, - { - "epoch": 3.4680306905370846, - "grad_norm": 0.04300279915332794, - "learning_rate": 3.3590601362636707e-06, - "loss": 0.0004922755528241396, - "step": 20340 - }, - { - "epoch": 3.4688832054560956, - "grad_norm": 0.051384493708610535, - "learning_rate": 3.3484689709350614e-06, - "loss": 0.000578513415530324, - "step": 20345 - }, - { - "epoch": 3.4697357203751067, - "grad_norm": 0.041927583515644073, - "learning_rate": 3.337893748677191e-06, - "loss": 0.00029339513275772333, - "step": 20350 - }, - { - "epoch": 3.4705882352941178, - "grad_norm": 0.059719622135162354, - "learning_rate": 3.3273344744269014e-06, - "loss": 0.000449614180251956, - "step": 20355 - }, - { - "epoch": 3.471440750213129, - "grad_norm": 0.04662923142313957, - "learning_rate": 3.3167911531136334e-06, - "loss": 0.0005207772832363844, - "step": 20360 - }, - { - "epoch": 3.47229326513214, - "grad_norm": 0.0316859669983387, - "learning_rate": 3.3062637896593498e-06, - "loss": 0.00032441234216094015, - "step": 20365 - }, - { - "epoch": 3.473145780051151, - "grad_norm": 0.05493699386715889, - "learning_rate": 3.2957523889785733e-06, - "loss": 0.0006051870062947273, - "step": 20370 - }, - { - "epoch": 3.473998294970162, - "grad_norm": 0.09825102239847183, - "learning_rate": 3.2852569559783785e-06, - "loss": 0.0013698142021894455, - "step": 20375 - }, - { - "epoch": 3.474850809889173, - "grad_norm": 0.03209096938371658, - "learning_rate": 3.2747774955583757e-06, - "loss": 0.0005756544414907694, - "step": 20380 - }, - { - "epoch": 3.475703324808184, - "grad_norm": 0.07800310105085373, - "learning_rate": 3.2643140126107343e-06, - "loss": 0.001114057283848524, - "step": 20385 - }, - { - "epoch": 3.476555839727195, - "grad_norm": 0.00817018747329712, - "learning_rate": 3.253866512020148e-06, - "loss": 0.00051291324198246, - "step": 20390 - }, - { - "epoch": 3.4774083546462062, - "grad_norm": 0.024451689794659615, - "learning_rate": 3.2434349986638687e-06, - "loss": 0.00032486242707818747, - "step": 20395 - }, - { - "epoch": 3.4782608695652173, - "grad_norm": 0.0076407743617892265, - "learning_rate": 3.2330194774116636e-06, - "loss": 0.00043834159150719644, - "step": 20400 - }, - { - "epoch": 3.4791133844842284, - "grad_norm": 0.012483174912631512, - "learning_rate": 3.222619953125852e-06, - "loss": 0.00016895943554118276, - "step": 20405 - }, - { - "epoch": 3.4799658994032394, - "grad_norm": 0.013760508969426155, - "learning_rate": 3.2122364306612745e-06, - "loss": 0.0003591555170714855, - "step": 20410 - }, - { - "epoch": 3.4808184143222505, - "grad_norm": 0.06936871260404587, - "learning_rate": 3.201868914865309e-06, - "loss": 0.0007365354336798191, - "step": 20415 - }, - { - "epoch": 3.4816709292412615, - "grad_norm": 0.026890093460679054, - "learning_rate": 3.19151741057785e-06, - "loss": 0.0003096622182056308, - "step": 20420 - }, - { - "epoch": 3.4825234441602726, - "grad_norm": 0.1273396909236908, - "learning_rate": 3.181181922631319e-06, - "loss": 0.0018214803189039231, - "step": 20425 - }, - { - "epoch": 3.483375959079284, - "grad_norm": 0.07851844280958176, - "learning_rate": 3.1708624558506784e-06, - "loss": 0.00047972016036510465, - "step": 20430 - }, - { - "epoch": 3.484228473998295, - "grad_norm": 0.10473177582025528, - "learning_rate": 3.1605590150533863e-06, - "loss": 0.0003519801888614893, - "step": 20435 - }, - { - "epoch": 3.485080988917306, - "grad_norm": 0.011826570145785809, - "learning_rate": 3.1502716050494493e-06, - "loss": 0.00012582357740029693, - "step": 20440 - }, - { - "epoch": 3.4859335038363173, - "grad_norm": 0.09120000153779984, - "learning_rate": 3.1400002306413596e-06, - "loss": 0.0011743055656552315, - "step": 20445 - }, - { - "epoch": 3.4867860187553283, - "grad_norm": 0.03551065921783447, - "learning_rate": 3.1297448966241312e-06, - "loss": 0.0003235040698200464, - "step": 20450 - }, - { - "epoch": 3.4876385336743394, - "grad_norm": 0.022862901911139488, - "learning_rate": 3.1195056077853093e-06, - "loss": 0.00019952079746872188, - "step": 20455 - }, - { - "epoch": 3.4884910485933505, - "grad_norm": 0.012301230803132057, - "learning_rate": 3.1092823689049294e-06, - "loss": 0.0005085674580186606, - "step": 20460 - }, - { - "epoch": 3.4893435635123615, - "grad_norm": 0.012983572669327259, - "learning_rate": 3.0990751847555355e-06, - "loss": 0.00026952670887112615, - "step": 20465 - }, - { - "epoch": 3.4901960784313726, - "grad_norm": 0.03648987412452698, - "learning_rate": 3.0888840601021784e-06, - "loss": 0.0006700227968394756, - "step": 20470 - }, - { - "epoch": 3.4910485933503836, - "grad_norm": 0.03276946395635605, - "learning_rate": 3.078708999702424e-06, - "loss": 0.0003196124453097582, - "step": 20475 - }, - { - "epoch": 3.4919011082693947, - "grad_norm": 0.05831300839781761, - "learning_rate": 3.068550008306318e-06, - "loss": 0.0005575232207775116, - "step": 20480 - }, - { - "epoch": 3.4927536231884058, - "grad_norm": 0.007617017719894648, - "learning_rate": 3.0584070906564297e-06, - "loss": 0.0005659013520926237, - "step": 20485 - }, - { - "epoch": 3.493606138107417, - "grad_norm": 0.01129902619868517, - "learning_rate": 3.0482802514878e-06, - "loss": 0.0005468820687383414, - "step": 20490 - }, - { - "epoch": 3.494458653026428, - "grad_norm": 0.021842598915100098, - "learning_rate": 3.0381694955279687e-06, - "loss": 0.00030360198579728606, - "step": 20495 - }, - { - "epoch": 3.495311167945439, - "grad_norm": 0.008945580571889877, - "learning_rate": 3.0280748274969887e-06, - "loss": 0.00019925013184547425, - "step": 20500 - }, - { - "epoch": 3.49616368286445, - "grad_norm": 0.024309689179062843, - "learning_rate": 3.0179962521073823e-06, - "loss": 0.0004822061397135258, - "step": 20505 - }, - { - "epoch": 3.497016197783461, - "grad_norm": 0.1111924946308136, - "learning_rate": 3.007933774064157e-06, - "loss": 0.0009571518748998642, - "step": 20510 - }, - { - "epoch": 3.497868712702472, - "grad_norm": 0.07870755344629288, - "learning_rate": 2.997887398064809e-06, - "loss": 0.00046168952248990534, - "step": 20515 - }, - { - "epoch": 3.498721227621483, - "grad_norm": 0.05273010954260826, - "learning_rate": 2.987857128799333e-06, - "loss": 0.0002907732035964727, - "step": 20520 - }, - { - "epoch": 3.4994032395566923, - "eval_loss": 0.0633777305483818, - "eval_runtime": 3.7174, - "eval_samples_per_second": 67.79, - "eval_steps_per_second": 1.076, - "step": 20524 - }, - { - "eval_cer_subset": 0.01387463969905711, - "eval_cer_subset_edit_distance": 852, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 20524 - }, - { - "epoch": 3.4995737425404947, - "grad_norm": 0.10134585946798325, - "learning_rate": 2.9778429709501866e-06, - "loss": 0.0007728733588010072, - "step": 20525 - }, - { - "epoch": 3.5004262574595053, - "grad_norm": 0.010177328251302242, - "learning_rate": 2.967844929192306e-06, - "loss": 0.0003987153992056847, - "step": 20530 - }, - { - "epoch": 3.501278772378517, - "grad_norm": 0.07338051497936249, - "learning_rate": 2.9578630081931245e-06, - "loss": 0.0006870163604617118, - "step": 20535 - }, - { - "epoch": 3.502131287297528, - "grad_norm": 0.010025802068412304, - "learning_rate": 2.9478972126125143e-06, - "loss": 0.0004745893180370331, - "step": 20540 - }, - { - "epoch": 3.502983802216539, - "grad_norm": 0.10838331282138824, - "learning_rate": 2.9379475471028616e-06, - "loss": 0.00041006896644830705, - "step": 20545 - }, - { - "epoch": 3.50383631713555, - "grad_norm": 0.019695911556482315, - "learning_rate": 2.928014016308994e-06, - "loss": 0.0004196802154183388, - "step": 20550 - }, - { - "epoch": 3.504688832054561, - "grad_norm": 0.01631144993007183, - "learning_rate": 2.918096624868209e-06, - "loss": 0.0008785014972090722, - "step": 20555 - }, - { - "epoch": 3.505541346973572, - "grad_norm": 0.05647982656955719, - "learning_rate": 2.9081953774102744e-06, - "loss": 0.0005624303594231606, - "step": 20560 - }, - { - "epoch": 3.506393861892583, - "grad_norm": 0.05051364004611969, - "learning_rate": 2.8983102785574223e-06, - "loss": 0.0006344023160636425, - "step": 20565 - }, - { - "epoch": 3.5072463768115942, - "grad_norm": 0.04484107717871666, - "learning_rate": 2.888441332924353e-06, - "loss": 0.00034873902332037686, - "step": 20570 - }, - { - "epoch": 3.5080988917306053, - "grad_norm": 0.026035010814666748, - "learning_rate": 2.87858854511821e-06, - "loss": 0.0005680721253156662, - "step": 20575 - }, - { - "epoch": 3.5089514066496164, - "grad_norm": 0.030738165602087975, - "learning_rate": 2.86875191973861e-06, - "loss": 0.0006887212861329317, - "step": 20580 - }, - { - "epoch": 3.5098039215686274, - "grad_norm": 0.021299093961715698, - "learning_rate": 2.858931461377609e-06, - "loss": 0.00039576496928930285, - "step": 20585 - }, - { - "epoch": 3.5106564364876385, - "grad_norm": 0.019585279747843742, - "learning_rate": 2.849127174619735e-06, - "loss": 0.0004361768718808889, - "step": 20590 - }, - { - "epoch": 3.5115089514066495, - "grad_norm": 0.021617313846945763, - "learning_rate": 2.839339064041944e-06, - "loss": 0.00023572267964482306, - "step": 20595 - }, - { - "epoch": 3.5123614663256606, - "grad_norm": 0.032228775322437286, - "learning_rate": 2.8295671342136602e-06, - "loss": 0.0006701612379401922, - "step": 20600 - }, - { - "epoch": 3.5132139812446717, - "grad_norm": 0.06554242223501205, - "learning_rate": 2.819811389696738e-06, - "loss": 0.000471029058098793, - "step": 20605 - }, - { - "epoch": 3.5140664961636827, - "grad_norm": 0.004567406140267849, - "learning_rate": 2.810071835045481e-06, - "loss": 0.0001446882146410644, - "step": 20610 - }, - { - "epoch": 3.514919011082694, - "grad_norm": 0.004180132411420345, - "learning_rate": 2.800348474806652e-06, - "loss": 0.00013457806780934334, - "step": 20615 - }, - { - "epoch": 3.5157715260017053, - "grad_norm": 0.09419524669647217, - "learning_rate": 2.790641313519419e-06, - "loss": 0.0003684045746922493, - "step": 20620 - }, - { - "epoch": 3.516624040920716, - "grad_norm": 0.015027535147964954, - "learning_rate": 2.780950355715429e-06, - "loss": 0.0011356399394571782, - "step": 20625 - }, - { - "epoch": 3.5174765558397274, - "grad_norm": 0.02399168163537979, - "learning_rate": 2.7712756059187254e-06, - "loss": 0.0003479213686659932, - "step": 20630 - }, - { - "epoch": 3.518329070758738, - "grad_norm": 0.001755329198203981, - "learning_rate": 2.761617068645816e-06, - "loss": 0.0009521358646452427, - "step": 20635 - }, - { - "epoch": 3.5191815856777495, - "grad_norm": 0.07159367203712463, - "learning_rate": 2.7519747484056233e-06, - "loss": 0.0004668514244258404, - "step": 20640 - }, - { - "epoch": 3.5200341005967606, - "grad_norm": 0.013049350120127201, - "learning_rate": 2.7423486496995007e-06, - "loss": 0.00038400108460336926, - "step": 20645 - }, - { - "epoch": 3.5208866155157716, - "grad_norm": 0.129945769906044, - "learning_rate": 2.732738777021233e-06, - "loss": 0.0007587837055325509, - "step": 20650 - }, - { - "epoch": 3.5217391304347827, - "grad_norm": 0.011801044456660748, - "learning_rate": 2.723145134857023e-06, - "loss": 0.00033344419207423923, - "step": 20655 - }, - { - "epoch": 3.5225916453537938, - "grad_norm": 0.012840129435062408, - "learning_rate": 2.713567727685513e-06, - "loss": 0.000570116238668561, - "step": 20660 - }, - { - "epoch": 3.523444160272805, - "grad_norm": 0.028646450489759445, - "learning_rate": 2.7040065599777395e-06, - "loss": 0.000337608833797276, - "step": 20665 - }, - { - "epoch": 3.524296675191816, - "grad_norm": 0.012803840450942516, - "learning_rate": 2.694461636197194e-06, - "loss": 0.00026674284599721433, - "step": 20670 - }, - { - "epoch": 3.525149190110827, - "grad_norm": 0.004883004352450371, - "learning_rate": 2.6849329607997503e-06, - "loss": 0.0005247415509074926, - "step": 20675 - }, - { - "epoch": 3.526001705029838, - "grad_norm": 0.013268784619867802, - "learning_rate": 2.675420538233712e-06, - "loss": 0.00042262640781700613, - "step": 20680 - }, - { - "epoch": 3.526854219948849, - "grad_norm": 0.02646799571812153, - "learning_rate": 2.6659243729398026e-06, - "loss": 0.00015779529931023717, - "step": 20685 - }, - { - "epoch": 3.52770673486786, - "grad_norm": 0.07095526158809662, - "learning_rate": 2.656444469351142e-06, - "loss": 0.0005843072663992644, - "step": 20690 - }, - { - "epoch": 3.528559249786871, - "grad_norm": 0.07467476278543472, - "learning_rate": 2.646980831893265e-06, - "loss": 0.0004990112502127886, - "step": 20695 - }, - { - "epoch": 3.5294117647058822, - "grad_norm": 0.016248852014541626, - "learning_rate": 2.6375334649841053e-06, - "loss": 0.0002280582906678319, - "step": 20700 - }, - { - "epoch": 3.5302642796248933, - "grad_norm": 0.015685981139540672, - "learning_rate": 2.628102373034022e-06, - "loss": 0.00040198476053774355, - "step": 20705 - }, - { - "epoch": 3.5311167945439044, - "grad_norm": 0.03586861863732338, - "learning_rate": 2.6186875604457466e-06, - "loss": 0.0006230812985450029, - "step": 20710 - }, - { - "epoch": 3.531969309462916, - "grad_norm": 0.01099316030740738, - "learning_rate": 2.6092890316144435e-06, - "loss": 0.00038466856349259613, - "step": 20715 - }, - { - "epoch": 3.5328218243819265, - "grad_norm": 0.052841104567050934, - "learning_rate": 2.599906790927653e-06, - "loss": 0.0004864667542278767, - "step": 20720 - }, - { - "epoch": 3.533674339300938, - "grad_norm": 0.016549568623304367, - "learning_rate": 2.5905408427653084e-06, - "loss": 0.0005029110237956047, - "step": 20725 - }, - { - "epoch": 3.5345268542199486, - "grad_norm": 0.046012695878744125, - "learning_rate": 2.581191191499754e-06, - "loss": 0.0005438106134533882, - "step": 20730 - }, - { - "epoch": 3.53537936913896, - "grad_norm": 0.010663103312253952, - "learning_rate": 2.57185784149572e-06, - "loss": 0.0002268374664708972, - "step": 20735 - }, - { - "epoch": 3.536231884057971, - "grad_norm": 0.05038389936089516, - "learning_rate": 2.56254079711032e-06, - "loss": 0.0003756745718419552, - "step": 20740 - }, - { - "epoch": 3.5370843989769822, - "grad_norm": 0.03177966922521591, - "learning_rate": 2.5532400626930625e-06, - "loss": 0.00024364627897739411, - "step": 20745 - }, - { - "epoch": 3.5379369138959933, - "grad_norm": 0.056538257747888565, - "learning_rate": 2.5439556425858333e-06, - "loss": 0.0004069384653121233, - "step": 20750 - }, - { - "epoch": 3.5387894288150044, - "grad_norm": 0.17656944692134857, - "learning_rate": 2.5346875411229194e-06, - "loss": 0.00038029137067496777, - "step": 20755 - }, - { - "epoch": 3.5396419437340154, - "grad_norm": 0.007788034155964851, - "learning_rate": 2.5254357626309675e-06, - "loss": 0.00021343901753425598, - "step": 20760 - }, - { - "epoch": 3.5404944586530265, - "grad_norm": 0.04890740290284157, - "learning_rate": 2.516200311429027e-06, - "loss": 0.00035889642313122747, - "step": 20765 - }, - { - "epoch": 3.5413469735720375, - "grad_norm": 0.01161973923444748, - "learning_rate": 2.5069811918285e-06, - "loss": 0.0005623042117804289, - "step": 20770 - }, - { - "epoch": 3.5421994884910486, - "grad_norm": 0.015352857299149036, - "learning_rate": 2.4977784081331926e-06, - "loss": 0.00024357731454074382, - "step": 20775 - }, - { - "epoch": 3.5430520034100597, - "grad_norm": 0.08323964476585388, - "learning_rate": 2.4885919646392653e-06, - "loss": 0.0010311774909496307, - "step": 20780 - }, - { - "epoch": 3.5439045183290707, - "grad_norm": 0.01847103051841259, - "learning_rate": 2.4794218656352573e-06, - "loss": 0.000347610330209136, - "step": 20785 - }, - { - "epoch": 3.544757033248082, - "grad_norm": 0.019678082317113876, - "learning_rate": 2.47026811540207e-06, - "loss": 0.0012042072601616382, - "step": 20790 - }, - { - "epoch": 3.545609548167093, - "grad_norm": 0.0032173304352909327, - "learning_rate": 2.4611307182129723e-06, - "loss": 0.0004959845449775457, - "step": 20795 - }, - { - "epoch": 3.546462063086104, - "grad_norm": 0.07671674340963364, - "learning_rate": 2.452009678333623e-06, - "loss": 0.00034225885756313803, - "step": 20800 - }, - { - "epoch": 3.547314578005115, - "grad_norm": 0.009932668879628181, - "learning_rate": 2.442905000022012e-06, - "loss": 0.00013967978302389383, - "step": 20805 - }, - { - "epoch": 3.548167092924126, - "grad_norm": 0.0073715317994356155, - "learning_rate": 2.4338166875285185e-06, - "loss": 0.0008998749777674675, - "step": 20810 - }, - { - "epoch": 3.549019607843137, - "grad_norm": 0.050911612808704376, - "learning_rate": 2.4247447450958564e-06, - "loss": 0.0005864705890417099, - "step": 20815 - }, - { - "epoch": 3.5498721227621486, - "grad_norm": 0.013407070189714432, - "learning_rate": 2.4156891769591222e-06, - "loss": 0.00031670662574470045, - "step": 20820 - }, - { - "epoch": 3.550724637681159, - "grad_norm": 0.016584917902946472, - "learning_rate": 2.4066499873457547e-06, - "loss": 0.00020419515203684568, - "step": 20825 - }, - { - "epoch": 3.5515771526001707, - "grad_norm": 0.059559017419815063, - "learning_rate": 2.3976271804755366e-06, - "loss": 0.0004414593800902367, - "step": 20830 - }, - { - "epoch": 3.5524296675191813, - "grad_norm": 0.06465112417936325, - "learning_rate": 2.3886207605606276e-06, - "loss": 0.000545783480629325, - "step": 20835 - }, - { - "epoch": 3.553282182438193, - "grad_norm": 0.06885542720556259, - "learning_rate": 2.3796307318055112e-06, - "loss": 0.0005661803297698498, - "step": 20840 - }, - { - "epoch": 3.554134697357204, - "grad_norm": 0.011327388696372509, - "learning_rate": 2.3706570984070417e-06, - "loss": 0.00034151540603488684, - "step": 20845 - }, - { - "epoch": 3.554987212276215, - "grad_norm": 0.012066229246556759, - "learning_rate": 2.361699864554406e-06, - "loss": 0.00031219117809087036, - "step": 20850 - }, - { - "epoch": 3.555839727195226, - "grad_norm": 0.010691968724131584, - "learning_rate": 2.352759034429143e-06, - "loss": 0.0007128301076591015, - "step": 20855 - }, - { - "epoch": 3.556692242114237, - "grad_norm": 0.016185365617275238, - "learning_rate": 2.3438346122051295e-06, - "loss": 0.0010599909350275994, - "step": 20860 - }, - { - "epoch": 3.557544757033248, - "grad_norm": 0.07072475552558899, - "learning_rate": 2.3349266020485714e-06, - "loss": 0.0009240474551916123, - "step": 20865 - }, - { - "epoch": 3.558397271952259, - "grad_norm": 0.02984112873673439, - "learning_rate": 2.326035008118038e-06, - "loss": 0.0006467741448432207, - "step": 20870 - }, - { - "epoch": 3.5592497868712702, - "grad_norm": 0.04521370306611061, - "learning_rate": 2.3171598345644164e-06, - "loss": 0.0007281980477273464, - "step": 20875 - }, - { - "epoch": 3.5601023017902813, - "grad_norm": 0.051938124001026154, - "learning_rate": 2.308301085530931e-06, - "loss": 0.001002713944762945, - "step": 20880 - }, - { - "epoch": 3.5609548167092924, - "grad_norm": 0.00381719833239913, - "learning_rate": 2.299458765153135e-06, - "loss": 0.0003859725082293153, - "step": 20885 - }, - { - "epoch": 3.5618073316283034, - "grad_norm": 0.008161951787769794, - "learning_rate": 2.2906328775589315e-06, - "loss": 0.00029291068203747274, - "step": 20890 - }, - { - "epoch": 3.5626598465473145, - "grad_norm": 0.012069445103406906, - "learning_rate": 2.2818234268685247e-06, - "loss": 0.00023344492074102164, - "step": 20895 - }, - { - "epoch": 3.5635123614663256, - "grad_norm": 0.019283002242445946, - "learning_rate": 2.273030417194474e-06, - "loss": 0.0002701515797525644, - "step": 20900 - }, - { - "epoch": 3.5643648763853366, - "grad_norm": 0.04979168623685837, - "learning_rate": 2.2642538526416384e-06, - "loss": 0.0007004131563007832, - "step": 20905 - }, - { - "epoch": 3.5652173913043477, - "grad_norm": 0.016069067642092705, - "learning_rate": 2.255493737307207e-06, - "loss": 0.00026671388186514375, - "step": 20910 - }, - { - "epoch": 3.566069906223359, - "grad_norm": 0.08306407183408737, - "learning_rate": 2.246750075280704e-06, - "loss": 0.0007622113451361656, - "step": 20915 - }, - { - "epoch": 3.56692242114237, - "grad_norm": 0.01117862667888403, - "learning_rate": 2.238022870643956e-06, - "loss": 0.0003897631075233221, - "step": 20920 - }, - { - "epoch": 3.5677749360613813, - "grad_norm": 0.02147560566663742, - "learning_rate": 2.2293121274711126e-06, - "loss": 0.00027591800317168237, - "step": 20925 - }, - { - "epoch": 3.568627450980392, - "grad_norm": 0.08633051812648773, - "learning_rate": 2.2206178498286293e-06, - "loss": 0.00048953453078866, - "step": 20930 - }, - { - "epoch": 3.5694799658994034, - "grad_norm": 0.024494649842381477, - "learning_rate": 2.211940041775301e-06, - "loss": 0.00030716974288225174, - "step": 20935 - }, - { - "epoch": 3.5703324808184145, - "grad_norm": 0.011852308176457882, - "learning_rate": 2.2032787073622075e-06, - "loss": 0.000294071389362216, - "step": 20940 - }, - { - "epoch": 3.5711849957374255, - "grad_norm": 0.013524633832275867, - "learning_rate": 2.1946338506327487e-06, - "loss": 0.0009473714977502823, - "step": 20945 - }, - { - "epoch": 3.5720375106564366, - "grad_norm": 0.10399185121059418, - "learning_rate": 2.1860054756226374e-06, - "loss": 0.0007893742062151432, - "step": 20950 - }, - { - "epoch": 3.5728900255754477, - "grad_norm": 0.026907512918114662, - "learning_rate": 2.1773935863598725e-06, - "loss": 0.00027270009741187096, - "step": 20955 - }, - { - "epoch": 3.5737425404944587, - "grad_norm": 0.02188337780535221, - "learning_rate": 2.1687981868647883e-06, - "loss": 0.0011183698661625385, - "step": 20960 - }, - { - "epoch": 3.57459505541347, - "grad_norm": 0.019559821113944054, - "learning_rate": 2.160219281149987e-06, - "loss": 0.0007883405312895774, - "step": 20965 - }, - { - "epoch": 3.575447570332481, - "grad_norm": 0.020538685843348503, - "learning_rate": 2.151656873220399e-06, - "loss": 0.0003405439667403698, - "step": 20970 - }, - { - "epoch": 3.576300085251492, - "grad_norm": 0.05687247961759567, - "learning_rate": 2.143110967073235e-06, - "loss": 0.00031936001032590867, - "step": 20975 - }, - { - "epoch": 3.577152600170503, - "grad_norm": 0.06545376777648926, - "learning_rate": 2.1345815666980027e-06, - "loss": 0.0014524533413350581, - "step": 20980 - }, - { - "epoch": 3.578005115089514, - "grad_norm": 0.013182499445974827, - "learning_rate": 2.1260686760765186e-06, - "loss": 0.0007357773371040821, - "step": 20985 - }, - { - "epoch": 3.578857630008525, - "grad_norm": 0.014390240423381329, - "learning_rate": 2.117572299182882e-06, - "loss": 0.0002635567681863904, - "step": 20990 - }, - { - "epoch": 3.579710144927536, - "grad_norm": 0.007471280638128519, - "learning_rate": 2.109092439983487e-06, - "loss": 0.00034300005063414574, - "step": 20995 - }, - { - "epoch": 3.580562659846547, - "grad_norm": 0.0056254020892083645, - "learning_rate": 2.1006291024370044e-06, - "loss": 0.00067988820374012, - "step": 21000 - }, - { - "epoch": 3.5814151747655583, - "grad_norm": 0.04476655274629593, - "learning_rate": 2.0921822904944152e-06, - "loss": 0.0012643163092434407, - "step": 21005 - }, - { - "epoch": 3.5822676896845693, - "grad_norm": 0.07641401141881943, - "learning_rate": 2.0837520080989612e-06, - "loss": 0.0005423400085419417, - "step": 21010 - }, - { - "epoch": 3.5831202046035804, - "grad_norm": 0.007483305409550667, - "learning_rate": 2.0753382591861823e-06, - "loss": 0.000717478571459651, - "step": 21015 - }, - { - "epoch": 3.583972719522592, - "grad_norm": 0.023776588961482048, - "learning_rate": 2.066941047683898e-06, - "loss": 0.0007531133480370045, - "step": 21020 - }, - { - "epoch": 3.5848252344416025, - "grad_norm": 0.011082421988248825, - "learning_rate": 2.0585603775121985e-06, - "loss": 0.000845146831125021, - "step": 21025 - }, - { - "epoch": 3.585677749360614, - "grad_norm": 0.016635581851005554, - "learning_rate": 2.0501962525834666e-06, - "loss": 0.0004957383964210749, - "step": 21030 - }, - { - "epoch": 3.5865302642796246, - "grad_norm": 0.009021840058267117, - "learning_rate": 2.0418486768023533e-06, - "loss": 0.00018236858304589986, - "step": 21035 - }, - { - "epoch": 3.587382779198636, - "grad_norm": 0.08496322482824326, - "learning_rate": 2.033517654065783e-06, - "loss": 0.000499946903437376, - "step": 21040 - }, - { - "epoch": 3.588235294117647, - "grad_norm": 0.03010745905339718, - "learning_rate": 2.025203188262954e-06, - "loss": 0.0007221087813377381, - "step": 21045 - }, - { - "epoch": 3.5890878090366582, - "grad_norm": 0.014769963920116425, - "learning_rate": 2.01690528327534e-06, - "loss": 0.0005381438415497542, - "step": 21050 - }, - { - "epoch": 3.5899403239556693, - "grad_norm": 0.004737900570034981, - "learning_rate": 2.0086239429766755e-06, - "loss": 0.00042590871453285217, - "step": 21055 - }, - { - "epoch": 3.5907928388746804, - "grad_norm": 0.011567474342882633, - "learning_rate": 2.000359171232968e-06, - "loss": 0.0002875934354960918, - "step": 21060 - }, - { - "epoch": 3.5916453537936914, - "grad_norm": 0.014703325927257538, - "learning_rate": 1.9921109719024815e-06, - "loss": 0.0004461627919226885, - "step": 21065 - }, - { - "epoch": 3.5924978687127025, - "grad_norm": 0.048457443714141846, - "learning_rate": 1.983879348835753e-06, - "loss": 0.00031095552258193494, - "step": 21070 - }, - { - "epoch": 3.5933503836317136, - "grad_norm": 0.035036347806453705, - "learning_rate": 1.975664305875582e-06, - "loss": 0.0004704943858087063, - "step": 21075 - }, - { - "epoch": 3.5942028985507246, - "grad_norm": 0.010464261285960674, - "learning_rate": 1.967465846857015e-06, - "loss": 0.000991502869874239, - "step": 21080 - }, - { - "epoch": 3.5950554134697357, - "grad_norm": 0.06083119288086891, - "learning_rate": 1.9592839756073773e-06, - "loss": 0.0003390623489394784, - "step": 21085 - }, - { - "epoch": 3.5959079283887467, - "grad_norm": 0.06610855460166931, - "learning_rate": 1.951118695946234e-06, - "loss": 0.0006665963679552078, - "step": 21090 - }, - { - "epoch": 3.596760443307758, - "grad_norm": 0.16398076713085175, - "learning_rate": 1.942970011685399e-06, - "loss": 0.000514071062207222, - "step": 21095 - }, - { - "epoch": 3.597612958226769, - "grad_norm": 0.007298734970390797, - "learning_rate": 1.9348379266289667e-06, - "loss": 0.00034214446786791087, - "step": 21100 - }, - { - "epoch": 3.59846547314578, - "grad_norm": 0.01020489726215601, - "learning_rate": 1.9267224445732548e-06, - "loss": 0.00022972908336669207, - "step": 21105 - }, - { - "epoch": 3.599317988064791, - "grad_norm": 0.004301535431295633, - "learning_rate": 1.9186235693068402e-06, - "loss": 0.000436145206913352, - "step": 21110 - }, - { - "epoch": 3.6001705029838025, - "grad_norm": 0.003998286556452513, - "learning_rate": 1.9105413046105452e-06, - "loss": 0.0006782910786569118, - "step": 21115 - }, - { - "epoch": 3.601023017902813, - "grad_norm": 0.024540653452277184, - "learning_rate": 1.9024756542574474e-06, - "loss": 0.000988519936800003, - "step": 21120 - }, - { - "epoch": 3.6018755328218246, - "grad_norm": 0.008918581530451775, - "learning_rate": 1.8944266220128512e-06, - "loss": 0.0002553706057369709, - "step": 21125 - }, - { - "epoch": 3.602728047740835, - "grad_norm": 0.04160633683204651, - "learning_rate": 1.886394211634322e-06, - "loss": 0.0010507453233003616, - "step": 21130 - }, - { - "epoch": 3.6035805626598467, - "grad_norm": 0.011109927669167519, - "learning_rate": 1.878378426871656e-06, - "loss": 0.0006055444478988648, - "step": 21135 - }, - { - "epoch": 3.604433077578858, - "grad_norm": 0.04830114543437958, - "learning_rate": 1.8703792714668763e-06, - "loss": 0.0004960444755852222, - "step": 21140 - }, - { - "epoch": 3.605285592497869, - "grad_norm": 0.00889639277011156, - "learning_rate": 1.862396749154267e-06, - "loss": 0.00040551358833909037, - "step": 21145 - }, - { - "epoch": 3.60613810741688, - "grad_norm": 0.004867313429713249, - "learning_rate": 1.8544308636603346e-06, - "loss": 0.00046198870986700056, - "step": 21150 - }, - { - "epoch": 3.606990622335891, - "grad_norm": 0.011908004991710186, - "learning_rate": 1.8464816187038129e-06, - "loss": 0.0007289954926818609, - "step": 21155 - }, - { - "epoch": 3.607843137254902, - "grad_norm": 0.028547517955303192, - "learning_rate": 1.8385490179956706e-06, - "loss": 0.00074036936275661, - "step": 21160 - }, - { - "epoch": 3.608695652173913, - "grad_norm": 0.016837269067764282, - "learning_rate": 1.8306330652391204e-06, - "loss": 0.00043045044876635077, - "step": 21165 - }, - { - "epoch": 3.609548167092924, - "grad_norm": 0.0513509176671505, - "learning_rate": 1.8227337641295859e-06, - "loss": 0.0011829334311187268, - "step": 21170 - }, - { - "epoch": 3.610400682011935, - "grad_norm": 0.0072334990836679935, - "learning_rate": 1.8148511183547252e-06, - "loss": 0.000338142248801887, - "step": 21175 - }, - { - "epoch": 3.6112531969309463, - "grad_norm": 0.012744804844260216, - "learning_rate": 1.806985131594424e-06, - "loss": 0.0004504667595028877, - "step": 21180 - }, - { - "epoch": 3.6121057118499573, - "grad_norm": 0.011759931221604347, - "learning_rate": 1.799135807520774e-06, - "loss": 0.00029756310395896436, - "step": 21185 - }, - { - "epoch": 3.6129582267689684, - "grad_norm": 0.018434008583426476, - "learning_rate": 1.7913031497981193e-06, - "loss": 0.00032974979840219023, - "step": 21190 - }, - { - "epoch": 3.6138107416879794, - "grad_norm": 0.014259081333875656, - "learning_rate": 1.7834871620829889e-06, - "loss": 0.000511990487575531, - "step": 21195 - }, - { - "epoch": 3.6146632566069905, - "grad_norm": 0.007993457838892937, - "learning_rate": 1.7756878480241556e-06, - "loss": 0.001352803036570549, - "step": 21200 - }, - { - "epoch": 3.6155157715260016, - "grad_norm": 0.004293349105864763, - "learning_rate": 1.7679052112625908e-06, - "loss": 0.0003066781908273697, - "step": 21205 - }, - { - "epoch": 3.6163682864450126, - "grad_norm": 0.07703537493944168, - "learning_rate": 1.760139255431492e-06, - "loss": 0.00040162606164813043, - "step": 21210 - }, - { - "epoch": 3.6172208013640237, - "grad_norm": 0.16972683370113373, - "learning_rate": 1.7523899841562632e-06, - "loss": 0.0007527290377765894, - "step": 21215 - }, - { - "epoch": 3.618073316283035, - "grad_norm": 0.027821099385619164, - "learning_rate": 1.7446574010545277e-06, - "loss": 0.0003161250613629818, - "step": 21220 - }, - { - "epoch": 3.618925831202046, - "grad_norm": 0.03964545577764511, - "learning_rate": 1.7369415097361103e-06, - "loss": 0.0006695718038827181, - "step": 21225 - }, - { - "epoch": 3.6197783461210573, - "grad_norm": 0.04607070982456207, - "learning_rate": 1.729242313803042e-06, - "loss": 0.0003807933768257499, - "step": 21230 - }, - { - "epoch": 3.620630861040068, - "grad_norm": 0.01855855993926525, - "learning_rate": 1.7215598168495729e-06, - "loss": 0.0009717889130115509, - "step": 21235 - }, - { - "epoch": 3.6214833759590794, - "grad_norm": 0.031046895310282707, - "learning_rate": 1.7138940224621381e-06, - "loss": 0.000874253548681736, - "step": 21240 - }, - { - "epoch": 3.6223358908780905, - "grad_norm": 0.07868655025959015, - "learning_rate": 1.7062449342193917e-06, - "loss": 0.0005975952371954918, - "step": 21245 - }, - { - "epoch": 3.6231884057971016, - "grad_norm": 0.12167970091104507, - "learning_rate": 1.6986125556921776e-06, - "loss": 0.0012172631919384002, - "step": 21250 - }, - { - "epoch": 3.6240409207161126, - "grad_norm": 0.026766330003738403, - "learning_rate": 1.6909968904435453e-06, - "loss": 0.00015401726122945547, - "step": 21255 - }, - { - "epoch": 3.6248934356351237, - "grad_norm": 0.0523478165268898, - "learning_rate": 1.6833979420287386e-06, - "loss": 0.00043351505883038044, - "step": 21260 - }, - { - "epoch": 3.6257459505541347, - "grad_norm": 0.03436252847313881, - "learning_rate": 1.6758157139952072e-06, - "loss": 0.0005547068547457456, - "step": 21265 - }, - { - "epoch": 3.626598465473146, - "grad_norm": 0.009839732199907303, - "learning_rate": 1.6682502098825824e-06, - "loss": 0.0005176344886422158, - "step": 21270 - }, - { - "epoch": 3.627450980392157, - "grad_norm": 0.08149638772010803, - "learning_rate": 1.6607014332226886e-06, - "loss": 0.000564785860478878, - "step": 21275 - }, - { - "epoch": 3.628303495311168, - "grad_norm": 0.004576738923788071, - "learning_rate": 1.6531693875395574e-06, - "loss": 0.0010984219610691071, - "step": 21280 - }, - { - "epoch": 3.629156010230179, - "grad_norm": 0.011690586805343628, - "learning_rate": 1.6456540763493884e-06, - "loss": 0.00019397520227357746, - "step": 21285 - }, - { - "epoch": 3.63000852514919, - "grad_norm": 0.07157396525144577, - "learning_rate": 1.6381555031605876e-06, - "loss": 0.0006828072480857372, - "step": 21290 - }, - { - "epoch": 3.630861040068201, - "grad_norm": 0.06383173167705536, - "learning_rate": 1.6306736714737256e-06, - "loss": 0.0006252658553421497, - "step": 21295 - }, - { - "epoch": 3.631713554987212, - "grad_norm": 0.05774596706032753, - "learning_rate": 1.6232085847815795e-06, - "loss": 0.0013784953393042088, - "step": 21300 - }, - { - "epoch": 3.632566069906223, - "grad_norm": 0.038066111505031586, - "learning_rate": 1.615760246569099e-06, - "loss": 0.000414402037858963, - "step": 21305 - }, - { - "epoch": 3.6334185848252343, - "grad_norm": 0.04431888833642006, - "learning_rate": 1.6083286603134112e-06, - "loss": 0.0002233121544122696, - "step": 21310 - }, - { - "epoch": 3.634271099744246, - "grad_norm": 0.005531220696866512, - "learning_rate": 1.6009138294838367e-06, - "loss": 0.0005217500030994416, - "step": 21315 - }, - { - "epoch": 3.6351236146632564, - "grad_norm": 0.03662824630737305, - "learning_rate": 1.5935157575418605e-06, - "loss": 0.0006840425077825785, - "step": 21320 - }, - { - "epoch": 3.635976129582268, - "grad_norm": 0.04950394108891487, - "learning_rate": 1.5861344479411454e-06, - "loss": 0.00041497671045362947, - "step": 21325 - }, - { - "epoch": 3.6368286445012785, - "grad_norm": 0.020219210535287857, - "learning_rate": 1.5787699041275345e-06, - "loss": 0.00039848005399107934, - "step": 21330 - }, - { - "epoch": 3.63768115942029, - "grad_norm": 0.009260480292141438, - "learning_rate": 1.5714221295390488e-06, - "loss": 0.0008187741041183472, - "step": 21335 - }, - { - "epoch": 3.638533674339301, - "grad_norm": 0.03334662690758705, - "learning_rate": 1.5640911276058654e-06, - "loss": 0.0002430976601317525, - "step": 21340 - }, - { - "epoch": 3.639386189258312, - "grad_norm": 0.02324068546295166, - "learning_rate": 1.5567769017503382e-06, - "loss": 0.00017816005274653434, - "step": 21345 - }, - { - "epoch": 3.640238704177323, - "grad_norm": 0.08741103112697601, - "learning_rate": 1.5494794553869982e-06, - "loss": 0.0008083363994956016, - "step": 21350 - }, - { - "epoch": 3.6410912190963343, - "grad_norm": 0.052611518651247025, - "learning_rate": 1.542198791922529e-06, - "loss": 0.00037114876322448255, - "step": 21355 - }, - { - "epoch": 3.6419437340153453, - "grad_norm": 0.044670768082141876, - "learning_rate": 1.534934914755795e-06, - "loss": 0.0010107600130140782, - "step": 21360 - }, - { - "epoch": 3.6427962489343564, - "grad_norm": 0.029497483745217323, - "learning_rate": 1.5276878272778126e-06, - "loss": 0.00046050939708948136, - "step": 21365 - }, - { - "epoch": 3.6436487638533674, - "grad_norm": 0.033001627773046494, - "learning_rate": 1.520457532871759e-06, - "loss": 0.0008739419281482697, - "step": 21370 - }, - { - "epoch": 3.6445012787723785, - "grad_norm": 0.0858602374792099, - "learning_rate": 1.5132440349129804e-06, - "loss": 0.001319802924990654, - "step": 21375 - }, - { - "epoch": 3.6453537936913896, - "grad_norm": 0.002159344032406807, - "learning_rate": 1.5060473367689785e-06, - "loss": 0.000239237817004323, - "step": 21380 - }, - { - "epoch": 3.6462063086104006, - "grad_norm": 0.10409238934516907, - "learning_rate": 1.4988674417994076e-06, - "loss": 0.001322145201265812, - "step": 21385 - }, - { - "epoch": 3.6470588235294117, - "grad_norm": 0.08724559098482132, - "learning_rate": 1.4917043533560823e-06, - "loss": 0.0007014136761426925, - "step": 21390 - }, - { - "epoch": 3.6479113384484227, - "grad_norm": 0.05907455086708069, - "learning_rate": 1.4845580747829696e-06, - "loss": 0.00017757418099790813, - "step": 21395 - }, - { - "epoch": 3.648763853367434, - "grad_norm": 0.00219578854739666, - "learning_rate": 1.4774286094161883e-06, - "loss": 0.00017139697447419167, - "step": 21400 - }, - { - "epoch": 3.649616368286445, - "grad_norm": 0.038314200937747955, - "learning_rate": 1.4703159605840218e-06, - "loss": 0.0002736913273110986, - "step": 21405 - }, - { - "epoch": 3.6504688832054564, - "grad_norm": 0.01319828350096941, - "learning_rate": 1.4632201316068806e-06, - "loss": 0.00023620841093361378, - "step": 21410 - }, - { - "epoch": 3.651321398124467, - "grad_norm": 0.039625514298677444, - "learning_rate": 1.456141125797332e-06, - "loss": 0.00044136070646345616, - "step": 21415 - }, - { - "epoch": 3.6521739130434785, - "grad_norm": 0.0016333634266629815, - "learning_rate": 1.4490789464601027e-06, - "loss": 0.000284887757152319, - "step": 21420 - }, - { - "epoch": 3.653026427962489, - "grad_norm": 0.007170053664594889, - "learning_rate": 1.4420335968920435e-06, - "loss": 0.00017265495844185353, - "step": 21425 - }, - { - "epoch": 3.6538789428815006, - "grad_norm": 0.008500473573803902, - "learning_rate": 1.4350050803821608e-06, - "loss": 0.00042344643734395506, - "step": 21430 - }, - { - "epoch": 3.6547314578005117, - "grad_norm": 0.02188137173652649, - "learning_rate": 1.4279934002115968e-06, - "loss": 0.0008881168439984321, - "step": 21435 - }, - { - "epoch": 3.6555839727195227, - "grad_norm": 0.011452939361333847, - "learning_rate": 1.420998559653637e-06, - "loss": 0.00039616567082703116, - "step": 21440 - }, - { - "epoch": 3.656436487638534, - "grad_norm": 0.07035384327173233, - "learning_rate": 1.4140205619737068e-06, - "loss": 0.0011622272431850433, - "step": 21445 - }, - { - "epoch": 3.657289002557545, - "grad_norm": 0.007866617292165756, - "learning_rate": 1.4070594104293758e-06, - "loss": 0.0007314439862966537, - "step": 21450 - }, - { - "epoch": 3.658141517476556, - "grad_norm": 0.020861556753516197, - "learning_rate": 1.400115108270332e-06, - "loss": 0.0006033163517713547, - "step": 21455 - }, - { - "epoch": 3.658994032395567, - "grad_norm": 0.10272221267223358, - "learning_rate": 1.3931876587384024e-06, - "loss": 0.00074980896897614, - "step": 21460 - }, - { - "epoch": 3.659846547314578, - "grad_norm": 0.03701607510447502, - "learning_rate": 1.3862770650675675e-06, - "loss": 0.00043788314796984197, - "step": 21465 - }, - { - "epoch": 3.660699062233589, - "grad_norm": 0.01912214234471321, - "learning_rate": 1.3793833304839088e-06, - "loss": 0.00025298474356532096, - "step": 21470 - }, - { - "epoch": 3.6615515771526, - "grad_norm": 0.09408050775527954, - "learning_rate": 1.3725064582056563e-06, - "loss": 0.0005287495441734791, - "step": 21475 - }, - { - "epoch": 3.662404092071611, - "grad_norm": 0.04015975072979927, - "learning_rate": 1.3656464514431587e-06, - "loss": 0.0005934454500675201, - "step": 21480 - }, - { - "epoch": 3.6632566069906223, - "grad_norm": 0.06787826120853424, - "learning_rate": 1.3588033133988916e-06, - "loss": 0.0005741545930504799, - "step": 21485 - }, - { - "epoch": 3.6641091219096333, - "grad_norm": 0.011169936507940292, - "learning_rate": 1.3519770472674663e-06, - "loss": 0.000489132758229971, - "step": 21490 - }, - { - "epoch": 3.6649616368286444, - "grad_norm": 0.08029177039861679, - "learning_rate": 1.345167656235617e-06, - "loss": 0.00028758635744452477, - "step": 21495 - }, - { - "epoch": 3.6658141517476555, - "grad_norm": 0.014910605736076832, - "learning_rate": 1.3383751434821844e-06, - "loss": 0.0004812784492969513, - "step": 21500 - }, - { - "epoch": 3.6666666666666665, - "grad_norm": 0.010188158601522446, - "learning_rate": 1.3315995121781442e-06, - "loss": 0.00021801250986754894, - "step": 21505 - }, - { - "epoch": 3.6675191815856776, - "grad_norm": 0.009267419576644897, - "learning_rate": 1.3248407654865747e-06, - "loss": 0.00018663164228200914, - "step": 21510 - }, - { - "epoch": 3.668371696504689, - "grad_norm": 0.008017940446734428, - "learning_rate": 1.318098906562702e-06, - "loss": 0.00021431564819067716, - "step": 21515 - }, - { - "epoch": 3.6692242114236997, - "grad_norm": 0.059672970324754715, - "learning_rate": 1.3113739385538418e-06, - "loss": 0.002246948331594467, - "step": 21520 - }, - { - "epoch": 3.670076726342711, - "grad_norm": 0.040056753903627396, - "learning_rate": 1.3046658645994287e-06, - "loss": 0.000770262023434043, - "step": 21525 - }, - { - "epoch": 3.670929241261722, - "grad_norm": 0.01101162564009428, - "learning_rate": 1.297974687831016e-06, - "loss": 0.0002761463634669781, - "step": 21530 - }, - { - "epoch": 3.6717817561807333, - "grad_norm": 0.02171594463288784, - "learning_rate": 1.2913004113722675e-06, - "loss": 0.002316083945333958, - "step": 21535 - }, - { - "epoch": 3.6726342710997444, - "grad_norm": 0.10676748305559158, - "learning_rate": 1.2846430383389699e-06, - "loss": 0.0004832268226891756, - "step": 21540 - }, - { - "epoch": 3.6734867860187554, - "grad_norm": 0.009993486106395721, - "learning_rate": 1.2780025718389916e-06, - "loss": 0.0002966204658150673, - "step": 21545 - }, - { - "epoch": 3.6743393009377665, - "grad_norm": 0.015029081143438816, - "learning_rate": 1.2713790149723277e-06, - "loss": 0.00016313637606799603, - "step": 21550 - }, - { - "epoch": 3.6751918158567776, - "grad_norm": 0.004780618939548731, - "learning_rate": 1.2647723708310713e-06, - "loss": 0.0002024576999247074, - "step": 21555 - }, - { - "epoch": 3.6760443307757886, - "grad_norm": 0.03506084531545639, - "learning_rate": 1.2581826424994348e-06, - "loss": 0.00036899447441101073, - "step": 21560 - }, - { - "epoch": 3.6768968456947997, - "grad_norm": 0.03333313763141632, - "learning_rate": 1.2516098330537154e-06, - "loss": 0.0009617398492991925, - "step": 21565 - }, - { - "epoch": 3.6777493606138107, - "grad_norm": 0.032154183834791183, - "learning_rate": 1.2450539455623173e-06, - "loss": 0.0003954335581511259, - "step": 21570 - }, - { - "epoch": 3.678601875532822, - "grad_norm": 0.029868626967072487, - "learning_rate": 1.238514983085742e-06, - "loss": 0.0002000307198613882, - "step": 21575 - }, - { - "epoch": 3.679454390451833, - "grad_norm": 0.007650670595467091, - "learning_rate": 1.2319929486766106e-06, - "loss": 0.00048509505577385423, - "step": 21580 - }, - { - "epoch": 3.680306905370844, - "grad_norm": 0.09073834866285324, - "learning_rate": 1.225487845379608e-06, - "loss": 0.0009519712999463082, - "step": 21585 - }, - { - "epoch": 3.681159420289855, - "grad_norm": 0.02642376720905304, - "learning_rate": 1.2189996762315468e-06, - "loss": 0.0002688464941456914, - "step": 21590 - }, - { - "epoch": 3.682011935208866, - "grad_norm": 0.013354654423892498, - "learning_rate": 1.2125284442613167e-06, - "loss": 0.0005553624592721462, - "step": 21595 - }, - { - "epoch": 3.682864450127877, - "grad_norm": 0.010057013481855392, - "learning_rate": 1.206074152489897e-06, - "loss": 0.00021782214753329754, - "step": 21600 - }, - { - "epoch": 3.683716965046888, - "grad_norm": 0.01647094078361988, - "learning_rate": 1.1996368039303733e-06, - "loss": 0.0002732400316745043, - "step": 21605 - }, - { - "epoch": 3.6845694799658997, - "grad_norm": 0.04311537370085716, - "learning_rate": 1.1932164015879126e-06, - "loss": 0.0002665382344275713, - "step": 21610 - }, - { - "epoch": 3.6854219948849103, - "grad_norm": 0.007978399284183979, - "learning_rate": 1.1868129484597714e-06, - "loss": 0.0005598774179816246, - "step": 21615 - }, - { - "epoch": 3.686274509803922, - "grad_norm": 0.06534867733716965, - "learning_rate": 1.1804264475352916e-06, - "loss": 0.0006941860541701317, - "step": 21620 - }, - { - "epoch": 3.6871270247229324, - "grad_norm": 0.021297244355082512, - "learning_rate": 1.1740569017959098e-06, - "loss": 0.0014498828910291194, - "step": 21625 - }, - { - "epoch": 3.687979539641944, - "grad_norm": 0.015370003879070282, - "learning_rate": 1.1677043142151386e-06, - "loss": 0.0002328573726117611, - "step": 21630 - }, - { - "epoch": 3.688832054560955, - "grad_norm": 0.044275715947151184, - "learning_rate": 1.1613686877585765e-06, - "loss": 0.0004160989075899124, - "step": 21635 - }, - { - "epoch": 3.689684569479966, - "grad_norm": 0.012127033434808254, - "learning_rate": 1.155050025383912e-06, - "loss": 0.0004161412827670574, - "step": 21640 - }, - { - "epoch": 3.690537084398977, - "grad_norm": 0.03625606745481491, - "learning_rate": 1.1487483300408898e-06, - "loss": 0.0002486191689968109, - "step": 21645 - }, - { - "epoch": 3.691389599317988, - "grad_norm": 0.0037806867621839046, - "learning_rate": 1.1424636046713731e-06, - "loss": 0.0009839927777647972, - "step": 21650 - }, - { - "epoch": 3.692242114236999, - "grad_norm": 0.024629997089505196, - "learning_rate": 1.1361958522092652e-06, - "loss": 0.0005969745106995106, - "step": 21655 - }, - { - "epoch": 3.6930946291560103, - "grad_norm": 0.020410476252436638, - "learning_rate": 1.1299450755805669e-06, - "loss": 0.000520474137738347, - "step": 21660 - }, - { - "epoch": 3.6939471440750213, - "grad_norm": 0.061364348977804184, - "learning_rate": 1.1237112777033485e-06, - "loss": 0.0004287329036742449, - "step": 21665 - }, - { - "epoch": 3.6947996589940324, - "grad_norm": 0.050738945603370667, - "learning_rate": 1.1174944614877487e-06, - "loss": 0.0003132443642243743, - "step": 21670 - }, - { - "epoch": 3.6956521739130435, - "grad_norm": 0.01692713052034378, - "learning_rate": 1.1112946298359876e-06, - "loss": 0.000536510581150651, - "step": 21675 - }, - { - "epoch": 3.6965046888320545, - "grad_norm": 0.017278827726840973, - "learning_rate": 1.1051117856423583e-06, - "loss": 0.0004452986642718315, - "step": 21680 - }, - { - "epoch": 3.6973572037510656, - "grad_norm": 0.054739370942115784, - "learning_rate": 1.0989459317932187e-06, - "loss": 0.0005310873501002788, - "step": 21685 - }, - { - "epoch": 3.6982097186700766, - "grad_norm": 0.11146213859319687, - "learning_rate": 1.0927970711669786e-06, - "loss": 0.0014278174377977848, - "step": 21690 - }, - { - "epoch": 3.6990622335890877, - "grad_norm": 0.03265102952718735, - "learning_rate": 1.0866652066341506e-06, - "loss": 0.00036611778195947407, - "step": 21695 - }, - { - "epoch": 3.6999147485080988, - "grad_norm": 0.044467322528362274, - "learning_rate": 1.080550341057282e-06, - "loss": 0.000625171372666955, - "step": 21700 - }, - { - "epoch": 3.70076726342711, - "grad_norm": 0.03242240473628044, - "learning_rate": 1.0744524772909977e-06, - "loss": 0.0004086061846464872, - "step": 21705 - }, - { - "epoch": 3.701619778346121, - "grad_norm": 0.017928048968315125, - "learning_rate": 1.0683716181819873e-06, - "loss": 0.0003092557191848755, - "step": 21710 - }, - { - "epoch": 3.7024722932651324, - "grad_norm": 0.11531335115432739, - "learning_rate": 1.0623077665689844e-06, - "loss": 0.0016329120844602585, - "step": 21715 - }, - { - "epoch": 3.703324808184143, - "grad_norm": 0.008593913167715073, - "learning_rate": 1.056260925282812e-06, - "loss": 0.0001580311683937907, - "step": 21720 - }, - { - "epoch": 3.7041773231031545, - "grad_norm": 0.061547067016363144, - "learning_rate": 1.0502310971463285e-06, - "loss": 0.001316339522600174, - "step": 21725 - }, - { - "epoch": 3.705029838022165, - "grad_norm": 0.057052768766880035, - "learning_rate": 1.0442182849744656e-06, - "loss": 0.00038654208183288576, - "step": 21730 - }, - { - "epoch": 3.7058823529411766, - "grad_norm": 0.03813531622290611, - "learning_rate": 1.038222491574199e-06, - "loss": 0.0002339026890695095, - "step": 21735 - }, - { - "epoch": 3.7067348678601877, - "grad_norm": 0.12125222384929657, - "learning_rate": 1.0322437197445605e-06, - "loss": 0.0009862667880952357, - "step": 21740 - }, - { - "epoch": 3.7075873827791987, - "grad_norm": 0.05146624147891998, - "learning_rate": 1.0262819722766463e-06, - "loss": 0.00039231935515999795, - "step": 21745 - }, - { - "epoch": 3.70843989769821, - "grad_norm": 0.0025069713592529297, - "learning_rate": 1.020337251953597e-06, - "loss": 0.0004076188895851374, - "step": 21750 - }, - { - "epoch": 3.709292412617221, - "grad_norm": 0.041663553565740585, - "learning_rate": 1.0144095615506053e-06, - "loss": 0.0006343224085867405, - "step": 21755 - }, - { - "epoch": 3.710144927536232, - "grad_norm": 0.005100937094539404, - "learning_rate": 1.0084989038349118e-06, - "loss": 0.00014722023624926805, - "step": 21760 - }, - { - "epoch": 3.710997442455243, - "grad_norm": 0.003966887481510639, - "learning_rate": 1.0026052815658088e-06, - "loss": 0.00029324383940547704, - "step": 21765 - }, - { - "epoch": 3.711849957374254, - "grad_norm": 0.010124515742063522, - "learning_rate": 9.96728697494638e-07, - "loss": 0.00028116661123931406, - "step": 21770 - }, - { - "epoch": 3.712702472293265, - "grad_norm": 0.024313004687428474, - "learning_rate": 9.908691543647873e-07, - "loss": 0.0006351057440042496, - "step": 21775 - }, - { - "epoch": 3.713554987212276, - "grad_norm": 0.008684827014803886, - "learning_rate": 9.850266549116775e-07, - "loss": 0.0005675890017300845, - "step": 21780 - }, - { - "epoch": 3.7144075021312872, - "grad_norm": 0.02833733707666397, - "learning_rate": 9.792012018627851e-07, - "loss": 0.0004331086296588182, - "step": 21785 - }, - { - "epoch": 3.7152600170502983, - "grad_norm": 0.07219739258289337, - "learning_rate": 9.733927979376304e-07, - "loss": 0.000644698552787304, - "step": 21790 - }, - { - "epoch": 3.7161125319693094, - "grad_norm": 0.006920814514160156, - "learning_rate": 9.676014458477655e-07, - "loss": 0.0005529311019927263, - "step": 21795 - }, - { - "epoch": 3.7169650468883204, - "grad_norm": 0.033863846212625504, - "learning_rate": 9.618271482967858e-07, - "loss": 0.0004058407619595528, - "step": 21800 - }, - { - "epoch": 3.7178175618073315, - "grad_norm": 0.07452560216188431, - "learning_rate": 9.560699079803225e-07, - "loss": 0.00045907692983746526, - "step": 21805 - }, - { - "epoch": 3.718670076726343, - "grad_norm": 0.006375455763190985, - "learning_rate": 9.50329727586055e-07, - "loss": 0.0002314785495400429, - "step": 21810 - }, - { - "epoch": 3.7195225916453536, - "grad_norm": 0.009601259604096413, - "learning_rate": 9.446066097936814e-07, - "loss": 0.00022105807438492774, - "step": 21815 - }, - { - "epoch": 3.720375106564365, - "grad_norm": 0.005180593114346266, - "learning_rate": 9.389005572749519e-07, - "loss": 0.0005345941055566073, - "step": 21820 - }, - { - "epoch": 3.7212276214833757, - "grad_norm": 0.01795378513634205, - "learning_rate": 9.33211572693636e-07, - "loss": 0.0010281124152243138, - "step": 21825 - }, - { - "epoch": 3.722080136402387, - "grad_norm": 0.005491979885846376, - "learning_rate": 9.275396587055422e-07, - "loss": 0.0005115194246172905, - "step": 21830 - }, - { - "epoch": 3.7229326513213983, - "grad_norm": 0.015443346463143826, - "learning_rate": 9.218848179585112e-07, - "loss": 0.0002423166995868087, - "step": 21835 - }, - { - "epoch": 3.7237851662404093, - "grad_norm": 0.005140860099345446, - "learning_rate": 9.162470530924101e-07, - "loss": 0.0002986373612657189, - "step": 21840 - }, - { - "epoch": 3.7246376811594204, - "grad_norm": 0.039769724011421204, - "learning_rate": 9.106263667391298e-07, - "loss": 0.0006013393867760897, - "step": 21845 - }, - { - "epoch": 3.7254901960784315, - "grad_norm": 0.05602128803730011, - "learning_rate": 9.05022761522596e-07, - "loss": 0.0014486395753920077, - "step": 21850 - }, - { - "epoch": 3.7263427109974425, - "grad_norm": 0.0194423608481884, - "learning_rate": 8.994362400587624e-07, - "loss": 0.00029678286518901585, - "step": 21855 - }, - { - "epoch": 3.7271952259164536, - "grad_norm": 0.015371611341834068, - "learning_rate": 8.938668049555966e-07, - "loss": 0.0003891410538926721, - "step": 21860 - }, - { - "epoch": 3.7280477408354646, - "grad_norm": 0.03265884891152382, - "learning_rate": 8.883144588131066e-07, - "loss": 0.0002852254081517458, - "step": 21865 - }, - { - "epoch": 3.7289002557544757, - "grad_norm": 0.0016827320214360952, - "learning_rate": 8.827792042233023e-07, - "loss": 0.00022765444591641427, - "step": 21870 - }, - { - "epoch": 3.7297527706734868, - "grad_norm": 0.053975410759449005, - "learning_rate": 8.772610437702249e-07, - "loss": 0.0005050489213317633, - "step": 21875 - }, - { - "epoch": 3.730605285592498, - "grad_norm": 0.041164103895425797, - "learning_rate": 8.717599800299387e-07, - "loss": 0.0007206748705357313, - "step": 21880 - }, - { - "epoch": 3.731457800511509, - "grad_norm": 0.01276206225156784, - "learning_rate": 8.662760155705273e-07, - "loss": 0.0003082378301769495, - "step": 21885 - }, - { - "epoch": 3.73231031543052, - "grad_norm": 0.014569109305739403, - "learning_rate": 8.6080915295208e-07, - "loss": 0.002179015427827835, - "step": 21890 - }, - { - "epoch": 3.733162830349531, - "grad_norm": 0.03148796409368515, - "learning_rate": 8.553593947267137e-07, - "loss": 0.0005310860928148031, - "step": 21895 - }, - { - "epoch": 3.734015345268542, - "grad_norm": 0.002400216180831194, - "learning_rate": 8.499267434385559e-07, - "loss": 0.00037463915068656204, - "step": 21900 - }, - { - "epoch": 3.734867860187553, - "grad_norm": 0.07524898648262024, - "learning_rate": 8.445112016237446e-07, - "loss": 0.0002757473383098841, - "step": 21905 - }, - { - "epoch": 3.735720375106564, - "grad_norm": 0.03802500292658806, - "learning_rate": 8.391127718104448e-07, - "loss": 0.0003543847240507603, - "step": 21910 - }, - { - "epoch": 3.7365728900255757, - "grad_norm": 0.04768325388431549, - "learning_rate": 8.3373145651882e-07, - "loss": 0.0006353846751153469, - "step": 21915 - }, - { - "epoch": 3.7374254049445863, - "grad_norm": 0.030708076432347298, - "learning_rate": 8.283672582610396e-07, - "loss": 0.00034714918583631513, - "step": 21920 - }, - { - "epoch": 3.738277919863598, - "grad_norm": 0.08609063178300858, - "learning_rate": 8.230201795413005e-07, - "loss": 0.0007828005589544773, - "step": 21925 - }, - { - "epoch": 3.7391304347826084, - "grad_norm": 0.022040044888854027, - "learning_rate": 8.176902228557894e-07, - "loss": 0.0003338766284286976, - "step": 21930 - }, - { - "epoch": 3.73998294970162, - "grad_norm": 0.033182695508003235, - "learning_rate": 8.123773906927117e-07, - "loss": 0.00040285829454660415, - "step": 21935 - }, - { - "epoch": 3.740835464620631, - "grad_norm": 0.034886643290519714, - "learning_rate": 8.07081685532271e-07, - "loss": 0.0003746964270249009, - "step": 21940 - }, - { - "epoch": 3.741687979539642, - "grad_norm": 0.010563422925770283, - "learning_rate": 8.01803109846677e-07, - "loss": 0.00028156861662864685, - "step": 21945 - }, - { - "epoch": 3.742540494458653, - "grad_norm": 0.00919833779335022, - "learning_rate": 7.965416661001506e-07, - "loss": 0.00018601591000333428, - "step": 21950 - }, - { - "epoch": 3.743393009377664, - "grad_norm": 0.07408086955547333, - "learning_rate": 7.912973567489017e-07, - "loss": 0.0004837862215936184, - "step": 21955 - }, - { - "epoch": 3.7442455242966752, - "grad_norm": 0.012142200022935867, - "learning_rate": 7.860701842411596e-07, - "loss": 0.0005443771369755268, - "step": 21960 - }, - { - "epoch": 3.7450980392156863, - "grad_norm": 0.005506236106157303, - "learning_rate": 7.808601510171306e-07, - "loss": 0.0010629001073539257, - "step": 21965 - }, - { - "epoch": 3.7459505541346974, - "grad_norm": 0.01200301107019186, - "learning_rate": 7.756672595090316e-07, - "loss": 0.0003482490312308073, - "step": 21970 - }, - { - "epoch": 3.7468030690537084, - "grad_norm": 0.020502127707004547, - "learning_rate": 7.704915121410859e-07, - "loss": 0.0010052938014268875, - "step": 21975 - }, - { - "epoch": 3.7476555839727195, - "grad_norm": 0.010255628265440464, - "learning_rate": 7.653329113294984e-07, - "loss": 0.00015571712283417581, - "step": 21980 - }, - { - "epoch": 3.7485080988917305, - "grad_norm": 0.0257349144667387, - "learning_rate": 7.601914594824801e-07, - "loss": 0.0002349275629967451, - "step": 21985 - }, - { - "epoch": 3.7493606138107416, - "grad_norm": 0.007586009334772825, - "learning_rate": 7.550671590002236e-07, - "loss": 0.0009384787641465664, - "step": 21990 - }, - { - "epoch": 3.7493606138107416, - "eval_loss": 0.06301642954349518, - "eval_runtime": 3.7241, - "eval_samples_per_second": 67.667, - "eval_steps_per_second": 1.074, - "step": 21990 - }, - { - "eval_cer_subset": 0.013206963375510935, - "eval_cer_subset_edit_distance": 811, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 21990 - }, - { - "epoch": 3.7502131287297527, - "grad_norm": 0.01631186157464981, - "learning_rate": 7.499600122749277e-07, - "loss": 0.00024612166453152895, - "step": 21995 - }, - { - "epoch": 3.7510656436487637, - "grad_norm": 0.0041867573745548725, - "learning_rate": 7.448700216907814e-07, - "loss": 0.00025008111260831355, - "step": 22000 - }, - { - "epoch": 3.7519181585677748, - "grad_norm": 0.008313664235174656, - "learning_rate": 7.397971896239585e-07, - "loss": 0.0005951725412160158, - "step": 22005 - }, - { - "epoch": 3.7527706734867863, - "grad_norm": 0.04596323147416115, - "learning_rate": 7.347415184426275e-07, - "loss": 0.0003834041766822338, - "step": 22010 - }, - { - "epoch": 3.753623188405797, - "grad_norm": 0.0066893636249005795, - "learning_rate": 7.297030105069379e-07, - "loss": 0.00040263482369482515, - "step": 22015 - }, - { - "epoch": 3.7544757033248084, - "grad_norm": 0.05047721043229103, - "learning_rate": 7.246816681690415e-07, - "loss": 0.0008630319498479366, - "step": 22020 - }, - { - "epoch": 3.755328218243819, - "grad_norm": 0.045102309435606, - "learning_rate": 7.196774937730632e-07, - "loss": 0.0004944218788295984, - "step": 22025 - }, - { - "epoch": 3.7561807331628305, - "grad_norm": 0.035739608108997345, - "learning_rate": 7.146904896551175e-07, - "loss": 0.000390232028439641, - "step": 22030 - }, - { - "epoch": 3.7570332480818416, - "grad_norm": 0.07845264673233032, - "learning_rate": 7.097206581433049e-07, - "loss": 0.0006002193316817283, - "step": 22035 - }, - { - "epoch": 3.7578857630008526, - "grad_norm": 0.06677660346031189, - "learning_rate": 7.047680015577067e-07, - "loss": 0.0006055246107280254, - "step": 22040 - }, - { - "epoch": 3.7587382779198637, - "grad_norm": 0.005762454587966204, - "learning_rate": 6.998325222103904e-07, - "loss": 0.0002784677781164646, - "step": 22045 - }, - { - "epoch": 3.7595907928388748, - "grad_norm": 0.052431393414735794, - "learning_rate": 6.949142224054003e-07, - "loss": 0.0006639796774834394, - "step": 22050 - }, - { - "epoch": 3.760443307757886, - "grad_norm": 0.060557615011930466, - "learning_rate": 6.900131044387663e-07, - "loss": 0.0003828573739156127, - "step": 22055 - }, - { - "epoch": 3.761295822676897, - "grad_norm": 0.020646315068006516, - "learning_rate": 6.851291705984835e-07, - "loss": 0.0003116678912192583, - "step": 22060 - }, - { - "epoch": 3.762148337595908, - "grad_norm": 0.02366071753203869, - "learning_rate": 6.802624231645445e-07, - "loss": 0.00022359511349350213, - "step": 22065 - }, - { - "epoch": 3.763000852514919, - "grad_norm": 0.04398680850863457, - "learning_rate": 6.754128644089072e-07, - "loss": 0.0004183667246252298, - "step": 22070 - }, - { - "epoch": 3.76385336743393, - "grad_norm": 0.005853195209056139, - "learning_rate": 6.705804965955062e-07, - "loss": 0.0005386173259466886, - "step": 22075 - }, - { - "epoch": 3.764705882352941, - "grad_norm": 0.009862879291176796, - "learning_rate": 6.657653219802453e-07, - "loss": 0.00016693586949259042, - "step": 22080 - }, - { - "epoch": 3.765558397271952, - "grad_norm": 0.07206539809703827, - "learning_rate": 6.60967342811014e-07, - "loss": 0.00036818250082433225, - "step": 22085 - }, - { - "epoch": 3.7664109121909632, - "grad_norm": 0.014724505133926868, - "learning_rate": 6.561865613276665e-07, - "loss": 0.00038080462254583833, - "step": 22090 - }, - { - "epoch": 3.7672634271099743, - "grad_norm": 0.0687309056520462, - "learning_rate": 6.514229797620382e-07, - "loss": 0.0005503068678081036, - "step": 22095 - }, - { - "epoch": 3.7681159420289854, - "grad_norm": 0.07538152486085892, - "learning_rate": 6.46676600337917e-07, - "loss": 0.0005414205603301525, - "step": 22100 - }, - { - "epoch": 3.7689684569479964, - "grad_norm": 0.013810453936457634, - "learning_rate": 6.419474252710763e-07, - "loss": 0.000361010548658669, - "step": 22105 - }, - { - "epoch": 3.7698209718670075, - "grad_norm": 0.01780586875975132, - "learning_rate": 6.3723545676925e-07, - "loss": 0.0002679239492863417, - "step": 22110 - }, - { - "epoch": 3.770673486786019, - "grad_norm": 0.012996112927794456, - "learning_rate": 6.325406970321453e-07, - "loss": 0.00031234726775437595, - "step": 22115 - }, - { - "epoch": 3.7715260017050296, - "grad_norm": 0.0011625232873484492, - "learning_rate": 6.278631482514257e-07, - "loss": 0.00037115085870027543, - "step": 22120 - }, - { - "epoch": 3.772378516624041, - "grad_norm": 0.008648713119328022, - "learning_rate": 6.232028126107319e-07, - "loss": 0.0002120264805853367, - "step": 22125 - }, - { - "epoch": 3.7732310315430517, - "grad_norm": 0.008668708615005016, - "learning_rate": 6.185596922856611e-07, - "loss": 0.00017857172060757874, - "step": 22130 - }, - { - "epoch": 3.7740835464620632, - "grad_norm": 0.020673241466283798, - "learning_rate": 6.139337894437795e-07, - "loss": 0.00015971544198691844, - "step": 22135 - }, - { - "epoch": 3.7749360613810743, - "grad_norm": 0.06992864608764648, - "learning_rate": 6.093251062446097e-07, - "loss": 0.0013386713340878486, - "step": 22140 - }, - { - "epoch": 3.7757885763000854, - "grad_norm": 0.06895331293344498, - "learning_rate": 6.04733644839639e-07, - "loss": 0.00039470652118325236, - "step": 22145 - }, - { - "epoch": 3.7766410912190964, - "grad_norm": 0.03614363074302673, - "learning_rate": 6.001594073723151e-07, - "loss": 0.0002598386025056243, - "step": 22150 - }, - { - "epoch": 3.7774936061381075, - "grad_norm": 0.025721333920955658, - "learning_rate": 5.956023959780427e-07, - "loss": 0.0004882506560534239, - "step": 22155 - }, - { - "epoch": 3.7783461210571185, - "grad_norm": 0.16202254593372345, - "learning_rate": 5.910626127841863e-07, - "loss": 0.0007598648779094219, - "step": 22160 - }, - { - "epoch": 3.7791986359761296, - "grad_norm": 0.01326384861022234, - "learning_rate": 5.865400599100678e-07, - "loss": 0.00036332786548882724, - "step": 22165 - }, - { - "epoch": 3.7800511508951407, - "grad_norm": 0.023784616962075233, - "learning_rate": 5.820347394669689e-07, - "loss": 0.0005443296395242214, - "step": 22170 - }, - { - "epoch": 3.7809036658141517, - "grad_norm": 0.034594547003507614, - "learning_rate": 5.775466535581155e-07, - "loss": 0.0002668139757588506, - "step": 22175 - }, - { - "epoch": 3.7817561807331628, - "grad_norm": 0.010697634890675545, - "learning_rate": 5.730758042787026e-07, - "loss": 0.00031887323129922154, - "step": 22180 - }, - { - "epoch": 3.782608695652174, - "grad_norm": 0.01985642872750759, - "learning_rate": 5.686221937158688e-07, - "loss": 0.00031337011605501173, - "step": 22185 - }, - { - "epoch": 3.783461210571185, - "grad_norm": 0.021349545568227768, - "learning_rate": 5.641858239487096e-07, - "loss": 0.0012821664102375507, - "step": 22190 - }, - { - "epoch": 3.784313725490196, - "grad_norm": 0.023454997688531876, - "learning_rate": 5.597666970482681e-07, - "loss": 0.0005118092987686396, - "step": 22195 - }, - { - "epoch": 3.785166240409207, - "grad_norm": 0.020847661420702934, - "learning_rate": 5.553648150775359e-07, - "loss": 0.0006337463855743408, - "step": 22200 - }, - { - "epoch": 3.786018755328218, - "grad_norm": 0.02310790866613388, - "learning_rate": 5.509801800914648e-07, - "loss": 0.00020002322271466254, - "step": 22205 - }, - { - "epoch": 3.7868712702472296, - "grad_norm": 0.01310602854937315, - "learning_rate": 5.466127941369428e-07, - "loss": 0.0004617081955075264, - "step": 22210 - }, - { - "epoch": 3.78772378516624, - "grad_norm": 0.015786537900567055, - "learning_rate": 5.422626592528137e-07, - "loss": 0.00013349256478250026, - "step": 22215 - }, - { - "epoch": 3.7885763000852517, - "grad_norm": 0.06509364396333694, - "learning_rate": 5.379297774698657e-07, - "loss": 0.0006992133799940348, - "step": 22220 - }, - { - "epoch": 3.7894288150042623, - "grad_norm": 0.019068438559770584, - "learning_rate": 5.336141508108266e-07, - "loss": 0.0005609648767858743, - "step": 22225 - }, - { - "epoch": 3.790281329923274, - "grad_norm": 0.021051403135061264, - "learning_rate": 5.293157812903847e-07, - "loss": 0.0008666029199957848, - "step": 22230 - }, - { - "epoch": 3.791133844842285, - "grad_norm": 0.03582745045423508, - "learning_rate": 5.250346709151559e-07, - "loss": 0.0006231529172509908, - "step": 22235 - }, - { - "epoch": 3.791986359761296, - "grad_norm": 0.0056411512196063995, - "learning_rate": 5.207708216837039e-07, - "loss": 0.00033764943946152927, - "step": 22240 - }, - { - "epoch": 3.792838874680307, - "grad_norm": 0.016097834333777428, - "learning_rate": 5.165242355865365e-07, - "loss": 0.0003145002759993076, - "step": 22245 - }, - { - "epoch": 3.793691389599318, - "grad_norm": 0.001957574160769582, - "learning_rate": 5.122949146061011e-07, - "loss": 0.0005082461517304182, - "step": 22250 - }, - { - "epoch": 3.794543904518329, - "grad_norm": 0.021488793194293976, - "learning_rate": 5.080828607167895e-07, - "loss": 0.0002109076827764511, - "step": 22255 - }, - { - "epoch": 3.79539641943734, - "grad_norm": 0.00327285542152822, - "learning_rate": 5.038880758849242e-07, - "loss": 0.00029755146242678167, - "step": 22260 - }, - { - "epoch": 3.7962489343563512, - "grad_norm": 0.04071727767586708, - "learning_rate": 4.997105620687642e-07, - "loss": 0.00035189902409911157, - "step": 22265 - }, - { - "epoch": 3.7971014492753623, - "grad_norm": 0.043891459703445435, - "learning_rate": 4.955503212185241e-07, - "loss": 0.00030435039661824703, - "step": 22270 - }, - { - "epoch": 3.7979539641943734, - "grad_norm": 0.04061891511082649, - "learning_rate": 4.914073552763381e-07, - "loss": 0.0007196913473308086, - "step": 22275 - }, - { - "epoch": 3.7988064791133844, - "grad_norm": 0.006950880866497755, - "learning_rate": 4.872816661762799e-07, - "loss": 0.0005048359278589487, - "step": 22280 - }, - { - "epoch": 3.7996589940323955, - "grad_norm": 0.01075704861432314, - "learning_rate": 4.831732558443588e-07, - "loss": 0.00047706514596939086, - "step": 22285 - }, - { - "epoch": 3.8005115089514065, - "grad_norm": 0.05214826390147209, - "learning_rate": 4.790821261985114e-07, - "loss": 0.0003694279119372368, - "step": 22290 - }, - { - "epoch": 3.8013640238704176, - "grad_norm": 0.10015593469142914, - "learning_rate": 4.750082791486226e-07, - "loss": 0.0009528685361146927, - "step": 22295 - }, - { - "epoch": 3.8022165387894287, - "grad_norm": 0.014796345494687557, - "learning_rate": 4.709517165965002e-07, - "loss": 0.00034629302099347113, - "step": 22300 - }, - { - "epoch": 3.80306905370844, - "grad_norm": 0.08756010234355927, - "learning_rate": 4.669124404358709e-07, - "loss": 0.0015204890631139278, - "step": 22305 - }, - { - "epoch": 3.803921568627451, - "grad_norm": 0.011379587464034557, - "learning_rate": 4.6289045255241407e-07, - "loss": 0.00043293628841638564, - "step": 22310 - }, - { - "epoch": 3.8047740835464623, - "grad_norm": 0.006499307695776224, - "learning_rate": 4.588857548237193e-07, - "loss": 0.0001419330248609185, - "step": 22315 - }, - { - "epoch": 3.805626598465473, - "grad_norm": 0.010868554934859276, - "learning_rate": 4.5489834911932034e-07, - "loss": 0.0003131470642983913, - "step": 22320 - }, - { - "epoch": 3.8064791133844844, - "grad_norm": 0.1949165314435959, - "learning_rate": 4.509282373006698e-07, - "loss": 0.000589557969942689, - "step": 22325 - }, - { - "epoch": 3.8073316283034955, - "grad_norm": 0.062032103538513184, - "learning_rate": 4.4697542122114766e-07, - "loss": 0.0010706196539103986, - "step": 22330 - }, - { - "epoch": 3.8081841432225065, - "grad_norm": 0.03491361066699028, - "learning_rate": 4.430399027260528e-07, - "loss": 0.00013118325732648374, - "step": 22335 - }, - { - "epoch": 3.8090366581415176, - "grad_norm": 0.012687149457633495, - "learning_rate": 4.39121683652624e-07, - "loss": 0.0006070541683584451, - "step": 22340 - }, - { - "epoch": 3.8098891730605287, - "grad_norm": 0.009227769449353218, - "learning_rate": 4.352207658300105e-07, - "loss": 0.00013156197965145112, - "step": 22345 - }, - { - "epoch": 3.8107416879795397, - "grad_norm": 0.025756431743502617, - "learning_rate": 4.3133715107929736e-07, - "loss": 0.0002231092657893896, - "step": 22350 - }, - { - "epoch": 3.8115942028985508, - "grad_norm": 0.02315904013812542, - "learning_rate": 4.2747084121348e-07, - "loss": 0.0008578922599554062, - "step": 22355 - }, - { - "epoch": 3.812446717817562, - "grad_norm": 0.03867887705564499, - "learning_rate": 4.2362183803748145e-07, - "loss": 0.001257845014333725, - "step": 22360 - }, - { - "epoch": 3.813299232736573, - "grad_norm": 0.007496925536543131, - "learning_rate": 4.197901433481435e-07, - "loss": 0.00025641615502536297, - "step": 22365 - }, - { - "epoch": 3.814151747655584, - "grad_norm": 0.006181985139846802, - "learning_rate": 4.159757589342352e-07, - "loss": 0.000565656740218401, - "step": 22370 - }, - { - "epoch": 3.815004262574595, - "grad_norm": 0.0055122580379247665, - "learning_rate": 4.121786865764282e-07, - "loss": 0.0002753081964328885, - "step": 22375 - }, - { - "epoch": 3.815856777493606, - "grad_norm": 0.06405172497034073, - "learning_rate": 4.083989280473293e-07, - "loss": 0.00029685499612241983, - "step": 22380 - }, - { - "epoch": 3.816709292412617, - "grad_norm": 0.015038585290312767, - "learning_rate": 4.0463648511145223e-07, - "loss": 0.00019005691865459085, - "step": 22385 - }, - { - "epoch": 3.817561807331628, - "grad_norm": 0.006501917727291584, - "learning_rate": 4.008913595252336e-07, - "loss": 0.00047526941634714606, - "step": 22390 - }, - { - "epoch": 3.8184143222506393, - "grad_norm": 0.044223811477422714, - "learning_rate": 3.971635530370207e-07, - "loss": 0.00044773337431252, - "step": 22395 - }, - { - "epoch": 3.8192668371696503, - "grad_norm": 0.11778905987739563, - "learning_rate": 3.934530673870714e-07, - "loss": 0.0004794740118086338, - "step": 22400 - }, - { - "epoch": 3.8201193520886614, - "grad_norm": 0.04858732223510742, - "learning_rate": 3.897599043075753e-07, - "loss": 0.0005154036451131105, - "step": 22405 - }, - { - "epoch": 3.820971867007673, - "grad_norm": 0.047194644808769226, - "learning_rate": 3.860840655226155e-07, - "loss": 0.0005436111241579056, - "step": 22410 - }, - { - "epoch": 3.8218243819266835, - "grad_norm": 0.02568012662231922, - "learning_rate": 3.8242555274820287e-07, - "loss": 0.0006584774237126112, - "step": 22415 - }, - { - "epoch": 3.822676896845695, - "grad_norm": 0.006387351080775261, - "learning_rate": 3.787843676922461e-07, - "loss": 0.0005248990841209888, - "step": 22420 - }, - { - "epoch": 3.8235294117647056, - "grad_norm": 0.03069436363875866, - "learning_rate": 3.751605120545731e-07, - "loss": 0.00037630724254995586, - "step": 22425 - }, - { - "epoch": 3.824381926683717, - "grad_norm": 0.007291358429938555, - "learning_rate": 3.715539875269222e-07, - "loss": 0.0008363793604075909, - "step": 22430 - }, - { - "epoch": 3.825234441602728, - "grad_norm": 0.009530414827167988, - "learning_rate": 3.6796479579293824e-07, - "loss": 0.00019785722251981498, - "step": 22435 - }, - { - "epoch": 3.8260869565217392, - "grad_norm": 0.008785492740571499, - "learning_rate": 3.643929385281727e-07, - "loss": 0.000590974697843194, - "step": 22440 - }, - { - "epoch": 3.8269394714407503, - "grad_norm": 0.007667489815503359, - "learning_rate": 3.608384174000958e-07, - "loss": 0.00028850554954260586, - "step": 22445 - }, - { - "epoch": 3.8277919863597614, - "grad_norm": 0.0715310126543045, - "learning_rate": 3.573012340680637e-07, - "loss": 0.0009551153518259526, - "step": 22450 - }, - { - "epoch": 3.8286445012787724, - "grad_norm": 0.009573518298566341, - "learning_rate": 3.5378139018335526e-07, - "loss": 0.0001834099180996418, - "step": 22455 - }, - { - "epoch": 3.8294970161977835, - "grad_norm": 0.024146724492311478, - "learning_rate": 3.502788873891604e-07, - "loss": 0.0002805993659421802, - "step": 22460 - }, - { - "epoch": 3.8303495311167945, - "grad_norm": 0.034584276378154755, - "learning_rate": 3.4679372732055455e-07, - "loss": 0.00022452049888670443, - "step": 22465 - }, - { - "epoch": 3.8312020460358056, - "grad_norm": 0.01712440513074398, - "learning_rate": 3.433259116045278e-07, - "loss": 0.00025582562666386365, - "step": 22470 - }, - { - "epoch": 3.8320545609548167, - "grad_norm": 0.03746594488620758, - "learning_rate": 3.398754418599728e-07, - "loss": 0.0005911256186664105, - "step": 22475 - }, - { - "epoch": 3.8329070758738277, - "grad_norm": 0.010265517979860306, - "learning_rate": 3.3644231969768427e-07, - "loss": 0.0005279291886836291, - "step": 22480 - }, - { - "epoch": 3.833759590792839, - "grad_norm": 0.00778925372287631, - "learning_rate": 3.3302654672035523e-07, - "loss": 0.00015487722121179103, - "step": 22485 - }, - { - "epoch": 3.83461210571185, - "grad_norm": 0.07120466977357864, - "learning_rate": 3.296281245225851e-07, - "loss": 0.00033021410927176477, - "step": 22490 - }, - { - "epoch": 3.835464620630861, - "grad_norm": 0.019761493429541588, - "learning_rate": 3.2624705469086745e-07, - "loss": 0.0005753487348556519, - "step": 22495 - }, - { - "epoch": 3.836317135549872, - "grad_norm": 0.058154042810201645, - "learning_rate": 3.22883338803602e-07, - "loss": 0.0007553929463028908, - "step": 22500 - }, - { - "epoch": 3.8371696504688835, - "grad_norm": 0.04644978418946266, - "learning_rate": 3.1953697843107864e-07, - "loss": 0.0006953209638595581, - "step": 22505 - }, - { - "epoch": 3.838022165387894, - "grad_norm": 0.1766635626554489, - "learning_rate": 3.1620797513549347e-07, - "loss": 0.0003559787990525365, - "step": 22510 - }, - { - "epoch": 3.8388746803069056, - "grad_norm": 0.00365132512524724, - "learning_rate": 3.128963304709367e-07, - "loss": 0.0004078059922903776, - "step": 22515 - }, - { - "epoch": 3.839727195225916, - "grad_norm": 0.030486639589071274, - "learning_rate": 3.096020459833884e-07, - "loss": 0.0005455107428133487, - "step": 22520 - }, - { - "epoch": 3.8405797101449277, - "grad_norm": 0.01159575954079628, - "learning_rate": 3.0632512321073916e-07, - "loss": 0.0001546024577692151, - "step": 22525 - }, - { - "epoch": 3.8414322250639388, - "grad_norm": 0.016208885237574577, - "learning_rate": 3.0306556368275697e-07, - "loss": 0.00098867267370224, - "step": 22530 - }, - { - "epoch": 3.84228473998295, - "grad_norm": 0.03821619600057602, - "learning_rate": 2.998233689211163e-07, - "loss": 0.001981107145547867, - "step": 22535 - }, - { - "epoch": 3.843137254901961, - "grad_norm": 0.08347555249929428, - "learning_rate": 2.9659854043937726e-07, - "loss": 0.0005207284353673458, - "step": 22540 - }, - { - "epoch": 3.843989769820972, - "grad_norm": 0.12094799429178238, - "learning_rate": 2.933910797430064e-07, - "loss": 0.0014771541580557823, - "step": 22545 - }, - { - "epoch": 3.844842284739983, - "grad_norm": 0.009907519444823265, - "learning_rate": 2.9020098832934354e-07, - "loss": 0.00020951812621206045, - "step": 22550 - }, - { - "epoch": 3.845694799658994, - "grad_norm": 0.009634948335587978, - "learning_rate": 2.87028267687639e-07, - "loss": 0.0004478182177990675, - "step": 22555 - }, - { - "epoch": 3.846547314578005, - "grad_norm": 0.012202809564769268, - "learning_rate": 2.8387291929901636e-07, - "loss": 0.0007417299784719944, - "step": 22560 - }, - { - "epoch": 3.847399829497016, - "grad_norm": 0.0760713517665863, - "learning_rate": 2.8073494463649734e-07, - "loss": 0.0011769287288188935, - "step": 22565 - }, - { - "epoch": 3.8482523444160273, - "grad_norm": 0.005865536630153656, - "learning_rate": 2.7761434516499757e-07, - "loss": 0.0004877586383372545, - "step": 22570 - }, - { - "epoch": 3.8491048593350383, - "grad_norm": 0.07838205993175507, - "learning_rate": 2.7451112234131434e-07, - "loss": 0.0007841618731617928, - "step": 22575 - }, - { - "epoch": 3.8499573742540494, - "grad_norm": 0.043550923466682434, - "learning_rate": 2.714252776141346e-07, - "loss": 0.0004276952240616083, - "step": 22580 - }, - { - "epoch": 3.8508098891730604, - "grad_norm": 0.07113270461559296, - "learning_rate": 2.6835681242403097e-07, - "loss": 0.0004472899250686169, - "step": 22585 - }, - { - "epoch": 3.8516624040920715, - "grad_norm": 0.005593425128608942, - "learning_rate": 2.653057282034743e-07, - "loss": 0.0005118703935295344, - "step": 22590 - }, - { - "epoch": 3.8525149190110826, - "grad_norm": 0.0488508939743042, - "learning_rate": 2.6227202637680025e-07, - "loss": 0.0004050908610224724, - "step": 22595 - }, - { - "epoch": 3.8533674339300936, - "grad_norm": 0.006526235956698656, - "learning_rate": 2.592557083602509e-07, - "loss": 0.00033729532733559606, - "step": 22600 - }, - { - "epoch": 3.8542199488491047, - "grad_norm": 0.06794753670692444, - "learning_rate": 2.5625677556194156e-07, - "loss": 0.0005501693580299616, - "step": 22605 - }, - { - "epoch": 3.855072463768116, - "grad_norm": 0.01906239427626133, - "learning_rate": 2.532752293818732e-07, - "loss": 0.00029343762435019016, - "step": 22610 - }, - { - "epoch": 3.855924978687127, - "grad_norm": 0.018383421003818512, - "learning_rate": 2.5031107121192827e-07, - "loss": 0.00016756532713770866, - "step": 22615 - }, - { - "epoch": 3.8567774936061383, - "grad_norm": 0.024472814053297043, - "learning_rate": 2.4736430243587484e-07, - "loss": 0.0008003754541277885, - "step": 22620 - }, - { - "epoch": 3.857630008525149, - "grad_norm": 0.015008813701570034, - "learning_rate": 2.4443492442936666e-07, - "loss": 0.000387492123991251, - "step": 22625 - }, - { - "epoch": 3.8584825234441604, - "grad_norm": 0.00883245375007391, - "learning_rate": 2.4152293855993073e-07, - "loss": 0.0003626285819336772, - "step": 22630 - }, - { - "epoch": 3.8593350383631715, - "grad_norm": 0.09013792127370834, - "learning_rate": 2.386283461869837e-07, - "loss": 0.0009348958730697632, - "step": 22635 - }, - { - "epoch": 3.8601875532821825, - "grad_norm": 0.022092169150710106, - "learning_rate": 2.3575114866181134e-07, - "loss": 0.000852299015969038, - "step": 22640 - }, - { - "epoch": 3.8610400682011936, - "grad_norm": 0.041009481996297836, - "learning_rate": 2.3289134732758923e-07, - "loss": 0.0002561352448537946, - "step": 22645 - }, - { - "epoch": 3.8618925831202047, - "grad_norm": 0.01204719115048647, - "learning_rate": 2.300489435193703e-07, - "loss": 0.00041399816982448103, - "step": 22650 - }, - { - "epoch": 3.8627450980392157, - "grad_norm": 0.006769226398319006, - "learning_rate": 2.2722393856407644e-07, - "loss": 0.0007864254526793957, - "step": 22655 - }, - { - "epoch": 3.863597612958227, - "grad_norm": 0.03550838679075241, - "learning_rate": 2.2441633378051522e-07, - "loss": 0.0007944885641336441, - "step": 22660 - }, - { - "epoch": 3.864450127877238, - "grad_norm": 0.018093502148985863, - "learning_rate": 2.2162613047937567e-07, - "loss": 0.0004680437035858631, - "step": 22665 - }, - { - "epoch": 3.865302642796249, - "grad_norm": 0.052169136703014374, - "learning_rate": 2.188533299632117e-07, - "loss": 0.0005756160244345665, - "step": 22670 - }, - { - "epoch": 3.86615515771526, - "grad_norm": 0.0072258529253304005, - "learning_rate": 2.1609793352646288e-07, - "loss": 0.0004663677420467138, - "step": 22675 - }, - { - "epoch": 3.867007672634271, - "grad_norm": 0.005863961763679981, - "learning_rate": 2.1335994245543358e-07, - "loss": 0.0002789400052279234, - "step": 22680 - }, - { - "epoch": 3.867860187553282, - "grad_norm": 0.018820008262991905, - "learning_rate": 2.1063935802831804e-07, - "loss": 0.00045074778608977795, - "step": 22685 - }, - { - "epoch": 3.868712702472293, - "grad_norm": 0.012297754175961018, - "learning_rate": 2.07936181515167e-07, - "loss": 0.00035502421669661997, - "step": 22690 - }, - { - "epoch": 3.869565217391304, - "grad_norm": 0.016527209430933, - "learning_rate": 2.0525041417792097e-07, - "loss": 0.0003985234070569277, - "step": 22695 - }, - { - "epoch": 3.8704177323103153, - "grad_norm": 0.02526754140853882, - "learning_rate": 2.0258205727038123e-07, - "loss": 0.0005783138796687127, - "step": 22700 - }, - { - "epoch": 3.8712702472293268, - "grad_norm": 0.03327864035964012, - "learning_rate": 1.9993111203822215e-07, - "loss": 0.0004067671485245228, - "step": 22705 - }, - { - "epoch": 3.8721227621483374, - "grad_norm": 0.027886848896741867, - "learning_rate": 1.972975797190038e-07, - "loss": 0.00023027975112199783, - "step": 22710 - }, - { - "epoch": 3.872975277067349, - "grad_norm": 0.05536410212516785, - "learning_rate": 1.9468146154213438e-07, - "loss": 0.0006173822097480297, - "step": 22715 - }, - { - "epoch": 3.8738277919863595, - "grad_norm": 0.03576589748263359, - "learning_rate": 1.92082758728912e-07, - "loss": 0.000484804529696703, - "step": 22720 - }, - { - "epoch": 3.874680306905371, - "grad_norm": 0.00873229093849659, - "learning_rate": 1.8950147249249536e-07, - "loss": 0.0002321997657418251, - "step": 22725 - }, - { - "epoch": 3.875532821824382, - "grad_norm": 0.039964936673641205, - "learning_rate": 1.8693760403791642e-07, - "loss": 0.0005052187014371156, - "step": 22730 - }, - { - "epoch": 3.876385336743393, - "grad_norm": 0.08028698712587357, - "learning_rate": 1.8439115456207188e-07, - "loss": 0.0006155148148536682, - "step": 22735 - }, - { - "epoch": 3.877237851662404, - "grad_norm": 0.027846908196806908, - "learning_rate": 1.8186212525373168e-07, - "loss": 0.0013848446309566499, - "step": 22740 - }, - { - "epoch": 3.8780903665814153, - "grad_norm": 0.020065290853381157, - "learning_rate": 1.7935051729353056e-07, - "loss": 0.00031999857164919374, - "step": 22745 - }, - { - "epoch": 3.8789428815004263, - "grad_norm": 0.03264397382736206, - "learning_rate": 1.7685633185396812e-07, - "loss": 0.00050089699216187, - "step": 22750 - }, - { - "epoch": 3.8797953964194374, - "grad_norm": 0.015950864180922508, - "learning_rate": 1.74379570099413e-07, - "loss": 0.00018732347525656222, - "step": 22755 - }, - { - "epoch": 3.8806479113384484, - "grad_norm": 0.022178884595632553, - "learning_rate": 1.7192023318610277e-07, - "loss": 0.0004226424265652895, - "step": 22760 - }, - { - "epoch": 3.8815004262574595, - "grad_norm": 0.08079300820827484, - "learning_rate": 1.6947832226213987e-07, - "loss": 0.0008535554632544517, - "step": 22765 - }, - { - "epoch": 3.8823529411764706, - "grad_norm": 0.04649774730205536, - "learning_rate": 1.670538384674791e-07, - "loss": 0.001085586380213499, - "step": 22770 - }, - { - "epoch": 3.8832054560954816, - "grad_norm": 0.003438888816162944, - "learning_rate": 1.6464678293396093e-07, - "loss": 0.0002649814588949084, - "step": 22775 - }, - { - "epoch": 3.8840579710144927, - "grad_norm": 0.03992622345685959, - "learning_rate": 1.6225715678527816e-07, - "loss": 0.0005447682924568653, - "step": 22780 - }, - { - "epoch": 3.8849104859335037, - "grad_norm": 0.022190820425748825, - "learning_rate": 1.5988496113698013e-07, - "loss": 0.00076604881323874, - "step": 22785 - }, - { - "epoch": 3.885763000852515, - "grad_norm": 0.05946198105812073, - "learning_rate": 1.575301970964976e-07, - "loss": 0.0003323152428492904, - "step": 22790 - }, - { - "epoch": 3.886615515771526, - "grad_norm": 0.011684319004416466, - "learning_rate": 1.551928657631013e-07, - "loss": 0.0003173027187585831, - "step": 22795 - }, - { - "epoch": 3.887468030690537, - "grad_norm": 0.03443511947989464, - "learning_rate": 1.528729682279392e-07, - "loss": 0.000368134374730289, - "step": 22800 - }, - { - "epoch": 3.888320545609548, - "grad_norm": 0.0683477595448494, - "learning_rate": 1.5057050557402418e-07, - "loss": 0.0003201848594471812, - "step": 22805 - }, - { - "epoch": 3.8891730605285595, - "grad_norm": 0.06834128499031067, - "learning_rate": 1.4828547887621307e-07, - "loss": 0.0007571569178253413, - "step": 22810 - }, - { - "epoch": 3.89002557544757, - "grad_norm": 0.013952577486634254, - "learning_rate": 1.4601788920123176e-07, - "loss": 0.0007348908577114344, - "step": 22815 - }, - { - "epoch": 3.8908780903665816, - "grad_norm": 0.0736064463853836, - "learning_rate": 1.4376773760767511e-07, - "loss": 0.0007845867425203323, - "step": 22820 - }, - { - "epoch": 3.8917306052855922, - "grad_norm": 0.018666284158825874, - "learning_rate": 1.415350251459821e-07, - "loss": 0.00020006420090794562, - "step": 22825 - }, - { - "epoch": 3.8925831202046037, - "grad_norm": 0.11472959816455841, - "learning_rate": 1.3931975285845636e-07, - "loss": 0.0009698046371340752, - "step": 22830 - }, - { - "epoch": 3.893435635123615, - "grad_norm": 0.012865605764091015, - "learning_rate": 1.3712192177926656e-07, - "loss": 0.0010215624235570431, - "step": 22835 - }, - { - "epoch": 3.894288150042626, - "grad_norm": 0.0334518626332283, - "learning_rate": 1.349415329344336e-07, - "loss": 0.00037307373713701963, - "step": 22840 - }, - { - "epoch": 3.895140664961637, - "grad_norm": 0.02127443067729473, - "learning_rate": 1.3277858734182662e-07, - "loss": 0.00018035907996818423, - "step": 22845 - }, - { - "epoch": 3.895993179880648, - "grad_norm": 0.00875978171825409, - "learning_rate": 1.3063308601118372e-07, - "loss": 0.000528197456151247, - "step": 22850 - }, - { - "epoch": 3.896845694799659, - "grad_norm": 0.009711408987641335, - "learning_rate": 1.2850502994410371e-07, - "loss": 0.0004956468474119902, - "step": 22855 - }, - { - "epoch": 3.89769820971867, - "grad_norm": 0.012017211876809597, - "learning_rate": 1.2639442013402528e-07, - "loss": 0.00036720100324600936, - "step": 22860 - }, - { - "epoch": 3.898550724637681, - "grad_norm": 0.011994452215731144, - "learning_rate": 1.2430125756625609e-07, - "loss": 0.00036521924193948506, - "step": 22865 - }, - { - "epoch": 3.899403239556692, - "grad_norm": 0.015366890467703342, - "learning_rate": 1.2222554321795614e-07, - "loss": 0.001251001376658678, - "step": 22870 - }, - { - "epoch": 3.9002557544757033, - "grad_norm": 0.011636365205049515, - "learning_rate": 1.2016727805812948e-07, - "loss": 0.00030510048381984236, - "step": 22875 - }, - { - "epoch": 3.9011082693947143, - "grad_norm": 0.06454995274543762, - "learning_rate": 1.1812646304764917e-07, - "loss": 0.00047010909765958785, - "step": 22880 - }, - { - "epoch": 3.9019607843137254, - "grad_norm": 0.004893654957413673, - "learning_rate": 1.1610309913923643e-07, - "loss": 0.0002160596428439021, - "step": 22885 - }, - { - "epoch": 3.9028132992327365, - "grad_norm": 0.009556293487548828, - "learning_rate": 1.140971872774607e-07, - "loss": 0.0003103669732809067, - "step": 22890 - }, - { - "epoch": 3.9036658141517475, - "grad_norm": 0.013235462829470634, - "learning_rate": 1.1210872839875207e-07, - "loss": 0.000385861424729228, - "step": 22895 - }, - { - "epoch": 3.9045183290707586, - "grad_norm": 0.006167116109281778, - "learning_rate": 1.1013772343138466e-07, - "loss": 0.0004389645531773567, - "step": 22900 - }, - { - "epoch": 3.90537084398977, - "grad_norm": 0.012628387659788132, - "learning_rate": 1.0818417329549328e-07, - "loss": 0.0002759493188932538, - "step": 22905 - }, - { - "epoch": 3.9062233589087807, - "grad_norm": 0.020555347204208374, - "learning_rate": 1.0624807890305676e-07, - "loss": 0.00044609596952795985, - "step": 22910 - }, - { - "epoch": 3.907075873827792, - "grad_norm": 0.019547248259186745, - "learning_rate": 1.0432944115791043e-07, - "loss": 0.00040673622861504556, - "step": 22915 - }, - { - "epoch": 3.907928388746803, - "grad_norm": 0.005401125177741051, - "learning_rate": 1.0242826095574198e-07, - "loss": 0.0004734213929623365, - "step": 22920 - }, - { - "epoch": 3.9087809036658143, - "grad_norm": 0.004537303000688553, - "learning_rate": 1.0054453918407896e-07, - "loss": 0.00024205415975302457, - "step": 22925 - }, - { - "epoch": 3.9096334185848254, - "grad_norm": 0.01689390279352665, - "learning_rate": 9.86782767223096e-08, - "loss": 0.0002517271088436246, - "step": 22930 - }, - { - "epoch": 3.9104859335038364, - "grad_norm": 0.06417275965213776, - "learning_rate": 9.682947444166616e-08, - "loss": 0.0005528208799660206, - "step": 22935 - }, - { - "epoch": 3.9113384484228475, - "grad_norm": 0.022223379462957382, - "learning_rate": 9.499813320522909e-08, - "loss": 0.00022923222277313471, - "step": 22940 - }, - { - "epoch": 3.9121909633418586, - "grad_norm": 0.015348847955465317, - "learning_rate": 9.318425386793116e-08, - "loss": 0.0006318584084510804, - "step": 22945 - }, - { - "epoch": 3.9130434782608696, - "grad_norm": 0.04616772010922432, - "learning_rate": 9.138783727655336e-08, - "loss": 0.0006988701410591602, - "step": 22950 - }, - { - "epoch": 3.9138959931798807, - "grad_norm": 0.012819357216358185, - "learning_rate": 8.960888426972068e-08, - "loss": 0.0001819216297008097, - "step": 22955 - }, - { - "epoch": 3.9147485080988917, - "grad_norm": 0.020759694278240204, - "learning_rate": 8.784739567790632e-08, - "loss": 0.0006579568609595299, - "step": 22960 - }, - { - "epoch": 3.915601023017903, - "grad_norm": 0.17942465841770172, - "learning_rate": 8.610337232343167e-08, - "loss": 0.001371748000383377, - "step": 22965 - }, - { - "epoch": 3.916453537936914, - "grad_norm": 0.041743483394384384, - "learning_rate": 8.437681502047045e-08, - "loss": 0.001165113039314747, - "step": 22970 - }, - { - "epoch": 3.917306052855925, - "grad_norm": 0.04779570922255516, - "learning_rate": 8.26677245750279e-08, - "loss": 0.0006786032579839229, - "step": 22975 - }, - { - "epoch": 3.918158567774936, - "grad_norm": 0.06158490851521492, - "learning_rate": 8.097610178497416e-08, - "loss": 0.0007079535163939, - "step": 22980 - }, - { - "epoch": 3.919011082693947, - "grad_norm": 0.016627155244350433, - "learning_rate": 7.930194744000668e-08, - "loss": 0.0005602708086371421, - "step": 22985 - }, - { - "epoch": 3.919863597612958, - "grad_norm": 0.012588472105562687, - "learning_rate": 7.764526232167945e-08, - "loss": 0.0003641904331743717, - "step": 22990 - }, - { - "epoch": 3.920716112531969, - "grad_norm": 0.008864199742674828, - "learning_rate": 7.600604720339049e-08, - "loss": 0.0014302042312920094, - "step": 22995 - }, - { - "epoch": 3.9215686274509802, - "grad_norm": 0.06016424298286438, - "learning_rate": 7.438430285037767e-08, - "loss": 0.0005181442946195602, - "step": 23000 - }, - { - "epoch": 3.9224211423699913, - "grad_norm": 0.017198998481035233, - "learning_rate": 7.278003001972704e-08, - "loss": 0.0007852497510612011, - "step": 23005 - }, - { - "epoch": 3.923273657289003, - "grad_norm": 0.10172398388385773, - "learning_rate": 7.119322946036454e-08, - "loss": 0.0008896278217434883, - "step": 23010 - }, - { - "epoch": 3.9241261722080134, - "grad_norm": 0.021225174888968468, - "learning_rate": 6.96239019130601e-08, - "loss": 0.0006865154020488263, - "step": 23015 - }, - { - "epoch": 3.924978687127025, - "grad_norm": 0.039081476628780365, - "learning_rate": 6.807204811043187e-08, - "loss": 0.0003759567625820637, - "step": 23020 - }, - { - "epoch": 3.9258312020460355, - "grad_norm": 0.033924706280231476, - "learning_rate": 6.653766877693368e-08, - "loss": 0.0003636789973825216, - "step": 23025 - }, - { - "epoch": 3.926683716965047, - "grad_norm": 0.022344253957271576, - "learning_rate": 6.502076462886752e-08, - "loss": 0.00048628607764840127, - "step": 23030 - }, - { - "epoch": 3.927536231884058, - "grad_norm": 0.01214676909148693, - "learning_rate": 6.352133637437112e-08, - "loss": 0.0007959893904626369, - "step": 23035 - }, - { - "epoch": 3.928388746803069, - "grad_norm": 0.044509515166282654, - "learning_rate": 6.20393847134304e-08, - "loss": 0.000532484333962202, - "step": 23040 - }, - { - "epoch": 3.92924126172208, - "grad_norm": 0.030274951830506325, - "learning_rate": 6.05749103378711e-08, - "loss": 0.0006205241661518812, - "step": 23045 - }, - { - "epoch": 3.9300937766410913, - "grad_norm": 0.07182637602090836, - "learning_rate": 5.912791393135469e-08, - "loss": 0.0010498687624931335, - "step": 23050 - }, - { - "epoch": 3.9309462915601023, - "grad_norm": 0.08199719339609146, - "learning_rate": 5.769839616938665e-08, - "loss": 0.0007067734841257334, - "step": 23055 - }, - { - "epoch": 3.9317988064791134, - "grad_norm": 0.02759665437042713, - "learning_rate": 5.6286357719320664e-08, - "loss": 0.0006836862768977881, - "step": 23060 - }, - { - "epoch": 3.9326513213981245, - "grad_norm": 0.049859654158353806, - "learning_rate": 5.48917992403336e-08, - "loss": 0.0003887650091201067, - "step": 23065 - }, - { - "epoch": 3.9335038363171355, - "grad_norm": 0.020034313201904297, - "learning_rate": 5.3514721383458856e-08, - "loss": 0.0002620942890644073, - "step": 23070 - }, - { - "epoch": 3.9343563512361466, - "grad_norm": 0.02726505696773529, - "learning_rate": 5.21551247915572e-08, - "loss": 0.00019104636739939452, - "step": 23075 - }, - { - "epoch": 3.9352088661551576, - "grad_norm": 0.022711891680955887, - "learning_rate": 5.081301009933758e-08, - "loss": 0.00029650195501744746, - "step": 23080 - }, - { - "epoch": 3.9360613810741687, - "grad_norm": 0.09871876984834671, - "learning_rate": 4.948837793334465e-08, - "loss": 0.00047057950869202616, - "step": 23085 - }, - { - "epoch": 3.9369138959931798, - "grad_norm": 0.006549649406224489, - "learning_rate": 4.818122891195458e-08, - "loss": 0.00047056037001311777, - "step": 23090 - }, - { - "epoch": 3.937766410912191, - "grad_norm": 0.05611155182123184, - "learning_rate": 4.689156364539176e-08, - "loss": 0.0005085552111268044, - "step": 23095 - }, - { - "epoch": 3.938618925831202, - "grad_norm": 0.0015141203766688704, - "learning_rate": 4.561938273572041e-08, - "loss": 0.0005205009132623672, - "step": 23100 - }, - { - "epoch": 3.9394714407502134, - "grad_norm": 0.040545813739299774, - "learning_rate": 4.436468677682797e-08, - "loss": 0.0003895478090271354, - "step": 23105 - }, - { - "epoch": 3.940323955669224, - "grad_norm": 0.007932890206575394, - "learning_rate": 4.3127476354450075e-08, - "loss": 0.0003442693734541535, - "step": 23110 - }, - { - "epoch": 3.9411764705882355, - "grad_norm": 0.04718896746635437, - "learning_rate": 4.1907752046162205e-08, - "loss": 0.0009176202118396759, - "step": 23115 - }, - { - "epoch": 3.942028985507246, - "grad_norm": 0.005520604085177183, - "learning_rate": 4.070551442136305e-08, - "loss": 0.0008139912039041519, - "step": 23120 - }, - { - "epoch": 3.9428815004262576, - "grad_norm": 0.013835078105330467, - "learning_rate": 3.952076404131199e-08, - "loss": 0.0009749887511134147, - "step": 23125 - }, - { - "epoch": 3.9437340153452687, - "grad_norm": 0.14306026697158813, - "learning_rate": 3.8353501459074953e-08, - "loss": 0.0004838756285607815, - "step": 23130 - }, - { - "epoch": 3.9445865302642797, - "grad_norm": 0.0022388347424566746, - "learning_rate": 3.720372721957854e-08, - "loss": 0.00016013121930882334, - "step": 23135 - }, - { - "epoch": 3.945439045183291, - "grad_norm": 0.06415778398513794, - "learning_rate": 3.607144185957256e-08, - "loss": 0.0004399829544126987, - "step": 23140 - }, - { - "epoch": 3.946291560102302, - "grad_norm": 0.006063231732696295, - "learning_rate": 3.4956645907642525e-08, - "loss": 0.0003463166998699307, - "step": 23145 - }, - { - "epoch": 3.947144075021313, - "grad_norm": 0.058278266340494156, - "learning_rate": 3.385933988421796e-08, - "loss": 0.0006705883424729109, - "step": 23150 - }, - { - "epoch": 3.947996589940324, - "grad_norm": 0.007072574459016323, - "learning_rate": 3.277952430155162e-08, - "loss": 0.00027483000885695217, - "step": 23155 - }, - { - "epoch": 3.948849104859335, - "grad_norm": 0.06288672983646393, - "learning_rate": 3.171719966374442e-08, - "loss": 0.00044926153495907786, - "step": 23160 - }, - { - "epoch": 3.949701619778346, - "grad_norm": 0.11492919921875, - "learning_rate": 3.06723664667205e-08, - "loss": 0.0006402281112968921, - "step": 23165 - }, - { - "epoch": 3.950554134697357, - "grad_norm": 0.057387739419937134, - "learning_rate": 2.964502519823969e-08, - "loss": 0.0009132426232099534, - "step": 23170 - }, - { - "epoch": 3.9514066496163682, - "grad_norm": 0.02673800103366375, - "learning_rate": 2.8635176337905852e-08, - "loss": 0.00021783523261547088, - "step": 23175 - }, - { - "epoch": 3.9522591645353793, - "grad_norm": 0.005547195672988892, - "learning_rate": 2.7642820357146046e-08, - "loss": 0.00022417106665670872, - "step": 23180 - }, - { - "epoch": 3.9531116794543903, - "grad_norm": 0.004117009229958057, - "learning_rate": 2.6667957719227197e-08, - "loss": 0.0005245423410087824, - "step": 23185 - }, - { - "epoch": 3.9539641943734014, - "grad_norm": 0.027315596118569374, - "learning_rate": 2.5710588879243597e-08, - "loss": 0.000357617880217731, - "step": 23190 - }, - { - "epoch": 3.9548167092924125, - "grad_norm": 0.013029955327510834, - "learning_rate": 2.4770714284133575e-08, - "loss": 0.0009207891300320626, - "step": 23195 - }, - { - "epoch": 3.955669224211424, - "grad_norm": 0.0028237253427505493, - "learning_rate": 2.3848334372654488e-08, - "loss": 0.00036229838151484727, - "step": 23200 - }, - { - "epoch": 3.9565217391304346, - "grad_norm": 0.04396171122789383, - "learning_rate": 2.2943449575407725e-08, - "loss": 0.0002923567779362202, - "step": 23205 - }, - { - "epoch": 3.957374254049446, - "grad_norm": 0.0613851472735405, - "learning_rate": 2.2056060314822044e-08, - "loss": 0.001064283773303032, - "step": 23210 - }, - { - "epoch": 3.9582267689684567, - "grad_norm": 0.0072186607867479324, - "learning_rate": 2.1186167005166066e-08, - "loss": 0.00033982205204665663, - "step": 23215 - }, - { - "epoch": 3.959079283887468, - "grad_norm": 0.02121078409254551, - "learning_rate": 2.0333770052527453e-08, - "loss": 0.0003547267057001591, - "step": 23220 - }, - { - "epoch": 3.9599317988064793, - "grad_norm": 0.0026946449652314186, - "learning_rate": 1.9498869854833733e-08, - "loss": 0.0009029872715473175, - "step": 23225 - }, - { - "epoch": 3.9607843137254903, - "grad_norm": 0.029824867844581604, - "learning_rate": 1.8681466801852286e-08, - "loss": 0.0006612129043787717, - "step": 23230 - }, - { - "epoch": 3.9616368286445014, - "grad_norm": 0.021079210564494133, - "learning_rate": 1.7881561275161217e-08, - "loss": 0.00028204533737152815, - "step": 23235 - }, - { - "epoch": 3.9624893435635125, - "grad_norm": 0.04094521328806877, - "learning_rate": 1.709915364819514e-08, - "loss": 0.0006630297284573317, - "step": 23240 - }, - { - "epoch": 3.9633418584825235, - "grad_norm": 0.012560448609292507, - "learning_rate": 1.6334244286203556e-08, - "loss": 0.0004881520289927721, - "step": 23245 - }, - { - "epoch": 3.9641943734015346, - "grad_norm": 0.08076170831918716, - "learning_rate": 1.5586833546267498e-08, - "loss": 0.0008171131834387779, - "step": 23250 - }, - { - "epoch": 3.9650468883205456, - "grad_norm": 0.016555890440940857, - "learning_rate": 1.4856921777312014e-08, - "loss": 0.0002690091263502836, - "step": 23255 - }, - { - "epoch": 3.9658994032395567, - "grad_norm": 0.025087429210543633, - "learning_rate": 1.4144509320072884e-08, - "loss": 0.0002989970613270998, - "step": 23260 - }, - { - "epoch": 3.9667519181585678, - "grad_norm": 0.07274094223976135, - "learning_rate": 1.3449596507138228e-08, - "loss": 0.001411922462284565, - "step": 23265 - }, - { - "epoch": 3.967604433077579, - "grad_norm": 0.009768236428499222, - "learning_rate": 1.277218366291105e-08, - "loss": 0.00017849041614681482, - "step": 23270 - }, - { - "epoch": 3.96845694799659, - "grad_norm": 0.011777863837778568, - "learning_rate": 1.2112271103630056e-08, - "loss": 0.0003875581081956625, - "step": 23275 - }, - { - "epoch": 3.969309462915601, - "grad_norm": 0.10975488275289536, - "learning_rate": 1.1469859137369642e-08, - "loss": 0.001325076725333929, - "step": 23280 - }, - { - "epoch": 3.970161977834612, - "grad_norm": 0.04203944653272629, - "learning_rate": 1.0844948064019088e-08, - "loss": 0.000397085165604949, - "step": 23285 - }, - { - "epoch": 3.971014492753623, - "grad_norm": 0.022160891443490982, - "learning_rate": 1.0237538175320026e-08, - "loss": 0.0010200665332376958, - "step": 23290 - }, - { - "epoch": 3.971867007672634, - "grad_norm": 0.02673690766096115, - "learning_rate": 9.64762975482064e-09, - "loss": 0.0005900958087295294, - "step": 23295 - }, - { - "epoch": 3.972719522591645, - "grad_norm": 0.09556782245635986, - "learning_rate": 9.075223077917304e-09, - "loss": 0.000301313167437911, - "step": 23300 - }, - { - "epoch": 3.9735720375106567, - "grad_norm": 0.007306256797164679, - "learning_rate": 8.520318411825434e-09, - "loss": 0.00012424198212102056, - "step": 23305 - }, - { - "epoch": 3.9744245524296673, - "grad_norm": 0.03629566729068756, - "learning_rate": 7.982916015591978e-09, - "loss": 0.0005501963198184967, - "step": 23310 - }, - { - "epoch": 3.975277067348679, - "grad_norm": 0.009374035522341728, - "learning_rate": 7.463016140095423e-09, - "loss": 0.0009492671117186547, - "step": 23315 - }, - { - "epoch": 3.9761295822676894, - "grad_norm": 0.010084625333547592, - "learning_rate": 6.960619028041625e-09, - "loss": 0.00046687186695635317, - "step": 23320 - }, - { - "epoch": 3.976982097186701, - "grad_norm": 0.02581017091870308, - "learning_rate": 6.475724913967972e-09, - "loss": 0.000594175374135375, - "step": 23325 - }, - { - "epoch": 3.977834612105712, - "grad_norm": 0.045390889048576355, - "learning_rate": 6.0083340242392256e-09, - "loss": 0.00024255807511508465, - "step": 23330 - }, - { - "epoch": 3.978687127024723, - "grad_norm": 0.06592033803462982, - "learning_rate": 5.558446577047515e-09, - "loss": 0.0006300599779933691, - "step": 23335 - }, - { - "epoch": 3.979539641943734, - "grad_norm": 0.03145362809300423, - "learning_rate": 5.1260627824123436e-09, - "loss": 0.0004346088971942663, - "step": 23340 - }, - { - "epoch": 3.980392156862745, - "grad_norm": 0.008353454060852528, - "learning_rate": 4.711182842193073e-09, - "loss": 0.0009800062514841556, - "step": 23345 - }, - { - "epoch": 3.9812446717817562, - "grad_norm": 0.024524807929992676, - "learning_rate": 4.313806950063947e-09, - "loss": 0.0003803495317697525, - "step": 23350 - }, - { - "epoch": 3.9820971867007673, - "grad_norm": 0.018736131489276886, - "learning_rate": 3.933935291530743e-09, - "loss": 0.00024626741651445627, - "step": 23355 - }, - { - "epoch": 3.9829497016197783, - "grad_norm": 0.010309611447155476, - "learning_rate": 3.571568043934936e-09, - "loss": 0.0006795517634600401, - "step": 23360 - }, - { - "epoch": 3.9838022165387894, - "grad_norm": 0.04562285169959068, - "learning_rate": 3.2267053764412076e-09, - "loss": 0.0006199819035828114, - "step": 23365 - }, - { - "epoch": 3.9846547314578005, - "grad_norm": 0.043983470648527145, - "learning_rate": 2.8993474500416113e-09, - "loss": 0.00030286619439721105, - "step": 23370 - }, - { - "epoch": 3.9855072463768115, - "grad_norm": 0.012420141138136387, - "learning_rate": 2.589494417555571e-09, - "loss": 0.0005513324867933989, - "step": 23375 - }, - { - "epoch": 3.9863597612958226, - "grad_norm": 0.010290928184986115, - "learning_rate": 2.2971464236382074e-09, - "loss": 0.00047493595629930494, - "step": 23380 - }, - { - "epoch": 3.9872122762148337, - "grad_norm": 0.0191446952521801, - "learning_rate": 2.0223036047636867e-09, - "loss": 0.0007154576946049928, - "step": 23385 - }, - { - "epoch": 3.9880647911338447, - "grad_norm": 0.20886076986789703, - "learning_rate": 1.764966089237707e-09, - "loss": 0.002053941413760185, - "step": 23390 - }, - { - "epoch": 3.9889173060528558, - "grad_norm": 0.10748913139104843, - "learning_rate": 1.5251339971933397e-09, - "loss": 0.001036037877202034, - "step": 23395 - }, - { - "epoch": 3.9897698209718673, - "grad_norm": 0.004688949324190617, - "learning_rate": 1.3028074405951882e-09, - "loss": 0.0003380549373105168, - "step": 23400 - }, - { - "epoch": 3.990622335890878, - "grad_norm": 0.09298693388700485, - "learning_rate": 1.0979865232310647e-09, - "loss": 0.0003235136391595006, - "step": 23405 - }, - { - "epoch": 3.9914748508098894, - "grad_norm": 0.011672712862491608, - "learning_rate": 9.106713407161515e-10, - "loss": 0.0008779228664934635, - "step": 23410 - }, - { - "epoch": 3.9923273657289, - "grad_norm": 0.025006311014294624, - "learning_rate": 7.408619804971649e-10, - "loss": 0.0008599048480391503, - "step": 23415 - }, - { - "epoch": 3.9931798806479115, - "grad_norm": 0.02157057262957096, - "learning_rate": 5.885585218481925e-10, - "loss": 0.00027061502914875745, - "step": 23420 - }, - { - "epoch": 3.9940323955669226, - "grad_norm": 0.004747865721583366, - "learning_rate": 4.5376103587069223e-10, - "loss": 0.0004908301401883364, - "step": 23425 - }, - { - "epoch": 3.9948849104859336, - "grad_norm": 0.03755816072225571, - "learning_rate": 3.3646958548516623e-10, - "loss": 0.00035946685820817945, - "step": 23430 - }, - { - "epoch": 3.9957374254049447, - "grad_norm": 0.05361028388142586, - "learning_rate": 2.3668422545614074e-10, - "loss": 0.00037483936175704003, - "step": 23435 - }, - { - "epoch": 3.9965899403239558, - "grad_norm": 0.18934012949466705, - "learning_rate": 1.5440500235885945e-10, - "loss": 0.0011177632957696914, - "step": 23440 - }, - { - "epoch": 3.997442455242967, - "grad_norm": 0.013160571455955505, - "learning_rate": 8.963195461259009e-11, - "loss": 0.0003229744965210557, - "step": 23445 - }, - { - "epoch": 3.998294970161978, - "grad_norm": 0.03440895304083824, - "learning_rate": 4.2365112447317885e-11, - "loss": 0.0006907129660248757, - "step": 23450 - }, - { - "epoch": 3.999147485080989, - "grad_norm": 0.006319939624518156, - "learning_rate": 1.2604497932888757e-11, - "loss": 0.00044195097871124744, - "step": 23455 - }, - { - "epoch": 3.9993179880647913, - "eval_loss": 0.0636245608329773, - "eval_runtime": 3.6813, - "eval_samples_per_second": 68.454, - "eval_steps_per_second": 1.087, - "step": 23456 - }, - { - "eval_cer_subset": 0.014004918006090512, - "eval_cer_subset_edit_distance": 860, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 23456 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 7.907152945373725e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23456/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/adapter_model.safetensors deleted file mode 100644 index cc05a1af047872bbfcca8fbf22ef1b85531df78c..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55415ecd71a3b56b03e58c1f8d6775589a7d136f043dba7371edd9f9047db4d8 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/optimizer.pt deleted file mode 100644 index 1bec2bd011f6a41b540cc342c1b93ca03bfaf77f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:340b0be5f84a4aec6126f331756ed85a068bc85b189df4c49ad5e8d7ceff48e9 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/rng_state.pth deleted file mode 100644 index 4669d52b4df192bc91f2676391b1233756a07f32..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a2df993afae23610a148cbb90d0d2fcfd8b5e755873f80c30cdbe027d950c3c -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/scheduler.pt deleted file mode 100644 index ad799d8383af8a8ccc7ae8336f06e377caf87c58..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d37c40ee1a09c0e2c666917b34c4c525a97c9a563d6ecb01e53ee96b1f9320f -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/trainer_state.json deleted file mode 100644 index d737869972d7b32c7eeea6b868aa58d3ffb9837b..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/trainer_state.json +++ /dev/null @@ -1,33134 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 4.0, - "eval_steps": 1466, - "global_step": 23460, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - }, - { - "epoch": 0.7502131287297528, - "grad_norm": 0.061287231743335724, - "learning_rate": 6.972370044908141e-05, - "loss": 0.011720015108585358, - "step": 4400 - }, - { - "epoch": 0.7510656436487638, - "grad_norm": 0.0668778270483017, - "learning_rate": 6.971058796426478e-05, - "loss": 0.012064819037914277, - "step": 4405 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 0.07897942513227463, - "learning_rate": 6.969746044247421e-05, - "loss": 0.010592888295650481, - "step": 4410 - }, - { - "epoch": 0.7527706734867861, - "grad_norm": 0.09090534597635269, - "learning_rate": 6.968431788983806e-05, - "loss": 0.011600010097026825, - "step": 4415 - }, - { - "epoch": 0.7536231884057971, - "grad_norm": 0.080506332218647, - "learning_rate": 6.967116031249172e-05, - "loss": 0.013006125390529633, - "step": 4420 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 0.04851040989160538, - "learning_rate": 6.965798771657758e-05, - "loss": 0.010165790468454361, - "step": 4425 - }, - { - "epoch": 0.7553282182438192, - "grad_norm": 0.06298603117465973, - "learning_rate": 6.964480010824505e-05, - "loss": 0.007800602912902832, - "step": 4430 - }, - { - "epoch": 0.7561807331628303, - "grad_norm": 0.09919694811105728, - "learning_rate": 6.963159749365053e-05, - "loss": 0.010877586901187897, - "step": 4435 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 0.083896704018116, - "learning_rate": 6.961837987895747e-05, - "loss": 0.011114003509283066, - "step": 4440 - }, - { - "epoch": 0.7578857630008525, - "grad_norm": 0.0452699176967144, - "learning_rate": 6.960514727033626e-05, - "loss": 0.008609171956777573, - "step": 4445 - }, - { - "epoch": 0.7587382779198636, - "grad_norm": 0.08951374143362045, - "learning_rate": 6.959189967396435e-05, - "loss": 0.01193360835313797, - "step": 4450 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 0.08749551326036453, - "learning_rate": 6.957863709602611e-05, - "loss": 0.009163837879896164, - "step": 4455 - }, - { - "epoch": 0.7604433077578857, - "grad_norm": 0.09304409474134445, - "learning_rate": 6.956535954271301e-05, - "loss": 0.01038273349404335, - "step": 4460 - }, - { - "epoch": 0.7612958226768969, - "grad_norm": 0.06662629544734955, - "learning_rate": 6.955206702022342e-05, - "loss": 0.010570932179689407, - "step": 4465 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 0.07736595720052719, - "learning_rate": 6.953875953476276e-05, - "loss": 0.009856238961219788, - "step": 4470 - }, - { - "epoch": 0.763000852514919, - "grad_norm": 0.04692552238702774, - "learning_rate": 6.952543709254338e-05, - "loss": 0.006967573612928391, - "step": 4475 - }, - { - "epoch": 0.7638533674339301, - "grad_norm": 0.06901179254055023, - "learning_rate": 6.951209969978464e-05, - "loss": 0.008588603138923645, - "step": 4480 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 0.07733304053544998, - "learning_rate": 6.949874736271289e-05, - "loss": 0.012579981982707978, - "step": 4485 - }, - { - "epoch": 0.7655583972719523, - "grad_norm": 0.0693301409482956, - "learning_rate": 6.948538008756144e-05, - "loss": 0.009747470915317535, - "step": 4490 - }, - { - "epoch": 0.7664109121909634, - "grad_norm": 0.07054253667593002, - "learning_rate": 6.947199788057059e-05, - "loss": 0.008387601375579834, - "step": 4495 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 0.06526053696870804, - "learning_rate": 6.945860074798757e-05, - "loss": 0.008389735966920853, - "step": 4500 - }, - { - "epoch": 0.7681159420289855, - "grad_norm": 0.06862380355596542, - "learning_rate": 6.944518869606662e-05, - "loss": 0.008736115694046021, - "step": 4505 - }, - { - "epoch": 0.7689684569479965, - "grad_norm": 0.06233246996998787, - "learning_rate": 6.943176173106897e-05, - "loss": 0.008519527316093446, - "step": 4510 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 0.07696249336004257, - "learning_rate": 6.941831985926273e-05, - "loss": 0.011381441354751587, - "step": 4515 - }, - { - "epoch": 0.7706734867860188, - "grad_norm": 0.11450641602277756, - "learning_rate": 6.940486308692302e-05, - "loss": 0.012895810604095458, - "step": 4520 - }, - { - "epoch": 0.7715260017050298, - "grad_norm": 0.09141158312559128, - "learning_rate": 6.939139142033191e-05, - "loss": 0.009508632868528367, - "step": 4525 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 0.09469986706972122, - "learning_rate": 6.937790486577844e-05, - "loss": 0.014812557399272919, - "step": 4530 - }, - { - "epoch": 0.773231031543052, - "grad_norm": 0.08494299650192261, - "learning_rate": 6.936440342955855e-05, - "loss": 0.01355334222316742, - "step": 4535 - }, - { - "epoch": 0.7740835464620631, - "grad_norm": 0.09328251332044601, - "learning_rate": 6.93508871179752e-05, - "loss": 0.011529977619647979, - "step": 4540 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 0.06696850806474686, - "learning_rate": 6.933735593733821e-05, - "loss": 0.010230815410614014, - "step": 4545 - }, - { - "epoch": 0.7757885763000852, - "grad_norm": 0.07968153059482574, - "learning_rate": 6.932380989396442e-05, - "loss": 0.012129776924848557, - "step": 4550 - }, - { - "epoch": 0.7766410912190963, - "grad_norm": 0.07916650176048279, - "learning_rate": 6.931024899417756e-05, - "loss": 0.009455519914627075, - "step": 4555 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 0.05229945108294487, - "learning_rate": 6.92966732443083e-05, - "loss": 0.008516684174537659, - "step": 4560 - }, - { - "epoch": 0.7783461210571185, - "grad_norm": 0.08314234018325806, - "learning_rate": 6.928308265069428e-05, - "loss": 0.010914114117622376, - "step": 4565 - }, - { - "epoch": 0.7791986359761296, - "grad_norm": 0.05075672268867493, - "learning_rate": 6.926947721968001e-05, - "loss": 0.008188208192586898, - "step": 4570 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 0.09465362876653671, - "learning_rate": 6.925585695761697e-05, - "loss": 0.009074468165636063, - "step": 4575 - }, - { - "epoch": 0.7809036658141517, - "grad_norm": 0.09024044126272202, - "learning_rate": 6.924222187086356e-05, - "loss": 0.006571034342050553, - "step": 4580 - }, - { - "epoch": 0.7817561807331628, - "grad_norm": 0.050575681030750275, - "learning_rate": 6.922857196578507e-05, - "loss": 0.008829852938652039, - "step": 4585 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.09888230264186859, - "learning_rate": 6.921490724875376e-05, - "loss": 0.01053793728351593, - "step": 4590 - }, - { - "epoch": 0.783461210571185, - "grad_norm": 0.042567264288663864, - "learning_rate": 6.920122772614875e-05, - "loss": 0.009682109951972962, - "step": 4595 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 0.10021623969078064, - "learning_rate": 6.91875334043561e-05, - "loss": 0.012160807102918624, - "step": 4600 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 0.07016255706548691, - "learning_rate": 6.917382428976878e-05, - "loss": 0.008590599894523621, - "step": 4605 - }, - { - "epoch": 0.7860187553282183, - "grad_norm": 0.06567320227622986, - "learning_rate": 6.916010038878667e-05, - "loss": 0.00809207409620285, - "step": 4610 - }, - { - "epoch": 0.7868712702472294, - "grad_norm": 0.05032164603471756, - "learning_rate": 6.914636170781652e-05, - "loss": 0.009291460365056991, - "step": 4615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 0.06303273886442184, - "learning_rate": 6.913260825327204e-05, - "loss": 0.00837131291627884, - "step": 4620 - }, - { - "epoch": 0.7885763000852515, - "grad_norm": 0.05837355926632881, - "learning_rate": 6.911884003157376e-05, - "loss": 0.007800968736410141, - "step": 4625 - }, - { - "epoch": 0.7894288150042625, - "grad_norm": 0.07052712142467499, - "learning_rate": 6.910505704914916e-05, - "loss": 0.010577390342950821, - "step": 4630 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 0.08550997078418732, - "learning_rate": 6.909125931243259e-05, - "loss": 0.012821859121322632, - "step": 4635 - }, - { - "epoch": 0.7911338448422848, - "grad_norm": 0.060406558215618134, - "learning_rate": 6.90774468278653e-05, - "loss": 0.0065113060176372525, - "step": 4640 - }, - { - "epoch": 0.7919863597612958, - "grad_norm": 0.13999445736408234, - "learning_rate": 6.906361960189542e-05, - "loss": 0.012699820101261139, - "step": 4645 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 0.07585978507995605, - "learning_rate": 6.904977764097797e-05, - "loss": 0.008435635268688202, - "step": 4650 - }, - { - "epoch": 0.793691389599318, - "grad_norm": 0.07488108426332474, - "learning_rate": 6.90359209515748e-05, - "loss": 0.011925875395536422, - "step": 4655 - }, - { - "epoch": 0.7945439045183291, - "grad_norm": 0.12476535886526108, - "learning_rate": 6.902204954015471e-05, - "loss": 0.009086847305297852, - "step": 4660 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 0.08779732137918472, - "learning_rate": 6.900816341319331e-05, - "loss": 0.00962812826037407, - "step": 4665 - }, - { - "epoch": 0.7962489343563512, - "grad_norm": 0.15791405737400055, - "learning_rate": 6.899426257717312e-05, - "loss": 0.011767397075891495, - "step": 4670 - }, - { - "epoch": 0.7971014492753623, - "grad_norm": 0.11228909343481064, - "learning_rate": 6.898034703858352e-05, - "loss": 0.008271434903144836, - "step": 4675 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 0.07288003712892532, - "learning_rate": 6.896641680392073e-05, - "loss": 0.009384474158287049, - "step": 4680 - }, - { - "epoch": 0.7988064791133845, - "grad_norm": 0.08285173773765564, - "learning_rate": 6.895247187968784e-05, - "loss": 0.012600034475326538, - "step": 4685 - }, - { - "epoch": 0.7996589940323956, - "grad_norm": 0.07812397927045822, - "learning_rate": 6.893851227239484e-05, - "loss": 0.008935874700546265, - "step": 4690 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 0.07500546425580978, - "learning_rate": 6.892453798855852e-05, - "loss": 0.010619471222162247, - "step": 4695 - }, - { - "epoch": 0.8013640238704177, - "grad_norm": 0.05211177095770836, - "learning_rate": 6.891054903470251e-05, - "loss": 0.008601508289575576, - "step": 4700 - }, - { - "epoch": 0.8022165387894288, - "grad_norm": 0.029616642743349075, - "learning_rate": 6.889654541735738e-05, - "loss": 0.007921247184276581, - "step": 4705 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 0.06894131749868393, - "learning_rate": 6.888252714306044e-05, - "loss": 0.010935742408037186, - "step": 4710 - }, - { - "epoch": 0.803921568627451, - "grad_norm": 0.0766182467341423, - "learning_rate": 6.886849421835587e-05, - "loss": 0.010556706041097642, - "step": 4715 - }, - { - "epoch": 0.8047740835464621, - "grad_norm": 0.09164462238550186, - "learning_rate": 6.885444664979477e-05, - "loss": 0.010812586545944214, - "step": 4720 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 0.06463408470153809, - "learning_rate": 6.884038444393496e-05, - "loss": 0.009179002791643142, - "step": 4725 - }, - { - "epoch": 0.8064791133844842, - "grad_norm": 0.06639672070741653, - "learning_rate": 6.882630760734118e-05, - "loss": 0.012755092978477479, - "step": 4730 - }, - { - "epoch": 0.8073316283034954, - "grad_norm": 0.062024496495723724, - "learning_rate": 6.881221614658493e-05, - "loss": 0.009655499458312988, - "step": 4735 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 0.06751494854688644, - "learning_rate": 6.879811006824459e-05, - "loss": 0.010860173404216767, - "step": 4740 - }, - { - "epoch": 0.8090366581415175, - "grad_norm": 0.05535218119621277, - "learning_rate": 6.878398937890535e-05, - "loss": 0.011440058052539826, - "step": 4745 - }, - { - "epoch": 0.8098891730605285, - "grad_norm": 0.08365204185247421, - "learning_rate": 6.876985408515922e-05, - "loss": 0.011058357357978821, - "step": 4750 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 0.06450537592172623, - "learning_rate": 6.875570419360501e-05, - "loss": 0.01046149879693985, - "step": 4755 - }, - { - "epoch": 0.8115942028985508, - "grad_norm": 0.08542726188898087, - "learning_rate": 6.874153971084837e-05, - "loss": 0.009869573265314102, - "step": 4760 - }, - { - "epoch": 0.8124467178175618, - "grad_norm": 0.08184531331062317, - "learning_rate": 6.872736064350176e-05, - "loss": 0.01054040789604187, - "step": 4765 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 0.07068512588739395, - "learning_rate": 6.871316699818442e-05, - "loss": 0.009573462605476379, - "step": 4770 - }, - { - "epoch": 0.814151747655584, - "grad_norm": 0.08866564929485321, - "learning_rate": 6.869895878152244e-05, - "loss": 0.008078956604003906, - "step": 4775 - }, - { - "epoch": 0.815004262574595, - "grad_norm": 0.08215270191431046, - "learning_rate": 6.868473600014867e-05, - "loss": 0.010586659610271453, - "step": 4780 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 0.0449003241956234, - "learning_rate": 6.867049866070278e-05, - "loss": 0.008572281152009965, - "step": 4785 - }, - { - "epoch": 0.8167092924126172, - "grad_norm": 0.0766722783446312, - "learning_rate": 6.865624676983124e-05, - "loss": 0.009015947580337524, - "step": 4790 - }, - { - "epoch": 0.8175618073316283, - "grad_norm": 0.07404733449220657, - "learning_rate": 6.864198033418732e-05, - "loss": 0.014639028906822204, - "step": 4795 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 0.10437514632940292, - "learning_rate": 6.862769936043102e-05, - "loss": 0.009333716332912445, - "step": 4800 - }, - { - "epoch": 0.8192668371696504, - "grad_norm": 0.06732609868049622, - "learning_rate": 6.861340385522921e-05, - "loss": 0.007169592380523682, - "step": 4805 - }, - { - "epoch": 0.8201193520886616, - "grad_norm": 0.06016068905591965, - "learning_rate": 6.859909382525552e-05, - "loss": 0.009211564064025879, - "step": 4810 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 0.07302942126989365, - "learning_rate": 6.858476927719031e-05, - "loss": 0.009643231332302094, - "step": 4815 - }, - { - "epoch": 0.8218243819266837, - "grad_norm": 0.07511111348867416, - "learning_rate": 6.857043021772079e-05, - "loss": 0.010751830041408538, - "step": 4820 - }, - { - "epoch": 0.8226768968456948, - "grad_norm": 0.04791528359055519, - "learning_rate": 6.855607665354088e-05, - "loss": 0.008413314074277877, - "step": 4825 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 0.08279003202915192, - "learning_rate": 6.854170859135132e-05, - "loss": 0.009260118752717972, - "step": 4830 - }, - { - "epoch": 0.824381926683717, - "grad_norm": 0.06907783448696136, - "learning_rate": 6.85273260378596e-05, - "loss": 0.009681220352649688, - "step": 4835 - }, - { - "epoch": 0.8252344416027281, - "grad_norm": 0.09847953170537949, - "learning_rate": 6.851292899977997e-05, - "loss": 0.009847448766231537, - "step": 4840 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 0.0683966800570488, - "learning_rate": 6.849851748383343e-05, - "loss": 0.007601346075534821, - "step": 4845 - }, - { - "epoch": 0.8269394714407502, - "grad_norm": 0.0523662269115448, - "learning_rate": 6.848409149674779e-05, - "loss": 0.00900915488600731, - "step": 4850 - }, - { - "epoch": 0.8277919863597613, - "grad_norm": 0.07112257927656174, - "learning_rate": 6.846965104525757e-05, - "loss": 0.011230588704347611, - "step": 4855 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 0.09305348247289658, - "learning_rate": 6.845519613610402e-05, - "loss": 0.01087992861866951, - "step": 4860 - }, - { - "epoch": 0.8294970161977835, - "grad_norm": 0.062347084283828735, - "learning_rate": 6.84407267760352e-05, - "loss": 0.00806276947259903, - "step": 4865 - }, - { - "epoch": 0.8303495311167945, - "grad_norm": 0.09091926366090775, - "learning_rate": 6.84262429718059e-05, - "loss": 0.010562103241682053, - "step": 4870 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 0.05807443708181381, - "learning_rate": 6.841174473017762e-05, - "loss": 0.010788433253765106, - "step": 4875 - }, - { - "epoch": 0.8320545609548167, - "grad_norm": 0.06664252281188965, - "learning_rate": 6.839723205791863e-05, - "loss": 0.01030244082212448, - "step": 4880 - }, - { - "epoch": 0.8329070758738278, - "grad_norm": 0.07469561696052551, - "learning_rate": 6.838270496180392e-05, - "loss": 0.01250479370355606, - "step": 4885 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 0.0469096302986145, - "learning_rate": 6.836816344861523e-05, - "loss": 0.010546717792749405, - "step": 4890 - }, - { - "epoch": 0.83461210571185, - "grad_norm": 0.0646355077624321, - "learning_rate": 6.835360752514104e-05, - "loss": 0.008491561561822892, - "step": 4895 - }, - { - "epoch": 0.835464620630861, - "grad_norm": 0.06006006523966789, - "learning_rate": 6.83390371981765e-05, - "loss": 0.010175065696239471, - "step": 4900 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 0.0595518983900547, - "learning_rate": 6.832445247452355e-05, - "loss": 0.009471315890550613, - "step": 4905 - }, - { - "epoch": 0.8371696504688833, - "grad_norm": 0.0722845196723938, - "learning_rate": 6.830985336099081e-05, - "loss": 0.011656039208173753, - "step": 4910 - }, - { - "epoch": 0.8380221653878943, - "grad_norm": 0.09830670058727264, - "learning_rate": 6.829523986439366e-05, - "loss": 0.0106172576546669, - "step": 4915 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 0.0725899264216423, - "learning_rate": 6.828061199155413e-05, - "loss": 0.00935768336057663, - "step": 4920 - }, - { - "epoch": 0.8397271952259164, - "grad_norm": 0.06721889227628708, - "learning_rate": 6.826596974930101e-05, - "loss": 0.010951700061559677, - "step": 4925 - }, - { - "epoch": 0.8405797101449275, - "grad_norm": 0.09289079904556274, - "learning_rate": 6.82513131444698e-05, - "loss": 0.010057362169027329, - "step": 4930 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 0.07667957991361618, - "learning_rate": 6.823664218390267e-05, - "loss": 0.012943412363529205, - "step": 4935 - }, - { - "epoch": 0.8422847399829497, - "grad_norm": 0.041785743087530136, - "learning_rate": 6.822195687444853e-05, - "loss": 0.009845246374607087, - "step": 4940 - }, - { - "epoch": 0.8431372549019608, - "grad_norm": 0.062134817242622375, - "learning_rate": 6.820725722296295e-05, - "loss": 0.010506168007850647, - "step": 4945 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 0.0870882049202919, - "learning_rate": 6.819254323630825e-05, - "loss": 0.008953387290239334, - "step": 4950 - }, - { - "epoch": 0.8448422847399829, - "grad_norm": 0.06943989545106888, - "learning_rate": 6.817781492135337e-05, - "loss": 0.009975537657737732, - "step": 4955 - }, - { - "epoch": 0.8456947996589941, - "grad_norm": 0.0961625948548317, - "learning_rate": 6.8163072284974e-05, - "loss": 0.010299818217754364, - "step": 4960 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 0.0685473084449768, - "learning_rate": 6.814831533405249e-05, - "loss": 0.007892660051584243, - "step": 4965 - }, - { - "epoch": 0.8473998294970162, - "grad_norm": 0.06370922178030014, - "learning_rate": 6.813354407547787e-05, - "loss": 0.011673354357481004, - "step": 4970 - }, - { - "epoch": 0.8482523444160273, - "grad_norm": 0.1212867870926857, - "learning_rate": 6.811875851614586e-05, - "loss": 0.01049395203590393, - "step": 4975 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 0.07590476423501968, - "learning_rate": 6.810395866295885e-05, - "loss": 0.011261900514364242, - "step": 4980 - }, - { - "epoch": 0.8499573742540495, - "grad_norm": 0.06342966109514236, - "learning_rate": 6.808914452282592e-05, - "loss": 0.011414043605327606, - "step": 4985 - }, - { - "epoch": 0.8508098891730606, - "grad_norm": 0.08939556777477264, - "learning_rate": 6.807431610266278e-05, - "loss": 0.008719882369041443, - "step": 4990 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 0.09418119490146637, - "learning_rate": 6.805947340939183e-05, - "loss": 0.011406099796295166, - "step": 4995 - }, - { - "epoch": 0.8525149190110827, - "grad_norm": 0.09113836288452148, - "learning_rate": 6.804461644994213e-05, - "loss": 0.011070792376995087, - "step": 5000 - }, - { - "epoch": 0.8533674339300937, - "grad_norm": 0.06545080989599228, - "learning_rate": 6.802974523124941e-05, - "loss": 0.009602059423923493, - "step": 5005 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 0.07779792696237564, - "learning_rate": 6.801485976025607e-05, - "loss": 0.008207190036773681, - "step": 5010 - }, - { - "epoch": 0.855072463768116, - "grad_norm": 0.038606271147727966, - "learning_rate": 6.799996004391113e-05, - "loss": 0.008772189915180206, - "step": 5015 - }, - { - "epoch": 0.855924978687127, - "grad_norm": 0.07329479604959488, - "learning_rate": 6.798504608917025e-05, - "loss": 0.011226999759674072, - "step": 5020 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 0.08631903678178787, - "learning_rate": 6.797011790299579e-05, - "loss": 0.012361649423837662, - "step": 5025 - }, - { - "epoch": 0.8576300085251491, - "grad_norm": 0.09041957557201385, - "learning_rate": 6.79551754923567e-05, - "loss": 0.010576151311397552, - "step": 5030 - }, - { - "epoch": 0.8584825234441603, - "grad_norm": 0.07003892213106155, - "learning_rate": 6.794021886422861e-05, - "loss": 0.008748160302639007, - "step": 5035 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 0.06976106762886047, - "learning_rate": 6.792524802559378e-05, - "loss": 0.010539846867322922, - "step": 5040 - }, - { - "epoch": 0.8601875532821824, - "grad_norm": 0.05501266196370125, - "learning_rate": 6.791026298344107e-05, - "loss": 0.01045292615890503, - "step": 5045 - }, - { - "epoch": 0.8610400682011935, - "grad_norm": 0.051503781229257584, - "learning_rate": 6.789526374476602e-05, - "loss": 0.009410140663385391, - "step": 5050 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 0.05674072727560997, - "learning_rate": 6.788025031657076e-05, - "loss": 0.00899135023355484, - "step": 5055 - }, - { - "epoch": 0.8627450980392157, - "grad_norm": 0.09688259661197662, - "learning_rate": 6.786522270586406e-05, - "loss": 0.010228607058525085, - "step": 5060 - }, - { - "epoch": 0.8635976129582268, - "grad_norm": 0.08542542159557343, - "learning_rate": 6.785018091966131e-05, - "loss": 0.010819461196660995, - "step": 5065 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 0.05703757330775261, - "learning_rate": 6.783512496498452e-05, - "loss": 0.011345957219600678, - "step": 5070 - }, - { - "epoch": 0.8653026427962489, - "grad_norm": 0.08096349984407425, - "learning_rate": 6.782005484886231e-05, - "loss": 0.011220332235097885, - "step": 5075 - }, - { - "epoch": 0.86615515771526, - "grad_norm": 0.07224266976118088, - "learning_rate": 6.780497057832988e-05, - "loss": 0.012557309865951539, - "step": 5080 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 0.08718731999397278, - "learning_rate": 6.778987216042912e-05, - "loss": 0.009770408272743225, - "step": 5085 - }, - { - "epoch": 0.8678601875532822, - "grad_norm": 0.05731170251965523, - "learning_rate": 6.777475960220846e-05, - "loss": 0.008567098528146744, - "step": 5090 - }, - { - "epoch": 0.8687127024722933, - "grad_norm": 0.07938708364963531, - "learning_rate": 6.775963291072292e-05, - "loss": 0.010664292424917222, - "step": 5095 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.028421485796570778, - "learning_rate": 6.774449209303416e-05, - "loss": 0.010194088518619537, - "step": 5100 - }, - { - "epoch": 0.8704177323103154, - "grad_norm": 0.06094631180167198, - "learning_rate": 6.772933715621042e-05, - "loss": 0.009481045603752136, - "step": 5105 - }, - { - "epoch": 0.8712702472293266, - "grad_norm": 0.08887558430433273, - "learning_rate": 6.771416810732653e-05, - "loss": 0.012845474481582641, - "step": 5110 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 0.07431238144636154, - "learning_rate": 6.76989849534639e-05, - "loss": 0.00815560668706894, - "step": 5115 - }, - { - "epoch": 0.8729752770673487, - "grad_norm": 0.09380137175321579, - "learning_rate": 6.768378770171052e-05, - "loss": 0.009580246359109878, - "step": 5120 - }, - { - "epoch": 0.8738277919863597, - "grad_norm": 0.08001488447189331, - "learning_rate": 6.766857635916099e-05, - "loss": 0.00857289507985115, - "step": 5125 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 0.049355555325746536, - "learning_rate": 6.765335093291647e-05, - "loss": 0.009263276308774947, - "step": 5130 - }, - { - "epoch": 0.875532821824382, - "grad_norm": 0.05873994901776314, - "learning_rate": 6.763811143008469e-05, - "loss": 0.008233514428138734, - "step": 5135 - }, - { - "epoch": 0.876385336743393, - "grad_norm": 0.10190756618976593, - "learning_rate": 6.762285785777995e-05, - "loss": 0.01529676467180252, - "step": 5140 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 0.08395158499479294, - "learning_rate": 6.760759022312313e-05, - "loss": 0.00961325541138649, - "step": 5145 - }, - { - "epoch": 0.8780903665814151, - "grad_norm": 0.07476748526096344, - "learning_rate": 6.759230853324169e-05, - "loss": 0.010477164387702942, - "step": 5150 - }, - { - "epoch": 0.8789428815004262, - "grad_norm": 0.07773051410913467, - "learning_rate": 6.757701279526961e-05, - "loss": 0.010389962792396545, - "step": 5155 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 0.07345708459615707, - "learning_rate": 6.756170301634745e-05, - "loss": 0.009174319356679917, - "step": 5160 - }, - { - "epoch": 0.8806479113384484, - "grad_norm": 0.07917368412017822, - "learning_rate": 6.754637920362233e-05, - "loss": 0.012756256759166718, - "step": 5165 - }, - { - "epoch": 0.8815004262574595, - "grad_norm": 0.06016271933913231, - "learning_rate": 6.75310413642479e-05, - "loss": 0.011058077961206437, - "step": 5170 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 0.06637005507946014, - "learning_rate": 6.751568950538441e-05, - "loss": 0.013590328395366669, - "step": 5175 - }, - { - "epoch": 0.8832054560954816, - "grad_norm": 0.06486016511917114, - "learning_rate": 6.750032363419857e-05, - "loss": 0.010195261240005494, - "step": 5180 - }, - { - "epoch": 0.8840579710144928, - "grad_norm": 0.09800687432289124, - "learning_rate": 6.748494375786372e-05, - "loss": 0.011106249690055848, - "step": 5185 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 0.04665162041783333, - "learning_rate": 6.746954988355967e-05, - "loss": 0.009880972653627395, - "step": 5190 - }, - { - "epoch": 0.8857630008525149, - "grad_norm": 0.05554487928748131, - "learning_rate": 6.745414201847282e-05, - "loss": 0.009480565786361694, - "step": 5195 - }, - { - "epoch": 0.886615515771526, - "grad_norm": 0.059967316687107086, - "learning_rate": 6.743872016979605e-05, - "loss": 0.010072766989469528, - "step": 5200 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 0.08275031298398972, - "learning_rate": 6.74232843447288e-05, - "loss": 0.010208947211503982, - "step": 5205 - }, - { - "epoch": 0.8883205456095482, - "grad_norm": 0.07091715931892395, - "learning_rate": 6.740783455047704e-05, - "loss": 0.011881709098815918, - "step": 5210 - }, - { - "epoch": 0.8891730605285593, - "grad_norm": 0.1465480923652649, - "learning_rate": 6.739237079425322e-05, - "loss": 0.010970161855220794, - "step": 5215 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 0.07571437954902649, - "learning_rate": 6.737689308327636e-05, - "loss": 0.010722124576568603, - "step": 5220 - }, - { - "epoch": 0.8908780903665814, - "grad_norm": 0.06671100109815598, - "learning_rate": 6.736140142477194e-05, - "loss": 0.010463282465934753, - "step": 5225 - }, - { - "epoch": 0.8917306052855924, - "grad_norm": 0.05676295980811119, - "learning_rate": 6.734589582597204e-05, - "loss": 0.00933043509721756, - "step": 5230 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 0.09708777070045471, - "learning_rate": 6.733037629411514e-05, - "loss": 0.011712780594825745, - "step": 5235 - }, - { - "epoch": 0.8934356351236147, - "grad_norm": 0.07073090970516205, - "learning_rate": 6.731484283644626e-05, - "loss": 0.008112293481826783, - "step": 5240 - }, - { - "epoch": 0.8942881500426257, - "grad_norm": 0.08821752667427063, - "learning_rate": 6.7299295460217e-05, - "loss": 0.01026250645518303, - "step": 5245 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 0.05165687948465347, - "learning_rate": 6.728373417268533e-05, - "loss": 0.008761890232563019, - "step": 5250 - }, - { - "epoch": 0.8959931798806479, - "grad_norm": 0.06872246414422989, - "learning_rate": 6.726815898111581e-05, - "loss": 0.012551462650299073, - "step": 5255 - }, - { - "epoch": 0.896845694799659, - "grad_norm": 0.07550673931837082, - "learning_rate": 6.725256989277944e-05, - "loss": 0.010165071487426758, - "step": 5260 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 0.05931933969259262, - "learning_rate": 6.723696691495373e-05, - "loss": 0.009457996487617493, - "step": 5265 - }, - { - "epoch": 0.8985507246376812, - "grad_norm": 0.10838861763477325, - "learning_rate": 6.722135005492268e-05, - "loss": 0.012053199112415314, - "step": 5270 - }, - { - "epoch": 0.8994032395566922, - "grad_norm": 0.062102265655994415, - "learning_rate": 6.720571931997676e-05, - "loss": 0.007582514733076096, - "step": 5275 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 0.05997858941555023, - "learning_rate": 6.719007471741289e-05, - "loss": 0.009348342567682267, - "step": 5280 - }, - { - "epoch": 0.9011082693947144, - "grad_norm": 0.05405129864811897, - "learning_rate": 6.717441625453451e-05, - "loss": 0.009910254925489425, - "step": 5285 - }, - { - "epoch": 0.9019607843137255, - "grad_norm": 0.08071329444646835, - "learning_rate": 6.715874393865152e-05, - "loss": 0.010672248899936676, - "step": 5290 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 0.06863300502300262, - "learning_rate": 6.714305777708027e-05, - "loss": 0.008570954948663712, - "step": 5295 - }, - { - "epoch": 0.9036658141517476, - "grad_norm": 0.07818135619163513, - "learning_rate": 6.712735777714357e-05, - "loss": 0.008647527545690536, - "step": 5300 - }, - { - "epoch": 0.9045183290707587, - "grad_norm": 0.14757376909255981, - "learning_rate": 6.711164394617072e-05, - "loss": 0.010825049877166749, - "step": 5305 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 0.07376445829868317, - "learning_rate": 6.709591629149746e-05, - "loss": 0.01299697458744049, - "step": 5310 - }, - { - "epoch": 0.9062233589087809, - "grad_norm": 0.05860469490289688, - "learning_rate": 6.708017482046597e-05, - "loss": 0.009549598395824432, - "step": 5315 - }, - { - "epoch": 0.907075873827792, - "grad_norm": 0.0799872875213623, - "learning_rate": 6.706441954042488e-05, - "loss": 0.009733843803405761, - "step": 5320 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 0.05245954543352127, - "learning_rate": 6.704865045872932e-05, - "loss": 0.009799794852733612, - "step": 5325 - }, - { - "epoch": 0.9087809036658141, - "grad_norm": 0.05515241622924805, - "learning_rate": 6.703286758274079e-05, - "loss": 0.007391643524169922, - "step": 5330 - }, - { - "epoch": 0.9096334185848253, - "grad_norm": 0.05900256708264351, - "learning_rate": 6.701707091982726e-05, - "loss": 0.009107303619384766, - "step": 5335 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 0.09559495002031326, - "learning_rate": 6.700126047736317e-05, - "loss": 0.009052158147096635, - "step": 5340 - }, - { - "epoch": 0.9113384484228474, - "grad_norm": 0.11189334839582443, - "learning_rate": 6.698543626272932e-05, - "loss": 0.011292549222707749, - "step": 5345 - }, - { - "epoch": 0.9121909633418585, - "grad_norm": 0.07031659781932831, - "learning_rate": 6.6969598283313e-05, - "loss": 0.008589480072259903, - "step": 5350 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.1652907431125641, - "learning_rate": 6.69537465465079e-05, - "loss": 0.010865563899278641, - "step": 5355 - }, - { - "epoch": 0.9138959931798807, - "grad_norm": 0.06157436594367027, - "learning_rate": 6.693788105971413e-05, - "loss": 0.012611952424049378, - "step": 5360 - }, - { - "epoch": 0.9147485080988917, - "grad_norm": 0.03928734362125397, - "learning_rate": 6.692200183033826e-05, - "loss": 0.009115418046712875, - "step": 5365 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 0.06604880094528198, - "learning_rate": 6.690610886579321e-05, - "loss": 0.010015038400888443, - "step": 5370 - }, - { - "epoch": 0.9164535379369139, - "grad_norm": 0.07625336199998856, - "learning_rate": 6.689020217349835e-05, - "loss": 0.010416677594184876, - "step": 5375 - }, - { - "epoch": 0.9173060528559249, - "grad_norm": 0.07674526423215866, - "learning_rate": 6.687428176087946e-05, - "loss": 0.01016802191734314, - "step": 5380 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 0.08422617614269257, - "learning_rate": 6.685834763536872e-05, - "loss": 0.011127004027366638, - "step": 5385 - }, - { - "epoch": 0.9190110826939472, - "grad_norm": 0.057719554752111435, - "learning_rate": 6.684239980440472e-05, - "loss": 0.008915853500366212, - "step": 5390 - }, - { - "epoch": 0.9198635976129582, - "grad_norm": 0.056555263698101044, - "learning_rate": 6.682643827543241e-05, - "loss": 0.0095272496342659, - "step": 5395 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 0.07605638355016708, - "learning_rate": 6.681046305590317e-05, - "loss": 0.010731159895658492, - "step": 5400 - }, - { - "epoch": 0.9215686274509803, - "grad_norm": 0.07499220222234726, - "learning_rate": 6.679447415327479e-05, - "loss": 0.010919998586177825, - "step": 5405 - }, - { - "epoch": 0.9224211423699915, - "grad_norm": 0.052863143384456635, - "learning_rate": 6.677847157501137e-05, - "loss": 0.011300939321517944, - "step": 5410 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 0.09210597723722458, - "learning_rate": 6.676245532858351e-05, - "loss": 0.013997772336006164, - "step": 5415 - }, - { - "epoch": 0.9241261722080136, - "grad_norm": 0.0746840238571167, - "learning_rate": 6.674642542146807e-05, - "loss": 0.012542533874511718, - "step": 5420 - }, - { - "epoch": 0.9249786871270247, - "grad_norm": 0.06137506663799286, - "learning_rate": 6.67303818611484e-05, - "loss": 0.008029398322105408, - "step": 5425 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 0.03867131471633911, - "learning_rate": 6.671432465511411e-05, - "loss": 0.009305672347545623, - "step": 5430 - }, - { - "epoch": 0.9266837169650469, - "grad_norm": 0.08291540294885635, - "learning_rate": 6.669825381086128e-05, - "loss": 0.011059926450252533, - "step": 5435 - }, - { - "epoch": 0.927536231884058, - "grad_norm": 0.0689411610364914, - "learning_rate": 6.668216933589228e-05, - "loss": 0.008350597321987152, - "step": 5440 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 0.051505669951438904, - "learning_rate": 6.666607123771591e-05, - "loss": 0.011102759093046189, - "step": 5445 - }, - { - "epoch": 0.9292412617220801, - "grad_norm": 0.08774327486753464, - "learning_rate": 6.664995952384729e-05, - "loss": 0.009498609602451325, - "step": 5450 - }, - { - "epoch": 0.9300937766410913, - "grad_norm": 0.060566093772649765, - "learning_rate": 6.663383420180789e-05, - "loss": 0.007811173051595688, - "step": 5455 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 0.09952156245708466, - "learning_rate": 6.661769527912555e-05, - "loss": 0.010514630377292633, - "step": 5460 - }, - { - "epoch": 0.9317988064791134, - "grad_norm": 0.04871741309762001, - "learning_rate": 6.660154276333446e-05, - "loss": 0.006630983203649521, - "step": 5465 - }, - { - "epoch": 0.9326513213981245, - "grad_norm": 0.06254981458187103, - "learning_rate": 6.658537666197517e-05, - "loss": 0.009895097464323044, - "step": 5470 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 0.08351470530033112, - "learning_rate": 6.656919698259452e-05, - "loss": 0.010659988969564438, - "step": 5475 - }, - { - "epoch": 0.9343563512361467, - "grad_norm": 0.07085305452346802, - "learning_rate": 6.655300373274575e-05, - "loss": 0.008971457183361054, - "step": 5480 - }, - { - "epoch": 0.9352088661551577, - "grad_norm": 0.06461923569440842, - "learning_rate": 6.653679691998839e-05, - "loss": 0.009138958156108856, - "step": 5485 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 0.11675399541854858, - "learning_rate": 6.652057655188832e-05, - "loss": 0.008388948440551759, - "step": 5490 - }, - { - "epoch": 0.9369138959931799, - "grad_norm": 0.09698229283094406, - "learning_rate": 6.650434263601777e-05, - "loss": 0.011885351687669753, - "step": 5495 - }, - { - "epoch": 0.9377664109121909, - "grad_norm": 0.06786464154720306, - "learning_rate": 6.648809517995524e-05, - "loss": 0.012351768463850022, - "step": 5500 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 0.09192351251840591, - "learning_rate": 6.647183419128561e-05, - "loss": 0.010940471291542053, - "step": 5505 - }, - { - "epoch": 0.9394714407502132, - "grad_norm": 0.05660499259829521, - "learning_rate": 6.645555967760003e-05, - "loss": 0.01160380095243454, - "step": 5510 - }, - { - "epoch": 0.9403239556692242, - "grad_norm": 0.0829106792807579, - "learning_rate": 6.6439271646496e-05, - "loss": 0.009475469589233398, - "step": 5515 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 0.08007021248340607, - "learning_rate": 6.642297010557733e-05, - "loss": 0.010524801164865493, - "step": 5520 - }, - { - "epoch": 0.9420289855072463, - "grad_norm": 0.07578855752944946, - "learning_rate": 6.640665506245406e-05, - "loss": 0.008864742517471314, - "step": 5525 - }, - { - "epoch": 0.9428815004262575, - "grad_norm": 0.064674511551857, - "learning_rate": 6.639032652474265e-05, - "loss": 0.010619612783193589, - "step": 5530 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 0.07176528871059418, - "learning_rate": 6.637398450006579e-05, - "loss": 0.011696039140224457, - "step": 5535 - }, - { - "epoch": 0.9445865302642796, - "grad_norm": 0.16578713059425354, - "learning_rate": 6.635762899605248e-05, - "loss": 0.009867334365844726, - "step": 5540 - }, - { - "epoch": 0.9454390451832907, - "grad_norm": 0.069394052028656, - "learning_rate": 6.634126002033802e-05, - "loss": 0.012951886653900147, - "step": 5545 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 0.07247213274240494, - "learning_rate": 6.632487758056397e-05, - "loss": 0.009127721190452576, - "step": 5550 - }, - { - "epoch": 0.9471440750213129, - "grad_norm": 0.11084317415952682, - "learning_rate": 6.630848168437822e-05, - "loss": 0.009197863936424255, - "step": 5555 - }, - { - "epoch": 0.947996589940324, - "grad_norm": 0.08719248324632645, - "learning_rate": 6.629207233943492e-05, - "loss": 0.010768509656190871, - "step": 5560 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 0.0857851505279541, - "learning_rate": 6.62756495533945e-05, - "loss": 0.009163270145654679, - "step": 5565 - }, - { - "epoch": 0.9497016197783461, - "grad_norm": 0.09808778762817383, - "learning_rate": 6.625921333392362e-05, - "loss": 0.01005362868309021, - "step": 5570 - }, - { - "epoch": 0.9505541346973572, - "grad_norm": 0.08482059836387634, - "learning_rate": 6.624276368869532e-05, - "loss": 0.008303509652614593, - "step": 5575 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 0.07460886240005493, - "learning_rate": 6.62263006253888e-05, - "loss": 0.00857923850417137, - "step": 5580 - }, - { - "epoch": 0.9522591645353794, - "grad_norm": 0.09494256228208542, - "learning_rate": 6.620982415168956e-05, - "loss": 0.009573552012443542, - "step": 5585 - }, - { - "epoch": 0.9531116794543905, - "grad_norm": 0.08753519505262375, - "learning_rate": 6.61933342752894e-05, - "loss": 0.010430536419153213, - "step": 5590 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 0.05132949724793434, - "learning_rate": 6.617683100388632e-05, - "loss": 0.009080658107995987, - "step": 5595 - }, - { - "epoch": 0.9548167092924126, - "grad_norm": 0.07516856491565704, - "learning_rate": 6.61603143451846e-05, - "loss": 0.009718524664640427, - "step": 5600 - }, - { - "epoch": 0.9556692242114238, - "grad_norm": 0.12723733484745026, - "learning_rate": 6.614378430689477e-05, - "loss": 0.01136334240436554, - "step": 5605 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 0.0905863493680954, - "learning_rate": 6.612724089673359e-05, - "loss": 0.013209307193756103, - "step": 5610 - }, - { - "epoch": 0.9573742540494459, - "grad_norm": 0.06084009259939194, - "learning_rate": 6.611068412242409e-05, - "loss": 0.01001257449388504, - "step": 5615 - }, - { - "epoch": 0.9582267689684569, - "grad_norm": 0.11799532175064087, - "learning_rate": 6.60941139916955e-05, - "loss": 0.008610795438289642, - "step": 5620 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 0.08627504110336304, - "learning_rate": 6.607753051228333e-05, - "loss": 0.009049218893051148, - "step": 5625 - }, - { - "epoch": 0.9599317988064792, - "grad_norm": 0.0910186693072319, - "learning_rate": 6.60609336919293e-05, - "loss": 0.01068672090768814, - "step": 5630 - }, - { - "epoch": 0.9607843137254902, - "grad_norm": 0.06503022462129593, - "learning_rate": 6.604432353838134e-05, - "loss": 0.010604655742645264, - "step": 5635 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 0.07681523263454437, - "learning_rate": 6.602770005939363e-05, - "loss": 0.010527564585208893, - "step": 5640 - }, - { - "epoch": 0.9624893435635123, - "grad_norm": 0.0680806040763855, - "learning_rate": 6.601106326272659e-05, - "loss": 0.009375665336847305, - "step": 5645 - }, - { - "epoch": 0.9633418584825234, - "grad_norm": 0.06601905822753906, - "learning_rate": 6.599441315614678e-05, - "loss": 0.009470004588365555, - "step": 5650 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 0.06291890889406204, - "learning_rate": 6.597774974742706e-05, - "loss": 0.012320800870656966, - "step": 5655 - }, - { - "epoch": 0.9650468883205456, - "grad_norm": 0.0956176221370697, - "learning_rate": 6.596107304434645e-05, - "loss": 0.01018187329173088, - "step": 5660 - }, - { - "epoch": 0.9658994032395567, - "grad_norm": 0.06642715632915497, - "learning_rate": 6.59443830546902e-05, - "loss": 0.010271859169006348, - "step": 5665 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 0.06783592700958252, - "learning_rate": 6.592767978624973e-05, - "loss": 0.00986798033118248, - "step": 5670 - }, - { - "epoch": 0.9676044330775788, - "grad_norm": 0.05877846106886864, - "learning_rate": 6.591096324682272e-05, - "loss": 0.009708859026432037, - "step": 5675 - }, - { - "epoch": 0.96845694799659, - "grad_norm": 0.049258604645729065, - "learning_rate": 6.589423344421297e-05, - "loss": 0.008615868538618088, - "step": 5680 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 0.05622515454888344, - "learning_rate": 6.587749038623052e-05, - "loss": 0.010138686001300811, - "step": 5685 - }, - { - "epoch": 0.9701619778346121, - "grad_norm": 0.05719893425703049, - "learning_rate": 6.586073408069159e-05, - "loss": 0.00869678258895874, - "step": 5690 - }, - { - "epoch": 0.9710144927536232, - "grad_norm": 0.07675095647573471, - "learning_rate": 6.584396453541856e-05, - "loss": 0.010557885468006133, - "step": 5695 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 0.0999779924750328, - "learning_rate": 6.582718175824006e-05, - "loss": 0.010667790472507478, - "step": 5700 - }, - { - "epoch": 0.9727195225916454, - "grad_norm": 0.04620293527841568, - "learning_rate": 6.58103857569908e-05, - "loss": 0.0078192800283432, - "step": 5705 - }, - { - "epoch": 0.9735720375106565, - "grad_norm": 0.06725125759840012, - "learning_rate": 6.579357653951174e-05, - "loss": 0.010441574454307555, - "step": 5710 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 0.09062530100345612, - "learning_rate": 6.577675411364997e-05, - "loss": 0.011757946014404297, - "step": 5715 - }, - { - "epoch": 0.9752770673486786, - "grad_norm": 0.050651032477617264, - "learning_rate": 6.575991848725876e-05, - "loss": 0.009817829728126526, - "step": 5720 - }, - { - "epoch": 0.9761295822676896, - "grad_norm": 0.06951560825109482, - "learning_rate": 6.574306966819755e-05, - "loss": 0.008903174102306366, - "step": 5725 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 0.0733589306473732, - "learning_rate": 6.57262076643319e-05, - "loss": 0.009786784648895264, - "step": 5730 - }, - { - "epoch": 0.9778346121057119, - "grad_norm": 0.0736282467842102, - "learning_rate": 6.570933248353359e-05, - "loss": 0.012176553905010223, - "step": 5735 - }, - { - "epoch": 0.9786871270247229, - "grad_norm": 0.069704569876194, - "learning_rate": 6.56924441336805e-05, - "loss": 0.008654942363500595, - "step": 5740 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 0.07497496902942657, - "learning_rate": 6.567554262265668e-05, - "loss": 0.010902392119169236, - "step": 5745 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 0.07559038698673248, - "learning_rate": 6.56586279583523e-05, - "loss": 0.00921270027756691, - "step": 5750 - }, - { - "epoch": 0.9812446717817562, - "grad_norm": 0.05894545465707779, - "learning_rate": 6.56417001486637e-05, - "loss": 0.009073206037282944, - "step": 5755 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 0.06555377691984177, - "learning_rate": 6.562475920149335e-05, - "loss": 0.010872729122638702, - "step": 5760 - }, - { - "epoch": 0.9829497016197783, - "grad_norm": 0.15036429464817047, - "learning_rate": 6.560780512474984e-05, - "loss": 0.009879975020885468, - "step": 5765 - }, - { - "epoch": 0.9838022165387894, - "grad_norm": 0.06842299550771713, - "learning_rate": 6.559083792634791e-05, - "loss": 0.00965554341673851, - "step": 5770 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 0.0486510805785656, - "learning_rate": 6.557385761420839e-05, - "loss": 0.00872802734375, - "step": 5775 - }, - { - "epoch": 0.9855072463768116, - "grad_norm": 0.059796739369630814, - "learning_rate": 6.555686419625826e-05, - "loss": 0.009720289707183838, - "step": 5780 - }, - { - "epoch": 0.9863597612958227, - "grad_norm": 0.07971934229135513, - "learning_rate": 6.553985768043062e-05, - "loss": 0.008043316006660462, - "step": 5785 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 0.09556971490383148, - "learning_rate": 6.552283807466468e-05, - "loss": 0.009030704945325851, - "step": 5790 - }, - { - "epoch": 0.9880647911338448, - "grad_norm": 0.04631726071238518, - "learning_rate": 6.550580538690577e-05, - "loss": 0.007388583570718765, - "step": 5795 - }, - { - "epoch": 0.9889173060528559, - "grad_norm": 0.08172665536403656, - "learning_rate": 6.548875962510528e-05, - "loss": 0.007863265275955201, - "step": 5800 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 0.11129096895456314, - "learning_rate": 6.547170079722076e-05, - "loss": 0.012218999862670898, - "step": 5805 - }, - { - "epoch": 0.9906223358908781, - "grad_norm": 0.06619804352521896, - "learning_rate": 6.545462891121584e-05, - "loss": 0.007535400986671448, - "step": 5810 - }, - { - "epoch": 0.9914748508098892, - "grad_norm": 0.11495351046323776, - "learning_rate": 6.543754397506025e-05, - "loss": 0.0121284119784832, - "step": 5815 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 0.06017669290304184, - "learning_rate": 6.542044599672978e-05, - "loss": 0.008776353299617767, - "step": 5820 - }, - { - "epoch": 0.9931798806479113, - "grad_norm": 0.08049561828374863, - "learning_rate": 6.540333498420637e-05, - "loss": 0.010460492223501205, - "step": 5825 - }, - { - "epoch": 0.9940323955669225, - "grad_norm": 0.07041274011135101, - "learning_rate": 6.538621094547798e-05, - "loss": 0.008290639519691468, - "step": 5830 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 0.061981480568647385, - "learning_rate": 6.53690738885387e-05, - "loss": 0.007011125236749649, - "step": 5835 - }, - { - "epoch": 0.9957374254049446, - "grad_norm": 0.09022640436887741, - "learning_rate": 6.535192382138867e-05, - "loss": 0.012455084919929504, - "step": 5840 - }, - { - "epoch": 0.9965899403239556, - "grad_norm": 0.05652628839015961, - "learning_rate": 6.53347607520341e-05, - "loss": 0.011704784631729127, - "step": 5845 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 0.0717577114701271, - "learning_rate": 6.531758468848732e-05, - "loss": 0.007738448679447174, - "step": 5850 - }, - { - "epoch": 0.9982949701619779, - "grad_norm": 0.04797588661313057, - "learning_rate": 6.530039563876665e-05, - "loss": 0.00894927978515625, - "step": 5855 - }, - { - "epoch": 0.9991474850809889, - "grad_norm": 0.06541015207767487, - "learning_rate": 6.528319361089651e-05, - "loss": 0.00731588676571846, - "step": 5860 - }, - { - "epoch": 0.9998294970161978, - "eval_loss": 0.03369956836104393, - "eval_runtime": 3.5892, - "eval_samples_per_second": 70.21, - "eval_steps_per_second": 1.114, - "step": 5864 - }, - { - "eval_cer_subset": 0.014444607292328236, - "eval_cer_subset_edit_distance": 887, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 5864 - }, - { - "epoch": 1.0, - "grad_norm": 0.06960473209619522, - "learning_rate": 6.52659786129074e-05, - "loss": 0.009423434734344482, - "step": 5865 - }, - { - "epoch": 1.000852514919011, - "grad_norm": 0.08438396453857422, - "learning_rate": 6.524875065283587e-05, - "loss": 0.009560108184814453, - "step": 5870 - }, - { - "epoch": 1.0017050298380221, - "grad_norm": 0.06281089037656784, - "learning_rate": 6.523150973872446e-05, - "loss": 0.007503298670053482, - "step": 5875 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 0.11766793578863144, - "learning_rate": 6.52142558786218e-05, - "loss": 0.008890827000141144, - "step": 5880 - }, - { - "epoch": 1.0034100596760442, - "grad_norm": 0.058115314692258835, - "learning_rate": 6.519698908058262e-05, - "loss": 0.006190531700849533, - "step": 5885 - }, - { - "epoch": 1.0042625745950555, - "grad_norm": 0.06857501715421677, - "learning_rate": 6.51797093526676e-05, - "loss": 0.007162582129240036, - "step": 5890 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 0.03517467528581619, - "learning_rate": 6.51624167029435e-05, - "loss": 0.0060476396232843396, - "step": 5895 - }, - { - "epoch": 1.0059676044330776, - "grad_norm": 0.10047292709350586, - "learning_rate": 6.514511113948307e-05, - "loss": 0.006416718661785126, - "step": 5900 - }, - { - "epoch": 1.0068201193520887, - "grad_norm": 0.07266796380281448, - "learning_rate": 6.512779267036518e-05, - "loss": 0.005519292503595352, - "step": 5905 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 0.05385264754295349, - "learning_rate": 6.511046130367464e-05, - "loss": 0.006731215119361878, - "step": 5910 - }, - { - "epoch": 1.0085251491901108, - "grad_norm": 0.0927869975566864, - "learning_rate": 6.50931170475023e-05, - "loss": 0.0073065564036369325, - "step": 5915 - }, - { - "epoch": 1.0093776641091219, - "grad_norm": 0.08416371792554855, - "learning_rate": 6.507575990994504e-05, - "loss": 0.005843915045261383, - "step": 5920 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 0.06585095822811127, - "learning_rate": 6.505838989910576e-05, - "loss": 0.006345044076442719, - "step": 5925 - }, - { - "epoch": 1.011082693947144, - "grad_norm": 0.06341785192489624, - "learning_rate": 6.504100702309336e-05, - "loss": 0.005391617119312286, - "step": 5930 - }, - { - "epoch": 1.011935208866155, - "grad_norm": 0.08260001242160797, - "learning_rate": 6.502361129002273e-05, - "loss": 0.008031262457370758, - "step": 5935 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 0.08805666118860245, - "learning_rate": 6.500620270801478e-05, - "loss": 0.006408621370792389, - "step": 5940 - }, - { - "epoch": 1.0136402387041774, - "grad_norm": 0.0704861581325531, - "learning_rate": 6.498878128519642e-05, - "loss": 0.006208440661430359, - "step": 5945 - }, - { - "epoch": 1.0144927536231885, - "grad_norm": 0.07539117336273193, - "learning_rate": 6.497134702970055e-05, - "loss": 0.005263582617044449, - "step": 5950 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 0.022507963702082634, - "learning_rate": 6.495389994966606e-05, - "loss": 0.005692056566476822, - "step": 5955 - }, - { - "epoch": 1.0161977834612106, - "grad_norm": 0.05641510710120201, - "learning_rate": 6.493644005323783e-05, - "loss": 0.007954449951648712, - "step": 5960 - }, - { - "epoch": 1.0170502983802217, - "grad_norm": 0.04853788763284683, - "learning_rate": 6.49189673485667e-05, - "loss": 0.006910678744316101, - "step": 5965 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 0.07868898659944534, - "learning_rate": 6.490148184380956e-05, - "loss": 0.007678037136793136, - "step": 5970 - }, - { - "epoch": 1.0187553282182438, - "grad_norm": 0.08481275290250778, - "learning_rate": 6.488398354712917e-05, - "loss": 0.0060794509947299956, - "step": 5975 - }, - { - "epoch": 1.0196078431372548, - "grad_norm": 0.05573422089219093, - "learning_rate": 6.486647246669435e-05, - "loss": 0.0050107244402170185, - "step": 5980 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 0.10777781158685684, - "learning_rate": 6.484894861067983e-05, - "loss": 0.006611569225788117, - "step": 5985 - }, - { - "epoch": 1.0213128729752772, - "grad_norm": 0.041842151433229446, - "learning_rate": 6.483141198726635e-05, - "loss": 0.0060344856232404705, - "step": 5990 - }, - { - "epoch": 1.0221653878942882, - "grad_norm": 0.05765567347407341, - "learning_rate": 6.48138626046406e-05, - "loss": 0.005772604793310166, - "step": 5995 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 0.05987582355737686, - "learning_rate": 6.479630047099517e-05, - "loss": 0.006899695098400116, - "step": 6000 - }, - { - "epoch": 1.0238704177323104, - "grad_norm": 0.046085257083177567, - "learning_rate": 6.477872559452867e-05, - "loss": 0.006151453405618667, - "step": 6005 - }, - { - "epoch": 1.0247229326513214, - "grad_norm": 0.05994739755988121, - "learning_rate": 6.476113798344566e-05, - "loss": 0.007787984609603882, - "step": 6010 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 0.08866287767887115, - "learning_rate": 6.47435376459566e-05, - "loss": 0.007754974067211151, - "step": 6015 - }, - { - "epoch": 1.0264279624893435, - "grad_norm": 0.07492240518331528, - "learning_rate": 6.472592459027793e-05, - "loss": 0.005562775582075119, - "step": 6020 - }, - { - "epoch": 1.0272804774083546, - "grad_norm": 0.058771468698978424, - "learning_rate": 6.470829882463198e-05, - "loss": 0.008101420104503631, - "step": 6025 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 0.08099868148565292, - "learning_rate": 6.469066035724708e-05, - "loss": 0.007585109025239944, - "step": 6030 - }, - { - "epoch": 1.0289855072463767, - "grad_norm": 0.09368649870157242, - "learning_rate": 6.467300919635743e-05, - "loss": 0.007342393696308136, - "step": 6035 - }, - { - "epoch": 1.029838022165388, - "grad_norm": 0.07358572632074356, - "learning_rate": 6.465534535020317e-05, - "loss": 0.007179292291402817, - "step": 6040 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 0.0542459636926651, - "learning_rate": 6.46376688270304e-05, - "loss": 0.007178785651922226, - "step": 6045 - }, - { - "epoch": 1.0315430520034101, - "grad_norm": 0.04534808546304703, - "learning_rate": 6.461997963509109e-05, - "loss": 0.005939013883471489, - "step": 6050 - }, - { - "epoch": 1.0323955669224212, - "grad_norm": 0.04498334974050522, - "learning_rate": 6.460227778264314e-05, - "loss": 0.007932021468877792, - "step": 6055 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 0.09503943473100662, - "learning_rate": 6.458456327795038e-05, - "loss": 0.006005316227674484, - "step": 6060 - }, - { - "epoch": 1.0341005967604433, - "grad_norm": 0.06634567677974701, - "learning_rate": 6.456683612928252e-05, - "loss": 0.00472346730530262, - "step": 6065 - }, - { - "epoch": 1.0349531116794544, - "grad_norm": 0.06090138852596283, - "learning_rate": 6.454909634491518e-05, - "loss": 0.0071956045925617215, - "step": 6070 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 0.09833965450525284, - "learning_rate": 6.453134393312988e-05, - "loss": 0.00738539919257164, - "step": 6075 - }, - { - "epoch": 1.0366581415174765, - "grad_norm": 0.07924133539199829, - "learning_rate": 6.451357890221406e-05, - "loss": 0.008464773744344711, - "step": 6080 - }, - { - "epoch": 1.0375106564364875, - "grad_norm": 0.04132373258471489, - "learning_rate": 6.4495801260461e-05, - "loss": 0.005705388635396958, - "step": 6085 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 0.08653424680233002, - "learning_rate": 6.44780110161699e-05, - "loss": 0.00777137503027916, - "step": 6090 - }, - { - "epoch": 1.0392156862745099, - "grad_norm": 0.08147025108337402, - "learning_rate": 6.446020817764583e-05, - "loss": 0.005003783106803894, - "step": 6095 - }, - { - "epoch": 1.040068201193521, - "grad_norm": 0.07091398537158966, - "learning_rate": 6.444239275319977e-05, - "loss": 0.005957254022359848, - "step": 6100 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 0.06259306520223618, - "learning_rate": 6.442456475114855e-05, - "loss": 0.005096634104847908, - "step": 6105 - }, - { - "epoch": 1.041773231031543, - "grad_norm": 0.07044103741645813, - "learning_rate": 6.440672417981485e-05, - "loss": 0.00557241328060627, - "step": 6110 - }, - { - "epoch": 1.0426257459505541, - "grad_norm": 0.05029159039258957, - "learning_rate": 6.438887104752726e-05, - "loss": 0.0056043524295091626, - "step": 6115 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 0.04778699576854706, - "learning_rate": 6.437100536262022e-05, - "loss": 0.00855453684926033, - "step": 6120 - }, - { - "epoch": 1.0443307757885762, - "grad_norm": 0.07467184215784073, - "learning_rate": 6.435312713343401e-05, - "loss": 0.006690071523189544, - "step": 6125 - }, - { - "epoch": 1.0451832907075873, - "grad_norm": 0.07189153879880905, - "learning_rate": 6.433523636831481e-05, - "loss": 0.007009527087211609, - "step": 6130 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 0.08000020682811737, - "learning_rate": 6.431733307561459e-05, - "loss": 0.007411211729049683, - "step": 6135 - }, - { - "epoch": 1.0468883205456097, - "grad_norm": 0.06737730652093887, - "learning_rate": 6.429941726369124e-05, - "loss": 0.006843548268079758, - "step": 6140 - }, - { - "epoch": 1.0477408354646207, - "grad_norm": 0.09834714978933334, - "learning_rate": 6.428148894090841e-05, - "loss": 0.007167841494083405, - "step": 6145 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 0.06415695697069168, - "learning_rate": 6.426354811563567e-05, - "loss": 0.005131457373499871, - "step": 6150 - }, - { - "epoch": 1.0494458653026428, - "grad_norm": 0.07823871076107025, - "learning_rate": 6.424559479624839e-05, - "loss": 0.004797356575727463, - "step": 6155 - }, - { - "epoch": 1.050298380221654, - "grad_norm": 0.07165013998746872, - "learning_rate": 6.422762899112777e-05, - "loss": 0.006430945545434952, - "step": 6160 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 0.10924427956342697, - "learning_rate": 6.420965070866086e-05, - "loss": 0.008151047676801682, - "step": 6165 - }, - { - "epoch": 1.052003410059676, - "grad_norm": 0.10381831228733063, - "learning_rate": 6.41916599572405e-05, - "loss": 0.009056917577981948, - "step": 6170 - }, - { - "epoch": 1.052855924978687, - "grad_norm": 0.05251248553395271, - "learning_rate": 6.417365674526539e-05, - "loss": 0.004240944981575012, - "step": 6175 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 0.0812104344367981, - "learning_rate": 6.415564108114001e-05, - "loss": 0.008805926889181137, - "step": 6180 - }, - { - "epoch": 1.0545609548167092, - "grad_norm": 0.05640942230820656, - "learning_rate": 6.413761297327469e-05, - "loss": 0.005727213248610497, - "step": 6185 - }, - { - "epoch": 1.0554134697357205, - "grad_norm": 0.10114334523677826, - "learning_rate": 6.411957243008552e-05, - "loss": 0.008660107105970382, - "step": 6190 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 0.06809760630130768, - "learning_rate": 6.410151945999447e-05, - "loss": 0.006786180287599563, - "step": 6195 - }, - { - "epoch": 1.0571184995737426, - "grad_norm": 0.08121974021196365, - "learning_rate": 6.408345407142924e-05, - "loss": 0.004730105027556419, - "step": 6200 - }, - { - "epoch": 1.0579710144927537, - "grad_norm": 0.0630379393696785, - "learning_rate": 6.406537627282336e-05, - "loss": 0.006532897800207138, - "step": 6205 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 0.09354323893785477, - "learning_rate": 6.404728607261612e-05, - "loss": 0.008165966719388962, - "step": 6210 - }, - { - "epoch": 1.0596760443307758, - "grad_norm": 0.0509798526763916, - "learning_rate": 6.402918347925267e-05, - "loss": 0.006781977415084839, - "step": 6215 - }, - { - "epoch": 1.0605285592497868, - "grad_norm": 0.09830603748559952, - "learning_rate": 6.401106850118389e-05, - "loss": 0.00675075501203537, - "step": 6220 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 0.08417326211929321, - "learning_rate": 6.399294114686645e-05, - "loss": 0.005759935826063156, - "step": 6225 - }, - { - "epoch": 1.062233589087809, - "grad_norm": 0.04999511316418648, - "learning_rate": 6.39748014247628e-05, - "loss": 0.0059943776577711105, - "step": 6230 - }, - { - "epoch": 1.06308610400682, - "grad_norm": 0.0355304591357708, - "learning_rate": 6.395664934334116e-05, - "loss": 0.003978967294096946, - "step": 6235 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 0.09096778929233551, - "learning_rate": 6.393848491107554e-05, - "loss": 0.006428928673267364, - "step": 6240 - }, - { - "epoch": 1.0647911338448424, - "grad_norm": 0.09047707170248032, - "learning_rate": 6.392030813644569e-05, - "loss": 0.005584535002708435, - "step": 6245 - }, - { - "epoch": 1.0656436487638534, - "grad_norm": 0.07133036106824875, - "learning_rate": 6.390211902793714e-05, - "loss": 0.00610351674258709, - "step": 6250 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 0.1025620549917221, - "learning_rate": 6.388391759404117e-05, - "loss": 0.006316560506820679, - "step": 6255 - }, - { - "epoch": 1.0673486786018755, - "grad_norm": 0.0922650694847107, - "learning_rate": 6.386570384325482e-05, - "loss": 0.008717238903045654, - "step": 6260 - }, - { - "epoch": 1.0682011935208866, - "grad_norm": 0.094338558614254, - "learning_rate": 6.384747778408085e-05, - "loss": 0.0067199327051639555, - "step": 6265 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 0.07260075211524963, - "learning_rate": 6.382923942502782e-05, - "loss": 0.007249505072832107, - "step": 6270 - }, - { - "epoch": 1.0699062233589087, - "grad_norm": 0.06572386622428894, - "learning_rate": 6.381098877460999e-05, - "loss": 0.007879015803337098, - "step": 6275 - }, - { - "epoch": 1.0707587382779198, - "grad_norm": 0.11646077036857605, - "learning_rate": 6.379272584134737e-05, - "loss": 0.006477512419223785, - "step": 6280 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 0.14154180884361267, - "learning_rate": 6.37744506337657e-05, - "loss": 0.0069471016526222226, - "step": 6285 - }, - { - "epoch": 1.0724637681159421, - "grad_norm": 0.113606296479702, - "learning_rate": 6.375616316039647e-05, - "loss": 0.010210946947336198, - "step": 6290 - }, - { - "epoch": 1.0733162830349532, - "grad_norm": 0.07193166017532349, - "learning_rate": 6.373786342977687e-05, - "loss": 0.00820360854268074, - "step": 6295 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 0.06180251017212868, - "learning_rate": 6.371955145044983e-05, - "loss": 0.006048502773046494, - "step": 6300 - }, - { - "epoch": 1.0750213128729753, - "grad_norm": 0.06956778466701508, - "learning_rate": 6.370122723096398e-05, - "loss": 0.005345676839351654, - "step": 6305 - }, - { - "epoch": 1.0758738277919864, - "grad_norm": 0.09170625358819962, - "learning_rate": 6.368289077987368e-05, - "loss": 0.0068355493247509004, - "step": 6310 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 0.07023731619119644, - "learning_rate": 6.366454210573901e-05, - "loss": 0.004600600153207779, - "step": 6315 - }, - { - "epoch": 1.0775788576300085, - "grad_norm": 0.07429320365190506, - "learning_rate": 6.36461812171257e-05, - "loss": 0.006272794306278228, - "step": 6320 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 0.11356805264949799, - "learning_rate": 6.362780812260528e-05, - "loss": 0.0048342026770114895, - "step": 6325 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 0.11231013387441635, - "learning_rate": 6.360942283075489e-05, - "loss": 0.00653451681137085, - "step": 6330 - }, - { - "epoch": 1.0801364023870417, - "grad_norm": 0.09655431658029556, - "learning_rate": 6.359102535015739e-05, - "loss": 0.008280844241380692, - "step": 6335 - }, - { - "epoch": 1.080988917306053, - "grad_norm": 0.10172779113054276, - "learning_rate": 6.357261568940135e-05, - "loss": 0.007757744938135147, - "step": 6340 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 0.06417235732078552, - "learning_rate": 6.3554193857081e-05, - "loss": 0.007309675216674805, - "step": 6345 - }, - { - "epoch": 1.082693947144075, - "grad_norm": 0.053178418427705765, - "learning_rate": 6.35357598617963e-05, - "loss": 0.007162143290042877, - "step": 6350 - }, - { - "epoch": 1.0835464620630861, - "grad_norm": 0.07408315688371658, - "learning_rate": 6.351731371215278e-05, - "loss": 0.008392173796892166, - "step": 6355 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 0.03643275052309036, - "learning_rate": 6.349885541676179e-05, - "loss": 0.00610513798892498, - "step": 6360 - }, - { - "epoch": 1.0852514919011083, - "grad_norm": 0.04701307415962219, - "learning_rate": 6.348038498424023e-05, - "loss": 0.00612705871462822, - "step": 6365 - }, - { - "epoch": 1.0861040068201193, - "grad_norm": 0.05050053820014, - "learning_rate": 6.346190242321075e-05, - "loss": 0.005640604719519615, - "step": 6370 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 0.05203640088438988, - "learning_rate": 6.344340774230159e-05, - "loss": 0.005340654775500298, - "step": 6375 - }, - { - "epoch": 1.0878090366581414, - "grad_norm": 0.07451866567134857, - "learning_rate": 6.342490095014669e-05, - "loss": 0.006459225714206695, - "step": 6380 - }, - { - "epoch": 1.0886615515771525, - "grad_norm": 0.09951499849557877, - "learning_rate": 6.340638205538566e-05, - "loss": 0.008529558777809143, - "step": 6385 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 0.06064416840672493, - "learning_rate": 6.33878510666637e-05, - "loss": 0.007885071635246276, - "step": 6390 - }, - { - "epoch": 1.0903665814151748, - "grad_norm": 0.09382321685552597, - "learning_rate": 6.33693079926317e-05, - "loss": 0.007992906123399734, - "step": 6395 - }, - { - "epoch": 1.091219096334186, - "grad_norm": 0.054066915065050125, - "learning_rate": 6.335075284194621e-05, - "loss": 0.007473263889551163, - "step": 6400 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 0.06763065606355667, - "learning_rate": 6.333218562326937e-05, - "loss": 0.006374929845333099, - "step": 6405 - }, - { - "epoch": 1.092924126172208, - "grad_norm": 0.0656818076968193, - "learning_rate": 6.331360634526899e-05, - "loss": 0.006085469573736191, - "step": 6410 - }, - { - "epoch": 1.093776641091219, - "grad_norm": 0.060463279485702515, - "learning_rate": 6.329501501661848e-05, - "loss": 0.005605050176382065, - "step": 6415 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 0.05734890326857567, - "learning_rate": 6.32764116459969e-05, - "loss": 0.00563613623380661, - "step": 6420 - }, - { - "epoch": 1.0954816709292412, - "grad_norm": 0.0502542182803154, - "learning_rate": 6.32577962420889e-05, - "loss": 0.004675766825675965, - "step": 6425 - }, - { - "epoch": 1.0963341858482523, - "grad_norm": 0.06550677120685577, - "learning_rate": 6.32391688135848e-05, - "loss": 0.006265480071306229, - "step": 6430 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 0.1298699826002121, - "learning_rate": 6.322052936918048e-05, - "loss": 0.008352620899677277, - "step": 6435 - }, - { - "epoch": 1.0980392156862746, - "grad_norm": 0.08422241359949112, - "learning_rate": 6.320187791757748e-05, - "loss": 0.005868597701191902, - "step": 6440 - }, - { - "epoch": 1.0988917306052857, - "grad_norm": 0.07807652652263641, - "learning_rate": 6.318321446748291e-05, - "loss": 0.010353461652994157, - "step": 6445 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 0.0663999617099762, - "learning_rate": 6.316453902760946e-05, - "loss": 0.00667201578617096, - "step": 6450 - }, - { - "epoch": 1.1005967604433078, - "grad_norm": 0.06362646073102951, - "learning_rate": 6.314585160667547e-05, - "loss": 0.006539353728294372, - "step": 6455 - }, - { - "epoch": 1.1014492753623188, - "grad_norm": 0.04160219058394432, - "learning_rate": 6.312715221340485e-05, - "loss": 0.004082740843296051, - "step": 6460 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 0.05449013039469719, - "learning_rate": 6.31084408565271e-05, - "loss": 0.007537595182657242, - "step": 6465 - }, - { - "epoch": 1.103154305200341, - "grad_norm": 0.06819169223308563, - "learning_rate": 6.308971754477729e-05, - "loss": 0.006866573542356491, - "step": 6470 - }, - { - "epoch": 1.104006820119352, - "grad_norm": 0.06622573733329773, - "learning_rate": 6.307098228689611e-05, - "loss": 0.009055091440677643, - "step": 6475 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 0.05732693895697594, - "learning_rate": 6.305223509162978e-05, - "loss": 0.006077280640602112, - "step": 6480 - }, - { - "epoch": 1.1057118499573741, - "grad_norm": 0.06630431115627289, - "learning_rate": 6.303347596773012e-05, - "loss": 0.0064442440867424015, - "step": 6485 - }, - { - "epoch": 1.1065643648763854, - "grad_norm": 0.06782745569944382, - "learning_rate": 6.301470492395451e-05, - "loss": 0.005072608217597007, - "step": 6490 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 0.05796601250767708, - "learning_rate": 6.299592196906591e-05, - "loss": 0.0074319586157798765, - "step": 6495 - }, - { - "epoch": 1.1082693947144076, - "grad_norm": 0.04627149552106857, - "learning_rate": 6.297712711183282e-05, - "loss": 0.00512685589492321, - "step": 6500 - }, - { - "epoch": 1.1091219096334186, - "grad_norm": 0.08210720866918564, - "learning_rate": 6.295832036102929e-05, - "loss": 0.006917678564786911, - "step": 6505 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 0.08367052674293518, - "learning_rate": 6.293950172543496e-05, - "loss": 0.0054212499409914015, - "step": 6510 - }, - { - "epoch": 1.1108269394714407, - "grad_norm": 0.08192700892686844, - "learning_rate": 6.292067121383499e-05, - "loss": 0.00791442021727562, - "step": 6515 - }, - { - "epoch": 1.1116794543904518, - "grad_norm": 0.05766206234693527, - "learning_rate": 6.290182883502008e-05, - "loss": 0.006540960818529129, - "step": 6520 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 0.07752664387226105, - "learning_rate": 6.28829745977865e-05, - "loss": 0.009196925908327103, - "step": 6525 - }, - { - "epoch": 1.113384484228474, - "grad_norm": 0.07419038563966751, - "learning_rate": 6.2864108510936e-05, - "loss": 0.007524489611387253, - "step": 6530 - }, - { - "epoch": 1.1142369991474852, - "grad_norm": 0.04851066321134567, - "learning_rate": 6.284523058327593e-05, - "loss": 0.004060015082359314, - "step": 6535 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 0.0764140635728836, - "learning_rate": 6.282634082361911e-05, - "loss": 0.006797254830598831, - "step": 6540 - }, - { - "epoch": 1.1159420289855073, - "grad_norm": 0.06873292475938797, - "learning_rate": 6.280743924078392e-05, - "loss": 0.007637844234704971, - "step": 6545 - }, - { - "epoch": 1.1167945439045184, - "grad_norm": 0.047832686454057693, - "learning_rate": 6.278852584359425e-05, - "loss": 0.00542646199464798, - "step": 6550 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 0.10061443597078323, - "learning_rate": 6.27696006408795e-05, - "loss": 0.008591605722904206, - "step": 6555 - }, - { - "epoch": 1.1184995737425405, - "grad_norm": 0.09729041159152985, - "learning_rate": 6.27506636414746e-05, - "loss": 0.0064585842192173, - "step": 6560 - }, - { - "epoch": 1.1193520886615516, - "grad_norm": 0.04427873343229294, - "learning_rate": 6.273171485421992e-05, - "loss": 0.004846593365073204, - "step": 6565 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 0.07813888788223267, - "learning_rate": 6.271275428796146e-05, - "loss": 0.007345958054065705, - "step": 6570 - }, - { - "epoch": 1.1210571184995737, - "grad_norm": 0.12500733137130737, - "learning_rate": 6.269378195155058e-05, - "loss": 0.006376177072525024, - "step": 6575 - }, - { - "epoch": 1.1219096334185847, - "grad_norm": 0.09636004269123077, - "learning_rate": 6.267479785384422e-05, - "loss": 0.0069282323122024534, - "step": 6580 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 0.06236017122864723, - "learning_rate": 6.265580200370478e-05, - "loss": 0.0050656192004680635, - "step": 6585 - }, - { - "epoch": 1.123614663256607, - "grad_norm": 0.0596463568508625, - "learning_rate": 6.263679441000019e-05, - "loss": 0.006198804825544357, - "step": 6590 - }, - { - "epoch": 1.1244671781756181, - "grad_norm": 0.0846855491399765, - "learning_rate": 6.261777508160378e-05, - "loss": 0.0073812372982501985, - "step": 6595 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 0.05228402093052864, - "learning_rate": 6.259874402739442e-05, - "loss": 0.006196213513612747, - "step": 6600 - }, - { - "epoch": 1.1261722080136403, - "grad_norm": 0.08395595103502274, - "learning_rate": 6.257970125625647e-05, - "loss": 0.0060448311269283295, - "step": 6605 - }, - { - "epoch": 1.1270247229326513, - "grad_norm": 0.055274344980716705, - "learning_rate": 6.25606467770797e-05, - "loss": 0.006578336656093598, - "step": 6610 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 0.054609380662441254, - "learning_rate": 6.254158059875936e-05, - "loss": 0.008093905448913575, - "step": 6615 - }, - { - "epoch": 1.1287297527706734, - "grad_norm": 0.05168715491890907, - "learning_rate": 6.25225027301962e-05, - "loss": 0.006086795404553413, - "step": 6620 - }, - { - "epoch": 1.1295822676896845, - "grad_norm": 0.06260590255260468, - "learning_rate": 6.250341318029641e-05, - "loss": 0.007288631051778793, - "step": 6625 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 0.06585957854986191, - "learning_rate": 6.24843119579716e-05, - "loss": 0.005779954791069031, - "step": 6630 - }, - { - "epoch": 1.1312872975277068, - "grad_norm": 0.05828391760587692, - "learning_rate": 6.246519907213888e-05, - "loss": 0.006320308148860932, - "step": 6635 - }, - { - "epoch": 1.132139812446718, - "grad_norm": 0.08400154113769531, - "learning_rate": 6.244607453172078e-05, - "loss": 0.00452205128967762, - "step": 6640 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 0.059920940548181534, - "learning_rate": 6.242693834564525e-05, - "loss": 0.00620727390050888, - "step": 6645 - }, - { - "epoch": 1.13384484228474, - "grad_norm": 0.1100456491112709, - "learning_rate": 6.240779052284571e-05, - "loss": 0.006768511235713958, - "step": 6650 - }, - { - "epoch": 1.134697357203751, - "grad_norm": 0.07722730189561844, - "learning_rate": 6.238863107226102e-05, - "loss": 0.008207496255636215, - "step": 6655 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 0.05468403548002243, - "learning_rate": 6.236946000283542e-05, - "loss": 0.005272969231009483, - "step": 6660 - }, - { - "epoch": 1.1364023870417732, - "grad_norm": 0.0685155913233757, - "learning_rate": 6.235027732351863e-05, - "loss": 0.008074409514665603, - "step": 6665 - }, - { - "epoch": 1.1372549019607843, - "grad_norm": 0.07667854428291321, - "learning_rate": 6.233108304326571e-05, - "loss": 0.00863628089427948, - "step": 6670 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 0.0727204978466034, - "learning_rate": 6.231187717103727e-05, - "loss": 0.004446333646774292, - "step": 6675 - }, - { - "epoch": 1.1389599317988064, - "grad_norm": 0.06465403735637665, - "learning_rate": 6.229265971579918e-05, - "loss": 0.007380707561969757, - "step": 6680 - }, - { - "epoch": 1.1398124467178175, - "grad_norm": 0.04102586954832077, - "learning_rate": 6.227343068652281e-05, - "loss": 0.006103607639670372, - "step": 6685 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 0.06988929212093353, - "learning_rate": 6.225419009218493e-05, - "loss": 0.007358456403017044, - "step": 6690 - }, - { - "epoch": 1.1415174765558398, - "grad_norm": 0.07802724838256836, - "learning_rate": 6.223493794176767e-05, - "loss": 0.007887010276317597, - "step": 6695 - }, - { - "epoch": 1.1423699914748509, - "grad_norm": 0.10777991265058517, - "learning_rate": 6.221567424425857e-05, - "loss": 0.007653985172510147, - "step": 6700 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 0.0922352522611618, - "learning_rate": 6.219639900865058e-05, - "loss": 0.007459624856710434, - "step": 6705 - }, - { - "epoch": 1.144075021312873, - "grad_norm": 0.06321967393159866, - "learning_rate": 6.217711224394202e-05, - "loss": 0.00540911853313446, - "step": 6710 - }, - { - "epoch": 1.144927536231884, - "grad_norm": 0.09344825148582458, - "learning_rate": 6.215781395913656e-05, - "loss": 0.0053936421871185304, - "step": 6715 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 0.03457584232091904, - "learning_rate": 6.213850416324333e-05, - "loss": 0.006388355046510696, - "step": 6720 - }, - { - "epoch": 1.1466325660699062, - "grad_norm": 0.06931985914707184, - "learning_rate": 6.211918286527676e-05, - "loss": 0.005831810832023621, - "step": 6725 - }, - { - "epoch": 1.1474850809889172, - "grad_norm": 0.05588890612125397, - "learning_rate": 6.209985007425668e-05, - "loss": 0.0041655078530311584, - "step": 6730 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 0.07582694292068481, - "learning_rate": 6.208050579920826e-05, - "loss": 0.006521198153495789, - "step": 6735 - }, - { - "epoch": 1.1491901108269396, - "grad_norm": 0.07055433094501495, - "learning_rate": 6.206115004916209e-05, - "loss": 0.0066129244863986966, - "step": 6740 - }, - { - "epoch": 1.1500426257459506, - "grad_norm": 0.07039172202348709, - "learning_rate": 6.204178283315405e-05, - "loss": 0.005633739382028579, - "step": 6745 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 0.07833350449800491, - "learning_rate": 6.202240416022541e-05, - "loss": 0.006761975586414337, - "step": 6750 - }, - { - "epoch": 1.1517476555839727, - "grad_norm": 0.05553733557462692, - "learning_rate": 6.200301403942278e-05, - "loss": 0.006545065343379975, - "step": 6755 - }, - { - "epoch": 1.1526001705029838, - "grad_norm": 0.07306832075119019, - "learning_rate": 6.198361247979809e-05, - "loss": 0.008323725312948227, - "step": 6760 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 0.04787914454936981, - "learning_rate": 6.196419949040867e-05, - "loss": 0.004425797611474991, - "step": 6765 - }, - { - "epoch": 1.154305200341006, - "grad_norm": 0.08021930605173111, - "learning_rate": 6.194477508031712e-05, - "loss": 0.005103312805294991, - "step": 6770 - }, - { - "epoch": 1.155157715260017, - "grad_norm": 0.0821428894996643, - "learning_rate": 6.192533925859144e-05, - "loss": 0.009274877607822418, - "step": 6775 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 0.09880609810352325, - "learning_rate": 6.190589203430486e-05, - "loss": 0.007863005250692367, - "step": 6780 - }, - { - "epoch": 1.156862745098039, - "grad_norm": 0.08075276762247086, - "learning_rate": 6.188643341653604e-05, - "loss": 0.004675677418708802, - "step": 6785 - }, - { - "epoch": 1.1577152600170504, - "grad_norm": 0.0633573830127716, - "learning_rate": 6.186696341436889e-05, - "loss": 0.007359890639781952, - "step": 6790 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 0.03828895092010498, - "learning_rate": 6.184748203689265e-05, - "loss": 0.004494070634245872, - "step": 6795 - }, - { - "epoch": 1.1594202898550725, - "grad_norm": 0.07907325774431229, - "learning_rate": 6.18279892932019e-05, - "loss": 0.006256269663572312, - "step": 6800 - }, - { - "epoch": 1.1602728047740836, - "grad_norm": 0.055188342928886414, - "learning_rate": 6.180848519239647e-05, - "loss": 0.009548474848270417, - "step": 6805 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 0.05850991606712341, - "learning_rate": 6.178896974358154e-05, - "loss": 0.0056076571345329285, - "step": 6810 - }, - { - "epoch": 1.1619778346121057, - "grad_norm": 0.0626932755112648, - "learning_rate": 6.176944295586757e-05, - "loss": 0.005667714029550552, - "step": 6815 - }, - { - "epoch": 1.1628303495311167, - "grad_norm": 0.06506946682929993, - "learning_rate": 6.174990483837031e-05, - "loss": 0.006154880672693252, - "step": 6820 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 0.0535859651863575, - "learning_rate": 6.17303554002108e-05, - "loss": 0.0042555928230285645, - "step": 6825 - }, - { - "epoch": 1.1645353793691389, - "grad_norm": 0.05577898398041725, - "learning_rate": 6.171079465051538e-05, - "loss": 0.006060104072093964, - "step": 6830 - }, - { - "epoch": 1.1653878942881502, - "grad_norm": 0.05574663355946541, - "learning_rate": 6.169122259841566e-05, - "loss": 0.008667515218257904, - "step": 6835 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 0.09157130867242813, - "learning_rate": 6.16716392530485e-05, - "loss": 0.007259176671504974, - "step": 6840 - }, - { - "epoch": 1.1670929241261723, - "grad_norm": 0.06404415518045425, - "learning_rate": 6.165204462355608e-05, - "loss": 0.007140593230724334, - "step": 6845 - }, - { - "epoch": 1.1679454390451833, - "grad_norm": 0.0713329091668129, - "learning_rate": 6.163243871908581e-05, - "loss": 0.006118748337030411, - "step": 6850 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 0.04038231074810028, - "learning_rate": 6.16128215487904e-05, - "loss": 0.006028918176889419, - "step": 6855 - }, - { - "epoch": 1.1696504688832055, - "grad_norm": 0.07793593406677246, - "learning_rate": 6.159319312182777e-05, - "loss": 0.006851163506507873, - "step": 6860 - }, - { - "epoch": 1.1705029838022165, - "grad_norm": 0.07543511688709259, - "learning_rate": 6.157355344736114e-05, - "loss": 0.006878272444009781, - "step": 6865 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 0.06332696974277496, - "learning_rate": 6.155390253455897e-05, - "loss": 0.006324999034404755, - "step": 6870 - }, - { - "epoch": 1.1722080136402386, - "grad_norm": 0.06920734792947769, - "learning_rate": 6.153424039259495e-05, - "loss": 0.00536310225725174, - "step": 6875 - }, - { - "epoch": 1.1730605285592497, - "grad_norm": 0.09313163161277771, - "learning_rate": 6.151456703064802e-05, - "loss": 0.00795424059033394, - "step": 6880 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 0.08588451147079468, - "learning_rate": 6.149488245790234e-05, - "loss": 0.006889034807682037, - "step": 6885 - }, - { - "epoch": 1.174765558397272, - "grad_norm": 0.09814538061618805, - "learning_rate": 6.147518668354737e-05, - "loss": 0.007332245260477066, - "step": 6890 - }, - { - "epoch": 1.175618073316283, - "grad_norm": 0.05141104385256767, - "learning_rate": 6.145547971677772e-05, - "loss": 0.00333656407892704, - "step": 6895 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 0.05575519800186157, - "learning_rate": 6.143576156679327e-05, - "loss": 0.005542768910527229, - "step": 6900 - }, - { - "epoch": 1.1773231031543052, - "grad_norm": 0.04917008429765701, - "learning_rate": 6.14160322427991e-05, - "loss": 0.007007633149623871, - "step": 6905 - }, - { - "epoch": 1.1781756180733163, - "grad_norm": 0.06385336071252823, - "learning_rate": 6.139629175400552e-05, - "loss": 0.007495941221714019, - "step": 6910 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 0.08664151281118393, - "learning_rate": 6.137654010962805e-05, - "loss": 0.0075534448027610775, - "step": 6915 - }, - { - "epoch": 1.1798806479113384, - "grad_norm": 0.08881189674139023, - "learning_rate": 6.13567773188874e-05, - "loss": 0.0059935558587312695, - "step": 6920 - }, - { - "epoch": 1.1807331628303495, - "grad_norm": 0.07297934591770172, - "learning_rate": 6.133700339100952e-05, - "loss": 0.006142234057188034, - "step": 6925 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 0.053351663053035736, - "learning_rate": 6.131721833522552e-05, - "loss": 0.006038139387965202, - "step": 6930 - }, - { - "epoch": 1.1824381926683718, - "grad_norm": 0.12923622131347656, - "learning_rate": 6.129742216077172e-05, - "loss": 0.008645947277545928, - "step": 6935 - }, - { - "epoch": 1.1832907075873829, - "grad_norm": 0.095455601811409, - "learning_rate": 6.127761487688964e-05, - "loss": 0.004740688577294349, - "step": 6940 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 0.11498606204986572, - "learning_rate": 6.125779649282599e-05, - "loss": 0.00805070549249649, - "step": 6945 - }, - { - "epoch": 1.184995737425405, - "grad_norm": 0.07489286363124847, - "learning_rate": 6.123796701783264e-05, - "loss": 0.0060746859759092334, - "step": 6950 - }, - { - "epoch": 1.185848252344416, - "grad_norm": 0.07027438282966614, - "learning_rate": 6.121812646116663e-05, - "loss": 0.006816025823354721, - "step": 6955 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 0.08542973548173904, - "learning_rate": 6.119827483209024e-05, - "loss": 0.005315831303596497, - "step": 6960 - }, - { - "epoch": 1.1875532821824382, - "grad_norm": 0.08409032970666885, - "learning_rate": 6.117841213987082e-05, - "loss": 0.0061601437628269196, - "step": 6965 - }, - { - "epoch": 1.1884057971014492, - "grad_norm": 0.10387974232435226, - "learning_rate": 6.115853839378096e-05, - "loss": 0.0071022816002368925, - "step": 6970 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 0.056441329419612885, - "learning_rate": 6.113865360309838e-05, - "loss": 0.004539218544960022, - "step": 6975 - }, - { - "epoch": 1.1901108269394713, - "grad_norm": 0.10132234543561935, - "learning_rate": 6.111875777710598e-05, - "loss": 0.0060778014361858364, - "step": 6980 - }, - { - "epoch": 1.1909633418584824, - "grad_norm": 0.07129226624965668, - "learning_rate": 6.109885092509177e-05, - "loss": 0.007794113457202911, - "step": 6985 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 0.09267892688512802, - "learning_rate": 6.107893305634893e-05, - "loss": 0.006867295503616333, - "step": 6990 - }, - { - "epoch": 1.1926683716965047, - "grad_norm": 0.0739888921380043, - "learning_rate": 6.10590041801758e-05, - "loss": 0.006263263523578644, - "step": 6995 - }, - { - "epoch": 1.1935208866155158, - "grad_norm": 0.10201311856508255, - "learning_rate": 6.103906430587583e-05, - "loss": 0.006245525926351548, - "step": 7000 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 0.08561360090970993, - "learning_rate": 6.1019113442757636e-05, - "loss": 0.005739351361989975, - "step": 7005 - }, - { - "epoch": 1.195225916453538, - "grad_norm": 0.06410367786884308, - "learning_rate": 6.099915160013491e-05, - "loss": 0.00598936527967453, - "step": 7010 - }, - { - "epoch": 1.196078431372549, - "grad_norm": 0.11765716224908829, - "learning_rate": 6.0979178787326544e-05, - "loss": 0.010477253794670105, - "step": 7015 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 0.06659694015979767, - "learning_rate": 6.095919501365648e-05, - "loss": 0.0072447523474693295, - "step": 7020 - }, - { - "epoch": 1.197783461210571, - "grad_norm": 0.05651358142495155, - "learning_rate": 6.093920028845381e-05, - "loss": 0.004644358158111572, - "step": 7025 - }, - { - "epoch": 1.1986359761295824, - "grad_norm": 0.07198809087276459, - "learning_rate": 6.0919194621052764e-05, - "loss": 0.00517328754067421, - "step": 7030 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 0.06188420578837395, - "learning_rate": 6.0899178020792614e-05, - "loss": 0.005182894691824913, - "step": 7035 - }, - { - "epoch": 1.2003410059676045, - "grad_norm": 0.07731341570615768, - "learning_rate": 6.087915049701783e-05, - "loss": 0.006863371282815933, - "step": 7040 - }, - { - "epoch": 1.2011935208866156, - "grad_norm": 0.07693833857774734, - "learning_rate": 6.0859112059077866e-05, - "loss": 0.008634812384843826, - "step": 7045 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 0.08118413388729095, - "learning_rate": 6.083906271632736e-05, - "loss": 0.008003174513578414, - "step": 7050 - }, - { - "epoch": 1.2028985507246377, - "grad_norm": 0.07794218510389328, - "learning_rate": 6.0819002478126016e-05, - "loss": 0.005899757146835327, - "step": 7055 - }, - { - "epoch": 1.2037510656436488, - "grad_norm": 0.08107218146324158, - "learning_rate": 6.079893135383861e-05, - "loss": 0.007581057399511338, - "step": 7060 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 0.06910198926925659, - "learning_rate": 6.077884935283502e-05, - "loss": 0.00794234573841095, - "step": 7065 - }, - { - "epoch": 1.2054560954816709, - "grad_norm": 0.08426421135663986, - "learning_rate": 6.0758756484490186e-05, - "loss": 0.0057635679841041565, - "step": 7070 - }, - { - "epoch": 1.206308610400682, - "grad_norm": 0.08670961856842041, - "learning_rate": 6.073865275818413e-05, - "loss": 0.006369538605213165, - "step": 7075 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 0.04245399683713913, - "learning_rate": 6.071853818330193e-05, - "loss": 0.0067548036575317385, - "step": 7080 - }, - { - "epoch": 1.208013640238704, - "grad_norm": 0.10600235313177109, - "learning_rate": 6.069841276923376e-05, - "loss": 0.006923867762088776, - "step": 7085 - }, - { - "epoch": 1.2088661551577153, - "grad_norm": 0.07093790173530579, - "learning_rate": 6.0678276525374814e-05, - "loss": 0.005083417519927025, - "step": 7090 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 0.04997126758098602, - "learning_rate": 6.065812946112537e-05, - "loss": 0.006207586824893951, - "step": 7095 - }, - { - "epoch": 1.2105711849957375, - "grad_norm": 0.04425262287259102, - "learning_rate": 6.063797158589074e-05, - "loss": 0.0046977601945400235, - "step": 7100 - }, - { - "epoch": 1.2114236999147485, - "grad_norm": 0.07300136238336563, - "learning_rate": 6.0617802909081295e-05, - "loss": 0.005589437484741211, - "step": 7105 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 0.0878889262676239, - "learning_rate": 6.0597623440112445e-05, - "loss": 0.005844222381711006, - "step": 7110 - }, - { - "epoch": 1.2131287297527706, - "grad_norm": 0.09079992771148682, - "learning_rate": 6.0577433188404635e-05, - "loss": 0.007329034805297852, - "step": 7115 - }, - { - "epoch": 1.2139812446717817, - "grad_norm": 0.07165387272834778, - "learning_rate": 6.055723216338336e-05, - "loss": 0.006642927974462509, - "step": 7120 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 0.07113955169916153, - "learning_rate": 6.053702037447911e-05, - "loss": 0.006992670893669129, - "step": 7125 - }, - { - "epoch": 1.215686274509804, - "grad_norm": 0.08737215399742126, - "learning_rate": 6.0516797831127425e-05, - "loss": 0.006702055037021637, - "step": 7130 - }, - { - "epoch": 1.216538789428815, - "grad_norm": 0.07276564091444016, - "learning_rate": 6.049656454276887e-05, - "loss": 0.005692619457840919, - "step": 7135 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 0.09203831106424332, - "learning_rate": 6.0476320518849006e-05, - "loss": 0.006464710086584091, - "step": 7140 - }, - { - "epoch": 1.2182438192668372, - "grad_norm": 0.07749375700950623, - "learning_rate": 6.045606576881843e-05, - "loss": 0.008598372340202332, - "step": 7145 - }, - { - "epoch": 1.2190963341858483, - "grad_norm": 0.04338040575385094, - "learning_rate": 6.043580030213272e-05, - "loss": 0.006892016530036927, - "step": 7150 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 0.06691034138202667, - "learning_rate": 6.0415524128252474e-05, - "loss": 0.007622670382261276, - "step": 7155 - }, - { - "epoch": 1.2208013640238704, - "grad_norm": 0.07532396167516708, - "learning_rate": 6.039523725664329e-05, - "loss": 0.00698106437921524, - "step": 7160 - }, - { - "epoch": 1.2216538789428815, - "grad_norm": 0.0408058688044548, - "learning_rate": 6.037493969677575e-05, - "loss": 0.005919446796178817, - "step": 7165 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 0.07306578010320663, - "learning_rate": 6.0354631458125425e-05, - "loss": 0.008953345566987991, - "step": 7170 - }, - { - "epoch": 1.2233589087809036, - "grad_norm": 0.08269601315259933, - "learning_rate": 6.033431255017289e-05, - "loss": 0.007224951684474945, - "step": 7175 - }, - { - "epoch": 1.2242114236999146, - "grad_norm": 0.045140884816646576, - "learning_rate": 6.0313982982403676e-05, - "loss": 0.006175454705953598, - "step": 7180 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 0.0721440315246582, - "learning_rate": 6.0293642764308304e-05, - "loss": 0.007267911732196808, - "step": 7185 - }, - { - "epoch": 1.225916453537937, - "grad_norm": 0.081813283264637, - "learning_rate": 6.027329190538227e-05, - "loss": 0.006872846186161042, - "step": 7190 - }, - { - "epoch": 1.226768968456948, - "grad_norm": 0.05662613734602928, - "learning_rate": 6.025293041512602e-05, - "loss": 0.004837202653288841, - "step": 7195 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 0.10023492574691772, - "learning_rate": 6.023255830304498e-05, - "loss": 0.0060194481164217, - "step": 7200 - }, - { - "epoch": 1.2284739982949702, - "grad_norm": 0.06398235261440277, - "learning_rate": 6.021217557864954e-05, - "loss": 0.007653398066759109, - "step": 7205 - }, - { - "epoch": 1.2293265132139812, - "grad_norm": 0.09494475275278091, - "learning_rate": 6.019178225145503e-05, - "loss": 0.007547302544116974, - "step": 7210 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 0.05356467142701149, - "learning_rate": 6.017137833098171e-05, - "loss": 0.007133310288190841, - "step": 7215 - }, - { - "epoch": 1.2310315430520034, - "grad_norm": 0.09225092083215714, - "learning_rate": 6.0150963826754836e-05, - "loss": 0.006320309638977051, - "step": 7220 - }, - { - "epoch": 1.2318840579710144, - "grad_norm": 0.07072161883115768, - "learning_rate": 6.013053874830458e-05, - "loss": 0.007313568145036697, - "step": 7225 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 0.07206818461418152, - "learning_rate": 6.0110103105166026e-05, - "loss": 0.0054031949490308765, - "step": 7230 - }, - { - "epoch": 1.2335890878090368, - "grad_norm": 0.08611681312322617, - "learning_rate": 6.008965690687922e-05, - "loss": 0.00670153945684433, - "step": 7235 - }, - { - "epoch": 1.2344416027280478, - "grad_norm": 0.07864221930503845, - "learning_rate": 6.0069200162989154e-05, - "loss": 0.0069690033793449405, - "step": 7240 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 0.06847227364778519, - "learning_rate": 6.0048732883045665e-05, - "loss": 0.006755173206329346, - "step": 7245 - }, - { - "epoch": 1.23614663256607, - "grad_norm": 0.06264699995517731, - "learning_rate": 6.0028255076603606e-05, - "loss": 0.00519348569214344, - "step": 7250 - }, - { - "epoch": 1.236999147485081, - "grad_norm": 0.04114431515336037, - "learning_rate": 6.0007766753222665e-05, - "loss": 0.006071234866976738, - "step": 7255 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 0.03660140559077263, - "learning_rate": 5.998726792246751e-05, - "loss": 0.007517064362764359, - "step": 7260 - }, - { - "epoch": 1.2387041773231031, - "grad_norm": 0.10343052446842194, - "learning_rate": 5.9966758593907636e-05, - "loss": 0.0070131182670593265, - "step": 7265 - }, - { - "epoch": 1.2395566922421142, - "grad_norm": 0.08169959485530853, - "learning_rate": 5.994623877711751e-05, - "loss": 0.006279528886079788, - "step": 7270 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 0.06312677264213562, - "learning_rate": 5.992570848167645e-05, - "loss": 0.0041657909750938416, - "step": 7275 - }, - { - "epoch": 1.2412617220801363, - "grad_norm": 0.08725713193416595, - "learning_rate": 5.990516771716869e-05, - "loss": 0.007908149063587189, - "step": 7280 - }, - { - "epoch": 1.2421142369991476, - "grad_norm": 0.05857875198125839, - "learning_rate": 5.988461649318333e-05, - "loss": 0.005137740075588227, - "step": 7285 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 0.04836168512701988, - "learning_rate": 5.986405481931438e-05, - "loss": 0.005033157765865326, - "step": 7290 - }, - { - "epoch": 1.2438192668371697, - "grad_norm": 0.09514568001031876, - "learning_rate": 5.98434827051607e-05, - "loss": 0.007460397481918335, - "step": 7295 - }, - { - "epoch": 1.2446717817561808, - "grad_norm": 0.049415748566389084, - "learning_rate": 5.982290016032604e-05, - "loss": 0.0049881644546985624, - "step": 7300 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 0.1069302037358284, - "learning_rate": 5.980230719441903e-05, - "loss": 0.005356961116194725, - "step": 7305 - }, - { - "epoch": 1.2463768115942029, - "grad_norm": 0.1031380444765091, - "learning_rate": 5.9781703817053136e-05, - "loss": 0.0067513369023799895, - "step": 7310 - }, - { - "epoch": 1.247229326513214, - "grad_norm": 0.05909884348511696, - "learning_rate": 5.976109003784671e-05, - "loss": 0.005357486009597778, - "step": 7315 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 0.09306607395410538, - "learning_rate": 5.974046586642295e-05, - "loss": 0.005747456848621368, - "step": 7320 - }, - { - "epoch": 1.248934356351236, - "grad_norm": 0.0688752606511116, - "learning_rate": 5.971983131240988e-05, - "loss": 0.0073902375996112825, - "step": 7325 - }, - { - "epoch": 1.2497868712702473, - "grad_norm": 0.06592141836881638, - "learning_rate": 5.969918638544044e-05, - "loss": 0.008268805593252182, - "step": 7330 - }, - { - "epoch": 1.2497868712702473, - "eval_loss": 0.037298671901226044, - "eval_runtime": 3.6917, - "eval_samples_per_second": 68.261, - "eval_steps_per_second": 1.084, - "step": 7330 - }, - { - "eval_cer_subset": 0.01283241324278991, - "eval_cer_subset_edit_distance": 788, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 7330 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 0.06094380095601082, - "learning_rate": 5.9678531095152326e-05, - "loss": 0.005528298765420913, - "step": 7335 - }, - { - "epoch": 1.2514919011082695, - "grad_norm": 0.1417030543088913, - "learning_rate": 5.965786545118815e-05, - "loss": 0.00984017476439476, - "step": 7340 - }, - { - "epoch": 1.2523444160272805, - "grad_norm": 0.08209668844938278, - "learning_rate": 5.963718946319529e-05, - "loss": 0.007516486942768097, - "step": 7345 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 0.06825494766235352, - "learning_rate": 5.9616503140826006e-05, - "loss": 0.005924524366855621, - "step": 7350 - }, - { - "epoch": 1.2540494458653026, - "grad_norm": 0.11229037493467331, - "learning_rate": 5.959580649373736e-05, - "loss": 0.006495627760887146, - "step": 7355 - }, - { - "epoch": 1.2549019607843137, - "grad_norm": 0.13235078752040863, - "learning_rate": 5.957509953159123e-05, - "loss": 0.00942063182592392, - "step": 7360 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 0.04514055699110031, - "learning_rate": 5.955438226405432e-05, - "loss": 0.006601292639970779, - "step": 7365 - }, - { - "epoch": 1.2566069906223358, - "grad_norm": 0.08192043751478195, - "learning_rate": 5.9533654700798126e-05, - "loss": 0.007403627783060074, - "step": 7370 - }, - { - "epoch": 1.257459505541347, - "grad_norm": 0.07101254910230637, - "learning_rate": 5.951291685149898e-05, - "loss": 0.006301522254943848, - "step": 7375 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 0.05598035827279091, - "learning_rate": 5.949216872583799e-05, - "loss": 0.006812388449907303, - "step": 7380 - }, - { - "epoch": 1.259164535379369, - "grad_norm": 0.06444506347179413, - "learning_rate": 5.9471410333501085e-05, - "loss": 0.005891536176204681, - "step": 7385 - }, - { - "epoch": 1.2600170502983803, - "grad_norm": 0.04921717569231987, - "learning_rate": 5.945064168417895e-05, - "loss": 0.004649973660707474, - "step": 7390 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 0.09095602482557297, - "learning_rate": 5.94298627875671e-05, - "loss": 0.007515725493431091, - "step": 7395 - }, - { - "epoch": 1.2617220801364024, - "grad_norm": 0.09932803362607956, - "learning_rate": 5.9409073653365816e-05, - "loss": 0.006223166733980179, - "step": 7400 - }, - { - "epoch": 1.2625745950554135, - "grad_norm": 0.08616010844707489, - "learning_rate": 5.938827429128014e-05, - "loss": 0.006999516487121582, - "step": 7405 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 0.11979297548532486, - "learning_rate": 5.936746471101993e-05, - "loss": 0.00812242105603218, - "step": 7410 - }, - { - "epoch": 1.2642796248934356, - "grad_norm": 0.12872007489204407, - "learning_rate": 5.934664492229976e-05, - "loss": 0.006246988475322723, - "step": 7415 - }, - { - "epoch": 1.2651321398124467, - "grad_norm": 0.0831044539809227, - "learning_rate": 5.932581493483903e-05, - "loss": 0.00590248554944992, - "step": 7420 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 0.09913221001625061, - "learning_rate": 5.9304974758361857e-05, - "loss": 0.007224322855472564, - "step": 7425 - }, - { - "epoch": 1.266837169650469, - "grad_norm": 0.08654595911502838, - "learning_rate": 5.928412440259713e-05, - "loss": 0.007056090980768204, - "step": 7430 - }, - { - "epoch": 1.26768968456948, - "grad_norm": 0.07882801443338394, - "learning_rate": 5.926326387727849e-05, - "loss": 0.00572751946747303, - "step": 7435 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 0.12886428833007812, - "learning_rate": 5.924239319214432e-05, - "loss": 0.0106881283223629, - "step": 7440 - }, - { - "epoch": 1.2693947144075022, - "grad_norm": 0.05597686767578125, - "learning_rate": 5.922151235693775e-05, - "loss": 0.005041084438562393, - "step": 7445 - }, - { - "epoch": 1.2702472293265132, - "grad_norm": 0.10719682276248932, - "learning_rate": 5.920062138140665e-05, - "loss": 0.007724158465862274, - "step": 7450 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 0.045485325157642365, - "learning_rate": 5.917972027530363e-05, - "loss": 0.003246675431728363, - "step": 7455 - }, - { - "epoch": 1.2719522591645354, - "grad_norm": 0.09602563083171844, - "learning_rate": 5.9158809048386017e-05, - "loss": 0.006592199206352234, - "step": 7460 - }, - { - "epoch": 1.2728047740835464, - "grad_norm": 0.0555407889187336, - "learning_rate": 5.913788771041586e-05, - "loss": 0.00537751168012619, - "step": 7465 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 0.15820109844207764, - "learning_rate": 5.911695627115994e-05, - "loss": 0.005968114733695984, - "step": 7470 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 0.05781199410557747, - "learning_rate": 5.9096014740389754e-05, - "loss": 0.00887204110622406, - "step": 7475 - }, - { - "epoch": 1.2753623188405796, - "grad_norm": 0.07927337288856506, - "learning_rate": 5.90750631278815e-05, - "loss": 0.006439142674207687, - "step": 7480 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 0.03843824937939644, - "learning_rate": 5.905410144341609e-05, - "loss": 0.007792883366346359, - "step": 7485 - }, - { - "epoch": 1.277067348678602, - "grad_norm": 0.0692640095949173, - "learning_rate": 5.903312969677914e-05, - "loss": 0.006274447590112686, - "step": 7490 - }, - { - "epoch": 1.277919863597613, - "grad_norm": 0.07501527667045593, - "learning_rate": 5.901214789776094e-05, - "loss": 0.007496471703052521, - "step": 7495 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 0.10271260142326355, - "learning_rate": 5.8991156056156514e-05, - "loss": 0.008766942471265794, - "step": 7500 - }, - { - "epoch": 1.2796248934356351, - "grad_norm": 0.03995242714881897, - "learning_rate": 5.897015418176555e-05, - "loss": 0.0055749226361513134, - "step": 7505 - }, - { - "epoch": 1.2804774083546462, - "grad_norm": 0.09215585142374039, - "learning_rate": 5.8949142284392406e-05, - "loss": 0.005763960257172585, - "step": 7510 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 0.07763402909040451, - "learning_rate": 5.892812037384615e-05, - "loss": 0.006439389288425445, - "step": 7515 - }, - { - "epoch": 1.2821824381926683, - "grad_norm": 0.04945438355207443, - "learning_rate": 5.890708845994049e-05, - "loss": 0.006960665434598922, - "step": 7520 - }, - { - "epoch": 1.2830349531116796, - "grad_norm": 0.05348283797502518, - "learning_rate": 5.888604655249384e-05, - "loss": 0.0061422914266586305, - "step": 7525 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 0.10389877110719681, - "learning_rate": 5.886499466132926e-05, - "loss": 0.009247081726789475, - "step": 7530 - }, - { - "epoch": 1.2847399829497017, - "grad_norm": 0.07753872126340866, - "learning_rate": 5.884393279627448e-05, - "loss": 0.004902977123856544, - "step": 7535 - }, - { - "epoch": 1.2855924978687128, - "grad_norm": 0.10553103685379028, - "learning_rate": 5.8822860967161856e-05, - "loss": 0.004547145590186119, - "step": 7540 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 0.08235067129135132, - "learning_rate": 5.880177918382844e-05, - "loss": 0.005282455682754516, - "step": 7545 - }, - { - "epoch": 1.287297527706735, - "grad_norm": 0.08135014772415161, - "learning_rate": 5.878068745611591e-05, - "loss": 0.006127358600497246, - "step": 7550 - }, - { - "epoch": 1.288150042625746, - "grad_norm": 0.04027952626347542, - "learning_rate": 5.875958579387056e-05, - "loss": 0.008251778036355972, - "step": 7555 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 0.1060953438282013, - "learning_rate": 5.8738474206943385e-05, - "loss": 0.008290941268205643, - "step": 7560 - }, - { - "epoch": 1.289855072463768, - "grad_norm": 0.06716421991586685, - "learning_rate": 5.871735270518995e-05, - "loss": 0.004932524263858795, - "step": 7565 - }, - { - "epoch": 1.2907075873827791, - "grad_norm": 0.07644582539796829, - "learning_rate": 5.869622129847048e-05, - "loss": 0.006172410026192665, - "step": 7570 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 0.06018557399511337, - "learning_rate": 5.867507999664983e-05, - "loss": 0.005532362312078476, - "step": 7575 - }, - { - "epoch": 1.2924126172208013, - "grad_norm": 0.06454342603683472, - "learning_rate": 5.865392880959745e-05, - "loss": 0.005053167790174484, - "step": 7580 - }, - { - "epoch": 1.2932651321398123, - "grad_norm": 0.07618142664432526, - "learning_rate": 5.863276774718742e-05, - "loss": 0.005658206716179848, - "step": 7585 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 0.05649973824620247, - "learning_rate": 5.8611596819298434e-05, - "loss": 0.007477214187383651, - "step": 7590 - }, - { - "epoch": 1.2949701619778347, - "grad_norm": 0.09222351759672165, - "learning_rate": 5.859041603581377e-05, - "loss": 0.006974493712186813, - "step": 7595 - }, - { - "epoch": 1.2958226768968457, - "grad_norm": 0.07462326437234879, - "learning_rate": 5.856922540662134e-05, - "loss": 0.008175718039274216, - "step": 7600 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 0.10593193024396896, - "learning_rate": 5.854802494161364e-05, - "loss": 0.006635700166225433, - "step": 7605 - }, - { - "epoch": 1.2975277067348678, - "grad_norm": 0.08673358708620071, - "learning_rate": 5.8526814650687724e-05, - "loss": 0.007347754389047623, - "step": 7610 - }, - { - "epoch": 1.298380221653879, - "grad_norm": 0.10450063645839691, - "learning_rate": 5.850559454374528e-05, - "loss": 0.008085139095783234, - "step": 7615 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 0.04219435900449753, - "learning_rate": 5.848436463069257e-05, - "loss": 0.006296204030513763, - "step": 7620 - }, - { - "epoch": 1.3000852514919012, - "grad_norm": 0.08187524974346161, - "learning_rate": 5.84631249214404e-05, - "loss": 0.007680010050535202, - "step": 7625 - }, - { - "epoch": 1.3009377664109123, - "grad_norm": 0.21044164896011353, - "learning_rate": 5.844187542590418e-05, - "loss": 0.008709554374217988, - "step": 7630 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 0.09822215139865875, - "learning_rate": 5.842061615400389e-05, - "loss": 0.007412384450435639, - "step": 7635 - }, - { - "epoch": 1.3026427962489344, - "grad_norm": 0.05957398563623428, - "learning_rate": 5.8399347115664053e-05, - "loss": 0.0062717020511627196, - "step": 7640 - }, - { - "epoch": 1.3034953111679455, - "grad_norm": 0.07013436406850815, - "learning_rate": 5.837806832081378e-05, - "loss": 0.005471421033143997, - "step": 7645 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 0.09616916626691818, - "learning_rate": 5.835677977938671e-05, - "loss": 0.008985907584428788, - "step": 7650 - }, - { - "epoch": 1.3052003410059676, - "grad_norm": 0.07946161180734634, - "learning_rate": 5.833548150132105e-05, - "loss": 0.00563003197312355, - "step": 7655 - }, - { - "epoch": 1.3060528559249787, - "grad_norm": 0.0630686804652214, - "learning_rate": 5.831417349655953e-05, - "loss": 0.007591667026281357, - "step": 7660 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 0.08530164510011673, - "learning_rate": 5.829285577504944e-05, - "loss": 0.006751708686351776, - "step": 7665 - }, - { - "epoch": 1.3077578857630008, - "grad_norm": 0.045148320496082306, - "learning_rate": 5.8271528346742616e-05, - "loss": 0.0052963607013225555, - "step": 7670 - }, - { - "epoch": 1.3086104006820118, - "grad_norm": 0.07147885859012604, - "learning_rate": 5.82501912215954e-05, - "loss": 0.005282463133335113, - "step": 7675 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 0.0933302789926529, - "learning_rate": 5.8228844409568654e-05, - "loss": 0.0073209434747695925, - "step": 7680 - }, - { - "epoch": 1.310315430520034, - "grad_norm": 0.07449645549058914, - "learning_rate": 5.820748792062781e-05, - "loss": 0.007801111787557602, - "step": 7685 - }, - { - "epoch": 1.3111679454390452, - "grad_norm": 0.04569214582443237, - "learning_rate": 5.8186121764742774e-05, - "loss": 0.006659354269504547, - "step": 7690 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 0.07046396285295486, - "learning_rate": 5.8164745951887995e-05, - "loss": 0.006448440253734589, - "step": 7695 - }, - { - "epoch": 1.3128729752770674, - "grad_norm": 0.09704319387674332, - "learning_rate": 5.814336049204239e-05, - "loss": 0.008210816234350205, - "step": 7700 - }, - { - "epoch": 1.3137254901960784, - "grad_norm": 0.06477776169776917, - "learning_rate": 5.81219653951894e-05, - "loss": 0.005369330942630768, - "step": 7705 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 0.11657397449016571, - "learning_rate": 5.810056067131698e-05, - "loss": 0.010190412402153015, - "step": 7710 - }, - { - "epoch": 1.3154305200341005, - "grad_norm": 0.06578268110752106, - "learning_rate": 5.8079146330417575e-05, - "loss": 0.006289052963256836, - "step": 7715 - }, - { - "epoch": 1.3162830349531116, - "grad_norm": 0.06296945363283157, - "learning_rate": 5.80577223824881e-05, - "loss": 0.008120459318161011, - "step": 7720 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 0.08275634050369263, - "learning_rate": 5.803628883752996e-05, - "loss": 0.006926379352807999, - "step": 7725 - }, - { - "epoch": 1.317988064791134, - "grad_norm": 0.0693436712026596, - "learning_rate": 5.8014845705549086e-05, - "loss": 0.006521113961935043, - "step": 7730 - }, - { - "epoch": 1.318840579710145, - "grad_norm": 0.05845775827765465, - "learning_rate": 5.799339299655579e-05, - "loss": 0.00762510895729065, - "step": 7735 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 0.08802217245101929, - "learning_rate": 5.7971930720564947e-05, - "loss": 0.008071760833263397, - "step": 7740 - }, - { - "epoch": 1.3205456095481671, - "grad_norm": 0.08866037428379059, - "learning_rate": 5.795045888759585e-05, - "loss": 0.006111105903983116, - "step": 7745 - }, - { - "epoch": 1.3213981244671782, - "grad_norm": 0.0844360888004303, - "learning_rate": 5.792897750767225e-05, - "loss": 0.005196729302406311, - "step": 7750 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 0.06763950735330582, - "learning_rate": 5.79074865908224e-05, - "loss": 0.006462454050779343, - "step": 7755 - }, - { - "epoch": 1.3231031543052003, - "grad_norm": 0.06333937495946884, - "learning_rate": 5.7885986147078946e-05, - "loss": 0.0068017512559890745, - "step": 7760 - }, - { - "epoch": 1.3239556692242114, - "grad_norm": 0.05730217695236206, - "learning_rate": 5.786447618647904e-05, - "loss": 0.0065845087170600895, - "step": 7765 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 0.06838720291852951, - "learning_rate": 5.784295671906422e-05, - "loss": 0.0059626404196023945, - "step": 7770 - }, - { - "epoch": 1.3256606990622335, - "grad_norm": 0.06693503260612488, - "learning_rate": 5.782142775488051e-05, - "loss": 0.008056168258190156, - "step": 7775 - }, - { - "epoch": 1.3265132139812446, - "grad_norm": 0.07886708527803421, - "learning_rate": 5.7799889303978324e-05, - "loss": 0.006670922040939331, - "step": 7780 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 0.06932322680950165, - "learning_rate": 5.777834137641255e-05, - "loss": 0.006734507530927658, - "step": 7785 - }, - { - "epoch": 1.328218243819267, - "grad_norm": 0.08057818561792374, - "learning_rate": 5.775678398224247e-05, - "loss": 0.005952415242791176, - "step": 7790 - }, - { - "epoch": 1.329070758738278, - "grad_norm": 0.06614059209823608, - "learning_rate": 5.7735217131531785e-05, - "loss": 0.007471600174903869, - "step": 7795 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 0.06335467845201492, - "learning_rate": 5.771364083434862e-05, - "loss": 0.007279399782419205, - "step": 7800 - }, - { - "epoch": 1.3307757885763, - "grad_norm": 0.11745526641607285, - "learning_rate": 5.769205510076552e-05, - "loss": 0.006242561340332031, - "step": 7805 - }, - { - "epoch": 1.3316283034953111, - "grad_norm": 0.0590963289141655, - "learning_rate": 5.7670459940859414e-05, - "loss": 0.006263129413127899, - "step": 7810 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 0.05416800454258919, - "learning_rate": 5.764885536471164e-05, - "loss": 0.00531160868704319, - "step": 7815 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.05527244135737419, - "learning_rate": 5.7627241382407933e-05, - "loss": 0.005747637152671814, - "step": 7820 - }, - { - "epoch": 1.3341858482523445, - "grad_norm": 0.057753629982471466, - "learning_rate": 5.760561800403844e-05, - "loss": 0.004979781061410904, - "step": 7825 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 0.10882547497749329, - "learning_rate": 5.758398523969763e-05, - "loss": 0.00589316263794899, - "step": 7830 - }, - { - "epoch": 1.3358908780903667, - "grad_norm": 0.08053787797689438, - "learning_rate": 5.756234309948443e-05, - "loss": 0.004465704411268234, - "step": 7835 - }, - { - "epoch": 1.3367433930093777, - "grad_norm": 0.09168808907270432, - "learning_rate": 5.75406915935021e-05, - "loss": 0.004735191911458969, - "step": 7840 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 0.0956537052989006, - "learning_rate": 5.751903073185829e-05, - "loss": 0.005519610643386841, - "step": 7845 - }, - { - "epoch": 1.3384484228473998, - "grad_norm": 0.05775619298219681, - "learning_rate": 5.749736052466501e-05, - "loss": 0.005525605380535125, - "step": 7850 - }, - { - "epoch": 1.339300937766411, - "grad_norm": 0.08594895154237747, - "learning_rate": 5.7475680982038616e-05, - "loss": 0.005280618742108345, - "step": 7855 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 0.10326153039932251, - "learning_rate": 5.745399211409987e-05, - "loss": 0.005818159133195877, - "step": 7860 - }, - { - "epoch": 1.341005967604433, - "grad_norm": 0.053448133170604706, - "learning_rate": 5.743229393097384e-05, - "loss": 0.008255011588335037, - "step": 7865 - }, - { - "epoch": 1.341858482523444, - "grad_norm": 0.05307561904191971, - "learning_rate": 5.741058644278995e-05, - "loss": 0.006851959228515625, - "step": 7870 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 0.050789013504981995, - "learning_rate": 5.738886965968199e-05, - "loss": 0.005396667867898941, - "step": 7875 - }, - { - "epoch": 1.3435635123614662, - "grad_norm": 0.06762190908193588, - "learning_rate": 5.736714359178808e-05, - "loss": 0.005661940947175026, - "step": 7880 - }, - { - "epoch": 1.3444160272804775, - "grad_norm": 0.06955094635486603, - "learning_rate": 5.734540824925066e-05, - "loss": 0.0065834902226924895, - "step": 7885 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 0.09844111651182175, - "learning_rate": 5.7323663642216525e-05, - "loss": 0.006687184423208236, - "step": 7890 - }, - { - "epoch": 1.3461210571184996, - "grad_norm": 0.05249316990375519, - "learning_rate": 5.7301909780836766e-05, - "loss": 0.00670531764626503, - "step": 7895 - }, - { - "epoch": 1.3469735720375107, - "grad_norm": 0.06578750163316727, - "learning_rate": 5.7280146675266815e-05, - "loss": 0.0063153237104415895, - "step": 7900 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 0.10460260510444641, - "learning_rate": 5.725837433566643e-05, - "loss": 0.008820119500160217, - "step": 7905 - }, - { - "epoch": 1.3486786018755328, - "grad_norm": 0.06620552390813828, - "learning_rate": 5.7236592772199624e-05, - "loss": 0.006502580642700195, - "step": 7910 - }, - { - "epoch": 1.3495311167945439, - "grad_norm": 0.1033373698592186, - "learning_rate": 5.72148019950348e-05, - "loss": 0.008503454178571701, - "step": 7915 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 0.05790281295776367, - "learning_rate": 5.719300201434458e-05, - "loss": 0.006304294615983963, - "step": 7920 - }, - { - "epoch": 1.3512361466325662, - "grad_norm": 0.06094033271074295, - "learning_rate": 5.717119284030595e-05, - "loss": 0.006775079667568207, - "step": 7925 - }, - { - "epoch": 1.3520886615515773, - "grad_norm": 0.08011666685342789, - "learning_rate": 5.714937448310015e-05, - "loss": 0.0064566083252429966, - "step": 7930 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 0.06395548582077026, - "learning_rate": 5.7127546952912686e-05, - "loss": 0.009279583394527436, - "step": 7935 - }, - { - "epoch": 1.3537936913895994, - "grad_norm": 0.06697574257850647, - "learning_rate": 5.710571025993342e-05, - "loss": 0.005718713253736496, - "step": 7940 - }, - { - "epoch": 1.3546462063086104, - "grad_norm": 0.08821829408407211, - "learning_rate": 5.7083864414356414e-05, - "loss": 0.008157726377248764, - "step": 7945 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 0.07097669690847397, - "learning_rate": 5.706200942638006e-05, - "loss": 0.004782359302043915, - "step": 7950 - }, - { - "epoch": 1.3563512361466326, - "grad_norm": 0.05015713721513748, - "learning_rate": 5.7040145306206963e-05, - "loss": 0.004204710572957992, - "step": 7955 - }, - { - "epoch": 1.3572037510656436, - "grad_norm": 0.054049719125032425, - "learning_rate": 5.701827206404406e-05, - "loss": 0.00606432780623436, - "step": 7960 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 0.0878557488322258, - "learning_rate": 5.6996389710102474e-05, - "loss": 0.008037066459655762, - "step": 7965 - }, - { - "epoch": 1.3589087809036657, - "grad_norm": 0.10873926430940628, - "learning_rate": 5.697449825459762e-05, - "loss": 0.007864821702241898, - "step": 7970 - }, - { - "epoch": 1.3597612958226768, - "grad_norm": 0.05823246389627457, - "learning_rate": 5.695259770774919e-05, - "loss": 0.00715988278388977, - "step": 7975 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 0.06690117716789246, - "learning_rate": 5.693068807978106e-05, - "loss": 0.006888707727193832, - "step": 7980 - }, - { - "epoch": 1.3614663256606991, - "grad_norm": 0.07290884852409363, - "learning_rate": 5.6908769380921363e-05, - "loss": 0.005684115365147591, - "step": 7985 - }, - { - "epoch": 1.3623188405797102, - "grad_norm": 0.07930465042591095, - "learning_rate": 5.6886841621402504e-05, - "loss": 0.0077220767736434935, - "step": 7990 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 0.08893048763275146, - "learning_rate": 5.686490481146107e-05, - "loss": 0.007824088633060455, - "step": 7995 - }, - { - "epoch": 1.3640238704177323, - "grad_norm": 0.09335844218730927, - "learning_rate": 5.6842958961337905e-05, - "loss": 0.006522499769926071, - "step": 8000 - }, - { - "epoch": 1.3648763853367434, - "grad_norm": 0.07194571942090988, - "learning_rate": 5.682100408127806e-05, - "loss": 0.008011893928050995, - "step": 8005 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 0.053429413586854935, - "learning_rate": 5.6799040181530794e-05, - "loss": 0.006260050833225251, - "step": 8010 - }, - { - "epoch": 1.3665814151747655, - "grad_norm": 0.11974478513002396, - "learning_rate": 5.677706727234959e-05, - "loss": 0.006692723929882049, - "step": 8015 - }, - { - "epoch": 1.3674339300937766, - "grad_norm": 0.07810027152299881, - "learning_rate": 5.6755085363992155e-05, - "loss": 0.007429388910531997, - "step": 8020 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 0.10204190760850906, - "learning_rate": 5.673309446672034e-05, - "loss": 0.005550343170762062, - "step": 8025 - }, - { - "epoch": 1.369138959931799, - "grad_norm": 0.07640541344881058, - "learning_rate": 5.671109459080026e-05, - "loss": 0.006840181350708008, - "step": 8030 - }, - { - "epoch": 1.36999147485081, - "grad_norm": 0.06644181162118912, - "learning_rate": 5.668908574650216e-05, - "loss": 0.005395495146512985, - "step": 8035 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 0.09630967676639557, - "learning_rate": 5.6667067944100526e-05, - "loss": 0.005423872545361519, - "step": 8040 - }, - { - "epoch": 1.371696504688832, - "grad_norm": 0.07114128023386002, - "learning_rate": 5.664504119387398e-05, - "loss": 0.007013414800167084, - "step": 8045 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 0.07324981689453125, - "learning_rate": 5.662300550610535e-05, - "loss": 0.008274464309215546, - "step": 8050 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 0.06012870743870735, - "learning_rate": 5.660096089108163e-05, - "loss": 0.00520169697701931, - "step": 8055 - }, - { - "epoch": 1.3742540494458653, - "grad_norm": 0.07458557933568954, - "learning_rate": 5.657890735909397e-05, - "loss": 0.006112886965274811, - "step": 8060 - }, - { - "epoch": 1.3751065643648763, - "grad_norm": 0.0470297709107399, - "learning_rate": 5.655684492043771e-05, - "loss": 0.004435106366872788, - "step": 8065 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 0.05244847387075424, - "learning_rate": 5.653477358541231e-05, - "loss": 0.006484140455722809, - "step": 8070 - }, - { - "epoch": 1.3768115942028984, - "grad_norm": 0.10809201747179031, - "learning_rate": 5.651269336432142e-05, - "loss": 0.006385499238967895, - "step": 8075 - }, - { - "epoch": 1.3776641091219095, - "grad_norm": 0.11761374026536942, - "learning_rate": 5.649060426747281e-05, - "loss": 0.0056259695440530775, - "step": 8080 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 0.06250949203968048, - "learning_rate": 5.646850630517842e-05, - "loss": 0.005127568915486336, - "step": 8085 - }, - { - "epoch": 1.3793691389599319, - "grad_norm": 0.07686682790517807, - "learning_rate": 5.6446399487754307e-05, - "loss": 0.006484859436750412, - "step": 8090 - }, - { - "epoch": 1.380221653878943, - "grad_norm": 0.10453952848911285, - "learning_rate": 5.6424283825520656e-05, - "loss": 0.007125881314277649, - "step": 8095 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 0.08170976489782333, - "learning_rate": 5.640215932880181e-05, - "loss": 0.007152590900659561, - "step": 8100 - }, - { - "epoch": 1.381926683716965, - "grad_norm": 0.08639637380838394, - "learning_rate": 5.638002600792621e-05, - "loss": 0.006862475723028183, - "step": 8105 - }, - { - "epoch": 1.382779198635976, - "grad_norm": 0.061349738389253616, - "learning_rate": 5.635788387322642e-05, - "loss": 0.006520121544599533, - "step": 8110 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 0.09568873792886734, - "learning_rate": 5.633573293503915e-05, - "loss": 0.00690893828868866, - "step": 8115 - }, - { - "epoch": 1.3844842284739982, - "grad_norm": 0.05280910059809685, - "learning_rate": 5.631357320370518e-05, - "loss": 0.0068241022527217865, - "step": 8120 - }, - { - "epoch": 1.3853367433930095, - "grad_norm": 0.08307540416717529, - "learning_rate": 5.6291404689569406e-05, - "loss": 0.009796305000782013, - "step": 8125 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 0.06511564552783966, - "learning_rate": 5.6269227402980824e-05, - "loss": 0.00675605982542038, - "step": 8130 - }, - { - "epoch": 1.3870417732310316, - "grad_norm": 0.09521665424108505, - "learning_rate": 5.624704135429255e-05, - "loss": 0.00661565363407135, - "step": 8135 - }, - { - "epoch": 1.3878942881500427, - "grad_norm": 0.06467590481042862, - "learning_rate": 5.622484655386175e-05, - "loss": 0.007056808471679688, - "step": 8140 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 0.04240449517965317, - "learning_rate": 5.62026430120497e-05, - "loss": 0.005277678743004799, - "step": 8145 - }, - { - "epoch": 1.3895993179880648, - "grad_norm": 0.08462672680616379, - "learning_rate": 5.618043073922176e-05, - "loss": 0.005951377004384995, - "step": 8150 - }, - { - "epoch": 1.3904518329070759, - "grad_norm": 0.08304573595523834, - "learning_rate": 5.615820974574735e-05, - "loss": 0.006729351729154587, - "step": 8155 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 0.04584382846951485, - "learning_rate": 5.6135980041999964e-05, - "loss": 0.00490913912653923, - "step": 8160 - }, - { - "epoch": 1.392156862745098, - "grad_norm": 0.06771710515022278, - "learning_rate": 5.6113741638357175e-05, - "loss": 0.007046511024236679, - "step": 8165 - }, - { - "epoch": 1.393009377664109, - "grad_norm": 0.06334209442138672, - "learning_rate": 5.609149454520062e-05, - "loss": 0.006314977258443833, - "step": 8170 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 0.06783269345760345, - "learning_rate": 5.606923877291595e-05, - "loss": 0.006176649779081345, - "step": 8175 - }, - { - "epoch": 1.3947144075021312, - "grad_norm": 0.10245220363140106, - "learning_rate": 5.604697433189293e-05, - "loss": 0.006309907138347626, - "step": 8180 - }, - { - "epoch": 1.3955669224211424, - "grad_norm": 0.07151709496974945, - "learning_rate": 5.6024701232525325e-05, - "loss": 0.005038458108901978, - "step": 8185 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 0.08811933547258377, - "learning_rate": 5.600241948521099e-05, - "loss": 0.006065644696354866, - "step": 8190 - }, - { - "epoch": 1.3972719522591646, - "grad_norm": 0.07598903775215149, - "learning_rate": 5.5980129100351736e-05, - "loss": 0.006201237812638283, - "step": 8195 - }, - { - "epoch": 1.3981244671781756, - "grad_norm": 0.058092061430215836, - "learning_rate": 5.5957830088353475e-05, - "loss": 0.006383272260427475, - "step": 8200 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 0.18196560442447662, - "learning_rate": 5.593552245962616e-05, - "loss": 0.004768157005310058, - "step": 8205 - }, - { - "epoch": 1.3998294970161977, - "grad_norm": 0.09071574360132217, - "learning_rate": 5.591320622458369e-05, - "loss": 0.007671289891004562, - "step": 8210 - }, - { - "epoch": 1.4006820119352088, - "grad_norm": 0.09470858424901962, - "learning_rate": 5.589088139364405e-05, - "loss": 0.006691985577344894, - "step": 8215 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 0.05345157906413078, - "learning_rate": 5.58685479772292e-05, - "loss": 0.005832263827323913, - "step": 8220 - }, - { - "epoch": 1.4023870417732311, - "grad_norm": 0.08154480904340744, - "learning_rate": 5.584620598576514e-05, - "loss": 0.00907905399799347, - "step": 8225 - }, - { - "epoch": 1.4032395566922422, - "grad_norm": 0.06621691584587097, - "learning_rate": 5.582385542968185e-05, - "loss": 0.005938088893890381, - "step": 8230 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 0.0557720884680748, - "learning_rate": 5.580149631941329e-05, - "loss": 0.005079039558768273, - "step": 8235 - }, - { - "epoch": 1.4049445865302643, - "grad_norm": 0.07839027792215347, - "learning_rate": 5.577912866539746e-05, - "loss": 0.006307472288608551, - "step": 8240 - }, - { - "epoch": 1.4057971014492754, - "grad_norm": 0.05926419049501419, - "learning_rate": 5.575675247807632e-05, - "loss": 0.0072102643549442295, - "step": 8245 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 0.0570182129740715, - "learning_rate": 5.5734367767895814e-05, - "loss": 0.0066485337913036345, - "step": 8250 - }, - { - "epoch": 1.4075021312872975, - "grad_norm": 0.0930657833814621, - "learning_rate": 5.571197454530588e-05, - "loss": 0.005854785442352295, - "step": 8255 - }, - { - "epoch": 1.4083546462063086, - "grad_norm": 0.06018427759408951, - "learning_rate": 5.568957282076041e-05, - "loss": 0.0049718767404556274, - "step": 8260 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 0.0889105498790741, - "learning_rate": 5.566716260471726e-05, - "loss": 0.005993577092885971, - "step": 8265 - }, - { - "epoch": 1.4100596760443307, - "grad_norm": 0.04429823160171509, - "learning_rate": 5.5644743907638294e-05, - "loss": 0.005357314646244049, - "step": 8270 - }, - { - "epoch": 1.4109121909633418, - "grad_norm": 0.054813142865896225, - "learning_rate": 5.5622316739989296e-05, - "loss": 0.005666692927479744, - "step": 8275 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 0.06909197568893433, - "learning_rate": 5.559988111224001e-05, - "loss": 0.005058525875210762, - "step": 8280 - }, - { - "epoch": 1.412617220801364, - "grad_norm": 0.10060004889965057, - "learning_rate": 5.557743703486413e-05, - "loss": 0.0070966087281703946, - "step": 8285 - }, - { - "epoch": 1.4134697357203752, - "grad_norm": 0.052008096128702164, - "learning_rate": 5.555498451833929e-05, - "loss": 0.006671085208654404, - "step": 8290 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 0.06272758543491364, - "learning_rate": 5.5532523573147094e-05, - "loss": 0.006071484088897705, - "step": 8295 - }, - { - "epoch": 1.4151747655583973, - "grad_norm": 0.08005380630493164, - "learning_rate": 5.551005420977304e-05, - "loss": 0.006429645419120789, - "step": 8300 - }, - { - "epoch": 1.4160272804774083, - "grad_norm": 0.08246695250272751, - "learning_rate": 5.548757643870659e-05, - "loss": 0.004599097743630409, - "step": 8305 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 0.1190599724650383, - "learning_rate": 5.54650902704411e-05, - "loss": 0.00652359127998352, - "step": 8310 - }, - { - "epoch": 1.4177323103154305, - "grad_norm": 0.042393747717142105, - "learning_rate": 5.5442595715473867e-05, - "loss": 0.004551848769187928, - "step": 8315 - }, - { - "epoch": 1.4185848252344417, - "grad_norm": 0.0809137374162674, - "learning_rate": 5.54200927843061e-05, - "loss": 0.0062880381941795346, - "step": 8320 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 0.09030820429325104, - "learning_rate": 5.5397581487442905e-05, - "loss": 0.007365265488624572, - "step": 8325 - }, - { - "epoch": 1.4202898550724639, - "grad_norm": 0.060766976326704025, - "learning_rate": 5.537506183539333e-05, - "loss": 0.0047208376228809355, - "step": 8330 - }, - { - "epoch": 1.421142369991475, - "grad_norm": 0.0763862356543541, - "learning_rate": 5.535253383867027e-05, - "loss": 0.006549081206321717, - "step": 8335 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 0.13493886590003967, - "learning_rate": 5.532999750779056e-05, - "loss": 0.0075773999094963075, - "step": 8340 - }, - { - "epoch": 1.422847399829497, - "grad_norm": 0.07888541370630264, - "learning_rate": 5.53074528532749e-05, - "loss": 0.007893601059913635, - "step": 8345 - }, - { - "epoch": 1.423699914748508, - "grad_norm": 0.04488302394747734, - "learning_rate": 5.528489988564789e-05, - "loss": 0.006052879989147187, - "step": 8350 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 0.09534436464309692, - "learning_rate": 5.5262338615438e-05, - "loss": 0.006183170899748802, - "step": 8355 - }, - { - "epoch": 1.4254049445865302, - "grad_norm": 0.0796227753162384, - "learning_rate": 5.523976905317758e-05, - "loss": 0.006368820369243622, - "step": 8360 - }, - { - "epoch": 1.4262574595055413, - "grad_norm": 0.039230771362781525, - "learning_rate": 5.521719120940287e-05, - "loss": 0.005282421037554741, - "step": 8365 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 0.12020807713270187, - "learning_rate": 5.5194605094653935e-05, - "loss": 0.00718868374824524, - "step": 8370 - }, - { - "epoch": 1.4279624893435634, - "grad_norm": 0.07434894144535065, - "learning_rate": 5.5172010719474746e-05, - "loss": 0.007591472566127777, - "step": 8375 - }, - { - "epoch": 1.4288150042625745, - "grad_norm": 0.0722346156835556, - "learning_rate": 5.514940809441309e-05, - "loss": 0.005854631587862968, - "step": 8380 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 0.07834078371524811, - "learning_rate": 5.5126797230020634e-05, - "loss": 0.007415357977151871, - "step": 8385 - }, - { - "epoch": 1.4305200341005968, - "grad_norm": 0.08718696236610413, - "learning_rate": 5.5104178136852884e-05, - "loss": 0.007089633494615555, - "step": 8390 - }, - { - "epoch": 1.4313725490196079, - "grad_norm": 0.08823035657405853, - "learning_rate": 5.508155082546918e-05, - "loss": 0.007153714448213578, - "step": 8395 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 0.07249119132757187, - "learning_rate": 5.505891530643269e-05, - "loss": 0.007651906460523605, - "step": 8400 - }, - { - "epoch": 1.43307757885763, - "grad_norm": 0.06284502893686295, - "learning_rate": 5.503627159031046e-05, - "loss": 0.007741397619247437, - "step": 8405 - }, - { - "epoch": 1.433930093776641, - "grad_norm": 0.06557357311248779, - "learning_rate": 5.501361968767331e-05, - "loss": 0.007656902819871902, - "step": 8410 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 0.0775318294763565, - "learning_rate": 5.49909596090959e-05, - "loss": 0.006714560091495514, - "step": 8415 - }, - { - "epoch": 1.4356351236146634, - "grad_norm": 0.05347822234034538, - "learning_rate": 5.496829136515672e-05, - "loss": 0.0048537302762269975, - "step": 8420 - }, - { - "epoch": 1.4364876385336744, - "grad_norm": 0.07040467113256454, - "learning_rate": 5.4945614966438046e-05, - "loss": 0.005411979556083679, - "step": 8425 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 0.09473410993814468, - "learning_rate": 5.492293042352598e-05, - "loss": 0.008005911856889725, - "step": 8430 - }, - { - "epoch": 1.4381926683716966, - "grad_norm": 0.037446580827236176, - "learning_rate": 5.4900237747010426e-05, - "loss": 0.006237779557704925, - "step": 8435 - }, - { - "epoch": 1.4390451832907076, - "grad_norm": 0.11029476672410965, - "learning_rate": 5.4877536947485074e-05, - "loss": 0.008190502226352692, - "step": 8440 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 0.0514204315841198, - "learning_rate": 5.4854828035547424e-05, - "loss": 0.006500741839408875, - "step": 8445 - }, - { - "epoch": 1.4407502131287298, - "grad_norm": 0.08411483466625214, - "learning_rate": 5.483211102179873e-05, - "loss": 0.0053235463798046116, - "step": 8450 - }, - { - "epoch": 1.4416027280477408, - "grad_norm": 0.09279052913188934, - "learning_rate": 5.480938591684407e-05, - "loss": 0.006842000037431717, - "step": 8455 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 0.09881046414375305, - "learning_rate": 5.478665273129228e-05, - "loss": 0.007064050436019898, - "step": 8460 - }, - { - "epoch": 1.443307757885763, - "grad_norm": 0.09018172323703766, - "learning_rate": 5.476391147575595e-05, - "loss": 0.005222787708044052, - "step": 8465 - }, - { - "epoch": 1.444160272804774, - "grad_norm": 0.11489493399858475, - "learning_rate": 5.4741162160851455e-05, - "loss": 0.004823528230190277, - "step": 8470 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 0.11010619252920151, - "learning_rate": 5.4718404797198955e-05, - "loss": 0.007554465532302856, - "step": 8475 - }, - { - "epoch": 1.4458653026427961, - "grad_norm": 0.10469060391187668, - "learning_rate": 5.469563939542233e-05, - "loss": 0.005817038565874099, - "step": 8480 - }, - { - "epoch": 1.4467178175618074, - "grad_norm": 0.06776002794504166, - "learning_rate": 5.467286596614922e-05, - "loss": 0.00899386927485466, - "step": 8485 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 0.08414942771196365, - "learning_rate": 5.4650084520011026e-05, - "loss": 0.00611347034573555, - "step": 8490 - }, - { - "epoch": 1.4484228473998295, - "grad_norm": 0.09625279158353806, - "learning_rate": 5.462729506764289e-05, - "loss": 0.005645812302827835, - "step": 8495 - }, - { - "epoch": 1.4492753623188406, - "grad_norm": 0.10020645707845688, - "learning_rate": 5.4604497619683674e-05, - "loss": 0.0058198563754558565, - "step": 8500 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 0.08466780185699463, - "learning_rate": 5.4581692186776e-05, - "loss": 0.005098164081573486, - "step": 8505 - }, - { - "epoch": 1.4509803921568627, - "grad_norm": 0.058955296874046326, - "learning_rate": 5.4558878779566194e-05, - "loss": 0.004072735831141472, - "step": 8510 - }, - { - "epoch": 1.4518329070758738, - "grad_norm": 0.14849397540092468, - "learning_rate": 5.4536057408704304e-05, - "loss": 0.011097650229930877, - "step": 8515 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 0.08641809970140457, - "learning_rate": 5.451322808484413e-05, - "loss": 0.006210784614086151, - "step": 8520 - }, - { - "epoch": 1.453537936913896, - "grad_norm": 0.07506752014160156, - "learning_rate": 5.4490390818643136e-05, - "loss": 0.006071462482213974, - "step": 8525 - }, - { - "epoch": 1.4543904518329072, - "grad_norm": 0.10383405536413193, - "learning_rate": 5.4467545620762545e-05, - "loss": 0.008749781548976899, - "step": 8530 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 0.08180487155914307, - "learning_rate": 5.444469250186721e-05, - "loss": 0.00613279715180397, - "step": 8535 - }, - { - "epoch": 1.4560954816709293, - "grad_norm": 0.07797367125749588, - "learning_rate": 5.442183147262577e-05, - "loss": 0.005885690450668335, - "step": 8540 - }, - { - "epoch": 1.4569479965899403, - "grad_norm": 0.0780278891324997, - "learning_rate": 5.439896254371049e-05, - "loss": 0.007259850949048996, - "step": 8545 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 0.10005395114421844, - "learning_rate": 5.437608572579737e-05, - "loss": 0.0061523888260126116, - "step": 8550 - }, - { - "epoch": 1.4586530264279625, - "grad_norm": 0.10141763836145401, - "learning_rate": 5.435320102956604e-05, - "loss": 0.006501191109418869, - "step": 8555 - }, - { - "epoch": 1.4595055413469735, - "grad_norm": 0.0938732773065567, - "learning_rate": 5.4330308465699865e-05, - "loss": 0.008337517827749252, - "step": 8560 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 0.1085699051618576, - "learning_rate": 5.430740804488582e-05, - "loss": 0.005681714415550232, - "step": 8565 - }, - { - "epoch": 1.4612105711849956, - "grad_norm": 0.07967904955148697, - "learning_rate": 5.428449977781463e-05, - "loss": 0.006299185007810593, - "step": 8570 - }, - { - "epoch": 1.4620630861040067, - "grad_norm": 0.090158611536026, - "learning_rate": 5.426158367518061e-05, - "loss": 0.007821831852197647, - "step": 8575 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 0.12222256511449814, - "learning_rate": 5.4238659747681736e-05, - "loss": 0.0065193742513656614, - "step": 8580 - }, - { - "epoch": 1.463768115942029, - "grad_norm": 0.07724417746067047, - "learning_rate": 5.421572800601971e-05, - "loss": 0.00850745365023613, - "step": 8585 - }, - { - "epoch": 1.46462063086104, - "grad_norm": 0.07322543114423752, - "learning_rate": 5.4192788460899786e-05, - "loss": 0.006478501856327057, - "step": 8590 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 0.07086360454559326, - "learning_rate": 5.416984112303095e-05, - "loss": 0.007459370046854019, - "step": 8595 - }, - { - "epoch": 1.4663256606990622, - "grad_norm": 0.08460366725921631, - "learning_rate": 5.414688600312575e-05, - "loss": 0.006461035460233688, - "step": 8600 - }, - { - "epoch": 1.4671781756180733, - "grad_norm": 0.06856394559144974, - "learning_rate": 5.412392311190041e-05, - "loss": 0.007420676201581955, - "step": 8605 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 0.06801126897335052, - "learning_rate": 5.4100952460074766e-05, - "loss": 0.006456401199102402, - "step": 8610 - }, - { - "epoch": 1.4688832054560954, - "grad_norm": 0.06273184716701508, - "learning_rate": 5.4077974058372295e-05, - "loss": 0.00508052185177803, - "step": 8615 - }, - { - "epoch": 1.4697357203751067, - "grad_norm": 0.07751575112342834, - "learning_rate": 5.405498791752007e-05, - "loss": 0.006596812605857849, - "step": 8620 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 0.10850238054990768, - "learning_rate": 5.4031994048248776e-05, - "loss": 0.006385332345962525, - "step": 8625 - }, - { - "epoch": 1.4714407502131288, - "grad_norm": 0.07195930927991867, - "learning_rate": 5.4008992461292736e-05, - "loss": 0.007354143261909485, - "step": 8630 - }, - { - "epoch": 1.4722932651321399, - "grad_norm": 0.061606891453266144, - "learning_rate": 5.3985983167389846e-05, - "loss": 0.007285259664058685, - "step": 8635 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 0.059549275785684586, - "learning_rate": 5.3962966177281616e-05, - "loss": 0.005211231112480163, - "step": 8640 - }, - { - "epoch": 1.473998294970162, - "grad_norm": 0.04548822343349457, - "learning_rate": 5.3939941501713146e-05, - "loss": 0.00805831179022789, - "step": 8645 - }, - { - "epoch": 1.474850809889173, - "grad_norm": 0.046682652086019516, - "learning_rate": 5.3916909151433096e-05, - "loss": 0.005787956342101097, - "step": 8650 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 0.06405246257781982, - "learning_rate": 5.3893869137193755e-05, - "loss": 0.005377359688282013, - "step": 8655 - }, - { - "epoch": 1.4765558397271952, - "grad_norm": 0.09410709887742996, - "learning_rate": 5.3870821469750964e-05, - "loss": 0.006961540877819061, - "step": 8660 - }, - { - "epoch": 1.4774083546462062, - "grad_norm": 0.0637243241071701, - "learning_rate": 5.384776615986414e-05, - "loss": 0.0060172989964485165, - "step": 8665 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 0.07082457840442657, - "learning_rate": 5.382470321829627e-05, - "loss": 0.005960140377283096, - "step": 8670 - }, - { - "epoch": 1.4791133844842284, - "grad_norm": 0.06502280384302139, - "learning_rate": 5.380163265581391e-05, - "loss": 0.005596417188644409, - "step": 8675 - }, - { - "epoch": 1.4799658994032396, - "grad_norm": 0.07504235208034515, - "learning_rate": 5.3778554483187134e-05, - "loss": 0.007427094876766205, - "step": 8680 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 0.08005198836326599, - "learning_rate": 5.375546871118964e-05, - "loss": 0.006888572126626968, - "step": 8685 - }, - { - "epoch": 1.4816709292412618, - "grad_norm": 0.1083201915025711, - "learning_rate": 5.373237535059861e-05, - "loss": 0.007253114879131317, - "step": 8690 - }, - { - "epoch": 1.4825234441602728, - "grad_norm": 0.060000013560056686, - "learning_rate": 5.37092744121948e-05, - "loss": 0.005570416525006294, - "step": 8695 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 0.04832584038376808, - "learning_rate": 5.3686165906762504e-05, - "loss": 0.005356843769550324, - "step": 8700 - }, - { - "epoch": 1.484228473998295, - "grad_norm": 0.061586812138557434, - "learning_rate": 5.3663049845089534e-05, - "loss": 0.005226074159145356, - "step": 8705 - }, - { - "epoch": 1.485080988917306, - "grad_norm": 0.08486256003379822, - "learning_rate": 5.363992623796724e-05, - "loss": 0.007083939760923386, - "step": 8710 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 0.09085836261510849, - "learning_rate": 5.361679509619048e-05, - "loss": 0.005988218262791634, - "step": 8715 - }, - { - "epoch": 1.4867860187553283, - "grad_norm": 0.06301745027303696, - "learning_rate": 5.359365643055765e-05, - "loss": 0.00595020055770874, - "step": 8720 - }, - { - "epoch": 1.4876385336743394, - "grad_norm": 0.07939866930246353, - "learning_rate": 5.3570510251870646e-05, - "loss": 0.006101110950112343, - "step": 8725 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 0.10560661554336548, - "learning_rate": 5.354735657093487e-05, - "loss": 0.006781180202960968, - "step": 8730 - }, - { - "epoch": 1.4893435635123615, - "grad_norm": 0.10549639165401459, - "learning_rate": 5.352419539855925e-05, - "loss": 0.006455187499523163, - "step": 8735 - }, - { - "epoch": 1.4901960784313726, - "grad_norm": 0.06474289298057556, - "learning_rate": 5.3501026745556157e-05, - "loss": 0.0078111283481121065, - "step": 8740 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 0.11109986901283264, - "learning_rate": 5.3477850622741525e-05, - "loss": 0.00798504576086998, - "step": 8745 - }, - { - "epoch": 1.4919011082693947, - "grad_norm": 0.0787222608923912, - "learning_rate": 5.3454667040934715e-05, - "loss": 0.007222773879766465, - "step": 8750 - }, - { - "epoch": 1.4927536231884058, - "grad_norm": 0.06622221320867538, - "learning_rate": 5.3431476010958613e-05, - "loss": 0.0064462460577487946, - "step": 8755 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 0.07526405900716782, - "learning_rate": 5.340827754363955e-05, - "loss": 0.005344667285680771, - "step": 8760 - }, - { - "epoch": 1.4944586530264279, - "grad_norm": 0.08911366015672684, - "learning_rate": 5.338507164980734e-05, - "loss": 0.006722994893789291, - "step": 8765 - }, - { - "epoch": 1.495311167945439, - "grad_norm": 0.03749583289027214, - "learning_rate": 5.336185834029527e-05, - "loss": 0.006120331957936287, - "step": 8770 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 0.08167645335197449, - "learning_rate": 5.333863762594008e-05, - "loss": 0.007496768981218338, - "step": 8775 - }, - { - "epoch": 1.4970161977834613, - "grad_norm": 0.09045904129743576, - "learning_rate": 5.3315409517581996e-05, - "loss": 0.007222528755664826, - "step": 8780 - }, - { - "epoch": 1.4978687127024723, - "grad_norm": 0.06064090132713318, - "learning_rate": 5.329217402606464e-05, - "loss": 0.0044986031949520115, - "step": 8785 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 0.07282263785600662, - "learning_rate": 5.3268931162235126e-05, - "loss": 0.005251912400126457, - "step": 8790 - }, - { - "epoch": 1.4995737425404945, - "grad_norm": 0.0674249604344368, - "learning_rate": 5.324568093694401e-05, - "loss": 0.006289477646350861, - "step": 8795 - }, - { - "epoch": 1.4997442455242966, - "eval_loss": 0.03760311380028725, - "eval_runtime": 3.668, - "eval_samples_per_second": 68.702, - "eval_steps_per_second": 1.091, - "step": 8796 - }, - { - "eval_cer_subset": 0.014184050678261437, - "eval_cer_subset_edit_distance": 871, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 8796 - }, - { - "epoch": 1.5004262574595055, - "grad_norm": 0.06114037334918976, - "learning_rate": 5.322242336104525e-05, - "loss": 0.005809751898050308, - "step": 8800 - }, - { - "epoch": 1.5012787723785166, - "grad_norm": 0.08830825984477997, - "learning_rate": 5.319915844539626e-05, - "loss": 0.006921032071113586, - "step": 8805 - }, - { - "epoch": 1.5021312872975279, - "grad_norm": 0.10813544690608978, - "learning_rate": 5.3175886200857873e-05, - "loss": 0.007966426759958267, - "step": 8810 - }, - { - "epoch": 1.502983802216539, - "grad_norm": 0.08357173204421997, - "learning_rate": 5.3152606638294355e-05, - "loss": 0.006943506002426147, - "step": 8815 - }, - { - "epoch": 1.50383631713555, - "grad_norm": 0.08059901744127274, - "learning_rate": 5.312931976857339e-05, - "loss": 0.0047626100480556485, - "step": 8820 - }, - { - "epoch": 1.504688832054561, - "grad_norm": 0.07412680238485336, - "learning_rate": 5.310602560256604e-05, - "loss": 0.00709492564201355, - "step": 8825 - }, - { - "epoch": 1.5055413469735721, - "grad_norm": 0.046478480100631714, - "learning_rate": 5.3082724151146814e-05, - "loss": 0.006465598940849304, - "step": 8830 - }, - { - "epoch": 1.5063938618925832, - "grad_norm": 0.11122216284275055, - "learning_rate": 5.30594154251936e-05, - "loss": 0.00888531506061554, - "step": 8835 - }, - { - "epoch": 1.5072463768115942, - "grad_norm": 0.06441432982683182, - "learning_rate": 5.3036099435587685e-05, - "loss": 0.005882937833666802, - "step": 8840 - }, - { - "epoch": 1.5080988917306053, - "grad_norm": 0.05722307041287422, - "learning_rate": 5.301277619321374e-05, - "loss": 0.0059202808886766435, - "step": 8845 - }, - { - "epoch": 1.5089514066496164, - "grad_norm": 0.06677310913801193, - "learning_rate": 5.2989445708959856e-05, - "loss": 0.0064939349889755246, - "step": 8850 - }, - { - "epoch": 1.5098039215686274, - "grad_norm": 0.08854222297668457, - "learning_rate": 5.296610799371745e-05, - "loss": 0.007034827768802643, - "step": 8855 - }, - { - "epoch": 1.5106564364876385, - "grad_norm": 0.059711627662181854, - "learning_rate": 5.2942763058381356e-05, - "loss": 0.007557753473520279, - "step": 8860 - }, - { - "epoch": 1.5115089514066495, - "grad_norm": 0.06355257332324982, - "learning_rate": 5.291941091384977e-05, - "loss": 0.006534597277641297, - "step": 8865 - }, - { - "epoch": 1.5123614663256606, - "grad_norm": 0.05741631239652634, - "learning_rate": 5.2896051571024255e-05, - "loss": 0.006453331559896469, - "step": 8870 - }, - { - "epoch": 1.5132139812446717, - "grad_norm": 0.05809224396944046, - "learning_rate": 5.287268504080972e-05, - "loss": 0.006065556779503822, - "step": 8875 - }, - { - "epoch": 1.5140664961636827, - "grad_norm": 0.04522582143545151, - "learning_rate": 5.284931133411443e-05, - "loss": 0.004097414761781692, - "step": 8880 - }, - { - "epoch": 1.514919011082694, - "grad_norm": 0.09349111467599869, - "learning_rate": 5.2825930461850014e-05, - "loss": 0.005707831308245659, - "step": 8885 - }, - { - "epoch": 1.515771526001705, - "grad_norm": 0.08951391279697418, - "learning_rate": 5.280254243493145e-05, - "loss": 0.00725678950548172, - "step": 8890 - }, - { - "epoch": 1.5166240409207161, - "grad_norm": 0.07826244086027145, - "learning_rate": 5.277914726427705e-05, - "loss": 0.008086606860160828, - "step": 8895 - }, - { - "epoch": 1.5174765558397272, - "grad_norm": 0.0619954876601696, - "learning_rate": 5.2755744960808446e-05, - "loss": 0.005462165176868439, - "step": 8900 - }, - { - "epoch": 1.5183290707587382, - "grad_norm": 0.04414132609963417, - "learning_rate": 5.273233553545062e-05, - "loss": 0.005678927898406983, - "step": 8905 - }, - { - "epoch": 1.5191815856777495, - "grad_norm": 0.07183931767940521, - "learning_rate": 5.2708918999131864e-05, - "loss": 0.007184042781591416, - "step": 8910 - }, - { - "epoch": 1.5200341005967606, - "grad_norm": 0.10447251796722412, - "learning_rate": 5.26854953627838e-05, - "loss": 0.009831231832504273, - "step": 8915 - }, - { - "epoch": 1.5208866155157716, - "grad_norm": 0.04392845183610916, - "learning_rate": 5.266206463734135e-05, - "loss": 0.006517301499843598, - "step": 8920 - }, - { - "epoch": 1.5217391304347827, - "grad_norm": 0.06292697787284851, - "learning_rate": 5.2638626833742776e-05, - "loss": 0.005328541249036789, - "step": 8925 - }, - { - "epoch": 1.5225916453537938, - "grad_norm": 0.06425110250711441, - "learning_rate": 5.2615181962929605e-05, - "loss": 0.006298693269491196, - "step": 8930 - }, - { - "epoch": 1.5234441602728048, - "grad_norm": 0.08059051632881165, - "learning_rate": 5.259173003584669e-05, - "loss": 0.008097793161869048, - "step": 8935 - }, - { - "epoch": 1.5242966751918159, - "grad_norm": 0.0625302791595459, - "learning_rate": 5.256827106344218e-05, - "loss": 0.006664089858531952, - "step": 8940 - }, - { - "epoch": 1.525149190110827, - "grad_norm": 0.06092630326747894, - "learning_rate": 5.254480505666749e-05, - "loss": 0.006084204837679863, - "step": 8945 - }, - { - "epoch": 1.526001705029838, - "grad_norm": 0.07297338545322418, - "learning_rate": 5.2521332026477344e-05, - "loss": 0.006405481696128845, - "step": 8950 - }, - { - "epoch": 1.526854219948849, - "grad_norm": 0.05876631662249565, - "learning_rate": 5.249785198382973e-05, - "loss": 0.006670171767473221, - "step": 8955 - }, - { - "epoch": 1.5277067348678601, - "grad_norm": 0.0633542388677597, - "learning_rate": 5.247436493968589e-05, - "loss": 0.004565924406051636, - "step": 8960 - }, - { - "epoch": 1.5285592497868712, - "grad_norm": 0.09164717048406601, - "learning_rate": 5.2450870905010395e-05, - "loss": 0.005662925541400909, - "step": 8965 - }, - { - "epoch": 1.5294117647058822, - "grad_norm": 0.06646572798490524, - "learning_rate": 5.2427369890771026e-05, - "loss": 0.006319984793663025, - "step": 8970 - }, - { - "epoch": 1.5302642796248933, - "grad_norm": 0.08518269658088684, - "learning_rate": 5.2403861907938826e-05, - "loss": 0.0066184550523757935, - "step": 8975 - }, - { - "epoch": 1.5311167945439044, - "grad_norm": 0.08369076251983643, - "learning_rate": 5.238034696748811e-05, - "loss": 0.005069610476493835, - "step": 8980 - }, - { - "epoch": 1.5319693094629157, - "grad_norm": 0.05607258528470993, - "learning_rate": 5.235682508039646e-05, - "loss": 0.007457223534584045, - "step": 8985 - }, - { - "epoch": 1.5328218243819267, - "grad_norm": 0.0828152522444725, - "learning_rate": 5.2333296257644646e-05, - "loss": 0.007727481424808502, - "step": 8990 - }, - { - "epoch": 1.5336743393009378, - "grad_norm": 0.09770844876766205, - "learning_rate": 5.230976051021671e-05, - "loss": 0.007591258734464645, - "step": 8995 - }, - { - "epoch": 1.5345268542199488, - "grad_norm": 0.05906900763511658, - "learning_rate": 5.2286217849099925e-05, - "loss": 0.008510296791791916, - "step": 9000 - }, - { - "epoch": 1.53537936913896, - "grad_norm": 0.07594765722751617, - "learning_rate": 5.2262668285284785e-05, - "loss": 0.005943647772073746, - "step": 9005 - }, - { - "epoch": 1.5362318840579712, - "grad_norm": 0.056658126413822174, - "learning_rate": 5.223911182976502e-05, - "loss": 0.004702667891979218, - "step": 9010 - }, - { - "epoch": 1.5370843989769822, - "grad_norm": 0.060573313385248184, - "learning_rate": 5.2215548493537556e-05, - "loss": 0.006530648469924927, - "step": 9015 - }, - { - "epoch": 1.5379369138959933, - "grad_norm": 0.06876473873853683, - "learning_rate": 5.219197828760254e-05, - "loss": 0.0070976391434669495, - "step": 9020 - }, - { - "epoch": 1.5387894288150044, - "grad_norm": 0.05402369797229767, - "learning_rate": 5.2168401222963354e-05, - "loss": 0.005997032299637795, - "step": 9025 - }, - { - "epoch": 1.5396419437340154, - "grad_norm": 0.0907805860042572, - "learning_rate": 5.214481731062652e-05, - "loss": 0.007357357442378998, - "step": 9030 - }, - { - "epoch": 1.5404944586530265, - "grad_norm": 0.07572564482688904, - "learning_rate": 5.212122656160182e-05, - "loss": 0.004879472404718399, - "step": 9035 - }, - { - "epoch": 1.5413469735720375, - "grad_norm": 0.05684768036007881, - "learning_rate": 5.209762898690218e-05, - "loss": 0.006248699128627777, - "step": 9040 - }, - { - "epoch": 1.5421994884910486, - "grad_norm": 0.070293128490448, - "learning_rate": 5.2074024597543745e-05, - "loss": 0.005055962502956391, - "step": 9045 - }, - { - "epoch": 1.5430520034100597, - "grad_norm": 0.06611300259828568, - "learning_rate": 5.2050413404545823e-05, - "loss": 0.0048581909388303755, - "step": 9050 - }, - { - "epoch": 1.5439045183290707, - "grad_norm": 0.06960003823041916, - "learning_rate": 5.202679541893092e-05, - "loss": 0.006258350610733032, - "step": 9055 - }, - { - "epoch": 1.5447570332480818, - "grad_norm": 0.059757016599178314, - "learning_rate": 5.2003170651724675e-05, - "loss": 0.006347355991601944, - "step": 9060 - }, - { - "epoch": 1.5456095481670928, - "grad_norm": 0.06531284749507904, - "learning_rate": 5.1979539113955936e-05, - "loss": 0.00543224960565567, - "step": 9065 - }, - { - "epoch": 1.546462063086104, - "grad_norm": 0.08068390935659409, - "learning_rate": 5.195590081665667e-05, - "loss": 0.004933612793684006, - "step": 9070 - }, - { - "epoch": 1.547314578005115, - "grad_norm": 0.06198716536164284, - "learning_rate": 5.193225577086203e-05, - "loss": 0.00523824393749237, - "step": 9075 - }, - { - "epoch": 1.548167092924126, - "grad_norm": 0.07734926789999008, - "learning_rate": 5.190860398761032e-05, - "loss": 0.005699950456619263, - "step": 9080 - }, - { - "epoch": 1.5490196078431373, - "grad_norm": 0.058083925396203995, - "learning_rate": 5.188494547794297e-05, - "loss": 0.006147466972470284, - "step": 9085 - }, - { - "epoch": 1.5498721227621484, - "grad_norm": 0.0675162672996521, - "learning_rate": 5.1861280252904546e-05, - "loss": 0.0059716224670410155, - "step": 9090 - }, - { - "epoch": 1.5507246376811594, - "grad_norm": 0.05415274575352669, - "learning_rate": 5.183760832354278e-05, - "loss": 0.0058246061205863954, - "step": 9095 - }, - { - "epoch": 1.5515771526001705, - "grad_norm": 0.05826190859079361, - "learning_rate": 5.1813929700908523e-05, - "loss": 0.005409573763608932, - "step": 9100 - }, - { - "epoch": 1.5524296675191815, - "grad_norm": 0.07188098877668381, - "learning_rate": 5.179024439605573e-05, - "loss": 0.00541839525103569, - "step": 9105 - }, - { - "epoch": 1.5532821824381928, - "grad_norm": 0.07955330610275269, - "learning_rate": 5.176655242004149e-05, - "loss": 0.007760365307331085, - "step": 9110 - }, - { - "epoch": 1.5541346973572039, - "grad_norm": 0.07923565059900284, - "learning_rate": 5.1742853783926e-05, - "loss": 0.00563618317246437, - "step": 9115 - }, - { - "epoch": 1.554987212276215, - "grad_norm": 0.08301008492708206, - "learning_rate": 5.171914849877258e-05, - "loss": 0.006948529183864594, - "step": 9120 - }, - { - "epoch": 1.555839727195226, - "grad_norm": 0.10905841737985611, - "learning_rate": 5.1695436575647655e-05, - "loss": 0.005861887335777282, - "step": 9125 - }, - { - "epoch": 1.556692242114237, - "grad_norm": 0.06157204881310463, - "learning_rate": 5.167171802562072e-05, - "loss": 0.005052468553185463, - "step": 9130 - }, - { - "epoch": 1.5575447570332481, - "grad_norm": 0.08309191465377808, - "learning_rate": 5.164799285976438e-05, - "loss": 0.006937308609485627, - "step": 9135 - }, - { - "epoch": 1.5583972719522592, - "grad_norm": 0.07454490661621094, - "learning_rate": 5.162426108915437e-05, - "loss": 0.00504121258854866, - "step": 9140 - }, - { - "epoch": 1.5592497868712702, - "grad_norm": 0.07217807322740555, - "learning_rate": 5.160052272486943e-05, - "loss": 0.004582167789340019, - "step": 9145 - }, - { - "epoch": 1.5601023017902813, - "grad_norm": 0.07113789767026901, - "learning_rate": 5.157677777799145e-05, - "loss": 0.0055323362350463865, - "step": 9150 - }, - { - "epoch": 1.5609548167092924, - "grad_norm": 0.10281748324632645, - "learning_rate": 5.1553026259605316e-05, - "loss": 0.006342601776123047, - "step": 9155 - }, - { - "epoch": 1.5618073316283034, - "grad_norm": 0.09731876850128174, - "learning_rate": 5.152926818079906e-05, - "loss": 0.0054936733096838, - "step": 9160 - }, - { - "epoch": 1.5626598465473145, - "grad_norm": 0.09631586819887161, - "learning_rate": 5.1505503552663734e-05, - "loss": 0.0064162641763687136, - "step": 9165 - }, - { - "epoch": 1.5635123614663256, - "grad_norm": 0.07588718831539154, - "learning_rate": 5.148173238629348e-05, - "loss": 0.0069232374429702755, - "step": 9170 - }, - { - "epoch": 1.5643648763853366, - "grad_norm": 0.10357257723808289, - "learning_rate": 5.145795469278544e-05, - "loss": 0.007076382637023926, - "step": 9175 - }, - { - "epoch": 1.5652173913043477, - "grad_norm": 0.07249122112989426, - "learning_rate": 5.1434170483239826e-05, - "loss": 0.005868781358003616, - "step": 9180 - }, - { - "epoch": 1.566069906223359, - "grad_norm": 0.06878417730331421, - "learning_rate": 5.1410379768759934e-05, - "loss": 0.006841042637825012, - "step": 9185 - }, - { - "epoch": 1.56692242114237, - "grad_norm": 0.1096004843711853, - "learning_rate": 5.138658256045203e-05, - "loss": 0.00807877779006958, - "step": 9190 - }, - { - "epoch": 1.567774936061381, - "grad_norm": 0.07194329053163528, - "learning_rate": 5.136277886942547e-05, - "loss": 0.005923056975007057, - "step": 9195 - }, - { - "epoch": 1.5686274509803921, - "grad_norm": 0.08904275298118591, - "learning_rate": 5.133896870679257e-05, - "loss": 0.006372517347335816, - "step": 9200 - }, - { - "epoch": 1.5694799658994032, - "grad_norm": 0.05133598670363426, - "learning_rate": 5.131515208366873e-05, - "loss": 0.00692460760474205, - "step": 9205 - }, - { - "epoch": 1.5703324808184145, - "grad_norm": 0.047151900827884674, - "learning_rate": 5.1291329011172345e-05, - "loss": 0.006545543670654297, - "step": 9210 - }, - { - "epoch": 1.5711849957374255, - "grad_norm": 0.07102219760417938, - "learning_rate": 5.126749950042482e-05, - "loss": 0.006531259417533875, - "step": 9215 - }, - { - "epoch": 1.5720375106564366, - "grad_norm": 0.09585709124803543, - "learning_rate": 5.124366356255056e-05, - "loss": 0.005086017400026321, - "step": 9220 - }, - { - "epoch": 1.5728900255754477, - "grad_norm": 0.06898393481969833, - "learning_rate": 5.121982120867695e-05, - "loss": 0.004247477650642395, - "step": 9225 - }, - { - "epoch": 1.5737425404944587, - "grad_norm": 0.10513560473918915, - "learning_rate": 5.119597244993443e-05, - "loss": 0.006501986831426621, - "step": 9230 - }, - { - "epoch": 1.5745950554134698, - "grad_norm": 0.06671630591154099, - "learning_rate": 5.1172117297456366e-05, - "loss": 0.007658005505800247, - "step": 9235 - }, - { - "epoch": 1.5754475703324808, - "grad_norm": 0.09480880945920944, - "learning_rate": 5.1148255762379156e-05, - "loss": 0.006366011500358581, - "step": 9240 - }, - { - "epoch": 1.576300085251492, - "grad_norm": 0.06769633293151855, - "learning_rate": 5.112438785584215e-05, - "loss": 0.00625738725066185, - "step": 9245 - }, - { - "epoch": 1.577152600170503, - "grad_norm": 0.03695152327418327, - "learning_rate": 5.1100513588987665e-05, - "loss": 0.006924654543399811, - "step": 9250 - }, - { - "epoch": 1.578005115089514, - "grad_norm": 0.05657009407877922, - "learning_rate": 5.107663297296104e-05, - "loss": 0.005848415940999985, - "step": 9255 - }, - { - "epoch": 1.578857630008525, - "grad_norm": 0.11228469014167786, - "learning_rate": 5.105274601891051e-05, - "loss": 0.005637861788272858, - "step": 9260 - }, - { - "epoch": 1.5797101449275361, - "grad_norm": 0.06454899162054062, - "learning_rate": 5.102885273798732e-05, - "loss": 0.0066472023725509645, - "step": 9265 - }, - { - "epoch": 1.5805626598465472, - "grad_norm": 0.05328953638672829, - "learning_rate": 5.1004953141345637e-05, - "loss": 0.008773463219404221, - "step": 9270 - }, - { - "epoch": 1.5814151747655583, - "grad_norm": 0.05827401205897331, - "learning_rate": 5.0981047240142576e-05, - "loss": 0.0075307883322238926, - "step": 9275 - }, - { - "epoch": 1.5822676896845693, - "grad_norm": 0.0719359889626503, - "learning_rate": 5.095713504553822e-05, - "loss": 0.007532978057861328, - "step": 9280 - }, - { - "epoch": 1.5831202046035806, - "grad_norm": 0.08982953429222107, - "learning_rate": 5.0933216568695596e-05, - "loss": 0.007915425300598144, - "step": 9285 - }, - { - "epoch": 1.5839727195225917, - "grad_norm": 0.0919221043586731, - "learning_rate": 5.090929182078061e-05, - "loss": 0.005685590207576752, - "step": 9290 - }, - { - "epoch": 1.5848252344416027, - "grad_norm": 0.0840388685464859, - "learning_rate": 5.088536081296215e-05, - "loss": 0.0070190995931625364, - "step": 9295 - }, - { - "epoch": 1.5856777493606138, - "grad_norm": 0.08340579271316528, - "learning_rate": 5.086142355641199e-05, - "loss": 0.005871276929974556, - "step": 9300 - }, - { - "epoch": 1.5865302642796248, - "grad_norm": 0.0840516984462738, - "learning_rate": 5.0837480062304865e-05, - "loss": 0.007803326845169068, - "step": 9305 - }, - { - "epoch": 1.5873827791986361, - "grad_norm": 0.08378542214632034, - "learning_rate": 5.0813530341818377e-05, - "loss": 0.005085055530071258, - "step": 9310 - }, - { - "epoch": 1.5882352941176472, - "grad_norm": 0.10764650255441666, - "learning_rate": 5.078957440613305e-05, - "loss": 0.007959616929292678, - "step": 9315 - }, - { - "epoch": 1.5890878090366582, - "grad_norm": 0.07483979314565659, - "learning_rate": 5.076561226643231e-05, - "loss": 0.004332176968455314, - "step": 9320 - }, - { - "epoch": 1.5899403239556693, - "grad_norm": 0.06658382713794708, - "learning_rate": 5.074164393390249e-05, - "loss": 0.006168607249855995, - "step": 9325 - }, - { - "epoch": 1.5907928388746804, - "grad_norm": 0.09388890862464905, - "learning_rate": 5.071766941973282e-05, - "loss": 0.006460639089345932, - "step": 9330 - }, - { - "epoch": 1.5916453537936914, - "grad_norm": 0.051856543868780136, - "learning_rate": 5.0693688735115364e-05, - "loss": 0.005657953023910522, - "step": 9335 - }, - { - "epoch": 1.5924978687127025, - "grad_norm": 0.0785013884305954, - "learning_rate": 5.066970189124513e-05, - "loss": 0.008378601819276809, - "step": 9340 - }, - { - "epoch": 1.5933503836317136, - "grad_norm": 0.0653534010052681, - "learning_rate": 5.0645708899319956e-05, - "loss": 0.006928309798240662, - "step": 9345 - }, - { - "epoch": 1.5942028985507246, - "grad_norm": 0.047050826251506805, - "learning_rate": 5.062170977054058e-05, - "loss": 0.005722399801015854, - "step": 9350 - }, - { - "epoch": 1.5950554134697357, - "grad_norm": 0.10868531465530396, - "learning_rate": 5.059770451611061e-05, - "loss": 0.009898315370082855, - "step": 9355 - }, - { - "epoch": 1.5959079283887467, - "grad_norm": 0.0615832693874836, - "learning_rate": 5.0573693147236465e-05, - "loss": 0.007755370438098907, - "step": 9360 - }, - { - "epoch": 1.5967604433077578, - "grad_norm": 0.10720556974411011, - "learning_rate": 5.054967567512747e-05, - "loss": 0.006318587809801102, - "step": 9365 - }, - { - "epoch": 1.5976129582267689, - "grad_norm": 0.06587128341197968, - "learning_rate": 5.052565211099578e-05, - "loss": 0.004849371314048767, - "step": 9370 - }, - { - "epoch": 1.59846547314578, - "grad_norm": 0.07305008918046951, - "learning_rate": 5.050162246605638e-05, - "loss": 0.005983927100896835, - "step": 9375 - }, - { - "epoch": 1.599317988064791, - "grad_norm": 0.06641892343759537, - "learning_rate": 5.0477586751527124e-05, - "loss": 0.007008136063814163, - "step": 9380 - }, - { - "epoch": 1.6001705029838023, - "grad_norm": 0.06871581077575684, - "learning_rate": 5.045354497862868e-05, - "loss": 0.0066993959248065945, - "step": 9385 - }, - { - "epoch": 1.6010230179028133, - "grad_norm": 0.07417753338813782, - "learning_rate": 5.042949715858453e-05, - "loss": 0.006360804289579391, - "step": 9390 - }, - { - "epoch": 1.6018755328218244, - "grad_norm": 0.09202401340007782, - "learning_rate": 5.040544330262102e-05, - "loss": 0.006207296252250671, - "step": 9395 - }, - { - "epoch": 1.6027280477408354, - "grad_norm": 0.06747353821992874, - "learning_rate": 5.0381383421967276e-05, - "loss": 0.006196716427803039, - "step": 9400 - }, - { - "epoch": 1.6035805626598465, - "grad_norm": 0.06609310954809189, - "learning_rate": 5.0357317527855266e-05, - "loss": 0.005642791092395782, - "step": 9405 - }, - { - "epoch": 1.6044330775788578, - "grad_norm": 0.039614174515008926, - "learning_rate": 5.0333245631519716e-05, - "loss": 0.005146804824471473, - "step": 9410 - }, - { - "epoch": 1.6052855924978688, - "grad_norm": 0.0902944952249527, - "learning_rate": 5.0309167744198234e-05, - "loss": 0.005218298360705376, - "step": 9415 - }, - { - "epoch": 1.60613810741688, - "grad_norm": 0.06527641415596008, - "learning_rate": 5.028508387713114e-05, - "loss": 0.006157718971371651, - "step": 9420 - }, - { - "epoch": 1.606990622335891, - "grad_norm": 0.10824134200811386, - "learning_rate": 5.026099404156161e-05, - "loss": 0.00577687993645668, - "step": 9425 - }, - { - "epoch": 1.607843137254902, - "grad_norm": 0.091335728764534, - "learning_rate": 5.023689824873556e-05, - "loss": 0.005114461481571198, - "step": 9430 - }, - { - "epoch": 1.608695652173913, - "grad_norm": 0.047340504825115204, - "learning_rate": 5.021279650990173e-05, - "loss": 0.005150845646858216, - "step": 9435 - }, - { - "epoch": 1.6095481670929241, - "grad_norm": 0.05847655236721039, - "learning_rate": 5.01886888363116e-05, - "loss": 0.006019642949104309, - "step": 9440 - }, - { - "epoch": 1.6104006820119352, - "grad_norm": 0.10413257032632828, - "learning_rate": 5.016457523921943e-05, - "loss": 0.0097243569791317, - "step": 9445 - }, - { - "epoch": 1.6112531969309463, - "grad_norm": 0.06559625267982483, - "learning_rate": 5.014045572988226e-05, - "loss": 0.006743426620960236, - "step": 9450 - }, - { - "epoch": 1.6121057118499573, - "grad_norm": 0.07541610300540924, - "learning_rate": 5.0116330319559865e-05, - "loss": 0.004393500834703445, - "step": 9455 - }, - { - "epoch": 1.6129582267689684, - "grad_norm": 0.04757530242204666, - "learning_rate": 5.00921990195148e-05, - "loss": 0.004641738906502724, - "step": 9460 - }, - { - "epoch": 1.6138107416879794, - "grad_norm": 0.10010012239217758, - "learning_rate": 5.0068061841012355e-05, - "loss": 0.005677872523665428, - "step": 9465 - }, - { - "epoch": 1.6146632566069905, - "grad_norm": 0.08248613774776459, - "learning_rate": 5.0043918795320576e-05, - "loss": 0.006557486951351166, - "step": 9470 - }, - { - "epoch": 1.6155157715260016, - "grad_norm": 0.06300318241119385, - "learning_rate": 5.001976989371023e-05, - "loss": 0.0052742622792720795, - "step": 9475 - }, - { - "epoch": 1.6163682864450126, - "grad_norm": 0.06455430388450623, - "learning_rate": 4.999561514745482e-05, - "loss": 0.0061374582350254055, - "step": 9480 - }, - { - "epoch": 1.617220801364024, - "grad_norm": 0.04623732715845108, - "learning_rate": 4.997145456783062e-05, - "loss": 0.007861848175525665, - "step": 9485 - }, - { - "epoch": 1.618073316283035, - "grad_norm": 0.05294455960392952, - "learning_rate": 4.994728816611655e-05, - "loss": 0.005468960478901863, - "step": 9490 - }, - { - "epoch": 1.618925831202046, - "grad_norm": 0.04539628326892853, - "learning_rate": 4.992311595359431e-05, - "loss": 0.005490221083164215, - "step": 9495 - }, - { - "epoch": 1.619778346121057, - "grad_norm": 0.04033574461936951, - "learning_rate": 4.98989379415483e-05, - "loss": 0.005296828970313072, - "step": 9500 - }, - { - "epoch": 1.6206308610400681, - "grad_norm": 0.10801003128290176, - "learning_rate": 4.98747541412656e-05, - "loss": 0.007847490906715392, - "step": 9505 - }, - { - "epoch": 1.6214833759590794, - "grad_norm": 0.05979831889271736, - "learning_rate": 4.985056456403603e-05, - "loss": 0.005352787673473358, - "step": 9510 - }, - { - "epoch": 1.6223358908780905, - "grad_norm": 0.07628990709781647, - "learning_rate": 4.9826369221152086e-05, - "loss": 0.005436672642827034, - "step": 9515 - }, - { - "epoch": 1.6231884057971016, - "grad_norm": 0.0654626339673996, - "learning_rate": 4.9802168123908955e-05, - "loss": 0.004777481406927108, - "step": 9520 - }, - { - "epoch": 1.6240409207161126, - "grad_norm": 0.08487557619810104, - "learning_rate": 4.97779612836045e-05, - "loss": 0.006834116578102112, - "step": 9525 - }, - { - "epoch": 1.6248934356351237, - "grad_norm": 0.09151525050401688, - "learning_rate": 4.9753748711539316e-05, - "loss": 0.006389729678630829, - "step": 9530 - }, - { - "epoch": 1.6257459505541347, - "grad_norm": 0.10458851605653763, - "learning_rate": 4.972953041901661e-05, - "loss": 0.005984527617692947, - "step": 9535 - }, - { - "epoch": 1.6265984654731458, - "grad_norm": 0.08780983090400696, - "learning_rate": 4.970530641734229e-05, - "loss": 0.0068392202258110045, - "step": 9540 - }, - { - "epoch": 1.6274509803921569, - "grad_norm": 0.04871044307947159, - "learning_rate": 4.968107671782493e-05, - "loss": 0.005444938316941261, - "step": 9545 - }, - { - "epoch": 1.628303495311168, - "grad_norm": 0.05514970421791077, - "learning_rate": 4.9656841331775745e-05, - "loss": 0.005353255197405815, - "step": 9550 - }, - { - "epoch": 1.629156010230179, - "grad_norm": 0.057791441679000854, - "learning_rate": 4.9632600270508655e-05, - "loss": 0.005117457732558251, - "step": 9555 - }, - { - "epoch": 1.63000852514919, - "grad_norm": 0.0816815048456192, - "learning_rate": 4.960835354534015e-05, - "loss": 0.005405401438474655, - "step": 9560 - }, - { - "epoch": 1.630861040068201, - "grad_norm": 0.087788425385952, - "learning_rate": 4.958410116758945e-05, - "loss": 0.006124432012438774, - "step": 9565 - }, - { - "epoch": 1.6317135549872122, - "grad_norm": 0.08500470966100693, - "learning_rate": 4.955984314857832e-05, - "loss": 0.00581449456512928, - "step": 9570 - }, - { - "epoch": 1.6325660699062232, - "grad_norm": 0.042804375290870667, - "learning_rate": 4.9535579499631264e-05, - "loss": 0.007793295383453369, - "step": 9575 - }, - { - "epoch": 1.6334185848252343, - "grad_norm": 0.08767658472061157, - "learning_rate": 4.951131023207533e-05, - "loss": 0.006432226300239563, - "step": 9580 - }, - { - "epoch": 1.6342710997442456, - "grad_norm": 0.0693424716591835, - "learning_rate": 4.948703535724023e-05, - "loss": 0.006517377495765686, - "step": 9585 - }, - { - "epoch": 1.6351236146632566, - "grad_norm": 0.08574991673231125, - "learning_rate": 4.9462754886458276e-05, - "loss": 0.009532185643911362, - "step": 9590 - }, - { - "epoch": 1.6359761295822677, - "grad_norm": 0.04135733097791672, - "learning_rate": 4.94384688310644e-05, - "loss": 0.005358002707362175, - "step": 9595 - }, - { - "epoch": 1.6368286445012787, - "grad_norm": 0.09947369992733002, - "learning_rate": 4.941417720239616e-05, - "loss": 0.005965238064527511, - "step": 9600 - }, - { - "epoch": 1.6376811594202898, - "grad_norm": 0.038376711308956146, - "learning_rate": 4.9389880011793665e-05, - "loss": 0.00521450936794281, - "step": 9605 - }, - { - "epoch": 1.638533674339301, - "grad_norm": 0.05022123083472252, - "learning_rate": 4.9365577270599675e-05, - "loss": 0.006678921729326248, - "step": 9610 - }, - { - "epoch": 1.6393861892583121, - "grad_norm": 0.06687050312757492, - "learning_rate": 4.93412689901595e-05, - "loss": 0.006315051764249802, - "step": 9615 - }, - { - "epoch": 1.6402387041773232, - "grad_norm": 0.08563709259033203, - "learning_rate": 4.931695518182107e-05, - "loss": 0.005977614223957062, - "step": 9620 - }, - { - "epoch": 1.6410912190963343, - "grad_norm": 0.07901418209075928, - "learning_rate": 4.929263585693486e-05, - "loss": 0.004367914795875549, - "step": 9625 - }, - { - "epoch": 1.6419437340153453, - "grad_norm": 0.05929172784090042, - "learning_rate": 4.9268311026853974e-05, - "loss": 0.00466451421380043, - "step": 9630 - }, - { - "epoch": 1.6427962489343564, - "grad_norm": 0.09167131781578064, - "learning_rate": 4.924398070293403e-05, - "loss": 0.0063233010470867155, - "step": 9635 - }, - { - "epoch": 1.6436487638533674, - "grad_norm": 0.053217221051454544, - "learning_rate": 4.921964489653321e-05, - "loss": 0.005829869210720063, - "step": 9640 - }, - { - "epoch": 1.6445012787723785, - "grad_norm": 0.05341719463467598, - "learning_rate": 4.919530361901232e-05, - "loss": 0.005165425688028335, - "step": 9645 - }, - { - "epoch": 1.6453537936913896, - "grad_norm": 0.0763968899846077, - "learning_rate": 4.917095688173466e-05, - "loss": 0.008034119009971618, - "step": 9650 - }, - { - "epoch": 1.6462063086104006, - "grad_norm": 0.07722017168998718, - "learning_rate": 4.9146604696066095e-05, - "loss": 0.008911440521478653, - "step": 9655 - }, - { - "epoch": 1.6470588235294117, - "grad_norm": 0.0639941543340683, - "learning_rate": 4.912224707337504e-05, - "loss": 0.0066375695168972015, - "step": 9660 - }, - { - "epoch": 1.6479113384484227, - "grad_norm": 0.05451088026165962, - "learning_rate": 4.9097884025032425e-05, - "loss": 0.004018183052539826, - "step": 9665 - }, - { - "epoch": 1.6487638533674338, - "grad_norm": 0.06928657740354538, - "learning_rate": 4.907351556241176e-05, - "loss": 0.0061560459434986115, - "step": 9670 - }, - { - "epoch": 1.6496163682864449, - "grad_norm": 0.0672740638256073, - "learning_rate": 4.904914169688903e-05, - "loss": 0.005010559782385826, - "step": 9675 - }, - { - "epoch": 1.6504688832054561, - "grad_norm": 0.05115605145692825, - "learning_rate": 4.902476243984279e-05, - "loss": 0.005690005421638489, - "step": 9680 - }, - { - "epoch": 1.6513213981244672, - "grad_norm": 0.08852645754814148, - "learning_rate": 4.9000377802654055e-05, - "loss": 0.0067652732133865355, - "step": 9685 - }, - { - "epoch": 1.6521739130434783, - "grad_norm": 0.08289605379104614, - "learning_rate": 4.897598779670643e-05, - "loss": 0.005946322903037071, - "step": 9690 - }, - { - "epoch": 1.6530264279624893, - "grad_norm": 0.08343428373336792, - "learning_rate": 4.895159243338594e-05, - "loss": 0.006231371313333511, - "step": 9695 - }, - { - "epoch": 1.6538789428815004, - "grad_norm": 0.08138900995254517, - "learning_rate": 4.892719172408117e-05, - "loss": 0.006785771995782852, - "step": 9700 - }, - { - "epoch": 1.6547314578005117, - "grad_norm": 0.07599585503339767, - "learning_rate": 4.890278568018318e-05, - "loss": 0.00609181635081768, - "step": 9705 - }, - { - "epoch": 1.6555839727195227, - "grad_norm": 0.07918383926153183, - "learning_rate": 4.887837431308552e-05, - "loss": 0.006991502642631531, - "step": 9710 - }, - { - "epoch": 1.6564364876385338, - "grad_norm": 0.048750922083854675, - "learning_rate": 4.8853957634184246e-05, - "loss": 0.00639684796333313, - "step": 9715 - }, - { - "epoch": 1.6572890025575449, - "grad_norm": 0.07931654155254364, - "learning_rate": 4.882953565487785e-05, - "loss": 0.004780232906341553, - "step": 9720 - }, - { - "epoch": 1.658141517476556, - "grad_norm": 0.07394375652074814, - "learning_rate": 4.8805108386567345e-05, - "loss": 0.005560039728879929, - "step": 9725 - }, - { - "epoch": 1.658994032395567, - "grad_norm": 0.07906223088502884, - "learning_rate": 4.8780675840656175e-05, - "loss": 0.006233107298612595, - "step": 9730 - }, - { - "epoch": 1.659846547314578, - "grad_norm": 0.05145291984081268, - "learning_rate": 4.875623802855027e-05, - "loss": 0.0049663417041301726, - "step": 9735 - }, - { - "epoch": 1.660699062233589, - "grad_norm": 0.06227492541074753, - "learning_rate": 4.873179496165802e-05, - "loss": 0.006139815598726272, - "step": 9740 - }, - { - "epoch": 1.6615515771526002, - "grad_norm": 0.08176816254854202, - "learning_rate": 4.870734665139028e-05, - "loss": 0.007625886052846908, - "step": 9745 - }, - { - "epoch": 1.6624040920716112, - "grad_norm": 0.06774444133043289, - "learning_rate": 4.868289310916029e-05, - "loss": 0.006510105729103088, - "step": 9750 - }, - { - "epoch": 1.6632566069906223, - "grad_norm": 0.07336006313562393, - "learning_rate": 4.8658434346383805e-05, - "loss": 0.0068834669888019565, - "step": 9755 - }, - { - "epoch": 1.6641091219096333, - "grad_norm": 0.07233051210641861, - "learning_rate": 4.863397037447899e-05, - "loss": 0.005505643784999847, - "step": 9760 - }, - { - "epoch": 1.6649616368286444, - "grad_norm": 0.037355873733758926, - "learning_rate": 4.860950120486643e-05, - "loss": 0.005151794478297234, - "step": 9765 - }, - { - "epoch": 1.6658141517476555, - "grad_norm": 0.10907282680273056, - "learning_rate": 4.8585026848969164e-05, - "loss": 0.007589263468980789, - "step": 9770 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 0.10357582569122314, - "learning_rate": 4.856054731821261e-05, - "loss": 0.006011854484677314, - "step": 9775 - }, - { - "epoch": 1.6675191815856778, - "grad_norm": 0.0713953971862793, - "learning_rate": 4.853606262402465e-05, - "loss": 0.006342334300279617, - "step": 9780 - }, - { - "epoch": 1.6683716965046889, - "grad_norm": 0.07772944122552872, - "learning_rate": 4.851157277783555e-05, - "loss": 0.005903373658657074, - "step": 9785 - }, - { - "epoch": 1.6692242114237, - "grad_norm": 0.1249493658542633, - "learning_rate": 4.848707779107797e-05, - "loss": 0.006542833894491196, - "step": 9790 - }, - { - "epoch": 1.670076726342711, - "grad_norm": 0.05137734115123749, - "learning_rate": 4.8462577675187e-05, - "loss": 0.004380676150321961, - "step": 9795 - }, - { - "epoch": 1.670929241261722, - "grad_norm": 0.09491576999425888, - "learning_rate": 4.8438072441600095e-05, - "loss": 0.005311820283532142, - "step": 9800 - }, - { - "epoch": 1.6717817561807333, - "grad_norm": 0.09257746487855911, - "learning_rate": 4.8413562101757134e-05, - "loss": 0.006033014133572578, - "step": 9805 - }, - { - "epoch": 1.6726342710997444, - "grad_norm": 0.045860812067985535, - "learning_rate": 4.838904666710034e-05, - "loss": 0.008368080109357834, - "step": 9810 - }, - { - "epoch": 1.6734867860187554, - "grad_norm": 0.033777810633182526, - "learning_rate": 4.836452614907435e-05, - "loss": 0.0045743979513645176, - "step": 9815 - }, - { - "epoch": 1.6743393009377665, - "grad_norm": 0.12888991832733154, - "learning_rate": 4.834000055912614e-05, - "loss": 0.005997149646282196, - "step": 9820 - }, - { - "epoch": 1.6751918158567776, - "grad_norm": 0.08622048050165176, - "learning_rate": 4.8315469908705074e-05, - "loss": 0.007002732157707215, - "step": 9825 - }, - { - "epoch": 1.6760443307757886, - "grad_norm": 0.04722774773836136, - "learning_rate": 4.82909342092629e-05, - "loss": 0.005374876409769058, - "step": 9830 - }, - { - "epoch": 1.6768968456947997, - "grad_norm": 0.08596520870923996, - "learning_rate": 4.826639347225366e-05, - "loss": 0.0066084228456020355, - "step": 9835 - }, - { - "epoch": 1.6777493606138107, - "grad_norm": 0.09831524640321732, - "learning_rate": 4.824184770913381e-05, - "loss": 0.004402932524681091, - "step": 9840 - }, - { - "epoch": 1.6786018755328218, - "grad_norm": 0.10586824268102646, - "learning_rate": 4.821729693136214e-05, - "loss": 0.006442143023014069, - "step": 9845 - }, - { - "epoch": 1.6794543904518329, - "grad_norm": 0.11845403164625168, - "learning_rate": 4.8192741150399735e-05, - "loss": 0.006300021708011627, - "step": 9850 - }, - { - "epoch": 1.680306905370844, - "grad_norm": 0.08749356120824814, - "learning_rate": 4.816818037771007e-05, - "loss": 0.0060168147087097164, - "step": 9855 - }, - { - "epoch": 1.681159420289855, - "grad_norm": 0.06483060121536255, - "learning_rate": 4.814361462475895e-05, - "loss": 0.00717247799038887, - "step": 9860 - }, - { - "epoch": 1.682011935208866, - "grad_norm": 0.09276239573955536, - "learning_rate": 4.811904390301444e-05, - "loss": 0.006788758933544159, - "step": 9865 - }, - { - "epoch": 1.682864450127877, - "grad_norm": 0.05662832781672478, - "learning_rate": 4.809446822394701e-05, - "loss": 0.0068000413477420805, - "step": 9870 - }, - { - "epoch": 1.6837169650468882, - "grad_norm": 0.07508451491594315, - "learning_rate": 4.80698875990294e-05, - "loss": 0.006339512765407562, - "step": 9875 - }, - { - "epoch": 1.6845694799658995, - "grad_norm": 0.06525320559740067, - "learning_rate": 4.804530203973664e-05, - "loss": 0.010082229971885681, - "step": 9880 - }, - { - "epoch": 1.6854219948849105, - "grad_norm": 0.07791458070278168, - "learning_rate": 4.8020711557546104e-05, - "loss": 0.006830710172653198, - "step": 9885 - }, - { - "epoch": 1.6862745098039216, - "grad_norm": 0.05997749790549278, - "learning_rate": 4.799611616393745e-05, - "loss": 0.00666801705956459, - "step": 9890 - }, - { - "epoch": 1.6871270247229326, - "grad_norm": 0.07050258666276932, - "learning_rate": 4.797151587039261e-05, - "loss": 0.0059244450181722644, - "step": 9895 - }, - { - "epoch": 1.6879795396419437, - "grad_norm": 0.06760186702013016, - "learning_rate": 4.794691068839585e-05, - "loss": 0.006415641307830811, - "step": 9900 - }, - { - "epoch": 1.688832054560955, - "grad_norm": 0.07285474240779877, - "learning_rate": 4.792230062943364e-05, - "loss": 0.004972729086875916, - "step": 9905 - }, - { - "epoch": 1.689684569479966, - "grad_norm": 0.02914854884147644, - "learning_rate": 4.789768570499481e-05, - "loss": 0.004819701239466667, - "step": 9910 - }, - { - "epoch": 1.690537084398977, - "grad_norm": 0.058768294751644135, - "learning_rate": 4.787306592657042e-05, - "loss": 0.00581958070397377, - "step": 9915 - }, - { - "epoch": 1.6913895993179882, - "grad_norm": 0.08694405853748322, - "learning_rate": 4.7848441305653804e-05, - "loss": 0.004998849332332611, - "step": 9920 - }, - { - "epoch": 1.6922421142369992, - "grad_norm": 0.10194200277328491, - "learning_rate": 4.782381185374054e-05, - "loss": 0.00809016153216362, - "step": 9925 - }, - { - "epoch": 1.6930946291560103, - "grad_norm": 0.04976386949419975, - "learning_rate": 4.779917758232849e-05, - "loss": 0.00392133817076683, - "step": 9930 - }, - { - "epoch": 1.6939471440750213, - "grad_norm": 0.04324428364634514, - "learning_rate": 4.777453850291774e-05, - "loss": 0.005488916113972664, - "step": 9935 - }, - { - "epoch": 1.6947996589940324, - "grad_norm": 0.128068745136261, - "learning_rate": 4.774989462701063e-05, - "loss": 0.008696570992469788, - "step": 9940 - }, - { - "epoch": 1.6956521739130435, - "grad_norm": 0.06357335299253464, - "learning_rate": 4.7725245966111764e-05, - "loss": 0.00657767504453659, - "step": 9945 - }, - { - "epoch": 1.6965046888320545, - "grad_norm": 0.09200388938188553, - "learning_rate": 4.770059253172793e-05, - "loss": 0.00511985532939434, - "step": 9950 - }, - { - "epoch": 1.6973572037510656, - "grad_norm": 0.0898200049996376, - "learning_rate": 4.767593433536819e-05, - "loss": 0.005805553123354912, - "step": 9955 - }, - { - "epoch": 1.6982097186700766, - "grad_norm": 0.06495708227157593, - "learning_rate": 4.765127138854379e-05, - "loss": 0.005122709274291992, - "step": 9960 - }, - { - "epoch": 1.6990622335890877, - "grad_norm": 0.06079862266778946, - "learning_rate": 4.762660370276824e-05, - "loss": 0.005829216912388802, - "step": 9965 - }, - { - "epoch": 1.6999147485080988, - "grad_norm": 0.07300638407468796, - "learning_rate": 4.760193128955721e-05, - "loss": 0.0057421475648880005, - "step": 9970 - }, - { - "epoch": 1.7007672634271098, - "grad_norm": 0.09826004505157471, - "learning_rate": 4.757725416042863e-05, - "loss": 0.007709302753210068, - "step": 9975 - }, - { - "epoch": 1.701619778346121, - "grad_norm": 0.08353756368160248, - "learning_rate": 4.755257232690258e-05, - "loss": 0.007458946853876114, - "step": 9980 - }, - { - "epoch": 1.7024722932651322, - "grad_norm": 0.057993657886981964, - "learning_rate": 4.752788580050137e-05, - "loss": 0.0048107530921697615, - "step": 9985 - }, - { - "epoch": 1.7033248081841432, - "grad_norm": 0.08480621874332428, - "learning_rate": 4.750319459274951e-05, - "loss": 0.007556724548339844, - "step": 9990 - }, - { - "epoch": 1.7041773231031543, - "grad_norm": 0.06563637405633926, - "learning_rate": 4.747849871517364e-05, - "loss": 0.00476250983774662, - "step": 9995 - }, - { - "epoch": 1.7050298380221653, - "grad_norm": 0.06217886507511139, - "learning_rate": 4.7453798179302656e-05, - "loss": 0.008565887063741683, - "step": 10000 - }, - { - "epoch": 1.7058823529411766, - "grad_norm": 0.07285669445991516, - "learning_rate": 4.742909299666756e-05, - "loss": 0.0062899492681026455, - "step": 10005 - }, - { - "epoch": 1.7067348678601877, - "grad_norm": 0.043275732547044754, - "learning_rate": 4.7404383178801564e-05, - "loss": 0.005467301979660988, - "step": 10010 - }, - { - "epoch": 1.7075873827791987, - "grad_norm": 0.09345486015081406, - "learning_rate": 4.7379668737240044e-05, - "loss": 0.007198603451251983, - "step": 10015 - }, - { - "epoch": 1.7084398976982098, - "grad_norm": 0.09792933613061905, - "learning_rate": 4.735494968352049e-05, - "loss": 0.009155672788619996, - "step": 10020 - }, - { - "epoch": 1.7092924126172209, - "grad_norm": 0.03888144716620445, - "learning_rate": 4.733022602918263e-05, - "loss": 0.00484597384929657, - "step": 10025 - }, - { - "epoch": 1.710144927536232, - "grad_norm": 0.050344232469797134, - "learning_rate": 4.7305497785768235e-05, - "loss": 0.00478862039744854, - "step": 10030 - }, - { - "epoch": 1.710997442455243, - "grad_norm": 0.0724092647433281, - "learning_rate": 4.728076496482131e-05, - "loss": 0.005028426647186279, - "step": 10035 - }, - { - "epoch": 1.711849957374254, - "grad_norm": 0.10781413316726685, - "learning_rate": 4.725602757788794e-05, - "loss": 0.00789962187409401, - "step": 10040 - }, - { - "epoch": 1.712702472293265, - "grad_norm": 0.0828569084405899, - "learning_rate": 4.723128563651637e-05, - "loss": 0.006212035566568375, - "step": 10045 - }, - { - "epoch": 1.7135549872122762, - "grad_norm": 0.06634854525327682, - "learning_rate": 4.720653915225695e-05, - "loss": 0.00550018809735775, - "step": 10050 - }, - { - "epoch": 1.7144075021312872, - "grad_norm": 0.07699137926101685, - "learning_rate": 4.718178813666217e-05, - "loss": 0.007427608966827393, - "step": 10055 - }, - { - "epoch": 1.7152600170502983, - "grad_norm": 0.08237455785274506, - "learning_rate": 4.715703260128663e-05, - "loss": 0.0049440376460552216, - "step": 10060 - }, - { - "epoch": 1.7161125319693094, - "grad_norm": 0.0423310324549675, - "learning_rate": 4.7132272557687034e-05, - "loss": 0.005643930658698082, - "step": 10065 - }, - { - "epoch": 1.7169650468883204, - "grad_norm": 0.08052363246679306, - "learning_rate": 4.71075080174222e-05, - "loss": 0.005594046413898468, - "step": 10070 - }, - { - "epoch": 1.7178175618073315, - "grad_norm": 0.05388827249407768, - "learning_rate": 4.7082738992053004e-05, - "loss": 0.005239073187112808, - "step": 10075 - }, - { - "epoch": 1.7186700767263428, - "grad_norm": 0.0699780210852623, - "learning_rate": 4.70579654931425e-05, - "loss": 0.004442551359534264, - "step": 10080 - }, - { - "epoch": 1.7195225916453538, - "grad_norm": 0.07259970158338547, - "learning_rate": 4.7033187532255765e-05, - "loss": 0.004775180667638779, - "step": 10085 - }, - { - "epoch": 1.7203751065643649, - "grad_norm": 0.10291304439306259, - "learning_rate": 4.700840512095995e-05, - "loss": 0.009148158878087998, - "step": 10090 - }, - { - "epoch": 1.721227621483376, - "grad_norm": 0.09639768302440643, - "learning_rate": 4.698361827082435e-05, - "loss": 0.008357913047075272, - "step": 10095 - }, - { - "epoch": 1.722080136402387, - "grad_norm": 0.08128193765878677, - "learning_rate": 4.695882699342026e-05, - "loss": 0.006467945128679276, - "step": 10100 - }, - { - "epoch": 1.7229326513213983, - "grad_norm": 0.0678371787071228, - "learning_rate": 4.6934031300321094e-05, - "loss": 0.005760467797517777, - "step": 10105 - }, - { - "epoch": 1.7237851662404093, - "grad_norm": 0.0766267478466034, - "learning_rate": 4.6909231203102285e-05, - "loss": 0.0068340465426445, - "step": 10110 - }, - { - "epoch": 1.7246376811594204, - "grad_norm": 0.04263419657945633, - "learning_rate": 4.6884426713341366e-05, - "loss": 0.005921339616179466, - "step": 10115 - }, - { - "epoch": 1.7254901960784315, - "grad_norm": 0.10168195515871048, - "learning_rate": 4.6859617842617874e-05, - "loss": 0.006926319003105164, - "step": 10120 - }, - { - "epoch": 1.7263427109974425, - "grad_norm": 0.07910803705453873, - "learning_rate": 4.683480460251343e-05, - "loss": 0.006997878849506378, - "step": 10125 - }, - { - "epoch": 1.7271952259164536, - "grad_norm": 0.045049965381622314, - "learning_rate": 4.680998700461169e-05, - "loss": 0.005594813078641891, - "step": 10130 - }, - { - "epoch": 1.7280477408354646, - "grad_norm": 0.07185275852680206, - "learning_rate": 4.678516506049832e-05, - "loss": 0.006092778965830803, - "step": 10135 - }, - { - "epoch": 1.7289002557544757, - "grad_norm": 0.07003147900104523, - "learning_rate": 4.676033878176102e-05, - "loss": 0.007595886290073395, - "step": 10140 - }, - { - "epoch": 1.7297527706734868, - "grad_norm": 0.06360077112913132, - "learning_rate": 4.6735508179989536e-05, - "loss": 0.00546439029276371, - "step": 10145 - }, - { - "epoch": 1.7306052855924978, - "grad_norm": 0.07347442954778671, - "learning_rate": 4.671067326677563e-05, - "loss": 0.004961185902357101, - "step": 10150 - }, - { - "epoch": 1.7314578005115089, - "grad_norm": 0.056153345853090286, - "learning_rate": 4.6685834053713035e-05, - "loss": 0.006820976734161377, - "step": 10155 - }, - { - "epoch": 1.73231031543052, - "grad_norm": 0.09868444502353668, - "learning_rate": 4.666099055239755e-05, - "loss": 0.004829689115285874, - "step": 10160 - }, - { - "epoch": 1.733162830349531, - "grad_norm": 0.07029838860034943, - "learning_rate": 4.663614277442694e-05, - "loss": 0.006708820164203644, - "step": 10165 - }, - { - "epoch": 1.734015345268542, - "grad_norm": 0.0785607323050499, - "learning_rate": 4.661129073140096e-05, - "loss": 0.0093411885201931, - "step": 10170 - }, - { - "epoch": 1.7348678601875531, - "grad_norm": 0.05867304652929306, - "learning_rate": 4.658643443492139e-05, - "loss": 0.004420546442270279, - "step": 10175 - }, - { - "epoch": 1.7357203751065644, - "grad_norm": 0.08736653625965118, - "learning_rate": 4.656157389659196e-05, - "loss": 0.0049125440418720245, - "step": 10180 - }, - { - "epoch": 1.7365728900255755, - "grad_norm": 0.10769468545913696, - "learning_rate": 4.653670912801842e-05, - "loss": 0.006663528829813003, - "step": 10185 - }, - { - "epoch": 1.7374254049445865, - "grad_norm": 0.054130490869283676, - "learning_rate": 4.651184014080843e-05, - "loss": 0.005649637803435326, - "step": 10190 - }, - { - "epoch": 1.7382779198635976, - "grad_norm": 0.0760764479637146, - "learning_rate": 4.648696694657171e-05, - "loss": 0.00803508386015892, - "step": 10195 - }, - { - "epoch": 1.7391304347826086, - "grad_norm": 0.08103618025779724, - "learning_rate": 4.646208955691987e-05, - "loss": 0.005645860359072686, - "step": 10200 - }, - { - "epoch": 1.73998294970162, - "grad_norm": 0.060226406902074814, - "learning_rate": 4.643720798346649e-05, - "loss": 0.005114502459764481, - "step": 10205 - }, - { - "epoch": 1.740835464620631, - "grad_norm": 0.08842508494853973, - "learning_rate": 4.641232223782713e-05, - "loss": 0.004128537327051163, - "step": 10210 - }, - { - "epoch": 1.741687979539642, - "grad_norm": 0.03715536370873451, - "learning_rate": 4.6387432331619284e-05, - "loss": 0.005640536174178123, - "step": 10215 - }, - { - "epoch": 1.742540494458653, - "grad_norm": 0.09130766242742538, - "learning_rate": 4.636253827646239e-05, - "loss": 0.0074319176375865935, - "step": 10220 - }, - { - "epoch": 1.7433930093776642, - "grad_norm": 0.08204436302185059, - "learning_rate": 4.6337640083977826e-05, - "loss": 0.006443107873201371, - "step": 10225 - }, - { - "epoch": 1.7442455242966752, - "grad_norm": 0.09834989905357361, - "learning_rate": 4.6312737765788883e-05, - "loss": 0.00825996845960617, - "step": 10230 - }, - { - "epoch": 1.7450980392156863, - "grad_norm": 0.07453756034374237, - "learning_rate": 4.628783133352078e-05, - "loss": 0.005153121426701546, - "step": 10235 - }, - { - "epoch": 1.7459505541346974, - "grad_norm": 0.0658891350030899, - "learning_rate": 4.626292079880071e-05, - "loss": 0.005568725615739822, - "step": 10240 - }, - { - "epoch": 1.7468030690537084, - "grad_norm": 0.08673261851072311, - "learning_rate": 4.623800617325772e-05, - "loss": 0.00687919333577156, - "step": 10245 - }, - { - "epoch": 1.7476555839727195, - "grad_norm": 0.08707419037818909, - "learning_rate": 4.621308746852276e-05, - "loss": 0.009814801812171935, - "step": 10250 - }, - { - "epoch": 1.7485080988917305, - "grad_norm": 0.07168986648321152, - "learning_rate": 4.618816469622874e-05, - "loss": 0.004722443222999573, - "step": 10255 - }, - { - "epoch": 1.7493606138107416, - "grad_norm": 0.07987508177757263, - "learning_rate": 4.616323786801042e-05, - "loss": 0.006749927252531052, - "step": 10260 - }, - { - "epoch": 1.7497016197783462, - "eval_loss": 0.03619376942515373, - "eval_runtime": 3.6854, - "eval_samples_per_second": 68.379, - "eval_steps_per_second": 1.085, - "step": 10262 - }, - { - "eval_cer_subset": 0.014314328985294836, - "eval_cer_subset_edit_distance": 879, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 10262 - }, - { - "epoch": 1.7502131287297527, - "grad_norm": 0.10899413377046585, - "learning_rate": 4.6138306995504495e-05, - "loss": 0.006938809901475907, - "step": 10265 - }, - { - "epoch": 1.7510656436487637, - "grad_norm": 0.10073213279247284, - "learning_rate": 4.6113372090349516e-05, - "loss": 0.00795048326253891, - "step": 10270 - }, - { - "epoch": 1.7519181585677748, - "grad_norm": 0.04800979420542717, - "learning_rate": 4.608843316418592e-05, - "loss": 0.007616385817527771, - "step": 10275 - }, - { - "epoch": 1.752770673486786, - "grad_norm": 0.09020161628723145, - "learning_rate": 4.6063490228656025e-05, - "loss": 0.005228221416473389, - "step": 10280 - }, - { - "epoch": 1.7536231884057971, - "grad_norm": 0.083438441157341, - "learning_rate": 4.603854329540403e-05, - "loss": 0.00726160854101181, - "step": 10285 - }, - { - "epoch": 1.7544757033248082, - "grad_norm": 0.07851024717092514, - "learning_rate": 4.6013592376076e-05, - "loss": 0.006890790909528733, - "step": 10290 - }, - { - "epoch": 1.7553282182438192, - "grad_norm": 0.09015098959207535, - "learning_rate": 4.598863748231985e-05, - "loss": 0.007083073258399963, - "step": 10295 - }, - { - "epoch": 1.7561807331628303, - "grad_norm": 0.04751535877585411, - "learning_rate": 4.596367862578534e-05, - "loss": 0.005376371741294861, - "step": 10300 - }, - { - "epoch": 1.7570332480818416, - "grad_norm": 0.07547739148139954, - "learning_rate": 4.5938715818124094e-05, - "loss": 0.008766484260559083, - "step": 10305 - }, - { - "epoch": 1.7578857630008526, - "grad_norm": 0.052052512764930725, - "learning_rate": 4.5913749070989616e-05, - "loss": 0.005375667661428452, - "step": 10310 - }, - { - "epoch": 1.7587382779198637, - "grad_norm": 0.11575129628181458, - "learning_rate": 4.5888778396037187e-05, - "loss": 0.006675881892442703, - "step": 10315 - }, - { - "epoch": 1.7595907928388748, - "grad_norm": 0.05995294824242592, - "learning_rate": 4.586380380492394e-05, - "loss": 0.007097356766462326, - "step": 10320 - }, - { - "epoch": 1.7604433077578858, - "grad_norm": 0.049236129969358444, - "learning_rate": 4.583882530930887e-05, - "loss": 0.004433324560523033, - "step": 10325 - }, - { - "epoch": 1.7612958226768969, - "grad_norm": 0.048296503722667694, - "learning_rate": 4.581384292085274e-05, - "loss": 0.0051886774599552155, - "step": 10330 - }, - { - "epoch": 1.762148337595908, - "grad_norm": 0.09939385205507278, - "learning_rate": 4.57888566512182e-05, - "loss": 0.006426715105772018, - "step": 10335 - }, - { - "epoch": 1.763000852514919, - "grad_norm": 0.08810277283191681, - "learning_rate": 4.5763866512069626e-05, - "loss": 0.00727301687002182, - "step": 10340 - }, - { - "epoch": 1.76385336743393, - "grad_norm": 0.05262129753828049, - "learning_rate": 4.573887251507328e-05, - "loss": 0.004860313236713409, - "step": 10345 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 0.09755868464708328, - "learning_rate": 4.571387467189718e-05, - "loss": 0.00684543177485466, - "step": 10350 - }, - { - "epoch": 1.7655583972719522, - "grad_norm": 0.08306272327899933, - "learning_rate": 4.568887299421115e-05, - "loss": 0.005363506823778152, - "step": 10355 - }, - { - "epoch": 1.7664109121909632, - "grad_norm": 0.06304962188005447, - "learning_rate": 4.566386749368681e-05, - "loss": 0.006262359023094177, - "step": 10360 - }, - { - "epoch": 1.7672634271099743, - "grad_norm": 0.099216029047966, - "learning_rate": 4.5638858181997544e-05, - "loss": 0.005263365060091019, - "step": 10365 - }, - { - "epoch": 1.7681159420289854, - "grad_norm": 0.06316341459751129, - "learning_rate": 4.5613845070818544e-05, - "loss": 0.0053974583745002745, - "step": 10370 - }, - { - "epoch": 1.7689684569479964, - "grad_norm": 0.08523725718259811, - "learning_rate": 4.5588828171826755e-05, - "loss": 0.006064000725746155, - "step": 10375 - }, - { - "epoch": 1.7698209718670077, - "grad_norm": 0.0663699060678482, - "learning_rate": 4.5563807496700925e-05, - "loss": 0.00665600374341011, - "step": 10380 - }, - { - "epoch": 1.7706734867860188, - "grad_norm": 0.10673311352729797, - "learning_rate": 4.55387830571215e-05, - "loss": 0.006540966033935547, - "step": 10385 - }, - { - "epoch": 1.7715260017050298, - "grad_norm": 0.08779574930667877, - "learning_rate": 4.551375486477074e-05, - "loss": 0.00547558106482029, - "step": 10390 - }, - { - "epoch": 1.772378516624041, - "grad_norm": 0.07451514899730682, - "learning_rate": 4.5488722931332625e-05, - "loss": 0.008499838411808014, - "step": 10395 - }, - { - "epoch": 1.773231031543052, - "grad_norm": 0.06014202535152435, - "learning_rate": 4.5463687268492904e-05, - "loss": 0.006278771907091141, - "step": 10400 - }, - { - "epoch": 1.7740835464620632, - "grad_norm": 0.039256151765584946, - "learning_rate": 4.543864788793907e-05, - "loss": 0.0037193533033132555, - "step": 10405 - }, - { - "epoch": 1.7749360613810743, - "grad_norm": 0.09449942409992218, - "learning_rate": 4.541360480136031e-05, - "loss": 0.006574592739343643, - "step": 10410 - }, - { - "epoch": 1.7757885763000854, - "grad_norm": 0.07616980373859406, - "learning_rate": 4.53885580204476e-05, - "loss": 0.006042734161019326, - "step": 10415 - }, - { - "epoch": 1.7766410912190964, - "grad_norm": 0.07019155472517014, - "learning_rate": 4.5363507556893574e-05, - "loss": 0.006044945493340492, - "step": 10420 - }, - { - "epoch": 1.7774936061381075, - "grad_norm": 0.0616939477622509, - "learning_rate": 4.533845342239266e-05, - "loss": 0.004315405339002609, - "step": 10425 - }, - { - "epoch": 1.7783461210571185, - "grad_norm": 0.09354502707719803, - "learning_rate": 4.5313395628640943e-05, - "loss": 0.005719271302223205, - "step": 10430 - }, - { - "epoch": 1.7791986359761296, - "grad_norm": 0.08747732639312744, - "learning_rate": 4.528833418733623e-05, - "loss": 0.00472431555390358, - "step": 10435 - }, - { - "epoch": 1.7800511508951407, - "grad_norm": 0.09513017535209656, - "learning_rate": 4.5263269110178034e-05, - "loss": 0.006968998908996582, - "step": 10440 - }, - { - "epoch": 1.7809036658141517, - "grad_norm": 0.09208676964044571, - "learning_rate": 4.523820040886759e-05, - "loss": 0.006609047204256058, - "step": 10445 - }, - { - "epoch": 1.7817561807331628, - "grad_norm": 0.09964144974946976, - "learning_rate": 4.521312809510778e-05, - "loss": 0.0056272163987159726, - "step": 10450 - }, - { - "epoch": 1.7826086956521738, - "grad_norm": 0.06850367784500122, - "learning_rate": 4.51880521806032e-05, - "loss": 0.005562498047947883, - "step": 10455 - }, - { - "epoch": 1.783461210571185, - "grad_norm": 0.0654430240392685, - "learning_rate": 4.5162972677060124e-05, - "loss": 0.0059367924928665165, - "step": 10460 - }, - { - "epoch": 1.784313725490196, - "grad_norm": 0.0449560284614563, - "learning_rate": 4.513788959618649e-05, - "loss": 0.005458919331431389, - "step": 10465 - }, - { - "epoch": 1.785166240409207, - "grad_norm": 0.14256814122200012, - "learning_rate": 4.511280294969192e-05, - "loss": 0.0066184431314468386, - "step": 10470 - }, - { - "epoch": 1.7860187553282183, - "grad_norm": 0.08284557610750198, - "learning_rate": 4.508771274928771e-05, - "loss": 0.007388219982385635, - "step": 10475 - }, - { - "epoch": 1.7868712702472294, - "grad_norm": 0.05675457417964935, - "learning_rate": 4.506261900668676e-05, - "loss": 0.005572458356618881, - "step": 10480 - }, - { - "epoch": 1.7877237851662404, - "grad_norm": 0.05767322704195976, - "learning_rate": 4.50375217336037e-05, - "loss": 0.0058133058249950405, - "step": 10485 - }, - { - "epoch": 1.7885763000852515, - "grad_norm": 0.03421638533473015, - "learning_rate": 4.501242094175476e-05, - "loss": 0.005268872529268265, - "step": 10490 - }, - { - "epoch": 1.7894288150042625, - "grad_norm": 0.07319685071706772, - "learning_rate": 4.4987316642857836e-05, - "loss": 0.008701664954423904, - "step": 10495 - }, - { - "epoch": 1.7902813299232738, - "grad_norm": 0.04271615296602249, - "learning_rate": 4.4962208848632426e-05, - "loss": 0.005680259317159653, - "step": 10500 - }, - { - "epoch": 1.7911338448422849, - "grad_norm": 0.05916997417807579, - "learning_rate": 4.493709757079971e-05, - "loss": 0.004779224097728729, - "step": 10505 - }, - { - "epoch": 1.791986359761296, - "grad_norm": 0.04994066804647446, - "learning_rate": 4.491198282108244e-05, - "loss": 0.00443916954100132, - "step": 10510 - }, - { - "epoch": 1.792838874680307, - "grad_norm": 0.09032617509365082, - "learning_rate": 4.488686461120504e-05, - "loss": 0.007850547134876252, - "step": 10515 - }, - { - "epoch": 1.793691389599318, - "grad_norm": 0.05055975914001465, - "learning_rate": 4.4861742952893525e-05, - "loss": 0.005925046652555466, - "step": 10520 - }, - { - "epoch": 1.7945439045183291, - "grad_norm": 0.07521310448646545, - "learning_rate": 4.48366178578755e-05, - "loss": 0.006785632669925689, - "step": 10525 - }, - { - "epoch": 1.7953964194373402, - "grad_norm": 0.06577371805906296, - "learning_rate": 4.4811489337880216e-05, - "loss": 0.005300462618470192, - "step": 10530 - }, - { - "epoch": 1.7962489343563512, - "grad_norm": 0.0451020710170269, - "learning_rate": 4.4786357404638485e-05, - "loss": 0.00612550750374794, - "step": 10535 - }, - { - "epoch": 1.7971014492753623, - "grad_norm": 0.08968023955821991, - "learning_rate": 4.4761222069882754e-05, - "loss": 0.00558510459959507, - "step": 10540 - }, - { - "epoch": 1.7979539641943734, - "grad_norm": 0.0945729911327362, - "learning_rate": 4.4736083345347015e-05, - "loss": 0.007513274252414703, - "step": 10545 - }, - { - "epoch": 1.7988064791133844, - "grad_norm": 0.10392102599143982, - "learning_rate": 4.4710941242766844e-05, - "loss": 0.006224355846643448, - "step": 10550 - }, - { - "epoch": 1.7996589940323955, - "grad_norm": 0.10485874116420746, - "learning_rate": 4.4685795773879446e-05, - "loss": 0.005821261927485466, - "step": 10555 - }, - { - "epoch": 1.8005115089514065, - "grad_norm": 0.0689731314778328, - "learning_rate": 4.466064695042355e-05, - "loss": 0.0062000565230846405, - "step": 10560 - }, - { - "epoch": 1.8013640238704176, - "grad_norm": 0.07008705288171768, - "learning_rate": 4.4635494784139463e-05, - "loss": 0.006286797672510147, - "step": 10565 - }, - { - "epoch": 1.8022165387894287, - "grad_norm": 0.07595150172710419, - "learning_rate": 4.461033928676904e-05, - "loss": 0.006704485416412354, - "step": 10570 - }, - { - "epoch": 1.80306905370844, - "grad_norm": 0.07564863562583923, - "learning_rate": 4.458518047005572e-05, - "loss": 0.005777762830257415, - "step": 10575 - }, - { - "epoch": 1.803921568627451, - "grad_norm": 0.07202555984258652, - "learning_rate": 4.4560018345744466e-05, - "loss": 0.00602865107357502, - "step": 10580 - }, - { - "epoch": 1.804774083546462, - "grad_norm": 0.10462740063667297, - "learning_rate": 4.453485292558179e-05, - "loss": 0.007622111588716507, - "step": 10585 - }, - { - "epoch": 1.8056265984654731, - "grad_norm": 0.05587150529026985, - "learning_rate": 4.450968422131578e-05, - "loss": 0.00641121193766594, - "step": 10590 - }, - { - "epoch": 1.8064791133844842, - "grad_norm": 0.0603446289896965, - "learning_rate": 4.448451224469598e-05, - "loss": 0.0073586970567703245, - "step": 10595 - }, - { - "epoch": 1.8073316283034955, - "grad_norm": 0.04228143393993378, - "learning_rate": 4.445933700747353e-05, - "loss": 0.005406339466571808, - "step": 10600 - }, - { - "epoch": 1.8081841432225065, - "grad_norm": 0.04840795323252678, - "learning_rate": 4.4434158521401065e-05, - "loss": 0.0041844088584184645, - "step": 10605 - }, - { - "epoch": 1.8090366581415176, - "grad_norm": 0.08334027975797653, - "learning_rate": 4.440897679823275e-05, - "loss": 0.008376862108707427, - "step": 10610 - }, - { - "epoch": 1.8098891730605287, - "grad_norm": 0.07879523187875748, - "learning_rate": 4.438379184972423e-05, - "loss": 0.0053595036268234255, - "step": 10615 - }, - { - "epoch": 1.8107416879795397, - "grad_norm": 0.0689932182431221, - "learning_rate": 4.435860368763269e-05, - "loss": 0.005961846932768822, - "step": 10620 - }, - { - "epoch": 1.8115942028985508, - "grad_norm": 0.07035796344280243, - "learning_rate": 4.43334123237168e-05, - "loss": 0.005833951756358147, - "step": 10625 - }, - { - "epoch": 1.8124467178175618, - "grad_norm": 0.06488184630870819, - "learning_rate": 4.4308217769736715e-05, - "loss": 0.006380685418844223, - "step": 10630 - }, - { - "epoch": 1.813299232736573, - "grad_norm": 0.1095893532037735, - "learning_rate": 4.428302003745412e-05, - "loss": 0.006500106304883957, - "step": 10635 - }, - { - "epoch": 1.814151747655584, - "grad_norm": 0.07402926683425903, - "learning_rate": 4.425781913863212e-05, - "loss": 0.010839180648326873, - "step": 10640 - }, - { - "epoch": 1.815004262574595, - "grad_norm": 0.07752810418605804, - "learning_rate": 4.4232615085035354e-05, - "loss": 0.0053322531282901766, - "step": 10645 - }, - { - "epoch": 1.815856777493606, - "grad_norm": 0.06572280824184418, - "learning_rate": 4.420740788842991e-05, - "loss": 0.0072415158152580265, - "step": 10650 - }, - { - "epoch": 1.8167092924126171, - "grad_norm": 0.07175682485103607, - "learning_rate": 4.418219756058335e-05, - "loss": 0.007061149924993515, - "step": 10655 - }, - { - "epoch": 1.8175618073316282, - "grad_norm": 0.0702451840043068, - "learning_rate": 4.4156984113264684e-05, - "loss": 0.0050024140626192095, - "step": 10660 - }, - { - "epoch": 1.8184143222506393, - "grad_norm": 0.05054900422692299, - "learning_rate": 4.4131767558244375e-05, - "loss": 0.004906433075666428, - "step": 10665 - }, - { - "epoch": 1.8192668371696503, - "grad_norm": 0.07256589829921722, - "learning_rate": 4.410654790729438e-05, - "loss": 0.006986310333013534, - "step": 10670 - }, - { - "epoch": 1.8201193520886616, - "grad_norm": 0.06617925316095352, - "learning_rate": 4.408132517218805e-05, - "loss": 0.007973263412714005, - "step": 10675 - }, - { - "epoch": 1.8209718670076727, - "grad_norm": 0.09039802104234695, - "learning_rate": 4.405609936470022e-05, - "loss": 0.007263268530368805, - "step": 10680 - }, - { - "epoch": 1.8218243819266837, - "grad_norm": 0.03763730078935623, - "learning_rate": 4.40308704966071e-05, - "loss": 0.005709199234843254, - "step": 10685 - }, - { - "epoch": 1.8226768968456948, - "grad_norm": 0.09264735877513885, - "learning_rate": 4.400563857968639e-05, - "loss": 0.006996266543865204, - "step": 10690 - }, - { - "epoch": 1.8235294117647058, - "grad_norm": 0.0882507711648941, - "learning_rate": 4.398040362571719e-05, - "loss": 0.007461686432361603, - "step": 10695 - }, - { - "epoch": 1.8243819266837171, - "grad_norm": 0.07662846893072128, - "learning_rate": 4.395516564648e-05, - "loss": 0.006977429986000061, - "step": 10700 - }, - { - "epoch": 1.8252344416027282, - "grad_norm": 0.07431378960609436, - "learning_rate": 4.392992465375676e-05, - "loss": 0.004957346618175507, - "step": 10705 - }, - { - "epoch": 1.8260869565217392, - "grad_norm": 0.06182624027132988, - "learning_rate": 4.39046806593308e-05, - "loss": 0.006677946448326111, - "step": 10710 - }, - { - "epoch": 1.8269394714407503, - "grad_norm": 0.06389910727739334, - "learning_rate": 4.3879433674986856e-05, - "loss": 0.006449097394943237, - "step": 10715 - }, - { - "epoch": 1.8277919863597614, - "grad_norm": 0.06772691756486893, - "learning_rate": 4.385418371251107e-05, - "loss": 0.004998251050710678, - "step": 10720 - }, - { - "epoch": 1.8286445012787724, - "grad_norm": 0.07048022747039795, - "learning_rate": 4.3828930783690955e-05, - "loss": 0.006418389827013015, - "step": 10725 - }, - { - "epoch": 1.8294970161977835, - "grad_norm": 0.09442687779664993, - "learning_rate": 4.3803674900315424e-05, - "loss": 0.006921603530645371, - "step": 10730 - }, - { - "epoch": 1.8303495311167945, - "grad_norm": 0.0578981414437294, - "learning_rate": 4.377841607417475e-05, - "loss": 0.007038000971078873, - "step": 10735 - }, - { - "epoch": 1.8312020460358056, - "grad_norm": 0.06990659236907959, - "learning_rate": 4.37531543170606e-05, - "loss": 0.005136258527636528, - "step": 10740 - }, - { - "epoch": 1.8320545609548167, - "grad_norm": 0.05566668137907982, - "learning_rate": 4.372788964076601e-05, - "loss": 0.005333118140697479, - "step": 10745 - }, - { - "epoch": 1.8329070758738277, - "grad_norm": 0.09198274463415146, - "learning_rate": 4.3702622057085376e-05, - "loss": 0.005783502757549286, - "step": 10750 - }, - { - "epoch": 1.8337595907928388, - "grad_norm": 0.12995415925979614, - "learning_rate": 4.3677351577814423e-05, - "loss": 0.005794361606240273, - "step": 10755 - }, - { - "epoch": 1.8346121057118498, - "grad_norm": 0.0827256515622139, - "learning_rate": 4.3652078214750264e-05, - "loss": 0.00593951866030693, - "step": 10760 - }, - { - "epoch": 1.835464620630861, - "grad_norm": 0.09131235629320145, - "learning_rate": 4.362680197969136e-05, - "loss": 0.006387272477149963, - "step": 10765 - }, - { - "epoch": 1.836317135549872, - "grad_norm": 0.06061462685465813, - "learning_rate": 4.360152288443748e-05, - "loss": 0.006085103005170822, - "step": 10770 - }, - { - "epoch": 1.8371696504688833, - "grad_norm": 0.05650132894515991, - "learning_rate": 4.357624094078976e-05, - "loss": 0.004817041009664536, - "step": 10775 - }, - { - "epoch": 1.8380221653878943, - "grad_norm": 0.09250559657812119, - "learning_rate": 4.355095616055063e-05, - "loss": 0.006116693839430809, - "step": 10780 - }, - { - "epoch": 1.8388746803069054, - "grad_norm": 0.06575264036655426, - "learning_rate": 4.352566855552389e-05, - "loss": 0.006027846410870552, - "step": 10785 - }, - { - "epoch": 1.8397271952259164, - "grad_norm": 0.07538174092769623, - "learning_rate": 4.350037813751462e-05, - "loss": 0.006624206900596619, - "step": 10790 - }, - { - "epoch": 1.8405797101449275, - "grad_norm": 0.06000296771526337, - "learning_rate": 4.347508491832924e-05, - "loss": 0.006386204063892365, - "step": 10795 - }, - { - "epoch": 1.8414322250639388, - "grad_norm": 0.058621276170015335, - "learning_rate": 4.3449788909775455e-05, - "loss": 0.006246517226099968, - "step": 10800 - }, - { - "epoch": 1.8422847399829498, - "grad_norm": 0.10082551836967468, - "learning_rate": 4.34244901236623e-05, - "loss": 0.006916524469852447, - "step": 10805 - }, - { - "epoch": 1.843137254901961, - "grad_norm": 0.07926804572343826, - "learning_rate": 4.3399188571800064e-05, - "loss": 0.006270130723714828, - "step": 10810 - }, - { - "epoch": 1.843989769820972, - "grad_norm": 0.14256511628627777, - "learning_rate": 4.3373884266000375e-05, - "loss": 0.008555002510547638, - "step": 10815 - }, - { - "epoch": 1.844842284739983, - "grad_norm": 0.0711030438542366, - "learning_rate": 4.334857721807612e-05, - "loss": 0.004097539931535721, - "step": 10820 - }, - { - "epoch": 1.845694799658994, - "grad_norm": 0.05918106436729431, - "learning_rate": 4.3323267439841464e-05, - "loss": 0.006263938546180725, - "step": 10825 - }, - { - "epoch": 1.8465473145780051, - "grad_norm": 0.06577462702989578, - "learning_rate": 4.329795494311186e-05, - "loss": 0.004532983154058456, - "step": 10830 - }, - { - "epoch": 1.8473998294970162, - "grad_norm": 0.07599867880344391, - "learning_rate": 4.327263973970401e-05, - "loss": 0.006951173394918441, - "step": 10835 - }, - { - "epoch": 1.8482523444160273, - "grad_norm": 0.055239275097846985, - "learning_rate": 4.324732184143592e-05, - "loss": 0.00514591783285141, - "step": 10840 - }, - { - "epoch": 1.8491048593350383, - "grad_norm": 0.10522980988025665, - "learning_rate": 4.322200126012681e-05, - "loss": 0.00747048556804657, - "step": 10845 - }, - { - "epoch": 1.8499573742540494, - "grad_norm": 0.08132579177618027, - "learning_rate": 4.319667800759716e-05, - "loss": 0.005432958528399467, - "step": 10850 - }, - { - "epoch": 1.8508098891730604, - "grad_norm": 0.04027591645717621, - "learning_rate": 4.3171352095668726e-05, - "loss": 0.004450181499123573, - "step": 10855 - }, - { - "epoch": 1.8516624040920715, - "grad_norm": 0.0873839259147644, - "learning_rate": 4.314602353616446e-05, - "loss": 0.006079509109258652, - "step": 10860 - }, - { - "epoch": 1.8525149190110826, - "grad_norm": 0.04989013075828552, - "learning_rate": 4.312069234090862e-05, - "loss": 0.003988634794950485, - "step": 10865 - }, - { - "epoch": 1.8533674339300936, - "grad_norm": 0.061433590948581696, - "learning_rate": 4.309535852172661e-05, - "loss": 0.0056050091981887816, - "step": 10870 - }, - { - "epoch": 1.854219948849105, - "grad_norm": 0.07007768750190735, - "learning_rate": 4.3070022090445114e-05, - "loss": 0.006938119232654571, - "step": 10875 - }, - { - "epoch": 1.855072463768116, - "grad_norm": 0.03557104617357254, - "learning_rate": 4.3044683058892024e-05, - "loss": 0.0061099715530872345, - "step": 10880 - }, - { - "epoch": 1.855924978687127, - "grad_norm": 0.07706935703754425, - "learning_rate": 4.3019341438896446e-05, - "loss": 0.0050103053450584415, - "step": 10885 - }, - { - "epoch": 1.856777493606138, - "grad_norm": 0.06719083338975906, - "learning_rate": 4.2993997242288686e-05, - "loss": 0.005047342553734779, - "step": 10890 - }, - { - "epoch": 1.8576300085251491, - "grad_norm": 0.05179615691304207, - "learning_rate": 4.296865048090024e-05, - "loss": 0.004692831635475158, - "step": 10895 - }, - { - "epoch": 1.8584825234441604, - "grad_norm": 0.08594074845314026, - "learning_rate": 4.294330116656385e-05, - "loss": 0.006039778143167496, - "step": 10900 - }, - { - "epoch": 1.8593350383631715, - "grad_norm": 0.11285590380430222, - "learning_rate": 4.291794931111339e-05, - "loss": 0.005857323482632637, - "step": 10905 - }, - { - "epoch": 1.8601875532821825, - "grad_norm": 0.056068334728479385, - "learning_rate": 4.289259492638399e-05, - "loss": 0.006339801102876663, - "step": 10910 - }, - { - "epoch": 1.8610400682011936, - "grad_norm": 0.1027015820145607, - "learning_rate": 4.2867238024211873e-05, - "loss": 0.007628202438354492, - "step": 10915 - }, - { - "epoch": 1.8618925831202047, - "grad_norm": 0.06938920170068741, - "learning_rate": 4.2841878616434516e-05, - "loss": 0.005421775206923485, - "step": 10920 - }, - { - "epoch": 1.8627450980392157, - "grad_norm": 0.06613876670598984, - "learning_rate": 4.2816516714890525e-05, - "loss": 0.00747835859656334, - "step": 10925 - }, - { - "epoch": 1.8635976129582268, - "grad_norm": 0.07735379040241241, - "learning_rate": 4.279115233141967e-05, - "loss": 0.006907149404287338, - "step": 10930 - }, - { - "epoch": 1.8644501278772379, - "grad_norm": 0.06285069137811661, - "learning_rate": 4.276578547786291e-05, - "loss": 0.005340246856212616, - "step": 10935 - }, - { - "epoch": 1.865302642796249, - "grad_norm": 0.0670136883854866, - "learning_rate": 4.274041616606232e-05, - "loss": 0.0067828245460987095, - "step": 10940 - }, - { - "epoch": 1.86615515771526, - "grad_norm": 0.07944425195455551, - "learning_rate": 4.2715044407861144e-05, - "loss": 0.006403806060552597, - "step": 10945 - }, - { - "epoch": 1.867007672634271, - "grad_norm": 0.07202634960412979, - "learning_rate": 4.268967021510375e-05, - "loss": 0.004467373341321945, - "step": 10950 - }, - { - "epoch": 1.867860187553282, - "grad_norm": 0.08753371983766556, - "learning_rate": 4.266429359963568e-05, - "loss": 0.006740668416023254, - "step": 10955 - }, - { - "epoch": 1.8687127024722932, - "grad_norm": 0.0920538380742073, - "learning_rate": 4.263891457330357e-05, - "loss": 0.007489701360464096, - "step": 10960 - }, - { - "epoch": 1.8695652173913042, - "grad_norm": 0.11196473985910416, - "learning_rate": 4.261353314795519e-05, - "loss": 0.007533909380435943, - "step": 10965 - }, - { - "epoch": 1.8704177323103153, - "grad_norm": 0.08394299447536469, - "learning_rate": 4.258814933543943e-05, - "loss": 0.005159291997551918, - "step": 10970 - }, - { - "epoch": 1.8712702472293266, - "grad_norm": 0.08024156838655472, - "learning_rate": 4.25627631476063e-05, - "loss": 0.00543709248304367, - "step": 10975 - }, - { - "epoch": 1.8721227621483376, - "grad_norm": 0.052640948444604874, - "learning_rate": 4.253737459630694e-05, - "loss": 0.004067758470773697, - "step": 10980 - }, - { - "epoch": 1.8729752770673487, - "grad_norm": 0.08472926914691925, - "learning_rate": 4.251198369339353e-05, - "loss": 0.0077335178852081295, - "step": 10985 - }, - { - "epoch": 1.8738277919863597, - "grad_norm": 0.09794485569000244, - "learning_rate": 4.248659045071942e-05, - "loss": 0.0055429480969905855, - "step": 10990 - }, - { - "epoch": 1.8746803069053708, - "grad_norm": 0.07767575234174728, - "learning_rate": 4.2461194880139016e-05, - "loss": 0.008025288581848145, - "step": 10995 - }, - { - "epoch": 1.875532821824382, - "grad_norm": 0.07427361607551575, - "learning_rate": 4.2435796993507794e-05, - "loss": 0.006437119096517563, - "step": 11000 - }, - { - "epoch": 1.8763853367433931, - "grad_norm": 0.07420040667057037, - "learning_rate": 4.241039680268237e-05, - "loss": 0.0051200386136770245, - "step": 11005 - }, - { - "epoch": 1.8772378516624042, - "grad_norm": 0.09004204720258713, - "learning_rate": 4.2384994319520355e-05, - "loss": 0.007488063722848892, - "step": 11010 - }, - { - "epoch": 1.8780903665814153, - "grad_norm": 0.060929473489522934, - "learning_rate": 4.235958955588049e-05, - "loss": 0.00483398288488388, - "step": 11015 - }, - { - "epoch": 1.8789428815004263, - "grad_norm": 0.08116185665130615, - "learning_rate": 4.2334182523622584e-05, - "loss": 0.007078002393245697, - "step": 11020 - }, - { - "epoch": 1.8797953964194374, - "grad_norm": 0.0908491238951683, - "learning_rate": 4.230877323460746e-05, - "loss": 0.007228228449821472, - "step": 11025 - }, - { - "epoch": 1.8806479113384484, - "grad_norm": 0.08618480712175369, - "learning_rate": 4.228336170069703e-05, - "loss": 0.005402455478906632, - "step": 11030 - }, - { - "epoch": 1.8815004262574595, - "grad_norm": 0.06869816035032272, - "learning_rate": 4.2257947933754236e-05, - "loss": 0.006180650368332863, - "step": 11035 - }, - { - "epoch": 1.8823529411764706, - "grad_norm": 0.0904744416475296, - "learning_rate": 4.223253194564309e-05, - "loss": 0.00636049136519432, - "step": 11040 - }, - { - "epoch": 1.8832054560954816, - "grad_norm": 0.04902644082903862, - "learning_rate": 4.220711374822859e-05, - "loss": 0.0062784947454929355, - "step": 11045 - }, - { - "epoch": 1.8840579710144927, - "grad_norm": 0.060081589967012405, - "learning_rate": 4.2181693353376817e-05, - "loss": 0.005494052171707153, - "step": 11050 - }, - { - "epoch": 1.8849104859335037, - "grad_norm": 0.058530837297439575, - "learning_rate": 4.215627077295485e-05, - "loss": 0.005457080900669098, - "step": 11055 - }, - { - "epoch": 1.8857630008525148, - "grad_norm": 0.15006953477859497, - "learning_rate": 4.2130846018830795e-05, - "loss": 0.0062985971570014955, - "step": 11060 - }, - { - "epoch": 1.8866155157715259, - "grad_norm": 0.04498155787587166, - "learning_rate": 4.210541910287377e-05, - "loss": 0.004242038726806641, - "step": 11065 - }, - { - "epoch": 1.887468030690537, - "grad_norm": 0.09093966335058212, - "learning_rate": 4.207999003695392e-05, - "loss": 0.00554364025592804, - "step": 11070 - }, - { - "epoch": 1.8883205456095482, - "grad_norm": 0.06531018018722534, - "learning_rate": 4.2054558832942365e-05, - "loss": 0.0063869751989841465, - "step": 11075 - }, - { - "epoch": 1.8891730605285593, - "grad_norm": 0.059213872998952866, - "learning_rate": 4.202912550271124e-05, - "loss": 0.004836349189281464, - "step": 11080 - }, - { - "epoch": 1.8900255754475703, - "grad_norm": 0.11074823886156082, - "learning_rate": 4.200369005813367e-05, - "loss": 0.00584055446088314, - "step": 11085 - }, - { - "epoch": 1.8908780903665814, - "grad_norm": 0.09352346509695053, - "learning_rate": 4.197825251108376e-05, - "loss": 0.006423837691545487, - "step": 11090 - }, - { - "epoch": 1.8917306052855924, - "grad_norm": 0.10930176079273224, - "learning_rate": 4.195281287343662e-05, - "loss": 0.007819923013448716, - "step": 11095 - }, - { - "epoch": 1.8925831202046037, - "grad_norm": 0.10935486853122711, - "learning_rate": 4.19273711570683e-05, - "loss": 0.008524692058563233, - "step": 11100 - }, - { - "epoch": 1.8934356351236148, - "grad_norm": 0.07407546788454056, - "learning_rate": 4.190192737385586e-05, - "loss": 0.006353407353162766, - "step": 11105 - }, - { - "epoch": 1.8942881500426259, - "grad_norm": 0.11030165106058121, - "learning_rate": 4.187648153567729e-05, - "loss": 0.007683426141738892, - "step": 11110 - }, - { - "epoch": 1.895140664961637, - "grad_norm": 0.09419413655996323, - "learning_rate": 4.185103365441155e-05, - "loss": 0.005654521286487579, - "step": 11115 - }, - { - "epoch": 1.895993179880648, - "grad_norm": 0.06284896284341812, - "learning_rate": 4.1825583741938576e-05, - "loss": 0.0048633765429258345, - "step": 11120 - }, - { - "epoch": 1.896845694799659, - "grad_norm": 0.06429705023765564, - "learning_rate": 4.180013181013921e-05, - "loss": 0.006907754391431808, - "step": 11125 - }, - { - "epoch": 1.89769820971867, - "grad_norm": 0.1234050914645195, - "learning_rate": 4.177467787089527e-05, - "loss": 0.008531783521175385, - "step": 11130 - }, - { - "epoch": 1.8985507246376812, - "grad_norm": 0.04056263715028763, - "learning_rate": 4.174922193608951e-05, - "loss": 0.006784418225288391, - "step": 11135 - }, - { - "epoch": 1.8994032395566922, - "grad_norm": 0.048422425985336304, - "learning_rate": 4.172376401760561e-05, - "loss": 0.006587067246437072, - "step": 11140 - }, - { - "epoch": 1.9002557544757033, - "grad_norm": 0.10680951178073883, - "learning_rate": 4.169830412732815e-05, - "loss": 0.005700337141752243, - "step": 11145 - }, - { - "epoch": 1.9011082693947143, - "grad_norm": 0.09418217837810516, - "learning_rate": 4.167284227714267e-05, - "loss": 0.0059782925993204115, - "step": 11150 - }, - { - "epoch": 1.9019607843137254, - "grad_norm": 0.12511073052883148, - "learning_rate": 4.1647378478935614e-05, - "loss": 0.006256800889968872, - "step": 11155 - }, - { - "epoch": 1.9028132992327365, - "grad_norm": 0.06957859545946121, - "learning_rate": 4.1621912744594316e-05, - "loss": 0.008690094202756881, - "step": 11160 - }, - { - "epoch": 1.9036658141517475, - "grad_norm": 0.10859719663858414, - "learning_rate": 4.159644508600704e-05, - "loss": 0.008262380957603455, - "step": 11165 - }, - { - "epoch": 1.9045183290707586, - "grad_norm": 0.08408714830875397, - "learning_rate": 4.157097551506292e-05, - "loss": 0.005347007513046264, - "step": 11170 - }, - { - "epoch": 1.9053708439897699, - "grad_norm": 0.05623621866106987, - "learning_rate": 4.1545504043652014e-05, - "loss": 0.005091758817434311, - "step": 11175 - }, - { - "epoch": 1.906223358908781, - "grad_norm": 0.06791777908802032, - "learning_rate": 4.1520030683665246e-05, - "loss": 0.006755101680755615, - "step": 11180 - }, - { - "epoch": 1.907075873827792, - "grad_norm": 0.039112675935029984, - "learning_rate": 4.149455544699444e-05, - "loss": 0.0063312210142612456, - "step": 11185 - }, - { - "epoch": 1.907928388746803, - "grad_norm": 0.05682097375392914, - "learning_rate": 4.146907834553227e-05, - "loss": 0.005028403550386429, - "step": 11190 - }, - { - "epoch": 1.908780903665814, - "grad_norm": 0.07670710980892181, - "learning_rate": 4.144359939117229e-05, - "loss": 0.006438900530338287, - "step": 11195 - }, - { - "epoch": 1.9096334185848254, - "grad_norm": 0.06266012787818909, - "learning_rate": 4.141811859580894e-05, - "loss": 0.006153284758329392, - "step": 11200 - }, - { - "epoch": 1.9104859335038364, - "grad_norm": 0.06892232596874237, - "learning_rate": 4.139263597133749e-05, - "loss": 0.0042446799576282505, - "step": 11205 - }, - { - "epoch": 1.9113384484228475, - "grad_norm": 0.08733050525188446, - "learning_rate": 4.136715152965409e-05, - "loss": 0.0048094093799591064, - "step": 11210 - }, - { - "epoch": 1.9121909633418586, - "grad_norm": 0.06578327715396881, - "learning_rate": 4.13416652826557e-05, - "loss": 0.0047289058566093445, - "step": 11215 - }, - { - "epoch": 1.9130434782608696, - "grad_norm": 0.06382749229669571, - "learning_rate": 4.1316177242240174e-05, - "loss": 0.004200926423072815, - "step": 11220 - }, - { - "epoch": 1.9138959931798807, - "grad_norm": 0.07368794828653336, - "learning_rate": 4.129068742030617e-05, - "loss": 0.0063028551638126375, - "step": 11225 - }, - { - "epoch": 1.9147485080988917, - "grad_norm": 0.09302657842636108, - "learning_rate": 4.1265195828753176e-05, - "loss": 0.008124063909053802, - "step": 11230 - }, - { - "epoch": 1.9156010230179028, - "grad_norm": 0.08030751347541809, - "learning_rate": 4.123970247948153e-05, - "loss": 0.009628574550151824, - "step": 11235 - }, - { - "epoch": 1.9164535379369139, - "grad_norm": 0.08395590633153915, - "learning_rate": 4.1214207384392356e-05, - "loss": 0.007773591578006745, - "step": 11240 - }, - { - "epoch": 1.917306052855925, - "grad_norm": 0.09472183138132095, - "learning_rate": 4.118871055538762e-05, - "loss": 0.005461954325437546, - "step": 11245 - }, - { - "epoch": 1.918158567774936, - "grad_norm": 0.095457524061203, - "learning_rate": 4.11632120043701e-05, - "loss": 0.005725187063217163, - "step": 11250 - }, - { - "epoch": 1.919011082693947, - "grad_norm": 0.10508730262517929, - "learning_rate": 4.113771174324336e-05, - "loss": 0.006902433931827545, - "step": 11255 - }, - { - "epoch": 1.919863597612958, - "grad_norm": 0.08675665408372879, - "learning_rate": 4.111220978391176e-05, - "loss": 0.007470531016588211, - "step": 11260 - }, - { - "epoch": 1.9207161125319692, - "grad_norm": 0.08215013146400452, - "learning_rate": 4.108670613828049e-05, - "loss": 0.005732448399066925, - "step": 11265 - }, - { - "epoch": 1.9215686274509802, - "grad_norm": 0.054156310856342316, - "learning_rate": 4.1061200818255476e-05, - "loss": 0.005808809399604797, - "step": 11270 - }, - { - "epoch": 1.9224211423699915, - "grad_norm": 0.09332830458879471, - "learning_rate": 4.103569383574346e-05, - "loss": 0.005646481737494468, - "step": 11275 - }, - { - "epoch": 1.9232736572890026, - "grad_norm": 0.05589313432574272, - "learning_rate": 4.101018520265195e-05, - "loss": 0.005581434443593025, - "step": 11280 - }, - { - "epoch": 1.9241261722080136, - "grad_norm": 0.0465618334710598, - "learning_rate": 4.098467493088922e-05, - "loss": 0.005028170347213745, - "step": 11285 - }, - { - "epoch": 1.9249786871270247, - "grad_norm": 0.07304909080266953, - "learning_rate": 4.095916303236431e-05, - "loss": 0.007494028657674789, - "step": 11290 - }, - { - "epoch": 1.9258312020460358, - "grad_norm": 0.09532103687524796, - "learning_rate": 4.0933649518987025e-05, - "loss": 0.006374432146549225, - "step": 11295 - }, - { - "epoch": 1.926683716965047, - "grad_norm": 0.07364784181118011, - "learning_rate": 4.090813440266794e-05, - "loss": 0.0053088821470737456, - "step": 11300 - }, - { - "epoch": 1.927536231884058, - "grad_norm": 0.0804903507232666, - "learning_rate": 4.088261769531834e-05, - "loss": 0.0069495439529418945, - "step": 11305 - }, - { - "epoch": 1.9283887468030692, - "grad_norm": 0.07125549763441086, - "learning_rate": 4.0857099408850264e-05, - "loss": 0.005846098065376282, - "step": 11310 - }, - { - "epoch": 1.9292412617220802, - "grad_norm": 0.017375558614730835, - "learning_rate": 4.083157955517653e-05, - "loss": 0.004308582097291946, - "step": 11315 - }, - { - "epoch": 1.9300937766410913, - "grad_norm": 0.07655836641788483, - "learning_rate": 4.080605814621063e-05, - "loss": 0.006030111759901047, - "step": 11320 - }, - { - "epoch": 1.9309462915601023, - "grad_norm": 0.05411117896437645, - "learning_rate": 4.078053519386681e-05, - "loss": 0.0069768443703651425, - "step": 11325 - }, - { - "epoch": 1.9317988064791134, - "grad_norm": 0.08431188017129898, - "learning_rate": 4.0755010710060035e-05, - "loss": 0.006973695755004883, - "step": 11330 - }, - { - "epoch": 1.9326513213981245, - "grad_norm": 0.08480583131313324, - "learning_rate": 4.072948470670598e-05, - "loss": 0.006525547057390213, - "step": 11335 - }, - { - "epoch": 1.9335038363171355, - "grad_norm": 0.073171466588974, - "learning_rate": 4.070395719572104e-05, - "loss": 0.0054599311202764515, - "step": 11340 - }, - { - "epoch": 1.9343563512361466, - "grad_norm": 0.06951522827148438, - "learning_rate": 4.0678428189022304e-05, - "loss": 0.008897364884614945, - "step": 11345 - }, - { - "epoch": 1.9352088661551576, - "grad_norm": 0.08654197305440903, - "learning_rate": 4.0652897698527557e-05, - "loss": 0.005458325147628784, - "step": 11350 - }, - { - "epoch": 1.9360613810741687, - "grad_norm": 0.07929553836584091, - "learning_rate": 4.0627365736155285e-05, - "loss": 0.00710543841123581, - "step": 11355 - }, - { - "epoch": 1.9369138959931798, - "grad_norm": 0.12434503436088562, - "learning_rate": 4.060183231382466e-05, - "loss": 0.0071723200380802155, - "step": 11360 - }, - { - "epoch": 1.9377664109121908, - "grad_norm": 0.06440022587776184, - "learning_rate": 4.057629744345551e-05, - "loss": 0.006010268628597259, - "step": 11365 - }, - { - "epoch": 1.938618925831202, - "grad_norm": 0.09477414190769196, - "learning_rate": 4.0550761136968404e-05, - "loss": 0.007152469456195831, - "step": 11370 - }, - { - "epoch": 1.9394714407502132, - "grad_norm": 0.06758873164653778, - "learning_rate": 4.0525223406284516e-05, - "loss": 0.004493400454521179, - "step": 11375 - }, - { - "epoch": 1.9403239556692242, - "grad_norm": 0.06823158264160156, - "learning_rate": 4.0499684263325695e-05, - "loss": 0.0058505676686763765, - "step": 11380 - }, - { - "epoch": 1.9411764705882353, - "grad_norm": 0.10731697082519531, - "learning_rate": 4.0474143720014485e-05, - "loss": 0.00592585802078247, - "step": 11385 - }, - { - "epoch": 1.9420289855072463, - "grad_norm": 0.09786538779735565, - "learning_rate": 4.044860178827405e-05, - "loss": 0.008860854804515839, - "step": 11390 - }, - { - "epoch": 1.9428815004262576, - "grad_norm": 0.08662491291761398, - "learning_rate": 4.042305848002822e-05, - "loss": 0.00579673945903778, - "step": 11395 - }, - { - "epoch": 1.9437340153452687, - "grad_norm": 0.08446741849184036, - "learning_rate": 4.039751380720145e-05, - "loss": 0.0067916139960289, - "step": 11400 - }, - { - "epoch": 1.9445865302642797, - "grad_norm": 0.08059567958116531, - "learning_rate": 4.037196778171885e-05, - "loss": 0.007273902744054794, - "step": 11405 - }, - { - "epoch": 1.9454390451832908, - "grad_norm": 0.067914679646492, - "learning_rate": 4.0346420415506156e-05, - "loss": 0.00854090303182602, - "step": 11410 - }, - { - "epoch": 1.9462915601023019, - "grad_norm": 0.06519316136837006, - "learning_rate": 4.032087172048973e-05, - "loss": 0.006127477809786797, - "step": 11415 - }, - { - "epoch": 1.947144075021313, - "grad_norm": 0.10216967016458511, - "learning_rate": 4.029532170859655e-05, - "loss": 0.007330343872308731, - "step": 11420 - }, - { - "epoch": 1.947996589940324, - "grad_norm": 0.07684756815433502, - "learning_rate": 4.02697703917542e-05, - "loss": 0.006121716648340225, - "step": 11425 - }, - { - "epoch": 1.948849104859335, - "grad_norm": 0.08026126027107239, - "learning_rate": 4.0244217781890906e-05, - "loss": 0.006386417150497437, - "step": 11430 - }, - { - "epoch": 1.949701619778346, - "grad_norm": 0.09047527611255646, - "learning_rate": 4.021866389093546e-05, - "loss": 0.004208286106586456, - "step": 11435 - }, - { - "epoch": 1.9505541346973572, - "grad_norm": 0.047482747584581375, - "learning_rate": 4.0193108730817284e-05, - "loss": 0.005754061415791512, - "step": 11440 - }, - { - "epoch": 1.9514066496163682, - "grad_norm": 0.054364416748285294, - "learning_rate": 4.0167552313466355e-05, - "loss": 0.004412830248475075, - "step": 11445 - }, - { - "epoch": 1.9522591645353793, - "grad_norm": 0.07640549540519714, - "learning_rate": 4.014199465081327e-05, - "loss": 0.005214530602097511, - "step": 11450 - }, - { - "epoch": 1.9531116794543903, - "grad_norm": 0.07241252809762955, - "learning_rate": 4.0116435754789206e-05, - "loss": 0.005129393562674523, - "step": 11455 - }, - { - "epoch": 1.9539641943734014, - "grad_norm": 0.048170432448387146, - "learning_rate": 4.009087563732589e-05, - "loss": 0.005180074647068977, - "step": 11460 - }, - { - "epoch": 1.9548167092924125, - "grad_norm": 0.07336216419935226, - "learning_rate": 4.006531431035566e-05, - "loss": 0.009098170697689057, - "step": 11465 - }, - { - "epoch": 1.9556692242114238, - "grad_norm": 0.04934614151716232, - "learning_rate": 4.0039751785811346e-05, - "loss": 0.005307629331946373, - "step": 11470 - }, - { - "epoch": 1.9565217391304348, - "grad_norm": 0.08941303193569183, - "learning_rate": 4.001418807562643e-05, - "loss": 0.0069742932915687565, - "step": 11475 - }, - { - "epoch": 1.9573742540494459, - "grad_norm": 0.05791569501161575, - "learning_rate": 3.998862319173488e-05, - "loss": 0.0050424404442310335, - "step": 11480 - }, - { - "epoch": 1.958226768968457, - "grad_norm": 0.04596787318587303, - "learning_rate": 3.996305714607125e-05, - "loss": 0.004805172979831696, - "step": 11485 - }, - { - "epoch": 1.959079283887468, - "grad_norm": 0.07698309421539307, - "learning_rate": 3.993748995057061e-05, - "loss": 0.006605527549982071, - "step": 11490 - }, - { - "epoch": 1.9599317988064793, - "grad_norm": 0.08400565385818481, - "learning_rate": 3.9911921617168565e-05, - "loss": 0.0085490882396698, - "step": 11495 - }, - { - "epoch": 1.9607843137254903, - "grad_norm": 0.1446380764245987, - "learning_rate": 3.9886352157801296e-05, - "loss": 0.005958027392625809, - "step": 11500 - }, - { - "epoch": 1.9616368286445014, - "grad_norm": 0.06108809635043144, - "learning_rate": 3.986078158440544e-05, - "loss": 0.0054461218416690825, - "step": 11505 - }, - { - "epoch": 1.9624893435635125, - "grad_norm": 0.1163720190525055, - "learning_rate": 3.983520990891823e-05, - "loss": 0.0065662160515785216, - "step": 11510 - }, - { - "epoch": 1.9633418584825235, - "grad_norm": 0.08339548110961914, - "learning_rate": 3.980963714327734e-05, - "loss": 0.007503192871809006, - "step": 11515 - }, - { - "epoch": 1.9641943734015346, - "grad_norm": 0.07774331420660019, - "learning_rate": 3.9784063299421e-05, - "loss": 0.005831217020750045, - "step": 11520 - }, - { - "epoch": 1.9650468883205456, - "grad_norm": 0.08897018432617188, - "learning_rate": 3.9758488389287936e-05, - "loss": 0.006972354650497436, - "step": 11525 - }, - { - "epoch": 1.9658994032395567, - "grad_norm": 0.07708834111690521, - "learning_rate": 3.9732912424817374e-05, - "loss": 0.0059847764670848845, - "step": 11530 - }, - { - "epoch": 1.9667519181585678, - "grad_norm": 0.133201003074646, - "learning_rate": 3.9707335417949015e-05, - "loss": 0.005828146636486053, - "step": 11535 - }, - { - "epoch": 1.9676044330775788, - "grad_norm": 0.05620214343070984, - "learning_rate": 3.968175738062303e-05, - "loss": 0.004607116058468819, - "step": 11540 - }, - { - "epoch": 1.9684569479965899, - "grad_norm": 0.05371567979454994, - "learning_rate": 3.965617832478015e-05, - "loss": 0.004455961659550667, - "step": 11545 - }, - { - "epoch": 1.969309462915601, - "grad_norm": 0.10317978262901306, - "learning_rate": 3.96305982623615e-05, - "loss": 0.004697806015610695, - "step": 11550 - }, - { - "epoch": 1.970161977834612, - "grad_norm": 0.08786958456039429, - "learning_rate": 3.96050172053087e-05, - "loss": 0.005183818191289902, - "step": 11555 - }, - { - "epoch": 1.971014492753623, - "grad_norm": 0.07750507444143295, - "learning_rate": 3.957943516556385e-05, - "loss": 0.005475999787449837, - "step": 11560 - }, - { - "epoch": 1.9718670076726341, - "grad_norm": 0.07066313922405243, - "learning_rate": 3.955385215506949e-05, - "loss": 0.005772550404071808, - "step": 11565 - }, - { - "epoch": 1.9727195225916454, - "grad_norm": 0.08183038979768753, - "learning_rate": 3.952826818576863e-05, - "loss": 0.005305550992488861, - "step": 11570 - }, - { - "epoch": 1.9735720375106565, - "grad_norm": 0.075381800532341, - "learning_rate": 3.95026832696047e-05, - "loss": 0.00803310126066208, - "step": 11575 - }, - { - "epoch": 1.9744245524296675, - "grad_norm": 0.09064166992902756, - "learning_rate": 3.9477097418521616e-05, - "loss": 0.006380292773246765, - "step": 11580 - }, - { - "epoch": 1.9752770673486786, - "grad_norm": 0.09140465408563614, - "learning_rate": 3.945151064446367e-05, - "loss": 0.00863645225763321, - "step": 11585 - }, - { - "epoch": 1.9761295822676896, - "grad_norm": 0.09985008090734482, - "learning_rate": 3.942592295937565e-05, - "loss": 0.005205995962023735, - "step": 11590 - }, - { - "epoch": 1.976982097186701, - "grad_norm": 0.07968702167272568, - "learning_rate": 3.940033437520273e-05, - "loss": 0.006467466801404953, - "step": 11595 - }, - { - "epoch": 1.977834612105712, - "grad_norm": 0.0925409123301506, - "learning_rate": 3.937474490389051e-05, - "loss": 0.006804432719945908, - "step": 11600 - }, - { - "epoch": 1.978687127024723, - "grad_norm": 0.053421750664711, - "learning_rate": 3.9349154557385e-05, - "loss": 0.0067677564918994905, - "step": 11605 - }, - { - "epoch": 1.979539641943734, - "grad_norm": 0.07791347801685333, - "learning_rate": 3.9323563347632624e-05, - "loss": 0.006826826930046081, - "step": 11610 - }, - { - "epoch": 1.9803921568627452, - "grad_norm": 0.08627293258905411, - "learning_rate": 3.929797128658024e-05, - "loss": 0.00804663747549057, - "step": 11615 - }, - { - "epoch": 1.9812446717817562, - "grad_norm": 0.06506595015525818, - "learning_rate": 3.927237838617503e-05, - "loss": 0.005456966534256935, - "step": 11620 - }, - { - "epoch": 1.9820971867007673, - "grad_norm": 0.09555826336145401, - "learning_rate": 3.924678465836465e-05, - "loss": 0.005365721881389618, - "step": 11625 - }, - { - "epoch": 1.9829497016197783, - "grad_norm": 0.09176401793956757, - "learning_rate": 3.922119011509706e-05, - "loss": 0.006210924685001373, - "step": 11630 - }, - { - "epoch": 1.9838022165387894, - "grad_norm": 0.05260130763053894, - "learning_rate": 3.919559476832069e-05, - "loss": 0.004408955946564675, - "step": 11635 - }, - { - "epoch": 1.9846547314578005, - "grad_norm": 0.0875319391489029, - "learning_rate": 3.916999862998427e-05, - "loss": 0.005069036781787872, - "step": 11640 - }, - { - "epoch": 1.9855072463768115, - "grad_norm": 0.10335614532232285, - "learning_rate": 3.9144401712036936e-05, - "loss": 0.007199827581644058, - "step": 11645 - }, - { - "epoch": 1.9863597612958226, - "grad_norm": 0.09518889337778091, - "learning_rate": 3.9118804026428194e-05, - "loss": 0.00541754923760891, - "step": 11650 - }, - { - "epoch": 1.9872122762148337, - "grad_norm": 0.06707368791103363, - "learning_rate": 3.9093205585107863e-05, - "loss": 0.00641927570104599, - "step": 11655 - }, - { - "epoch": 1.9880647911338447, - "grad_norm": 0.10102292895317078, - "learning_rate": 3.906760640002618e-05, - "loss": 0.007096148282289505, - "step": 11660 - }, - { - "epoch": 1.9889173060528558, - "grad_norm": 0.0690481886267662, - "learning_rate": 3.904200648313368e-05, - "loss": 0.0063364550471305845, - "step": 11665 - }, - { - "epoch": 1.989769820971867, - "grad_norm": 0.1051480695605278, - "learning_rate": 3.901640584638126e-05, - "loss": 0.009133437275886535, - "step": 11670 - }, - { - "epoch": 1.9906223358908781, - "grad_norm": 0.0857042595744133, - "learning_rate": 3.899080450172015e-05, - "loss": 0.007245839387178421, - "step": 11675 - }, - { - "epoch": 1.9914748508098892, - "grad_norm": 0.04038793221116066, - "learning_rate": 3.8965202461101904e-05, - "loss": 0.005575920641422272, - "step": 11680 - }, - { - "epoch": 1.9923273657289002, - "grad_norm": 0.06331093609333038, - "learning_rate": 3.893959973647842e-05, - "loss": 0.004866635054349899, - "step": 11685 - }, - { - "epoch": 1.9931798806479113, - "grad_norm": 0.08694019168615341, - "learning_rate": 3.891399633980188e-05, - "loss": 0.004249059408903122, - "step": 11690 - }, - { - "epoch": 1.9940323955669226, - "grad_norm": 0.06739087402820587, - "learning_rate": 3.888839228302482e-05, - "loss": 0.006520142406225204, - "step": 11695 - }, - { - "epoch": 1.9948849104859336, - "grad_norm": 0.09432726353406906, - "learning_rate": 3.886278757810005e-05, - "loss": 0.006377060711383819, - "step": 11700 - }, - { - "epoch": 1.9957374254049447, - "grad_norm": 0.040565814822912216, - "learning_rate": 3.883718223698071e-05, - "loss": 0.0062430910766124725, - "step": 11705 - }, - { - "epoch": 1.9965899403239558, - "grad_norm": 0.09249477833509445, - "learning_rate": 3.881157627162022e-05, - "loss": 0.005447167158126831, - "step": 11710 - }, - { - "epoch": 1.9974424552429668, - "grad_norm": 0.08561582118272781, - "learning_rate": 3.87859696939723e-05, - "loss": 0.0067646786570549015, - "step": 11715 - }, - { - "epoch": 1.9982949701619779, - "grad_norm": 0.0771077573299408, - "learning_rate": 3.876036251599094e-05, - "loss": 0.006473222374916076, - "step": 11720 - }, - { - "epoch": 1.999147485080989, - "grad_norm": 0.047942496836185455, - "learning_rate": 3.873475474963044e-05, - "loss": 0.004876254498958588, - "step": 11725 - }, - { - "epoch": 1.9996589940323957, - "eval_loss": 0.03507082909345627, - "eval_runtime": 3.6311, - "eval_samples_per_second": 69.4, - "eval_steps_per_second": 1.102, - "step": 11728 - }, - { - "eval_cer_subset": 0.01172504763300601, - "eval_cer_subset_edit_distance": 720, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 11728 - }, - { - "epoch": 2.0, - "grad_norm": 0.09595198184251785, - "learning_rate": 3.8709146406845345e-05, - "loss": 0.005297505855560302, - "step": 11730 - }, - { - "epoch": 2.000852514919011, - "grad_norm": 0.0500548854470253, - "learning_rate": 3.8683537499590486e-05, - "loss": 0.0029773740097880363, - "step": 11735 - }, - { - "epoch": 2.001705029838022, - "grad_norm": 0.043145813047885895, - "learning_rate": 3.865792803982097e-05, - "loss": 0.0026956576853990554, - "step": 11740 - }, - { - "epoch": 2.002557544757033, - "grad_norm": 0.06828423589468002, - "learning_rate": 3.86323180394921e-05, - "loss": 0.0032785605639219285, - "step": 11745 - }, - { - "epoch": 2.0034100596760442, - "grad_norm": 0.05070719122886658, - "learning_rate": 3.8606707510559514e-05, - "loss": 0.0025875838473439217, - "step": 11750 - }, - { - "epoch": 2.0042625745950553, - "grad_norm": 0.03793288394808769, - "learning_rate": 3.8581096464979046e-05, - "loss": 0.003196726739406586, - "step": 11755 - }, - { - "epoch": 2.0051150895140664, - "grad_norm": 0.058611899614334106, - "learning_rate": 3.8555484914706783e-05, - "loss": 0.0025842227041721344, - "step": 11760 - }, - { - "epoch": 2.0059676044330774, - "grad_norm": 0.05239633843302727, - "learning_rate": 3.8529872871699064e-05, - "loss": 0.0033856891095638275, - "step": 11765 - }, - { - "epoch": 2.0068201193520885, - "grad_norm": 0.0694168210029602, - "learning_rate": 3.8504260347912414e-05, - "loss": 0.0036750052124261854, - "step": 11770 - }, - { - "epoch": 2.0076726342710995, - "grad_norm": 0.05401293560862541, - "learning_rate": 3.847864735530364e-05, - "loss": 0.0020644858479499815, - "step": 11775 - }, - { - "epoch": 2.008525149190111, - "grad_norm": 0.024288944900035858, - "learning_rate": 3.8453033905829715e-05, - "loss": 0.0030498920008540154, - "step": 11780 - }, - { - "epoch": 2.009377664109122, - "grad_norm": 0.07617825269699097, - "learning_rate": 3.842742001144787e-05, - "loss": 0.002012002095580101, - "step": 11785 - }, - { - "epoch": 2.010230179028133, - "grad_norm": 0.05394979938864708, - "learning_rate": 3.8401805684115514e-05, - "loss": 0.0013803424313664435, - "step": 11790 - }, - { - "epoch": 2.0110826939471442, - "grad_norm": 0.04705117642879486, - "learning_rate": 3.837619093579025e-05, - "loss": 0.0019010987132787704, - "step": 11795 - }, - { - "epoch": 2.0119352088661553, - "grad_norm": 0.04174034297466278, - "learning_rate": 3.835057577842993e-05, - "loss": 0.00339580737054348, - "step": 11800 - }, - { - "epoch": 2.0127877237851663, - "grad_norm": 0.05027524381875992, - "learning_rate": 3.832496022399254e-05, - "loss": 0.003779648244380951, - "step": 11805 - }, - { - "epoch": 2.0136402387041774, - "grad_norm": 0.06344325840473175, - "learning_rate": 3.829934428443627e-05, - "loss": 0.003120606765151024, - "step": 11810 - }, - { - "epoch": 2.0144927536231885, - "grad_norm": 0.04142893850803375, - "learning_rate": 3.827372797171949e-05, - "loss": 0.001541936956346035, - "step": 11815 - }, - { - "epoch": 2.0153452685421995, - "grad_norm": 0.05739682540297508, - "learning_rate": 3.8248111297800766e-05, - "loss": 0.002022813446819782, - "step": 11820 - }, - { - "epoch": 2.0161977834612106, - "grad_norm": 0.05701421573758125, - "learning_rate": 3.82224942746388e-05, - "loss": 0.0032159242779016494, - "step": 11825 - }, - { - "epoch": 2.0170502983802217, - "grad_norm": 0.05839217081665993, - "learning_rate": 3.8196876914192476e-05, - "loss": 0.0019759060814976692, - "step": 11830 - }, - { - "epoch": 2.0179028132992327, - "grad_norm": 0.04104325920343399, - "learning_rate": 3.8171259228420824e-05, - "loss": 0.0030811641365289686, - "step": 11835 - }, - { - "epoch": 2.0187553282182438, - "grad_norm": 0.05367572233080864, - "learning_rate": 3.814564122928303e-05, - "loss": 0.0024660680443048476, - "step": 11840 - }, - { - "epoch": 2.019607843137255, - "grad_norm": 0.07062884420156479, - "learning_rate": 3.8120022928738444e-05, - "loss": 0.0028638459742069246, - "step": 11845 - }, - { - "epoch": 2.020460358056266, - "grad_norm": 0.1098889485001564, - "learning_rate": 3.809440433874652e-05, - "loss": 0.002245648391544819, - "step": 11850 - }, - { - "epoch": 2.021312872975277, - "grad_norm": 0.11214791238307953, - "learning_rate": 3.806878547126689e-05, - "loss": 0.0025152696296572687, - "step": 11855 - }, - { - "epoch": 2.022165387894288, - "grad_norm": 0.0809246301651001, - "learning_rate": 3.804316633825926e-05, - "loss": 0.0030847292393445967, - "step": 11860 - }, - { - "epoch": 2.023017902813299, - "grad_norm": 0.05590866506099701, - "learning_rate": 3.801754695168353e-05, - "loss": 0.002259066328406334, - "step": 11865 - }, - { - "epoch": 2.02387041773231, - "grad_norm": 0.061719413846731186, - "learning_rate": 3.799192732349967e-05, - "loss": 0.003117647022008896, - "step": 11870 - }, - { - "epoch": 2.024722932651321, - "grad_norm": 0.05439555272459984, - "learning_rate": 3.796630746566777e-05, - "loss": 0.00280950702726841, - "step": 11875 - }, - { - "epoch": 2.0255754475703327, - "grad_norm": 0.07110737264156342, - "learning_rate": 3.794068739014802e-05, - "loss": 0.0022924147546291352, - "step": 11880 - }, - { - "epoch": 2.0264279624893438, - "grad_norm": 0.035215508192777634, - "learning_rate": 3.791506710890075e-05, - "loss": 0.0014670810662209988, - "step": 11885 - }, - { - "epoch": 2.027280477408355, - "grad_norm": 0.04196110740303993, - "learning_rate": 3.7889446633886345e-05, - "loss": 0.002128283493220806, - "step": 11890 - }, - { - "epoch": 2.028132992327366, - "grad_norm": 0.02117479033768177, - "learning_rate": 3.7863825977065295e-05, - "loss": 0.002085634134709835, - "step": 11895 - }, - { - "epoch": 2.028985507246377, - "grad_norm": 0.137400820851326, - "learning_rate": 3.783820515039818e-05, - "loss": 0.003697726130485535, - "step": 11900 - }, - { - "epoch": 2.029838022165388, - "grad_norm": 0.05773406848311424, - "learning_rate": 3.781258416584565e-05, - "loss": 0.0020811671391129495, - "step": 11905 - }, - { - "epoch": 2.030690537084399, - "grad_norm": 0.02017928846180439, - "learning_rate": 3.7786963035368425e-05, - "loss": 0.002070310711860657, - "step": 11910 - }, - { - "epoch": 2.03154305200341, - "grad_norm": 0.023989839479327202, - "learning_rate": 3.7761341770927314e-05, - "loss": 0.0035201530903577805, - "step": 11915 - }, - { - "epoch": 2.032395566922421, - "grad_norm": 0.05773681029677391, - "learning_rate": 3.7735720384483176e-05, - "loss": 0.002326706610620022, - "step": 11920 - }, - { - "epoch": 2.0332480818414322, - "grad_norm": 0.06733391433954239, - "learning_rate": 3.771009888799692e-05, - "loss": 0.0019989268854260444, - "step": 11925 - }, - { - "epoch": 2.0341005967604433, - "grad_norm": 0.09590540081262589, - "learning_rate": 3.76844772934295e-05, - "loss": 0.0024355117231607435, - "step": 11930 - }, - { - "epoch": 2.0349531116794544, - "grad_norm": 0.027501709759235382, - "learning_rate": 3.765885561274196e-05, - "loss": 0.0011640249751508236, - "step": 11935 - }, - { - "epoch": 2.0358056265984654, - "grad_norm": 0.031739719212055206, - "learning_rate": 3.7633233857895326e-05, - "loss": 0.0022482817992568018, - "step": 11940 - }, - { - "epoch": 2.0366581415174765, - "grad_norm": 0.027232196182012558, - "learning_rate": 3.760761204085071e-05, - "loss": 0.0018043683841824532, - "step": 11945 - }, - { - "epoch": 2.0375106564364875, - "grad_norm": 0.08634094148874283, - "learning_rate": 3.75819901735692e-05, - "loss": 0.0024119339883327483, - "step": 11950 - }, - { - "epoch": 2.0383631713554986, - "grad_norm": 0.05877285450696945, - "learning_rate": 3.755636826801194e-05, - "loss": 0.0009346712380647659, - "step": 11955 - }, - { - "epoch": 2.0392156862745097, - "grad_norm": 0.034714680165052414, - "learning_rate": 3.7530746336140115e-05, - "loss": 0.0021316718310117723, - "step": 11960 - }, - { - "epoch": 2.0400682011935207, - "grad_norm": 0.05897806957364082, - "learning_rate": 3.750512438991487e-05, - "loss": 0.0029691245406866075, - "step": 11965 - }, - { - "epoch": 2.040920716112532, - "grad_norm": 0.07302019745111465, - "learning_rate": 3.747950244129739e-05, - "loss": 0.0023437861353158953, - "step": 11970 - }, - { - "epoch": 2.041773231031543, - "grad_norm": 0.07187193632125854, - "learning_rate": 3.745388050224885e-05, - "loss": 0.0016971008852124214, - "step": 11975 - }, - { - "epoch": 2.0426257459505544, - "grad_norm": 0.05619340017437935, - "learning_rate": 3.742825858473044e-05, - "loss": 0.0021343151107430457, - "step": 11980 - }, - { - "epoch": 2.0434782608695654, - "grad_norm": 0.08814098685979843, - "learning_rate": 3.7402636700703295e-05, - "loss": 0.0026463912799954414, - "step": 11985 - }, - { - "epoch": 2.0443307757885765, - "grad_norm": 0.10130181908607483, - "learning_rate": 3.737701486212859e-05, - "loss": 0.0020437544211745263, - "step": 11990 - }, - { - "epoch": 2.0451832907075875, - "grad_norm": 0.06105076149106026, - "learning_rate": 3.7351393080967416e-05, - "loss": 0.002344959042966366, - "step": 11995 - }, - { - "epoch": 2.0460358056265986, - "grad_norm": 0.052822742611169815, - "learning_rate": 3.732577136918091e-05, - "loss": 0.0020566854625940325, - "step": 12000 - }, - { - "epoch": 2.0468883205456097, - "grad_norm": 0.10074899345636368, - "learning_rate": 3.730014973873013e-05, - "loss": 0.0026124339550733567, - "step": 12005 - }, - { - "epoch": 2.0477408354646207, - "grad_norm": 0.025597436353564262, - "learning_rate": 3.7274528201576095e-05, - "loss": 0.001905813068151474, - "step": 12010 - }, - { - "epoch": 2.0485933503836318, - "grad_norm": 0.05437781289219856, - "learning_rate": 3.7248906769679776e-05, - "loss": 0.0025912046432495115, - "step": 12015 - }, - { - "epoch": 2.049445865302643, - "grad_norm": 0.07095912098884583, - "learning_rate": 3.722328545500215e-05, - "loss": 0.002769463881850243, - "step": 12020 - }, - { - "epoch": 2.050298380221654, - "grad_norm": 0.14383850991725922, - "learning_rate": 3.719766426950408e-05, - "loss": 0.0038499854505062102, - "step": 12025 - }, - { - "epoch": 2.051150895140665, - "grad_norm": 0.06089269369840622, - "learning_rate": 3.7172043225146386e-05, - "loss": 0.002288899011909962, - "step": 12030 - }, - { - "epoch": 2.052003410059676, - "grad_norm": 0.05808301270008087, - "learning_rate": 3.7146422333889824e-05, - "loss": 0.0028305932879447936, - "step": 12035 - }, - { - "epoch": 2.052855924978687, - "grad_norm": 0.13334520161151886, - "learning_rate": 3.712080160769506e-05, - "loss": 0.00331525094807148, - "step": 12040 - }, - { - "epoch": 2.053708439897698, - "grad_norm": 0.03266080096364021, - "learning_rate": 3.709518105852273e-05, - "loss": 0.0020869884639978407, - "step": 12045 - }, - { - "epoch": 2.054560954816709, - "grad_norm": 0.07307332009077072, - "learning_rate": 3.706956069833336e-05, - "loss": 0.0023028414696455004, - "step": 12050 - }, - { - "epoch": 2.0554134697357203, - "grad_norm": 0.06093568354845047, - "learning_rate": 3.7043940539087366e-05, - "loss": 0.0022027945145964623, - "step": 12055 - }, - { - "epoch": 2.0562659846547313, - "grad_norm": 0.04207700863480568, - "learning_rate": 3.70183205927451e-05, - "loss": 0.0016738155856728554, - "step": 12060 - }, - { - "epoch": 2.0571184995737424, - "grad_norm": 0.046319641172885895, - "learning_rate": 3.699270087126679e-05, - "loss": 0.002480871044099331, - "step": 12065 - }, - { - "epoch": 2.0579710144927534, - "grad_norm": 0.042888909578323364, - "learning_rate": 3.69670813866126e-05, - "loss": 0.0020912257954478265, - "step": 12070 - }, - { - "epoch": 2.0588235294117645, - "grad_norm": 0.05530136078596115, - "learning_rate": 3.694146215074256e-05, - "loss": 0.0021427463740110396, - "step": 12075 - }, - { - "epoch": 2.059676044330776, - "grad_norm": 0.04992877319455147, - "learning_rate": 3.6915843175616555e-05, - "loss": 0.001970967650413513, - "step": 12080 - }, - { - "epoch": 2.060528559249787, - "grad_norm": 0.07341081649065018, - "learning_rate": 3.6890224473194373e-05, - "loss": 0.003120069019496441, - "step": 12085 - }, - { - "epoch": 2.061381074168798, - "grad_norm": 0.05361134931445122, - "learning_rate": 3.686460605543571e-05, - "loss": 0.0030833475291728975, - "step": 12090 - }, - { - "epoch": 2.062233589087809, - "grad_norm": 0.0904894769191742, - "learning_rate": 3.683898793430008e-05, - "loss": 0.0020733945071697234, - "step": 12095 - }, - { - "epoch": 2.0630861040068202, - "grad_norm": 0.03312591835856438, - "learning_rate": 3.681337012174686e-05, - "loss": 0.002308916300535202, - "step": 12100 - }, - { - "epoch": 2.0639386189258313, - "grad_norm": 0.05372268706560135, - "learning_rate": 3.6787752629735314e-05, - "loss": 0.0024915780872106553, - "step": 12105 - }, - { - "epoch": 2.0647911338448424, - "grad_norm": 0.08257800340652466, - "learning_rate": 3.676213547022452e-05, - "loss": 0.001413002610206604, - "step": 12110 - }, - { - "epoch": 2.0656436487638534, - "grad_norm": 0.037859030067920685, - "learning_rate": 3.673651865517344e-05, - "loss": 0.002315748296678066, - "step": 12115 - }, - { - "epoch": 2.0664961636828645, - "grad_norm": 0.04125140607357025, - "learning_rate": 3.6710902196540856e-05, - "loss": 0.0022393757477402686, - "step": 12120 - }, - { - "epoch": 2.0673486786018755, - "grad_norm": 0.09325335919857025, - "learning_rate": 3.668528610628538e-05, - "loss": 0.003246062248945236, - "step": 12125 - }, - { - "epoch": 2.0682011935208866, - "grad_norm": 0.09278098493814468, - "learning_rate": 3.665967039636543e-05, - "loss": 0.0027722738683223723, - "step": 12130 - }, - { - "epoch": 2.0690537084398977, - "grad_norm": 0.07906672358512878, - "learning_rate": 3.663405507873931e-05, - "loss": 0.0035691894590854645, - "step": 12135 - }, - { - "epoch": 2.0699062233589087, - "grad_norm": 0.04077119752764702, - "learning_rate": 3.660844016536507e-05, - "loss": 0.0018417894840240478, - "step": 12140 - }, - { - "epoch": 2.07075873827792, - "grad_norm": 0.08916836231946945, - "learning_rate": 3.6582825668200636e-05, - "loss": 0.0019499020650982856, - "step": 12145 - }, - { - "epoch": 2.071611253196931, - "grad_norm": 0.017643144354224205, - "learning_rate": 3.655721159920368e-05, - "loss": 0.0018016694113612175, - "step": 12150 - }, - { - "epoch": 2.072463768115942, - "grad_norm": 0.046675924211740494, - "learning_rate": 3.6531597970331704e-05, - "loss": 0.0023558875545859337, - "step": 12155 - }, - { - "epoch": 2.073316283034953, - "grad_norm": 0.06159510463476181, - "learning_rate": 3.650598479354202e-05, - "loss": 0.003485919535160065, - "step": 12160 - }, - { - "epoch": 2.074168797953964, - "grad_norm": 0.10620608925819397, - "learning_rate": 3.64803720807917e-05, - "loss": 0.0021355047821998594, - "step": 12165 - }, - { - "epoch": 2.075021312872975, - "grad_norm": 0.03321434184908867, - "learning_rate": 3.645475984403761e-05, - "loss": 0.0027330033481121063, - "step": 12170 - }, - { - "epoch": 2.075873827791986, - "grad_norm": 0.05574263632297516, - "learning_rate": 3.642914809523639e-05, - "loss": 0.0017123395577073098, - "step": 12175 - }, - { - "epoch": 2.0767263427109977, - "grad_norm": 0.045334603637456894, - "learning_rate": 3.640353684634446e-05, - "loss": 0.001525832526385784, - "step": 12180 - }, - { - "epoch": 2.0775788576300087, - "grad_norm": 0.05117806792259216, - "learning_rate": 3.6377926109318005e-05, - "loss": 0.0022421007975935935, - "step": 12185 - }, - { - "epoch": 2.0784313725490198, - "grad_norm": 0.02836792916059494, - "learning_rate": 3.635231589611297e-05, - "loss": 0.003241851553320885, - "step": 12190 - }, - { - "epoch": 2.079283887468031, - "grad_norm": 0.13245631754398346, - "learning_rate": 3.632670621868506e-05, - "loss": 0.0028171174228191374, - "step": 12195 - }, - { - "epoch": 2.080136402387042, - "grad_norm": 0.04175787791609764, - "learning_rate": 3.63010970889897e-05, - "loss": 0.0026013338938355446, - "step": 12200 - }, - { - "epoch": 2.080988917306053, - "grad_norm": 0.022211721166968346, - "learning_rate": 3.6275488518982104e-05, - "loss": 0.0029422508552670477, - "step": 12205 - }, - { - "epoch": 2.081841432225064, - "grad_norm": 0.0889682024717331, - "learning_rate": 3.6249880520617205e-05, - "loss": 0.002521348185837269, - "step": 12210 - }, - { - "epoch": 2.082693947144075, - "grad_norm": 0.022678803652524948, - "learning_rate": 3.622427310584967e-05, - "loss": 0.0010427280329167842, - "step": 12215 - }, - { - "epoch": 2.083546462063086, - "grad_norm": 0.07812847197055817, - "learning_rate": 3.6198666286633886e-05, - "loss": 0.002325686253607273, - "step": 12220 - }, - { - "epoch": 2.084398976982097, - "grad_norm": 0.06912051141262054, - "learning_rate": 3.6173060074923945e-05, - "loss": 0.0022675972431898117, - "step": 12225 - }, - { - "epoch": 2.0852514919011083, - "grad_norm": 0.02951810136437416, - "learning_rate": 3.6147454482673715e-05, - "loss": 0.00159697774797678, - "step": 12230 - }, - { - "epoch": 2.0861040068201193, - "grad_norm": 0.11821833997964859, - "learning_rate": 3.6121849521836735e-05, - "loss": 0.002206057496368885, - "step": 12235 - }, - { - "epoch": 2.0869565217391304, - "grad_norm": 0.05461777001619339, - "learning_rate": 3.609624520436624e-05, - "loss": 0.0012241648510098457, - "step": 12240 - }, - { - "epoch": 2.0878090366581414, - "grad_norm": 0.05038715526461601, - "learning_rate": 3.607064154221516e-05, - "loss": 0.002225806750357151, - "step": 12245 - }, - { - "epoch": 2.0886615515771525, - "grad_norm": 0.03050738200545311, - "learning_rate": 3.604503854733617e-05, - "loss": 0.0020998189225792884, - "step": 12250 - }, - { - "epoch": 2.0895140664961636, - "grad_norm": 0.07000287622213364, - "learning_rate": 3.6019436231681585e-05, - "loss": 0.0022106122225522993, - "step": 12255 - }, - { - "epoch": 2.0903665814151746, - "grad_norm": 0.0332137756049633, - "learning_rate": 3.5993834607203416e-05, - "loss": 0.0020401908084750177, - "step": 12260 - }, - { - "epoch": 2.0912190963341857, - "grad_norm": 0.0996270552277565, - "learning_rate": 3.596823368585336e-05, - "loss": 0.002487153559923172, - "step": 12265 - }, - { - "epoch": 2.0920716112531967, - "grad_norm": 0.1305847465991974, - "learning_rate": 3.594263347958276e-05, - "loss": 0.0028627485036849974, - "step": 12270 - }, - { - "epoch": 2.092924126172208, - "grad_norm": 0.0762234702706337, - "learning_rate": 3.5917034000342664e-05, - "loss": 0.0020220713689923287, - "step": 12275 - }, - { - "epoch": 2.0937766410912193, - "grad_norm": 0.015480007976293564, - "learning_rate": 3.589143526008376e-05, - "loss": 0.00215108972042799, - "step": 12280 - }, - { - "epoch": 2.0946291560102304, - "grad_norm": 0.0862250104546547, - "learning_rate": 3.5865837270756385e-05, - "loss": 0.0020705640316009523, - "step": 12285 - }, - { - "epoch": 2.0954816709292414, - "grad_norm": 0.03390849754214287, - "learning_rate": 3.584024004431052e-05, - "loss": 0.002040168456733227, - "step": 12290 - }, - { - "epoch": 2.0963341858482525, - "grad_norm": 0.07754851132631302, - "learning_rate": 3.581464359269582e-05, - "loss": 0.0029265256598591805, - "step": 12295 - }, - { - "epoch": 2.0971867007672635, - "grad_norm": 0.0625162348151207, - "learning_rate": 3.578904792786155e-05, - "loss": 0.0020755715668201447, - "step": 12300 - }, - { - "epoch": 2.0980392156862746, - "grad_norm": 0.10999561101198196, - "learning_rate": 3.576345306175663e-05, - "loss": 0.0027062267065048216, - "step": 12305 - }, - { - "epoch": 2.0988917306052857, - "grad_norm": 0.03573682904243469, - "learning_rate": 3.573785900632959e-05, - "loss": 0.00178314708173275, - "step": 12310 - }, - { - "epoch": 2.0997442455242967, - "grad_norm": 0.07235981523990631, - "learning_rate": 3.5712265773528564e-05, - "loss": 0.00233871191740036, - "step": 12315 - }, - { - "epoch": 2.100596760443308, - "grad_norm": 0.054438747465610504, - "learning_rate": 3.568667337530135e-05, - "loss": 0.0031350374221801756, - "step": 12320 - }, - { - "epoch": 2.101449275362319, - "grad_norm": 0.07696446031332016, - "learning_rate": 3.566108182359533e-05, - "loss": 0.0019116310402750968, - "step": 12325 - }, - { - "epoch": 2.10230179028133, - "grad_norm": 0.0676850974559784, - "learning_rate": 3.563549113035749e-05, - "loss": 0.0011704936623573303, - "step": 12330 - }, - { - "epoch": 2.103154305200341, - "grad_norm": 0.07241418212652206, - "learning_rate": 3.5609901307534416e-05, - "loss": 0.002332131937146187, - "step": 12335 - }, - { - "epoch": 2.104006820119352, - "grad_norm": 0.0832296758890152, - "learning_rate": 3.558431236707227e-05, - "loss": 0.002539648115634918, - "step": 12340 - }, - { - "epoch": 2.104859335038363, - "grad_norm": 0.046911224722862244, - "learning_rate": 3.555872432091684e-05, - "loss": 0.0015112090855836867, - "step": 12345 - }, - { - "epoch": 2.105711849957374, - "grad_norm": 0.09462827444076538, - "learning_rate": 3.553313718101348e-05, - "loss": 0.0024237846955657005, - "step": 12350 - }, - { - "epoch": 2.106564364876385, - "grad_norm": 0.06934045255184174, - "learning_rate": 3.550755095930711e-05, - "loss": 0.0014186175540089607, - "step": 12355 - }, - { - "epoch": 2.1074168797953963, - "grad_norm": 0.05409622564911842, - "learning_rate": 3.5481965667742216e-05, - "loss": 0.0016573246568441391, - "step": 12360 - }, - { - "epoch": 2.1082693947144073, - "grad_norm": 0.05712766572833061, - "learning_rate": 3.545638131826289e-05, - "loss": 0.0029039720073342325, - "step": 12365 - }, - { - "epoch": 2.1091219096334184, - "grad_norm": 0.05685155466198921, - "learning_rate": 3.543079792281274e-05, - "loss": 0.0016390934586524963, - "step": 12370 - }, - { - "epoch": 2.10997442455243, - "grad_norm": 0.06140974909067154, - "learning_rate": 3.5405215493334966e-05, - "loss": 0.0038812048733234406, - "step": 12375 - }, - { - "epoch": 2.110826939471441, - "grad_norm": 0.0662747323513031, - "learning_rate": 3.537963404177227e-05, - "loss": 0.0029465768486261366, - "step": 12380 - }, - { - "epoch": 2.111679454390452, - "grad_norm": 0.05666056647896767, - "learning_rate": 3.535405358006694e-05, - "loss": 0.0028562054038047792, - "step": 12385 - }, - { - "epoch": 2.112531969309463, - "grad_norm": 0.02187039703130722, - "learning_rate": 3.532847412016077e-05, - "loss": 0.0017194624990224839, - "step": 12390 - }, - { - "epoch": 2.113384484228474, - "grad_norm": 0.040781840682029724, - "learning_rate": 3.530289567399513e-05, - "loss": 0.0026536308228969573, - "step": 12395 - }, - { - "epoch": 2.114236999147485, - "grad_norm": 0.05844609811902046, - "learning_rate": 3.527731825351088e-05, - "loss": 0.0018391696736216544, - "step": 12400 - }, - { - "epoch": 2.1150895140664963, - "grad_norm": 0.08661946654319763, - "learning_rate": 3.52517418706484e-05, - "loss": 0.0028108954429626465, - "step": 12405 - }, - { - "epoch": 2.1159420289855073, - "grad_norm": 0.05540858209133148, - "learning_rate": 3.52261665373476e-05, - "loss": 0.001869852840900421, - "step": 12410 - }, - { - "epoch": 2.1167945439045184, - "grad_norm": 0.05183592066168785, - "learning_rate": 3.520059226554789e-05, - "loss": 0.0038085319101810455, - "step": 12415 - }, - { - "epoch": 2.1176470588235294, - "grad_norm": 0.09019337594509125, - "learning_rate": 3.517501906718822e-05, - "loss": 0.0025485800579190254, - "step": 12420 - }, - { - "epoch": 2.1184995737425405, - "grad_norm": 0.05994381010532379, - "learning_rate": 3.514944695420698e-05, - "loss": 0.0023555709049105644, - "step": 12425 - }, - { - "epoch": 2.1193520886615516, - "grad_norm": 0.07013200968503952, - "learning_rate": 3.512387593854208e-05, - "loss": 0.0023415835574269296, - "step": 12430 - }, - { - "epoch": 2.1202046035805626, - "grad_norm": 0.0558604821562767, - "learning_rate": 3.509830603213094e-05, - "loss": 0.002999695762991905, - "step": 12435 - }, - { - "epoch": 2.1210571184995737, - "grad_norm": 0.054457131773233414, - "learning_rate": 3.507273724691045e-05, - "loss": 0.0022147590294480323, - "step": 12440 - }, - { - "epoch": 2.1219096334185847, - "grad_norm": 0.052365075796842575, - "learning_rate": 3.5047169594816955e-05, - "loss": 0.0023635342717170716, - "step": 12445 - }, - { - "epoch": 2.122762148337596, - "grad_norm": 0.047059565782547, - "learning_rate": 3.502160308778627e-05, - "loss": 0.0015694497153162957, - "step": 12450 - }, - { - "epoch": 2.123614663256607, - "grad_norm": 0.03100336343050003, - "learning_rate": 3.499603773775371e-05, - "loss": 0.0020049646496772765, - "step": 12455 - }, - { - "epoch": 2.124467178175618, - "grad_norm": 0.07436710596084595, - "learning_rate": 3.4970473556654027e-05, - "loss": 0.004277446493506432, - "step": 12460 - }, - { - "epoch": 2.125319693094629, - "grad_norm": 0.044698864221572876, - "learning_rate": 3.4944910556421444e-05, - "loss": 0.0032587334513664245, - "step": 12465 - }, - { - "epoch": 2.12617220801364, - "grad_norm": 0.04725298285484314, - "learning_rate": 3.491934874898961e-05, - "loss": 0.0018061451613903047, - "step": 12470 - }, - { - "epoch": 2.127024722932651, - "grad_norm": 0.04054245352745056, - "learning_rate": 3.4893788146291604e-05, - "loss": 0.0017766639590263366, - "step": 12475 - }, - { - "epoch": 2.1278772378516626, - "grad_norm": 0.06061461195349693, - "learning_rate": 3.486822876025999e-05, - "loss": 0.0025131702423095703, - "step": 12480 - }, - { - "epoch": 2.1287297527706737, - "grad_norm": 0.058438993990421295, - "learning_rate": 3.4842670602826744e-05, - "loss": 0.002218405343592167, - "step": 12485 - }, - { - "epoch": 2.1295822676896847, - "grad_norm": 0.057440634816884995, - "learning_rate": 3.481711368592327e-05, - "loss": 0.0015549706295132637, - "step": 12490 - }, - { - "epoch": 2.130434782608696, - "grad_norm": 0.06638845056295395, - "learning_rate": 3.4791558021480355e-05, - "loss": 0.002662469446659088, - "step": 12495 - }, - { - "epoch": 2.131287297527707, - "grad_norm": 0.06725790351629257, - "learning_rate": 3.476600362142824e-05, - "loss": 0.0024463947862386703, - "step": 12500 - }, - { - "epoch": 2.132139812446718, - "grad_norm": 0.07708985358476639, - "learning_rate": 3.474045049769659e-05, - "loss": 0.0034916583448648454, - "step": 12505 - }, - { - "epoch": 2.132992327365729, - "grad_norm": 0.06412148475646973, - "learning_rate": 3.4714898662214454e-05, - "loss": 0.002831364795565605, - "step": 12510 - }, - { - "epoch": 2.13384484228474, - "grad_norm": 0.04649505391716957, - "learning_rate": 3.468934812691027e-05, - "loss": 0.002048753574490547, - "step": 12515 - }, - { - "epoch": 2.134697357203751, - "grad_norm": 0.04807932674884796, - "learning_rate": 3.4663798903711865e-05, - "loss": 0.0018209950998425485, - "step": 12520 - }, - { - "epoch": 2.135549872122762, - "grad_norm": 0.043283116072416306, - "learning_rate": 3.4638251004546476e-05, - "loss": 0.001797056198120117, - "step": 12525 - }, - { - "epoch": 2.136402387041773, - "grad_norm": 0.015419692732393742, - "learning_rate": 3.4612704441340716e-05, - "loss": 0.002100854739546776, - "step": 12530 - }, - { - "epoch": 2.1372549019607843, - "grad_norm": 0.05244193226099014, - "learning_rate": 3.458715922602057e-05, - "loss": 0.002430478297173977, - "step": 12535 - }, - { - "epoch": 2.1381074168797953, - "grad_norm": 0.08995307981967926, - "learning_rate": 3.4561615370511394e-05, - "loss": 0.0023157089948654176, - "step": 12540 - }, - { - "epoch": 2.1389599317988064, - "grad_norm": 0.06513174623250961, - "learning_rate": 3.4536072886737894e-05, - "loss": 0.002109107933938503, - "step": 12545 - }, - { - "epoch": 2.1398124467178175, - "grad_norm": 0.12199243903160095, - "learning_rate": 3.4510531786624176e-05, - "loss": 0.0016247857362031936, - "step": 12550 - }, - { - "epoch": 2.1406649616368285, - "grad_norm": 0.06062543764710426, - "learning_rate": 3.4484992082093665e-05, - "loss": 0.0033494606614112854, - "step": 12555 - }, - { - "epoch": 2.1415174765558396, - "grad_norm": 0.08636222034692764, - "learning_rate": 3.445945378506915e-05, - "loss": 0.0037529505789279938, - "step": 12560 - }, - { - "epoch": 2.1423699914748506, - "grad_norm": 0.027961688116192818, - "learning_rate": 3.443391690747274e-05, - "loss": 0.0016466494649648666, - "step": 12565 - }, - { - "epoch": 2.1432225063938617, - "grad_norm": 0.033621031790971756, - "learning_rate": 3.440838146122591e-05, - "loss": 0.002477872557938099, - "step": 12570 - }, - { - "epoch": 2.144075021312873, - "grad_norm": 0.08104594051837921, - "learning_rate": 3.4382847458249453e-05, - "loss": 0.0031348835676908494, - "step": 12575 - }, - { - "epoch": 2.1449275362318843, - "grad_norm": 0.07412353157997131, - "learning_rate": 3.4357314910463506e-05, - "loss": 0.002509618178009987, - "step": 12580 - }, - { - "epoch": 2.1457800511508953, - "grad_norm": 0.04261288791894913, - "learning_rate": 3.43317838297875e-05, - "loss": 0.0021477997303009032, - "step": 12585 - }, - { - "epoch": 2.1466325660699064, - "grad_norm": 0.15133292973041534, - "learning_rate": 3.430625422814018e-05, - "loss": 0.0033604972064495086, - "step": 12590 - }, - { - "epoch": 2.1474850809889174, - "grad_norm": 0.08455967903137207, - "learning_rate": 3.428072611743962e-05, - "loss": 0.0035134248435497286, - "step": 12595 - }, - { - "epoch": 2.1483375959079285, - "grad_norm": 0.10830427706241608, - "learning_rate": 3.425519950960321e-05, - "loss": 0.003783620521426201, - "step": 12600 - }, - { - "epoch": 2.1491901108269396, - "grad_norm": 0.05701782926917076, - "learning_rate": 3.422967441654761e-05, - "loss": 0.0017763100564479827, - "step": 12605 - }, - { - "epoch": 2.1500426257459506, - "grad_norm": 0.058323513716459274, - "learning_rate": 3.420415085018878e-05, - "loss": 0.003765106201171875, - "step": 12610 - }, - { - "epoch": 2.1508951406649617, - "grad_norm": 0.08780697733163834, - "learning_rate": 3.417862882244195e-05, - "loss": 0.0021065909415483473, - "step": 12615 - }, - { - "epoch": 2.1517476555839727, - "grad_norm": 0.08741293847560883, - "learning_rate": 3.415310834522168e-05, - "loss": 0.0022673629224300384, - "step": 12620 - }, - { - "epoch": 2.152600170502984, - "grad_norm": 0.08681067824363708, - "learning_rate": 3.412758943044177e-05, - "loss": 0.0029561318457126617, - "step": 12625 - }, - { - "epoch": 2.153452685421995, - "grad_norm": 0.05104825645685196, - "learning_rate": 3.4102072090015306e-05, - "loss": 0.0028430519625544546, - "step": 12630 - }, - { - "epoch": 2.154305200341006, - "grad_norm": 0.05437494069337845, - "learning_rate": 3.4076556335854606e-05, - "loss": 0.0026259947568178176, - "step": 12635 - }, - { - "epoch": 2.155157715260017, - "grad_norm": 0.016572406515479088, - "learning_rate": 3.4051042179871286e-05, - "loss": 0.00198390893638134, - "step": 12640 - }, - { - "epoch": 2.156010230179028, - "grad_norm": 0.04134957864880562, - "learning_rate": 3.4025529633976216e-05, - "loss": 0.0017651205882430077, - "step": 12645 - }, - { - "epoch": 2.156862745098039, - "grad_norm": 0.04091856628656387, - "learning_rate": 3.400001871007949e-05, - "loss": 0.002631684020161629, - "step": 12650 - }, - { - "epoch": 2.15771526001705, - "grad_norm": 0.08851557224988937, - "learning_rate": 3.397450942009046e-05, - "loss": 0.004056418687105179, - "step": 12655 - }, - { - "epoch": 2.1585677749360612, - "grad_norm": 0.09870146960020065, - "learning_rate": 3.3949001775917686e-05, - "loss": 0.0017272619530558585, - "step": 12660 - }, - { - "epoch": 2.1594202898550723, - "grad_norm": 0.059828147292137146, - "learning_rate": 3.3923495789469016e-05, - "loss": 0.0018833462148904801, - "step": 12665 - }, - { - "epoch": 2.1602728047740833, - "grad_norm": 0.04078202694654465, - "learning_rate": 3.3897991472651495e-05, - "loss": 0.0015183920040726662, - "step": 12670 - }, - { - "epoch": 2.1611253196930944, - "grad_norm": 0.09713901579380035, - "learning_rate": 3.387248883737137e-05, - "loss": 0.002313835546374321, - "step": 12675 - }, - { - "epoch": 2.161977834612106, - "grad_norm": 0.13590694963932037, - "learning_rate": 3.3846987895534116e-05, - "loss": 0.002948279120028019, - "step": 12680 - }, - { - "epoch": 2.162830349531117, - "grad_norm": 0.05830051749944687, - "learning_rate": 3.3821488659044435e-05, - "loss": 0.002866750955581665, - "step": 12685 - }, - { - "epoch": 2.163682864450128, - "grad_norm": 0.08523424714803696, - "learning_rate": 3.3795991139806205e-05, - "loss": 0.001992848888039589, - "step": 12690 - }, - { - "epoch": 2.164535379369139, - "grad_norm": 0.07573958486318588, - "learning_rate": 3.3770495349722534e-05, - "loss": 0.003222312778234482, - "step": 12695 - }, - { - "epoch": 2.16538789428815, - "grad_norm": 0.1144784539937973, - "learning_rate": 3.374500130069569e-05, - "loss": 0.0023121457546949387, - "step": 12700 - }, - { - "epoch": 2.166240409207161, - "grad_norm": 0.037679724395275116, - "learning_rate": 3.371950900462716e-05, - "loss": 0.0022720521315932273, - "step": 12705 - }, - { - "epoch": 2.1670929241261723, - "grad_norm": 0.09523876011371613, - "learning_rate": 3.369401847341756e-05, - "loss": 0.0026744550094008447, - "step": 12710 - }, - { - "epoch": 2.1679454390451833, - "grad_norm": 0.08904188126325607, - "learning_rate": 3.3668529718966753e-05, - "loss": 0.0025367535650730132, - "step": 12715 - }, - { - "epoch": 2.1687979539641944, - "grad_norm": 0.065862737596035, - "learning_rate": 3.364304275317373e-05, - "loss": 0.0017513807862997055, - "step": 12720 - }, - { - "epoch": 2.1696504688832055, - "grad_norm": 0.03308388963341713, - "learning_rate": 3.361755758793665e-05, - "loss": 0.001534645166248083, - "step": 12725 - }, - { - "epoch": 2.1705029838022165, - "grad_norm": 0.11249089986085892, - "learning_rate": 3.359207423515283e-05, - "loss": 0.0012927086092531681, - "step": 12730 - }, - { - "epoch": 2.1713554987212276, - "grad_norm": 0.09918250143527985, - "learning_rate": 3.356659270671875e-05, - "loss": 0.0035567093640565873, - "step": 12735 - }, - { - "epoch": 2.1722080136402386, - "grad_norm": 0.008674295619130135, - "learning_rate": 3.354111301453005e-05, - "loss": 0.0013304737396538258, - "step": 12740 - }, - { - "epoch": 2.1730605285592497, - "grad_norm": 0.09038940817117691, - "learning_rate": 3.351563517048149e-05, - "loss": 0.0022449616342782976, - "step": 12745 - }, - { - "epoch": 2.1739130434782608, - "grad_norm": 0.11863812804222107, - "learning_rate": 3.349015918646695e-05, - "loss": 0.0029456689953804016, - "step": 12750 - }, - { - "epoch": 2.174765558397272, - "grad_norm": 0.055224135518074036, - "learning_rate": 3.34646850743795e-05, - "loss": 0.0021983785554766655, - "step": 12755 - }, - { - "epoch": 2.175618073316283, - "grad_norm": 0.05251838266849518, - "learning_rate": 3.34392128461113e-05, - "loss": 0.0018048876896500587, - "step": 12760 - }, - { - "epoch": 2.176470588235294, - "grad_norm": 0.07146445661783218, - "learning_rate": 3.341374251355361e-05, - "loss": 0.0030932359397411345, - "step": 12765 - }, - { - "epoch": 2.177323103154305, - "grad_norm": 0.03640792518854141, - "learning_rate": 3.338827408859686e-05, - "loss": 0.0016893571242690085, - "step": 12770 - }, - { - "epoch": 2.1781756180733165, - "grad_norm": 0.0680721327662468, - "learning_rate": 3.336280758313052e-05, - "loss": 0.0037735387682914733, - "step": 12775 - }, - { - "epoch": 2.1790281329923276, - "grad_norm": 0.047598470002412796, - "learning_rate": 3.333734300904322e-05, - "loss": 0.002026566304266453, - "step": 12780 - }, - { - "epoch": 2.1798806479113386, - "grad_norm": 0.08361580222845078, - "learning_rate": 3.3311880378222695e-05, - "loss": 0.002865005284547806, - "step": 12785 - }, - { - "epoch": 2.1807331628303497, - "grad_norm": 0.04869835823774338, - "learning_rate": 3.328641970255572e-05, - "loss": 0.0018146531656384468, - "step": 12790 - }, - { - "epoch": 2.1815856777493607, - "grad_norm": 0.06970708072185516, - "learning_rate": 3.326096099392819e-05, - "loss": 0.0022316936403512953, - "step": 12795 - }, - { - "epoch": 2.182438192668372, - "grad_norm": 0.07073621451854706, - "learning_rate": 3.323550426422508e-05, - "loss": 0.0021546846255660057, - "step": 12800 - }, - { - "epoch": 2.183290707587383, - "grad_norm": 0.0552116334438324, - "learning_rate": 3.3210049525330426e-05, - "loss": 0.0022750692442059517, - "step": 12805 - }, - { - "epoch": 2.184143222506394, - "grad_norm": 0.08244488388299942, - "learning_rate": 3.318459678912737e-05, - "loss": 0.0027180306613445284, - "step": 12810 - }, - { - "epoch": 2.184995737425405, - "grad_norm": 0.07275483757257462, - "learning_rate": 3.315914606749808e-05, - "loss": 0.002150987088680267, - "step": 12815 - }, - { - "epoch": 2.185848252344416, - "grad_norm": 0.06152818351984024, - "learning_rate": 3.3133697372323804e-05, - "loss": 0.002709987387061119, - "step": 12820 - }, - { - "epoch": 2.186700767263427, - "grad_norm": 0.07358045876026154, - "learning_rate": 3.310825071548483e-05, - "loss": 0.0029207577928900717, - "step": 12825 - }, - { - "epoch": 2.187553282182438, - "grad_norm": 0.07633842527866364, - "learning_rate": 3.3082806108860516e-05, - "loss": 0.0028854381293058396, - "step": 12830 - }, - { - "epoch": 2.1884057971014492, - "grad_norm": 0.0533052496612072, - "learning_rate": 3.305736356432926e-05, - "loss": 0.0023338528349995612, - "step": 12835 - }, - { - "epoch": 2.1892583120204603, - "grad_norm": 0.09400077164173126, - "learning_rate": 3.303192309376846e-05, - "loss": 0.00362023301422596, - "step": 12840 - }, - { - "epoch": 2.1901108269394713, - "grad_norm": 0.09847433120012283, - "learning_rate": 3.300648470905459e-05, - "loss": 0.003238249197602272, - "step": 12845 - }, - { - "epoch": 2.1909633418584824, - "grad_norm": 0.09695439040660858, - "learning_rate": 3.298104842206314e-05, - "loss": 0.002254056558012962, - "step": 12850 - }, - { - "epoch": 2.1918158567774935, - "grad_norm": 0.07510244101285934, - "learning_rate": 3.295561424466861e-05, - "loss": 0.002555438503623009, - "step": 12855 - }, - { - "epoch": 2.1926683716965045, - "grad_norm": 0.07085850089788437, - "learning_rate": 3.2930182188744524e-05, - "loss": 0.0029295925050973892, - "step": 12860 - }, - { - "epoch": 2.1935208866155156, - "grad_norm": 0.12662498652935028, - "learning_rate": 3.290475226616339e-05, - "loss": 0.0019443847239017486, - "step": 12865 - }, - { - "epoch": 2.1943734015345266, - "grad_norm": 0.08738470077514648, - "learning_rate": 3.2879324488796755e-05, - "loss": 0.002229847013950348, - "step": 12870 - }, - { - "epoch": 2.1952259164535377, - "grad_norm": 0.04957102984189987, - "learning_rate": 3.285389886851517e-05, - "loss": 0.0017434298992156983, - "step": 12875 - }, - { - "epoch": 2.196078431372549, - "grad_norm": 0.057968392968177795, - "learning_rate": 3.282847541718814e-05, - "loss": 0.003453432023525238, - "step": 12880 - }, - { - "epoch": 2.1969309462915603, - "grad_norm": 0.1128922700881958, - "learning_rate": 3.280305414668419e-05, - "loss": 0.0025962982326745987, - "step": 12885 - }, - { - "epoch": 2.1977834612105713, - "grad_norm": 0.0661446675658226, - "learning_rate": 3.2777635068870784e-05, - "loss": 0.002279244549572468, - "step": 12890 - }, - { - "epoch": 2.1986359761295824, - "grad_norm": 0.09260411560535431, - "learning_rate": 3.275221819561443e-05, - "loss": 0.002637815475463867, - "step": 12895 - }, - { - "epoch": 2.1994884910485935, - "grad_norm": 0.08168021589517593, - "learning_rate": 3.272680353878056e-05, - "loss": 0.0029386602342128753, - "step": 12900 - }, - { - "epoch": 2.2003410059676045, - "grad_norm": 0.06187237799167633, - "learning_rate": 3.270139111023358e-05, - "loss": 0.0018257955089211464, - "step": 12905 - }, - { - "epoch": 2.2011935208866156, - "grad_norm": 0.09450117498636246, - "learning_rate": 3.267598092183684e-05, - "loss": 0.0023655250668525698, - "step": 12910 - }, - { - "epoch": 2.2020460358056266, - "grad_norm": 0.060870688408613205, - "learning_rate": 3.2650572985452685e-05, - "loss": 0.001705418713390827, - "step": 12915 - }, - { - "epoch": 2.2028985507246377, - "grad_norm": 0.06867264956235886, - "learning_rate": 3.262516731294237e-05, - "loss": 0.00248488187789917, - "step": 12920 - }, - { - "epoch": 2.2037510656436488, - "grad_norm": 0.07654258608818054, - "learning_rate": 3.259976391616612e-05, - "loss": 0.002200855314731598, - "step": 12925 - }, - { - "epoch": 2.20460358056266, - "grad_norm": 0.06781245768070221, - "learning_rate": 3.257436280698308e-05, - "loss": 0.002006441354751587, - "step": 12930 - }, - { - "epoch": 2.205456095481671, - "grad_norm": 0.045858342200517654, - "learning_rate": 3.254896399725132e-05, - "loss": 0.0020667938515543938, - "step": 12935 - }, - { - "epoch": 2.206308610400682, - "grad_norm": 0.06805605441331863, - "learning_rate": 3.2523567498827865e-05, - "loss": 0.002215307205915451, - "step": 12940 - }, - { - "epoch": 2.207161125319693, - "grad_norm": 0.07554472237825394, - "learning_rate": 3.2498173323568645e-05, - "loss": 0.0021156981587409974, - "step": 12945 - }, - { - "epoch": 2.208013640238704, - "grad_norm": 0.049611154943704605, - "learning_rate": 3.2472781483328506e-05, - "loss": 0.0037985272705554963, - "step": 12950 - }, - { - "epoch": 2.208866155157715, - "grad_norm": 0.04867832362651825, - "learning_rate": 3.24473919899612e-05, - "loss": 0.0011579260230064393, - "step": 12955 - }, - { - "epoch": 2.209718670076726, - "grad_norm": 0.04439609497785568, - "learning_rate": 3.2422004855319376e-05, - "loss": 0.0033864513039588927, - "step": 12960 - }, - { - "epoch": 2.2105711849957372, - "grad_norm": 0.054114069789648056, - "learning_rate": 3.23966200912546e-05, - "loss": 0.0017186013981699943, - "step": 12965 - }, - { - "epoch": 2.2114236999147483, - "grad_norm": 0.03286417946219444, - "learning_rate": 3.237123770961735e-05, - "loss": 0.0013779066503047943, - "step": 12970 - }, - { - "epoch": 2.21227621483376, - "grad_norm": 0.05740232393145561, - "learning_rate": 3.234585772225694e-05, - "loss": 0.00376686155796051, - "step": 12975 - }, - { - "epoch": 2.213128729752771, - "grad_norm": 0.11821190267801285, - "learning_rate": 3.232048014102158e-05, - "loss": 0.003515421971678734, - "step": 12980 - }, - { - "epoch": 2.213981244671782, - "grad_norm": 0.06561318039894104, - "learning_rate": 3.229510497775838e-05, - "loss": 0.0034034676849842072, - "step": 12985 - }, - { - "epoch": 2.214833759590793, - "grad_norm": 0.06076068431138992, - "learning_rate": 3.226973224431333e-05, - "loss": 0.0018323207274079322, - "step": 12990 - }, - { - "epoch": 2.215686274509804, - "grad_norm": 0.05743642896413803, - "learning_rate": 3.2244361952531266e-05, - "loss": 0.002844391018152237, - "step": 12995 - }, - { - "epoch": 2.216538789428815, - "grad_norm": 0.0632607489824295, - "learning_rate": 3.221899411425586e-05, - "loss": 0.003329380601644516, - "step": 13000 - }, - { - "epoch": 2.217391304347826, - "grad_norm": 0.06082088127732277, - "learning_rate": 3.219362874132966e-05, - "loss": 0.0026398774236440657, - "step": 13005 - }, - { - "epoch": 2.2182438192668372, - "grad_norm": 0.07731121778488159, - "learning_rate": 3.2168265845594075e-05, - "loss": 0.00193992517888546, - "step": 13010 - }, - { - "epoch": 2.2190963341858483, - "grad_norm": 0.08783961087465286, - "learning_rate": 3.214290543888938e-05, - "loss": 0.0019096124917268753, - "step": 13015 - }, - { - "epoch": 2.2199488491048593, - "grad_norm": 0.07576426863670349, - "learning_rate": 3.211754753305461e-05, - "loss": 0.002824045717716217, - "step": 13020 - }, - { - "epoch": 2.2208013640238704, - "grad_norm": 0.0671941265463829, - "learning_rate": 3.20921921399277e-05, - "loss": 0.0025903450325131415, - "step": 13025 - }, - { - "epoch": 2.2216538789428815, - "grad_norm": 0.025313038378953934, - "learning_rate": 3.206683927134538e-05, - "loss": 0.001357127632945776, - "step": 13030 - }, - { - "epoch": 2.2225063938618925, - "grad_norm": 0.0281735397875309, - "learning_rate": 3.204148893914323e-05, - "loss": 0.0018472330644726752, - "step": 13035 - }, - { - "epoch": 2.2233589087809036, - "grad_norm": 0.027222834527492523, - "learning_rate": 3.2016141155155625e-05, - "loss": 0.0018411261960864067, - "step": 13040 - }, - { - "epoch": 2.2242114236999146, - "grad_norm": 0.04794001951813698, - "learning_rate": 3.199079593121574e-05, - "loss": 0.0015307093039155007, - "step": 13045 - }, - { - "epoch": 2.2250639386189257, - "grad_norm": 0.05856316536664963, - "learning_rate": 3.196545327915558e-05, - "loss": 0.001051103323698044, - "step": 13050 - }, - { - "epoch": 2.2259164535379368, - "grad_norm": 0.037851642817258835, - "learning_rate": 3.194011321080592e-05, - "loss": 0.0020413145422935484, - "step": 13055 - }, - { - "epoch": 2.226768968456948, - "grad_norm": 0.04197809472680092, - "learning_rate": 3.191477573799638e-05, - "loss": 0.0025324104353785515, - "step": 13060 - }, - { - "epoch": 2.227621483375959, - "grad_norm": 0.04126058518886566, - "learning_rate": 3.188944087255531e-05, - "loss": 0.001765124499797821, - "step": 13065 - }, - { - "epoch": 2.2284739982949704, - "grad_norm": 0.13436861336231232, - "learning_rate": 3.186410862630988e-05, - "loss": 0.003620542213320732, - "step": 13070 - }, - { - "epoch": 2.229326513213981, - "grad_norm": 0.05177616328001022, - "learning_rate": 3.183877901108601e-05, - "loss": 0.001679854467511177, - "step": 13075 - }, - { - "epoch": 2.2301790281329925, - "grad_norm": 0.03360729292035103, - "learning_rate": 3.1813452038708415e-05, - "loss": 0.002009689994156361, - "step": 13080 - }, - { - "epoch": 2.2310315430520036, - "grad_norm": 0.102437824010849, - "learning_rate": 3.178812772100058e-05, - "loss": 0.002533908933401108, - "step": 13085 - }, - { - "epoch": 2.2318840579710146, - "grad_norm": 0.045174695551395416, - "learning_rate": 3.176280606978473e-05, - "loss": 0.0023472383618354797, - "step": 13090 - }, - { - "epoch": 2.2327365728900257, - "grad_norm": 0.0679149329662323, - "learning_rate": 3.173748709688184e-05, - "loss": 0.00249241441488266, - "step": 13095 - }, - { - "epoch": 2.2335890878090368, - "grad_norm": 0.1367262750864029, - "learning_rate": 3.171217081411166e-05, - "loss": 0.002387053519487381, - "step": 13100 - }, - { - "epoch": 2.234441602728048, - "grad_norm": 0.06661707162857056, - "learning_rate": 3.168685723329269e-05, - "loss": 0.002376999333500862, - "step": 13105 - }, - { - "epoch": 2.235294117647059, - "grad_norm": 0.08916410058736801, - "learning_rate": 3.166154636624214e-05, - "loss": 0.0027421964332461357, - "step": 13110 - }, - { - "epoch": 2.23614663256607, - "grad_norm": 0.058119386434555054, - "learning_rate": 3.163623822477595e-05, - "loss": 0.0018962904810905456, - "step": 13115 - }, - { - "epoch": 2.236999147485081, - "grad_norm": 0.06457269936800003, - "learning_rate": 3.161093282070882e-05, - "loss": 0.001441392581909895, - "step": 13120 - }, - { - "epoch": 2.237851662404092, - "grad_norm": 0.1250019371509552, - "learning_rate": 3.158563016585412e-05, - "loss": 0.002274188958108425, - "step": 13125 - }, - { - "epoch": 2.238704177323103, - "grad_norm": 0.03324245661497116, - "learning_rate": 3.156033027202403e-05, - "loss": 0.002002820558845997, - "step": 13130 - }, - { - "epoch": 2.239556692242114, - "grad_norm": 0.01897227205336094, - "learning_rate": 3.153503315102934e-05, - "loss": 0.0016582176089286805, - "step": 13135 - }, - { - "epoch": 2.2404092071611252, - "grad_norm": 0.07142049074172974, - "learning_rate": 3.15097388146796e-05, - "loss": 0.002489439025521278, - "step": 13140 - }, - { - "epoch": 2.2412617220801363, - "grad_norm": 0.05619347095489502, - "learning_rate": 3.148444727478303e-05, - "loss": 0.0021767957136034966, - "step": 13145 - }, - { - "epoch": 2.2421142369991474, - "grad_norm": 0.0950259119272232, - "learning_rate": 3.14591585431466e-05, - "loss": 0.001732981950044632, - "step": 13150 - }, - { - "epoch": 2.2429667519181584, - "grad_norm": 0.06186724454164505, - "learning_rate": 3.143387263157591e-05, - "loss": 0.001604793407022953, - "step": 13155 - }, - { - "epoch": 2.2438192668371695, - "grad_norm": 0.0921434834599495, - "learning_rate": 3.1408589551875256e-05, - "loss": 0.001957142725586891, - "step": 13160 - }, - { - "epoch": 2.2446717817561805, - "grad_norm": 0.05556231364607811, - "learning_rate": 3.138330931584763e-05, - "loss": 0.002686610072851181, - "step": 13165 - }, - { - "epoch": 2.2455242966751916, - "grad_norm": 0.10184850543737411, - "learning_rate": 3.1358031935294666e-05, - "loss": 0.0019098062068223954, - "step": 13170 - }, - { - "epoch": 2.246376811594203, - "grad_norm": 0.08860436826944351, - "learning_rate": 3.133275742201673e-05, - "loss": 0.002402664348483086, - "step": 13175 - }, - { - "epoch": 2.247229326513214, - "grad_norm": 0.06324724107980728, - "learning_rate": 3.130748578781278e-05, - "loss": 0.0018930312246084214, - "step": 13180 - }, - { - "epoch": 2.2480818414322252, - "grad_norm": 0.07382629811763763, - "learning_rate": 3.128221704448045e-05, - "loss": 0.0026824956759810446, - "step": 13185 - }, - { - "epoch": 2.2489343563512363, - "grad_norm": 0.1002819687128067, - "learning_rate": 3.125695120381603e-05, - "loss": 0.0030449360609054567, - "step": 13190 - }, - { - "epoch": 2.249616368286445, - "eval_loss": 0.046705588698387146, - "eval_runtime": 3.7196, - "eval_samples_per_second": 67.748, - "eval_steps_per_second": 1.075, - "step": 13194 - }, - { - "eval_cer_subset": 0.013842070122298761, - "eval_cer_subset_edit_distance": 850, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 13194 - }, - { - "epoch": 2.2497868712702473, - "grad_norm": 0.030392520129680634, - "learning_rate": 3.123168827761447e-05, - "loss": 0.0015232504345476627, - "step": 13195 - }, - { - "epoch": 2.2506393861892584, - "grad_norm": 0.04160630702972412, - "learning_rate": 3.1206428277669336e-05, - "loss": 0.0026638204231858253, - "step": 13200 - }, - { - "epoch": 2.2514919011082695, - "grad_norm": 0.06140404939651489, - "learning_rate": 3.118117121577284e-05, - "loss": 0.003001154027879238, - "step": 13205 - }, - { - "epoch": 2.2523444160272805, - "grad_norm": 0.06974830478429794, - "learning_rate": 3.115591710371581e-05, - "loss": 0.0032261811196804047, - "step": 13210 - }, - { - "epoch": 2.2531969309462916, - "grad_norm": 0.09120716899633408, - "learning_rate": 3.1130665953287695e-05, - "loss": 0.001386938989162445, - "step": 13215 - }, - { - "epoch": 2.2540494458653026, - "grad_norm": 0.06130429729819298, - "learning_rate": 3.110541777627661e-05, - "loss": 0.0014743787236511708, - "step": 13220 - }, - { - "epoch": 2.2549019607843137, - "grad_norm": 0.07033205777406693, - "learning_rate": 3.108017258446921e-05, - "loss": 0.003749256581068039, - "step": 13225 - }, - { - "epoch": 2.2557544757033248, - "grad_norm": 0.08650046586990356, - "learning_rate": 3.1054930389650804e-05, - "loss": 0.0023554276674985887, - "step": 13230 - }, - { - "epoch": 2.256606990622336, - "grad_norm": 0.06045643612742424, - "learning_rate": 3.102969120360529e-05, - "loss": 0.0019686706364154816, - "step": 13235 - }, - { - "epoch": 2.257459505541347, - "grad_norm": 0.1004268005490303, - "learning_rate": 3.100445503811514e-05, - "loss": 0.003136196732521057, - "step": 13240 - }, - { - "epoch": 2.258312020460358, - "grad_norm": 0.08810209482908249, - "learning_rate": 3.097922190496146e-05, - "loss": 0.002239716053009033, - "step": 13245 - }, - { - "epoch": 2.259164535379369, - "grad_norm": 0.10518727451562881, - "learning_rate": 3.095399181592392e-05, - "loss": 0.002447150461375713, - "step": 13250 - }, - { - "epoch": 2.26001705029838, - "grad_norm": 0.049536559730768204, - "learning_rate": 3.092876478278074e-05, - "loss": 0.0023296492174267767, - "step": 13255 - }, - { - "epoch": 2.260869565217391, - "grad_norm": 0.057701822370290756, - "learning_rate": 3.0903540817308734e-05, - "loss": 0.0018970953300595284, - "step": 13260 - }, - { - "epoch": 2.261722080136402, - "grad_norm": 0.04391616955399513, - "learning_rate": 3.087831993128333e-05, - "loss": 0.0026229951530694962, - "step": 13265 - }, - { - "epoch": 2.2625745950554137, - "grad_norm": 0.048150911927223206, - "learning_rate": 3.0853102136478444e-05, - "loss": 0.0015288691036403179, - "step": 13270 - }, - { - "epoch": 2.2634271099744243, - "grad_norm": 0.12074416130781174, - "learning_rate": 3.082788744466659e-05, - "loss": 0.0025329213589429856, - "step": 13275 - }, - { - "epoch": 2.264279624893436, - "grad_norm": 0.05400107055902481, - "learning_rate": 3.080267586761881e-05, - "loss": 0.0017294475808739662, - "step": 13280 - }, - { - "epoch": 2.265132139812447, - "grad_norm": 0.07027488201856613, - "learning_rate": 3.0777467417104717e-05, - "loss": 0.0026237966492772104, - "step": 13285 - }, - { - "epoch": 2.265984654731458, - "grad_norm": 0.06868001073598862, - "learning_rate": 3.075226210489247e-05, - "loss": 0.0021411897614598274, - "step": 13290 - }, - { - "epoch": 2.266837169650469, - "grad_norm": 0.07447243481874466, - "learning_rate": 3.072705994274874e-05, - "loss": 0.002808676287531853, - "step": 13295 - }, - { - "epoch": 2.26768968456948, - "grad_norm": 0.04292432591319084, - "learning_rate": 3.070186094243872e-05, - "loss": 0.001994679495692253, - "step": 13300 - }, - { - "epoch": 2.268542199488491, - "grad_norm": 0.06083334609866142, - "learning_rate": 3.067666511572614e-05, - "loss": 0.001621294766664505, - "step": 13305 - }, - { - "epoch": 2.269394714407502, - "grad_norm": 0.04339296743273735, - "learning_rate": 3.065147247437327e-05, - "loss": 0.002122482657432556, - "step": 13310 - }, - { - "epoch": 2.2702472293265132, - "grad_norm": 0.07901404052972794, - "learning_rate": 3.062628303014087e-05, - "loss": 0.0030757525935769083, - "step": 13315 - }, - { - "epoch": 2.2710997442455243, - "grad_norm": 0.046554502099752426, - "learning_rate": 3.060109679478821e-05, - "loss": 0.0022816451266407965, - "step": 13320 - }, - { - "epoch": 2.2719522591645354, - "grad_norm": 0.03428821638226509, - "learning_rate": 3.0575913780073036e-05, - "loss": 0.002047870494425297, - "step": 13325 - }, - { - "epoch": 2.2728047740835464, - "grad_norm": 0.09298217296600342, - "learning_rate": 3.0550733997751634e-05, - "loss": 0.002046193927526474, - "step": 13330 - }, - { - "epoch": 2.2736572890025575, - "grad_norm": 0.08458553999662399, - "learning_rate": 3.0525557459578786e-05, - "loss": 0.002566727437078953, - "step": 13335 - }, - { - "epoch": 2.2745098039215685, - "grad_norm": 0.10309132188558578, - "learning_rate": 3.050038417730772e-05, - "loss": 0.00358976349234581, - "step": 13340 - }, - { - "epoch": 2.2753623188405796, - "grad_norm": 0.08116701990365982, - "learning_rate": 3.0475214162690144e-05, - "loss": 0.003372102603316307, - "step": 13345 - }, - { - "epoch": 2.2762148337595907, - "grad_norm": 0.09258918464183807, - "learning_rate": 3.0450047427476292e-05, - "loss": 0.0037133049219846724, - "step": 13350 - }, - { - "epoch": 2.2770673486786017, - "grad_norm": 0.09618882834911346, - "learning_rate": 3.0424883983414797e-05, - "loss": 0.0024330444633960725, - "step": 13355 - }, - { - "epoch": 2.277919863597613, - "grad_norm": 0.04637463390827179, - "learning_rate": 3.039972384225282e-05, - "loss": 0.0011583495885133742, - "step": 13360 - }, - { - "epoch": 2.2787723785166243, - "grad_norm": 0.04919019341468811, - "learning_rate": 3.0374567015735953e-05, - "loss": 0.0017433254048228263, - "step": 13365 - }, - { - "epoch": 2.279624893435635, - "grad_norm": 0.07092445343732834, - "learning_rate": 3.0349413515608213e-05, - "loss": 0.0010275598615407944, - "step": 13370 - }, - { - "epoch": 2.2804774083546464, - "grad_norm": 0.06819095462560654, - "learning_rate": 3.03242633536121e-05, - "loss": 0.0018655678257346153, - "step": 13375 - }, - { - "epoch": 2.2813299232736575, - "grad_norm": 0.13397860527038574, - "learning_rate": 3.029911654148857e-05, - "loss": 0.0029754094779491425, - "step": 13380 - }, - { - "epoch": 2.2821824381926685, - "grad_norm": 0.09142930805683136, - "learning_rate": 3.0273973090976974e-05, - "loss": 0.0027707524597644804, - "step": 13385 - }, - { - "epoch": 2.2830349531116796, - "grad_norm": 0.06282728165388107, - "learning_rate": 3.0248833013815112e-05, - "loss": 0.0018412042409181595, - "step": 13390 - }, - { - "epoch": 2.2838874680306906, - "grad_norm": 0.05533494055271149, - "learning_rate": 3.0223696321739196e-05, - "loss": 0.0025158364325761793, - "step": 13395 - }, - { - "epoch": 2.2847399829497017, - "grad_norm": 0.08349598199129105, - "learning_rate": 3.0198563026483876e-05, - "loss": 0.002777436375617981, - "step": 13400 - }, - { - "epoch": 2.2855924978687128, - "grad_norm": 0.07469198107719421, - "learning_rate": 3.0173433139782227e-05, - "loss": 0.001951916702091694, - "step": 13405 - }, - { - "epoch": 2.286445012787724, - "grad_norm": 0.07126526534557343, - "learning_rate": 3.0148306673365708e-05, - "loss": 0.0031182590872049333, - "step": 13410 - }, - { - "epoch": 2.287297527706735, - "grad_norm": 0.06499479711055756, - "learning_rate": 3.0123183638964183e-05, - "loss": 0.001717902161180973, - "step": 13415 - }, - { - "epoch": 2.288150042625746, - "grad_norm": 0.03133346140384674, - "learning_rate": 3.0098064048305917e-05, - "loss": 0.0015830917283892632, - "step": 13420 - }, - { - "epoch": 2.289002557544757, - "grad_norm": 0.06725561618804932, - "learning_rate": 3.0072947913117573e-05, - "loss": 0.004541714489459991, - "step": 13425 - }, - { - "epoch": 2.289855072463768, - "grad_norm": 0.13644525408744812, - "learning_rate": 3.0047835245124216e-05, - "loss": 0.0027179479598999023, - "step": 13430 - }, - { - "epoch": 2.290707587382779, - "grad_norm": 0.06966832280158997, - "learning_rate": 3.0022726056049262e-05, - "loss": 0.0026542846113443374, - "step": 13435 - }, - { - "epoch": 2.29156010230179, - "grad_norm": 0.04449222609400749, - "learning_rate": 2.999762035761451e-05, - "loss": 0.0014596210792660713, - "step": 13440 - }, - { - "epoch": 2.2924126172208013, - "grad_norm": 0.05453059822320938, - "learning_rate": 2.9972518161540124e-05, - "loss": 0.0024629242718219755, - "step": 13445 - }, - { - "epoch": 2.2932651321398123, - "grad_norm": 0.06370346248149872, - "learning_rate": 2.9947419479544677e-05, - "loss": 0.0018157381564378738, - "step": 13450 - }, - { - "epoch": 2.2941176470588234, - "grad_norm": 0.149154931306839, - "learning_rate": 2.992232432334505e-05, - "loss": 0.0038953136652708055, - "step": 13455 - }, - { - "epoch": 2.2949701619778344, - "grad_norm": 0.08758609741926193, - "learning_rate": 2.9897232704656494e-05, - "loss": 0.00197781715542078, - "step": 13460 - }, - { - "epoch": 2.2958226768968455, - "grad_norm": 0.04877983406186104, - "learning_rate": 2.9872144635192625e-05, - "loss": 0.0018029011785984038, - "step": 13465 - }, - { - "epoch": 2.296675191815857, - "grad_norm": 0.029492873698472977, - "learning_rate": 2.984706012666536e-05, - "loss": 0.00226336307823658, - "step": 13470 - }, - { - "epoch": 2.2975277067348676, - "grad_norm": 0.09038830548524857, - "learning_rate": 2.982197919078502e-05, - "loss": 0.0024063091725111006, - "step": 13475 - }, - { - "epoch": 2.298380221653879, - "grad_norm": 0.08629653602838516, - "learning_rate": 2.97969018392602e-05, - "loss": 0.0019390033558011055, - "step": 13480 - }, - { - "epoch": 2.29923273657289, - "grad_norm": 0.08667116612195969, - "learning_rate": 2.9771828083797832e-05, - "loss": 0.003171199932694435, - "step": 13485 - }, - { - "epoch": 2.3000852514919012, - "grad_norm": 0.07069036364555359, - "learning_rate": 2.974675793610318e-05, - "loss": 0.002098524570465088, - "step": 13490 - }, - { - "epoch": 2.3009377664109123, - "grad_norm": 0.0887150913476944, - "learning_rate": 2.972169140787985e-05, - "loss": 0.001710166409611702, - "step": 13495 - }, - { - "epoch": 2.3017902813299234, - "grad_norm": 0.08873872458934784, - "learning_rate": 2.969662851082972e-05, - "loss": 0.002029442973434925, - "step": 13500 - }, - { - "epoch": 2.3026427962489344, - "grad_norm": 0.09199293702840805, - "learning_rate": 2.9671569256652976e-05, - "loss": 0.0015904868021607399, - "step": 13505 - }, - { - "epoch": 2.3034953111679455, - "grad_norm": 0.07347019016742706, - "learning_rate": 2.9646513657048106e-05, - "loss": 0.002239963971078396, - "step": 13510 - }, - { - "epoch": 2.3043478260869565, - "grad_norm": 0.056011516600847244, - "learning_rate": 2.9621461723711897e-05, - "loss": 0.003089374490082264, - "step": 13515 - }, - { - "epoch": 2.3052003410059676, - "grad_norm": 0.05805368721485138, - "learning_rate": 2.9596413468339447e-05, - "loss": 0.0011475264094769956, - "step": 13520 - }, - { - "epoch": 2.3060528559249787, - "grad_norm": 0.08263146877288818, - "learning_rate": 2.95713689026241e-05, - "loss": 0.0027705669403076173, - "step": 13525 - }, - { - "epoch": 2.3069053708439897, - "grad_norm": 0.10079067945480347, - "learning_rate": 2.954632803825749e-05, - "loss": 0.0038317229598760607, - "step": 13530 - }, - { - "epoch": 2.307757885763001, - "grad_norm": 0.07248156517744064, - "learning_rate": 2.9521290886929514e-05, - "loss": 0.0017008930444717407, - "step": 13535 - }, - { - "epoch": 2.308610400682012, - "grad_norm": 0.09252380579710007, - "learning_rate": 2.949625746032838e-05, - "loss": 0.0021895600482821466, - "step": 13540 - }, - { - "epoch": 2.309462915601023, - "grad_norm": 0.03231853246688843, - "learning_rate": 2.947122777014051e-05, - "loss": 0.002471560053527355, - "step": 13545 - }, - { - "epoch": 2.310315430520034, - "grad_norm": 0.09625072032213211, - "learning_rate": 2.944620182805059e-05, - "loss": 0.002643503434956074, - "step": 13550 - }, - { - "epoch": 2.311167945439045, - "grad_norm": 0.11135435849428177, - "learning_rate": 2.9421179645741552e-05, - "loss": 0.0015677452087402345, - "step": 13555 - }, - { - "epoch": 2.312020460358056, - "grad_norm": 0.07239774614572525, - "learning_rate": 2.939616123489459e-05, - "loss": 0.0020940851420164107, - "step": 13560 - }, - { - "epoch": 2.3128729752770676, - "grad_norm": 0.0686500295996666, - "learning_rate": 2.937114660718915e-05, - "loss": 0.004896241426467896, - "step": 13565 - }, - { - "epoch": 2.313725490196078, - "grad_norm": 0.04634196311235428, - "learning_rate": 2.934613577430288e-05, - "loss": 0.0017542928457260133, - "step": 13570 - }, - { - "epoch": 2.3145780051150897, - "grad_norm": 0.08693452924489975, - "learning_rate": 2.9321128747911657e-05, - "loss": 0.003124900534749031, - "step": 13575 - }, - { - "epoch": 2.3154305200341008, - "grad_norm": 0.053911175578832626, - "learning_rate": 2.9296125539689615e-05, - "loss": 0.001699080690741539, - "step": 13580 - }, - { - "epoch": 2.316283034953112, - "grad_norm": 0.07346964627504349, - "learning_rate": 2.9271126161309052e-05, - "loss": 0.0027174966409802435, - "step": 13585 - }, - { - "epoch": 2.317135549872123, - "grad_norm": 0.07157005369663239, - "learning_rate": 2.9246130624440546e-05, - "loss": 0.0026199813932180406, - "step": 13590 - }, - { - "epoch": 2.317988064791134, - "grad_norm": 0.0852048397064209, - "learning_rate": 2.922113894075282e-05, - "loss": 0.002349478751420975, - "step": 13595 - }, - { - "epoch": 2.318840579710145, - "grad_norm": 0.069539375603199, - "learning_rate": 2.9196151121912828e-05, - "loss": 0.002428753860294819, - "step": 13600 - }, - { - "epoch": 2.319693094629156, - "grad_norm": 0.06993792951107025, - "learning_rate": 2.9171167179585712e-05, - "loss": 0.0025543162599205972, - "step": 13605 - }, - { - "epoch": 2.320545609548167, - "grad_norm": 0.09210001677274704, - "learning_rate": 2.9146187125434826e-05, - "loss": 0.004642657563090324, - "step": 13610 - }, - { - "epoch": 2.321398124467178, - "grad_norm": 0.03652270883321762, - "learning_rate": 2.9121210971121674e-05, - "loss": 0.0019740790128707887, - "step": 13615 - }, - { - "epoch": 2.3222506393861893, - "grad_norm": 0.032051410526037216, - "learning_rate": 2.9096238728305957e-05, - "loss": 0.0020309314131736755, - "step": 13620 - }, - { - "epoch": 2.3231031543052003, - "grad_norm": 0.08898582309484482, - "learning_rate": 2.907127040864556e-05, - "loss": 0.0012645654380321503, - "step": 13625 - }, - { - "epoch": 2.3239556692242114, - "grad_norm": 0.21863117814064026, - "learning_rate": 2.9046306023796493e-05, - "loss": 0.0025009674951434135, - "step": 13630 - }, - { - "epoch": 2.3248081841432224, - "grad_norm": 0.06401807814836502, - "learning_rate": 2.9021345585413004e-05, - "loss": 0.002794540859758854, - "step": 13635 - }, - { - "epoch": 2.3256606990622335, - "grad_norm": 0.049468256533145905, - "learning_rate": 2.8996389105147437e-05, - "loss": 0.0024725871160626413, - "step": 13640 - }, - { - "epoch": 2.3265132139812446, - "grad_norm": 0.0904751718044281, - "learning_rate": 2.8971436594650292e-05, - "loss": 0.0033982183784246446, - "step": 13645 - }, - { - "epoch": 2.3273657289002556, - "grad_norm": 0.11576029658317566, - "learning_rate": 2.8946488065570242e-05, - "loss": 0.004228492826223373, - "step": 13650 - }, - { - "epoch": 2.3282182438192667, - "grad_norm": 0.08191253244876862, - "learning_rate": 2.892154352955411e-05, - "loss": 0.0015400771982967854, - "step": 13655 - }, - { - "epoch": 2.3290707587382777, - "grad_norm": 0.03641185909509659, - "learning_rate": 2.8896602998246817e-05, - "loss": 0.002032958157360554, - "step": 13660 - }, - { - "epoch": 2.329923273657289, - "grad_norm": 0.09123575687408447, - "learning_rate": 2.8871666483291433e-05, - "loss": 0.00326089970767498, - "step": 13665 - }, - { - "epoch": 2.3307757885763003, - "grad_norm": 0.07897967845201492, - "learning_rate": 2.8846733996329148e-05, - "loss": 0.0022133901715278626, - "step": 13670 - }, - { - "epoch": 2.3316283034953114, - "grad_norm": 0.0802898034453392, - "learning_rate": 2.8821805548999275e-05, - "loss": 0.002646717242896557, - "step": 13675 - }, - { - "epoch": 2.3324808184143224, - "grad_norm": 0.05337275192141533, - "learning_rate": 2.879688115293926e-05, - "loss": 0.0022310430184006693, - "step": 13680 - }, - { - "epoch": 2.3333333333333335, - "grad_norm": 0.026133684441447258, - "learning_rate": 2.8771960819784635e-05, - "loss": 0.0013902435079216958, - "step": 13685 - }, - { - "epoch": 2.3341858482523445, - "grad_norm": 0.0701865404844284, - "learning_rate": 2.8747044561169026e-05, - "loss": 0.0030527923256158827, - "step": 13690 - }, - { - "epoch": 2.3350383631713556, - "grad_norm": 0.023815227672457695, - "learning_rate": 2.8722132388724187e-05, - "loss": 0.001688534766435623, - "step": 13695 - }, - { - "epoch": 2.3358908780903667, - "grad_norm": 0.0819278210401535, - "learning_rate": 2.8697224314079928e-05, - "loss": 0.0028546562418341635, - "step": 13700 - }, - { - "epoch": 2.3367433930093777, - "grad_norm": 0.03683038055896759, - "learning_rate": 2.86723203488642e-05, - "loss": 0.0024238623678684234, - "step": 13705 - }, - { - "epoch": 2.337595907928389, - "grad_norm": 0.050080958753824234, - "learning_rate": 2.8647420504702977e-05, - "loss": 0.001459009852260351, - "step": 13710 - }, - { - "epoch": 2.3384484228474, - "grad_norm": 0.04246260225772858, - "learning_rate": 2.8622524793220336e-05, - "loss": 0.0024909645318984984, - "step": 13715 - }, - { - "epoch": 2.339300937766411, - "grad_norm": 0.04298778250813484, - "learning_rate": 2.8597633226038422e-05, - "loss": 0.0017042815685272216, - "step": 13720 - }, - { - "epoch": 2.340153452685422, - "grad_norm": 0.08792980760335922, - "learning_rate": 2.857274581477747e-05, - "loss": 0.0021930102258920668, - "step": 13725 - }, - { - "epoch": 2.341005967604433, - "grad_norm": 0.030293628573417664, - "learning_rate": 2.854786257105573e-05, - "loss": 0.002472694218158722, - "step": 13730 - }, - { - "epoch": 2.341858482523444, - "grad_norm": 0.100398488342762, - "learning_rate": 2.852298350648953e-05, - "loss": 0.0016385417431592942, - "step": 13735 - }, - { - "epoch": 2.342710997442455, - "grad_norm": 0.056936830282211304, - "learning_rate": 2.849810863269325e-05, - "loss": 0.0014652124606072902, - "step": 13740 - }, - { - "epoch": 2.343563512361466, - "grad_norm": 0.04332558810710907, - "learning_rate": 2.8473237961279293e-05, - "loss": 0.0029267419129610063, - "step": 13745 - }, - { - "epoch": 2.3444160272804773, - "grad_norm": 0.051982469856739044, - "learning_rate": 2.8448371503858143e-05, - "loss": 0.001836571842432022, - "step": 13750 - }, - { - "epoch": 2.3452685421994883, - "grad_norm": 0.1215415671467781, - "learning_rate": 2.8423509272038276e-05, - "loss": 0.002749188058078289, - "step": 13755 - }, - { - "epoch": 2.3461210571184994, - "grad_norm": 0.044508881866931915, - "learning_rate": 2.8398651277426203e-05, - "loss": 0.0023854803293943405, - "step": 13760 - }, - { - "epoch": 2.346973572037511, - "grad_norm": 0.09419308602809906, - "learning_rate": 2.837379753162647e-05, - "loss": 0.00259498693048954, - "step": 13765 - }, - { - "epoch": 2.3478260869565215, - "grad_norm": 0.0996370017528534, - "learning_rate": 2.8348948046241616e-05, - "loss": 0.003275657445192337, - "step": 13770 - }, - { - "epoch": 2.348678601875533, - "grad_norm": 0.0585092268884182, - "learning_rate": 2.8324102832872238e-05, - "loss": 0.0023032236844301225, - "step": 13775 - }, - { - "epoch": 2.349531116794544, - "grad_norm": 0.06259947270154953, - "learning_rate": 2.829926190311689e-05, - "loss": 0.0022853843867778776, - "step": 13780 - }, - { - "epoch": 2.350383631713555, - "grad_norm": 0.1343093067407608, - "learning_rate": 2.827442526857214e-05, - "loss": 0.0019558047875761985, - "step": 13785 - }, - { - "epoch": 2.351236146632566, - "grad_norm": 0.03901712968945503, - "learning_rate": 2.8249592940832552e-05, - "loss": 0.0019383212551474572, - "step": 13790 - }, - { - "epoch": 2.3520886615515773, - "grad_norm": 0.08933644741773605, - "learning_rate": 2.8224764931490707e-05, - "loss": 0.0019501563161611556, - "step": 13795 - }, - { - "epoch": 2.3529411764705883, - "grad_norm": 0.06790988147258759, - "learning_rate": 2.819994125213713e-05, - "loss": 0.0018905265256762504, - "step": 13800 - }, - { - "epoch": 2.3537936913895994, - "grad_norm": 0.10576235502958298, - "learning_rate": 2.817512191436033e-05, - "loss": 0.0017807571217417716, - "step": 13805 - }, - { - "epoch": 2.3546462063086104, - "grad_norm": 0.07914351671934128, - "learning_rate": 2.8150306929746826e-05, - "loss": 0.002854841575026512, - "step": 13810 - }, - { - "epoch": 2.3554987212276215, - "grad_norm": 0.10912367701530457, - "learning_rate": 2.812549630988104e-05, - "loss": 0.0028494328260421755, - "step": 13815 - }, - { - "epoch": 2.3563512361466326, - "grad_norm": 0.07309834659099579, - "learning_rate": 2.8100690066345434e-05, - "loss": 0.001808878593146801, - "step": 13820 - }, - { - "epoch": 2.3572037510656436, - "grad_norm": 0.07053545117378235, - "learning_rate": 2.807588821072037e-05, - "loss": 0.0024722769856452944, - "step": 13825 - }, - { - "epoch": 2.3580562659846547, - "grad_norm": 0.06512318551540375, - "learning_rate": 2.8051090754584176e-05, - "loss": 0.0025828687474131586, - "step": 13830 - }, - { - "epoch": 2.3589087809036657, - "grad_norm": 0.06797149777412415, - "learning_rate": 2.8026297709513125e-05, - "loss": 0.0021874068304896356, - "step": 13835 - }, - { - "epoch": 2.359761295822677, - "grad_norm": 0.12261441349983215, - "learning_rate": 2.800150908708145e-05, - "loss": 0.00291924811899662, - "step": 13840 - }, - { - "epoch": 2.360613810741688, - "grad_norm": 0.05696386098861694, - "learning_rate": 2.797672489886131e-05, - "loss": 0.003488580882549286, - "step": 13845 - }, - { - "epoch": 2.361466325660699, - "grad_norm": 0.3340120315551758, - "learning_rate": 2.795194515642276e-05, - "loss": 0.0033275336027145386, - "step": 13850 - }, - { - "epoch": 2.36231884057971, - "grad_norm": 0.08209964632987976, - "learning_rate": 2.7927169871333836e-05, - "loss": 0.0020242417231202126, - "step": 13855 - }, - { - "epoch": 2.363171355498721, - "grad_norm": 0.04942183569073677, - "learning_rate": 2.7902399055160435e-05, - "loss": 0.0015470117330551147, - "step": 13860 - }, - { - "epoch": 2.364023870417732, - "grad_norm": 0.07711990922689438, - "learning_rate": 2.7877632719466438e-05, - "loss": 0.002402086555957794, - "step": 13865 - }, - { - "epoch": 2.3648763853367436, - "grad_norm": 0.06835886090993881, - "learning_rate": 2.7852870875813572e-05, - "loss": 0.002709807641804218, - "step": 13870 - }, - { - "epoch": 2.3657289002557547, - "grad_norm": 0.01572684571146965, - "learning_rate": 2.7828113535761476e-05, - "loss": 0.0037427868694067, - "step": 13875 - }, - { - "epoch": 2.3665814151747657, - "grad_norm": 0.03897464647889137, - "learning_rate": 2.7803360710867728e-05, - "loss": 0.0029004696756601334, - "step": 13880 - }, - { - "epoch": 2.367433930093777, - "grad_norm": 0.1281740814447403, - "learning_rate": 2.777861241268774e-05, - "loss": 0.0021549168974161147, - "step": 13885 - }, - { - "epoch": 2.368286445012788, - "grad_norm": 0.04390920698642731, - "learning_rate": 2.7753868652774873e-05, - "loss": 0.0019567809998989106, - "step": 13890 - }, - { - "epoch": 2.369138959931799, - "grad_norm": 0.09526315331459045, - "learning_rate": 2.7729129442680314e-05, - "loss": 0.001414876524358988, - "step": 13895 - }, - { - "epoch": 2.36999147485081, - "grad_norm": 0.041541386395692825, - "learning_rate": 2.7704394793953162e-05, - "loss": 0.0023986730724573136, - "step": 13900 - }, - { - "epoch": 2.370843989769821, - "grad_norm": 0.056684307754039764, - "learning_rate": 2.7679664718140354e-05, - "loss": 0.0023011576384305956, - "step": 13905 - }, - { - "epoch": 2.371696504688832, - "grad_norm": 0.04548821225762367, - "learning_rate": 2.765493922678674e-05, - "loss": 0.002776668407022953, - "step": 13910 - }, - { - "epoch": 2.372549019607843, - "grad_norm": 0.05635173246264458, - "learning_rate": 2.763021833143499e-05, - "loss": 0.0021549917757511137, - "step": 13915 - }, - { - "epoch": 2.373401534526854, - "grad_norm": 0.06744635850191116, - "learning_rate": 2.7605502043625636e-05, - "loss": 0.0014210479333996774, - "step": 13920 - }, - { - "epoch": 2.3742540494458653, - "grad_norm": 0.03131572902202606, - "learning_rate": 2.758079037489707e-05, - "loss": 0.002670668438076973, - "step": 13925 - }, - { - "epoch": 2.3751065643648763, - "grad_norm": 0.1132262721657753, - "learning_rate": 2.75560833367855e-05, - "loss": 0.004025829955935478, - "step": 13930 - }, - { - "epoch": 2.3759590792838874, - "grad_norm": 0.08719862997531891, - "learning_rate": 2.753138094082502e-05, - "loss": 0.0026264961808919905, - "step": 13935 - }, - { - "epoch": 2.3768115942028984, - "grad_norm": 0.045282550156116486, - "learning_rate": 2.7506683198547527e-05, - "loss": 0.0016890913248062134, - "step": 13940 - }, - { - "epoch": 2.3776641091219095, - "grad_norm": 0.03815371170639992, - "learning_rate": 2.7481990121482737e-05, - "loss": 0.0017980627715587616, - "step": 13945 - }, - { - "epoch": 2.3785166240409206, - "grad_norm": 0.05136419087648392, - "learning_rate": 2.745730172115819e-05, - "loss": 0.0017518583685159684, - "step": 13950 - }, - { - "epoch": 2.3793691389599316, - "grad_norm": 0.076651431620121, - "learning_rate": 2.743261800909929e-05, - "loss": 0.0021933792158961296, - "step": 13955 - }, - { - "epoch": 2.3802216538789427, - "grad_norm": 0.04328504204750061, - "learning_rate": 2.740793899682919e-05, - "loss": 0.0015049883164465427, - "step": 13960 - }, - { - "epoch": 2.381074168797954, - "grad_norm": 0.029004819691181183, - "learning_rate": 2.7383264695868863e-05, - "loss": 0.0023387337103486063, - "step": 13965 - }, - { - "epoch": 2.381926683716965, - "grad_norm": 0.11483976989984512, - "learning_rate": 2.7358595117737118e-05, - "loss": 0.00246519148349762, - "step": 13970 - }, - { - "epoch": 2.3827791986359763, - "grad_norm": 0.09073470532894135, - "learning_rate": 2.733393027395051e-05, - "loss": 0.0031791247427463533, - "step": 13975 - }, - { - "epoch": 2.3836317135549874, - "grad_norm": 0.12094864249229431, - "learning_rate": 2.7309270176023436e-05, - "loss": 0.0025795340538024903, - "step": 13980 - }, - { - "epoch": 2.3844842284739984, - "grad_norm": 0.13568098843097687, - "learning_rate": 2.7284614835468035e-05, - "loss": 0.0057578980922698975, - "step": 13985 - }, - { - "epoch": 2.3853367433930095, - "grad_norm": 0.06415567547082901, - "learning_rate": 2.725996426379423e-05, - "loss": 0.0024575673043727873, - "step": 13990 - }, - { - "epoch": 2.3861892583120206, - "grad_norm": 0.05898221582174301, - "learning_rate": 2.723531847250975e-05, - "loss": 0.0013358716852962971, - "step": 13995 - }, - { - "epoch": 2.3870417732310316, - "grad_norm": 0.019117049872875214, - "learning_rate": 2.721067747312004e-05, - "loss": 0.0016026780009269713, - "step": 14000 - }, - { - "epoch": 2.3878942881500427, - "grad_norm": 0.028591491281986237, - "learning_rate": 2.7186041277128383e-05, - "loss": 0.001663113385438919, - "step": 14005 - }, - { - "epoch": 2.3887468030690537, - "grad_norm": 0.03701665997505188, - "learning_rate": 2.7161409896035733e-05, - "loss": 0.0012899260967969895, - "step": 14010 - }, - { - "epoch": 2.389599317988065, - "grad_norm": 0.05777057632803917, - "learning_rate": 2.7136783341340862e-05, - "loss": 0.0018556809052824974, - "step": 14015 - }, - { - "epoch": 2.390451832907076, - "grad_norm": 0.04922354966402054, - "learning_rate": 2.711216162454024e-05, - "loss": 0.002131880074739456, - "step": 14020 - }, - { - "epoch": 2.391304347826087, - "grad_norm": 0.045851659029722214, - "learning_rate": 2.708754475712814e-05, - "loss": 0.001147150807082653, - "step": 14025 - }, - { - "epoch": 2.392156862745098, - "grad_norm": 0.11482678353786469, - "learning_rate": 2.7062932750596514e-05, - "loss": 0.0027298804372549055, - "step": 14030 - }, - { - "epoch": 2.393009377664109, - "grad_norm": 0.054821670055389404, - "learning_rate": 2.7038325616435058e-05, - "loss": 0.0018268844112753868, - "step": 14035 - }, - { - "epoch": 2.39386189258312, - "grad_norm": 0.09821441024541855, - "learning_rate": 2.701372336613122e-05, - "loss": 0.002109052799642086, - "step": 14040 - }, - { - "epoch": 2.394714407502131, - "grad_norm": 0.04923141747713089, - "learning_rate": 2.6989126011170115e-05, - "loss": 0.0021799976006150247, - "step": 14045 - }, - { - "epoch": 2.395566922421142, - "grad_norm": 0.1223372220993042, - "learning_rate": 2.6964533563034648e-05, - "loss": 0.00261150524020195, - "step": 14050 - }, - { - "epoch": 2.3964194373401533, - "grad_norm": 0.04964495450258255, - "learning_rate": 2.6939946033205374e-05, - "loss": 0.001747405156493187, - "step": 14055 - }, - { - "epoch": 2.397271952259165, - "grad_norm": 0.05354087054729462, - "learning_rate": 2.6915363433160562e-05, - "loss": 0.0017880409955978393, - "step": 14060 - }, - { - "epoch": 2.3981244671781754, - "grad_norm": 0.0796194076538086, - "learning_rate": 2.6890785774376188e-05, - "loss": 0.002280256152153015, - "step": 14065 - }, - { - "epoch": 2.398976982097187, - "grad_norm": 0.048979468643665314, - "learning_rate": 2.6866213068325942e-05, - "loss": 0.0034266695380210876, - "step": 14070 - }, - { - "epoch": 2.399829497016198, - "grad_norm": 0.11115774512290955, - "learning_rate": 2.6841645326481166e-05, - "loss": 0.0014098694548010827, - "step": 14075 - }, - { - "epoch": 2.400682011935209, - "grad_norm": 0.14144426584243774, - "learning_rate": 2.681708256031089e-05, - "loss": 0.0017399771139025688, - "step": 14080 - }, - { - "epoch": 2.40153452685422, - "grad_norm": 0.060562510043382645, - "learning_rate": 2.6792524781281846e-05, - "loss": 0.0031288094818592072, - "step": 14085 - }, - { - "epoch": 2.402387041773231, - "grad_norm": 0.08271291851997375, - "learning_rate": 2.6767972000858402e-05, - "loss": 0.002268883027136326, - "step": 14090 - }, - { - "epoch": 2.403239556692242, - "grad_norm": 0.08203598111867905, - "learning_rate": 2.674342423050264e-05, - "loss": 0.0017265897244215012, - "step": 14095 - }, - { - "epoch": 2.4040920716112533, - "grad_norm": 0.07809042930603027, - "learning_rate": 2.6718881481674265e-05, - "loss": 0.0032232727855443953, - "step": 14100 - }, - { - "epoch": 2.4049445865302643, - "grad_norm": 0.043053366243839264, - "learning_rate": 2.6694343765830633e-05, - "loss": 0.0014350255951285362, - "step": 14105 - }, - { - "epoch": 2.4057971014492754, - "grad_norm": 0.2139715999364853, - "learning_rate": 2.666981109442679e-05, - "loss": 0.002208554185926914, - "step": 14110 - }, - { - "epoch": 2.4066496163682864, - "grad_norm": 0.028433851897716522, - "learning_rate": 2.6645283478915373e-05, - "loss": 0.0033426061272621155, - "step": 14115 - }, - { - "epoch": 2.4075021312872975, - "grad_norm": 0.03152618184685707, - "learning_rate": 2.6620760930746726e-05, - "loss": 0.0017683111131191255, - "step": 14120 - }, - { - "epoch": 2.4083546462063086, - "grad_norm": 0.11559031158685684, - "learning_rate": 2.6596243461368762e-05, - "loss": 0.0027762461453676225, - "step": 14125 - }, - { - "epoch": 2.4092071611253196, - "grad_norm": 0.08188942819833755, - "learning_rate": 2.6571731082227068e-05, - "loss": 0.0029629599303007126, - "step": 14130 - }, - { - "epoch": 2.4100596760443307, - "grad_norm": 0.03179270401597023, - "learning_rate": 2.654722380476482e-05, - "loss": 0.001593652181327343, - "step": 14135 - }, - { - "epoch": 2.4109121909633418, - "grad_norm": 0.03763008117675781, - "learning_rate": 2.652272164042285e-05, - "loss": 0.003974568471312523, - "step": 14140 - }, - { - "epoch": 2.411764705882353, - "grad_norm": 0.06221388280391693, - "learning_rate": 2.649822460063958e-05, - "loss": 0.0021382227540016176, - "step": 14145 - }, - { - "epoch": 2.412617220801364, - "grad_norm": 0.13541199266910553, - "learning_rate": 2.6473732696851025e-05, - "loss": 0.0030446551740169526, - "step": 14150 - }, - { - "epoch": 2.413469735720375, - "grad_norm": 0.07515605539083481, - "learning_rate": 2.6449245940490843e-05, - "loss": 0.0023170780390501023, - "step": 14155 - }, - { - "epoch": 2.414322250639386, - "grad_norm": 0.029287993907928467, - "learning_rate": 2.6424764342990247e-05, - "loss": 0.002732834219932556, - "step": 14160 - }, - { - "epoch": 2.4151747655583975, - "grad_norm": 0.056158751249313354, - "learning_rate": 2.6400287915778073e-05, - "loss": 0.0026283055543899537, - "step": 14165 - }, - { - "epoch": 2.416027280477408, - "grad_norm": 0.05005735903978348, - "learning_rate": 2.6375816670280742e-05, - "loss": 0.0021377095952630045, - "step": 14170 - }, - { - "epoch": 2.4168797953964196, - "grad_norm": 0.039338257163763046, - "learning_rate": 2.6351350617922217e-05, - "loss": 0.0010171877220273018, - "step": 14175 - }, - { - "epoch": 2.4177323103154307, - "grad_norm": 0.054605189710855484, - "learning_rate": 2.6326889770124074e-05, - "loss": 0.0015358464792370797, - "step": 14180 - }, - { - "epoch": 2.4185848252344417, - "grad_norm": 0.05107913911342621, - "learning_rate": 2.630243413830547e-05, - "loss": 0.0014638695865869522, - "step": 14185 - }, - { - "epoch": 2.419437340153453, - "grad_norm": 0.14121516048908234, - "learning_rate": 2.62779837338831e-05, - "loss": 0.0018762655556201935, - "step": 14190 - }, - { - "epoch": 2.420289855072464, - "grad_norm": 0.13554073870182037, - "learning_rate": 2.625353856827121e-05, - "loss": 0.002315247431397438, - "step": 14195 - }, - { - "epoch": 2.421142369991475, - "grad_norm": 0.07378100603818893, - "learning_rate": 2.6229098652881636e-05, - "loss": 0.0017681105062365531, - "step": 14200 - }, - { - "epoch": 2.421994884910486, - "grad_norm": 0.0729142278432846, - "learning_rate": 2.6204663999123712e-05, - "loss": 0.0013508319854736329, - "step": 14205 - }, - { - "epoch": 2.422847399829497, - "grad_norm": 0.09028290957212448, - "learning_rate": 2.6180234618404393e-05, - "loss": 0.0023917261511087417, - "step": 14210 - }, - { - "epoch": 2.423699914748508, - "grad_norm": 0.042102012783288956, - "learning_rate": 2.6155810522128105e-05, - "loss": 0.001337253674864769, - "step": 14215 - }, - { - "epoch": 2.424552429667519, - "grad_norm": 0.126102477312088, - "learning_rate": 2.6131391721696812e-05, - "loss": 0.0030670080333948134, - "step": 14220 - }, - { - "epoch": 2.42540494458653, - "grad_norm": 0.08583983033895493, - "learning_rate": 2.6106978228510047e-05, - "loss": 0.0025723014026880265, - "step": 14225 - }, - { - "epoch": 2.4262574595055413, - "grad_norm": 0.0516071692109108, - "learning_rate": 2.608257005396482e-05, - "loss": 0.0020857708528637885, - "step": 14230 - }, - { - "epoch": 2.4271099744245523, - "grad_norm": 0.08321108669042587, - "learning_rate": 2.6058167209455697e-05, - "loss": 0.0023237552493810655, - "step": 14235 - }, - { - "epoch": 2.4279624893435634, - "grad_norm": 0.04344337806105614, - "learning_rate": 2.6033769706374727e-05, - "loss": 0.0016502588987350464, - "step": 14240 - }, - { - "epoch": 2.4288150042625745, - "grad_norm": 0.10716593265533447, - "learning_rate": 2.6009377556111488e-05, - "loss": 0.002213199995458126, - "step": 14245 - }, - { - "epoch": 2.4296675191815855, - "grad_norm": 0.08346270024776459, - "learning_rate": 2.598499077005302e-05, - "loss": 0.0023431163281202316, - "step": 14250 - }, - { - "epoch": 2.4305200341005966, - "grad_norm": 0.032770343124866486, - "learning_rate": 2.596060935958392e-05, - "loss": 0.0011562082916498184, - "step": 14255 - }, - { - "epoch": 2.431372549019608, - "grad_norm": 0.09246552735567093, - "learning_rate": 2.593623333608623e-05, - "loss": 0.002459176816046238, - "step": 14260 - }, - { - "epoch": 2.4322250639386187, - "grad_norm": 0.05482151731848717, - "learning_rate": 2.5911862710939474e-05, - "loss": 0.0019333874806761742, - "step": 14265 - }, - { - "epoch": 2.43307757885763, - "grad_norm": 0.03243163228034973, - "learning_rate": 2.588749749552069e-05, - "loss": 0.0017584215849637986, - "step": 14270 - }, - { - "epoch": 2.4339300937766413, - "grad_norm": 0.07286939024925232, - "learning_rate": 2.586313770120434e-05, - "loss": 0.002444162592291832, - "step": 14275 - }, - { - "epoch": 2.4347826086956523, - "grad_norm": 0.05575154721736908, - "learning_rate": 2.583878333936243e-05, - "loss": 0.0024999476969242095, - "step": 14280 - }, - { - "epoch": 2.4356351236146634, - "grad_norm": 0.10262400656938553, - "learning_rate": 2.5814434421364354e-05, - "loss": 0.0018360136076807977, - "step": 14285 - }, - { - "epoch": 2.4364876385336744, - "grad_norm": 0.023329658433794975, - "learning_rate": 2.5790090958577017e-05, - "loss": 0.002157992497086525, - "step": 14290 - }, - { - "epoch": 2.4373401534526855, - "grad_norm": 0.11155838519334793, - "learning_rate": 2.576575296236473e-05, - "loss": 0.002236923947930336, - "step": 14295 - }, - { - "epoch": 2.4381926683716966, - "grad_norm": 0.015751022845506668, - "learning_rate": 2.5741420444089317e-05, - "loss": 0.0023830370977520944, - "step": 14300 - }, - { - "epoch": 2.4390451832907076, - "grad_norm": 0.06451129168272018, - "learning_rate": 2.5717093415109982e-05, - "loss": 0.0012244164943695068, - "step": 14305 - }, - { - "epoch": 2.4398976982097187, - "grad_norm": 0.05141889676451683, - "learning_rate": 2.569277188678339e-05, - "loss": 0.0008386586792767048, - "step": 14310 - }, - { - "epoch": 2.4407502131287298, - "grad_norm": 0.07528503239154816, - "learning_rate": 2.5668455870463654e-05, - "loss": 0.0027780460193753244, - "step": 14315 - }, - { - "epoch": 2.441602728047741, - "grad_norm": 0.0676177367568016, - "learning_rate": 2.5644145377502277e-05, - "loss": 0.002171286940574646, - "step": 14320 - }, - { - "epoch": 2.442455242966752, - "grad_norm": 0.03209437057375908, - "learning_rate": 2.5619840419248228e-05, - "loss": 0.0011549444869160652, - "step": 14325 - }, - { - "epoch": 2.443307757885763, - "grad_norm": 0.0711345300078392, - "learning_rate": 2.559554100704787e-05, - "loss": 0.0029217278584837913, - "step": 14330 - }, - { - "epoch": 2.444160272804774, - "grad_norm": 0.07314640283584595, - "learning_rate": 2.5571247152244955e-05, - "loss": 0.0019763100892305372, - "step": 14335 - }, - { - "epoch": 2.445012787723785, - "grad_norm": 0.058573171496391296, - "learning_rate": 2.5546958866180686e-05, - "loss": 0.0023175042122602465, - "step": 14340 - }, - { - "epoch": 2.445865302642796, - "grad_norm": 0.06780791282653809, - "learning_rate": 2.552267616019362e-05, - "loss": 0.0022560084238648415, - "step": 14345 - }, - { - "epoch": 2.446717817561807, - "grad_norm": 0.0834873840212822, - "learning_rate": 2.5498399045619755e-05, - "loss": 0.0015980398282408714, - "step": 14350 - }, - { - "epoch": 2.4475703324808182, - "grad_norm": 0.06677491962909698, - "learning_rate": 2.5474127533792443e-05, - "loss": 0.002242721430957317, - "step": 14355 - }, - { - "epoch": 2.4484228473998293, - "grad_norm": 0.11220566183328629, - "learning_rate": 2.5449861636042443e-05, - "loss": 0.001862034946680069, - "step": 14360 - }, - { - "epoch": 2.449275362318841, - "grad_norm": 0.05493709817528725, - "learning_rate": 2.542560136369786e-05, - "loss": 0.0020324042066931724, - "step": 14365 - }, - { - "epoch": 2.4501278772378514, - "grad_norm": 0.09586431086063385, - "learning_rate": 2.5401346728084225e-05, - "loss": 0.001961209811270237, - "step": 14370 - }, - { - "epoch": 2.450980392156863, - "grad_norm": 0.06384766101837158, - "learning_rate": 2.5377097740524402e-05, - "loss": 0.002969523146748543, - "step": 14375 - }, - { - "epoch": 2.451832907075874, - "grad_norm": 0.11584383249282837, - "learning_rate": 2.5352854412338607e-05, - "loss": 0.0037360407412052156, - "step": 14380 - }, - { - "epoch": 2.452685421994885, - "grad_norm": 0.05268854275345802, - "learning_rate": 2.5328616754844447e-05, - "loss": 0.0024207277223467828, - "step": 14385 - }, - { - "epoch": 2.453537936913896, - "grad_norm": 0.10550973564386368, - "learning_rate": 2.5304384779356855e-05, - "loss": 0.002147519588470459, - "step": 14390 - }, - { - "epoch": 2.454390451832907, - "grad_norm": 0.11402281373739243, - "learning_rate": 2.5280158497188144e-05, - "loss": 0.0030479192733764648, - "step": 14395 - }, - { - "epoch": 2.455242966751918, - "grad_norm": 0.042928412556648254, - "learning_rate": 2.5255937919647928e-05, - "loss": 0.0009582490660250187, - "step": 14400 - }, - { - "epoch": 2.4560954816709293, - "grad_norm": 0.09466255456209183, - "learning_rate": 2.52317230580432e-05, - "loss": 0.0028877202421426773, - "step": 14405 - }, - { - "epoch": 2.4569479965899403, - "grad_norm": 0.0167491864413023, - "learning_rate": 2.5207513923678246e-05, - "loss": 0.002237674966454506, - "step": 14410 - }, - { - "epoch": 2.4578005115089514, - "grad_norm": 0.11767696589231491, - "learning_rate": 2.518331052785468e-05, - "loss": 0.00270021203905344, - "step": 14415 - }, - { - "epoch": 2.4586530264279625, - "grad_norm": 0.13400165736675262, - "learning_rate": 2.5159112881871494e-05, - "loss": 0.0025584336370229723, - "step": 14420 - }, - { - "epoch": 2.4595055413469735, - "grad_norm": 0.051460813730955124, - "learning_rate": 2.5134920997024915e-05, - "loss": 0.001182288955897093, - "step": 14425 - }, - { - "epoch": 2.4603580562659846, - "grad_norm": 0.05078651383519173, - "learning_rate": 2.511073488460855e-05, - "loss": 0.001340255793184042, - "step": 14430 - }, - { - "epoch": 2.4612105711849956, - "grad_norm": 0.06714113801717758, - "learning_rate": 2.5086554555913245e-05, - "loss": 0.0019190860912203789, - "step": 14435 - }, - { - "epoch": 2.4620630861040067, - "grad_norm": 0.05757109820842743, - "learning_rate": 2.5062380022227226e-05, - "loss": 0.0016031917184591293, - "step": 14440 - }, - { - "epoch": 2.4629156010230178, - "grad_norm": 0.045739807188510895, - "learning_rate": 2.5038211294835944e-05, - "loss": 0.0020723894238471987, - "step": 14445 - }, - { - "epoch": 2.463768115942029, - "grad_norm": 0.06381653994321823, - "learning_rate": 2.5014048385022156e-05, - "loss": 0.002237732522189617, - "step": 14450 - }, - { - "epoch": 2.46462063086104, - "grad_norm": 0.08096056431531906, - "learning_rate": 2.498989130406594e-05, - "loss": 0.0017275322228670121, - "step": 14455 - }, - { - "epoch": 2.4654731457800514, - "grad_norm": 0.04627775028347969, - "learning_rate": 2.4965740063244582e-05, - "loss": 0.0028135737404227255, - "step": 14460 - }, - { - "epoch": 2.466325660699062, - "grad_norm": 0.07789458334445953, - "learning_rate": 2.4941594673832737e-05, - "loss": 0.0017165482044219972, - "step": 14465 - }, - { - "epoch": 2.4671781756180735, - "grad_norm": 0.03633275255560875, - "learning_rate": 2.491745514710224e-05, - "loss": 0.003017013892531395, - "step": 14470 - }, - { - "epoch": 2.4680306905370846, - "grad_norm": 0.07425010204315186, - "learning_rate": 2.489332149432224e-05, - "loss": 0.002849162742495537, - "step": 14475 - }, - { - "epoch": 2.4688832054560956, - "grad_norm": 0.08738066256046295, - "learning_rate": 2.486919372675911e-05, - "loss": 0.003103286027908325, - "step": 14480 - }, - { - "epoch": 2.4697357203751067, - "grad_norm": 0.059462107717990875, - "learning_rate": 2.4845071855676526e-05, - "loss": 0.003129242733120918, - "step": 14485 - }, - { - "epoch": 2.4705882352941178, - "grad_norm": 0.12157633155584335, - "learning_rate": 2.4820955892335358e-05, - "loss": 0.00188961960375309, - "step": 14490 - }, - { - "epoch": 2.471440750213129, - "grad_norm": 0.04780135303735733, - "learning_rate": 2.4796845847993743e-05, - "loss": 0.001777658425271511, - "step": 14495 - }, - { - "epoch": 2.47229326513214, - "grad_norm": 0.08734847605228424, - "learning_rate": 2.477274173390706e-05, - "loss": 0.0025872459635138513, - "step": 14500 - }, - { - "epoch": 2.473145780051151, - "grad_norm": 0.08637238293886185, - "learning_rate": 2.4748643561327887e-05, - "loss": 0.0034623559564352035, - "step": 14505 - }, - { - "epoch": 2.473998294970162, - "grad_norm": 0.1351020187139511, - "learning_rate": 2.4724551341506083e-05, - "loss": 0.0025932226330041886, - "step": 14510 - }, - { - "epoch": 2.474850809889173, - "grad_norm": 0.0965266153216362, - "learning_rate": 2.4700465085688678e-05, - "loss": 0.0021650340408086778, - "step": 14515 - }, - { - "epoch": 2.475703324808184, - "grad_norm": 0.06353217363357544, - "learning_rate": 2.4676384805119954e-05, - "loss": 0.0017436511814594268, - "step": 14520 - }, - { - "epoch": 2.476555839727195, - "grad_norm": 0.09694099426269531, - "learning_rate": 2.4652310511041376e-05, - "loss": 0.002511733956634998, - "step": 14525 - }, - { - "epoch": 2.4774083546462062, - "grad_norm": 0.13362912833690643, - "learning_rate": 2.4628242214691614e-05, - "loss": 0.0020636413246393204, - "step": 14530 - }, - { - "epoch": 2.4782608695652173, - "grad_norm": 0.05283635854721069, - "learning_rate": 2.4604179927306575e-05, - "loss": 0.002218991331756115, - "step": 14535 - }, - { - "epoch": 2.4791133844842284, - "grad_norm": 0.062003809958696365, - "learning_rate": 2.4580123660119317e-05, - "loss": 0.0021969690918922425, - "step": 14540 - }, - { - "epoch": 2.4799658994032394, - "grad_norm": 0.1058121919631958, - "learning_rate": 2.4556073424360115e-05, - "loss": 0.002514044567942619, - "step": 14545 - }, - { - "epoch": 2.4808184143222505, - "grad_norm": 0.06746378540992737, - "learning_rate": 2.4532029231256397e-05, - "loss": 0.001485797483474016, - "step": 14550 - }, - { - "epoch": 2.4816709292412615, - "grad_norm": 0.043892405927181244, - "learning_rate": 2.4507991092032832e-05, - "loss": 0.0021189235150814055, - "step": 14555 - }, - { - "epoch": 2.4825234441602726, - "grad_norm": 0.04537670686841011, - "learning_rate": 2.4483959017911195e-05, - "loss": 0.0018616810441017151, - "step": 14560 - }, - { - "epoch": 2.483375959079284, - "grad_norm": 0.04895998165011406, - "learning_rate": 2.445993302011046e-05, - "loss": 0.0016737811267375946, - "step": 14565 - }, - { - "epoch": 2.484228473998295, - "grad_norm": 0.07096420228481293, - "learning_rate": 2.4435913109846773e-05, - "loss": 0.0032933827489614485, - "step": 14570 - }, - { - "epoch": 2.485080988917306, - "grad_norm": 0.07391496002674103, - "learning_rate": 2.4411899298333403e-05, - "loss": 0.0021815944463014604, - "step": 14575 - }, - { - "epoch": 2.4859335038363173, - "grad_norm": 0.12835897505283356, - "learning_rate": 2.438789159678083e-05, - "loss": 0.0032001670449972154, - "step": 14580 - }, - { - "epoch": 2.4867860187553283, - "grad_norm": 0.0947527140378952, - "learning_rate": 2.436389001639662e-05, - "loss": 0.002512381225824356, - "step": 14585 - }, - { - "epoch": 2.4876385336743394, - "grad_norm": 0.06699662655591965, - "learning_rate": 2.4339894568385526e-05, - "loss": 0.0014906782656908036, - "step": 14590 - }, - { - "epoch": 2.4884910485933505, - "grad_norm": 0.042523179203271866, - "learning_rate": 2.4315905263949404e-05, - "loss": 0.0012685291469097138, - "step": 14595 - }, - { - "epoch": 2.4893435635123615, - "grad_norm": 0.03687009960412979, - "learning_rate": 2.4291922114287286e-05, - "loss": 0.0016289660707116127, - "step": 14600 - }, - { - "epoch": 2.4901960784313726, - "grad_norm": 0.07698170840740204, - "learning_rate": 2.4267945130595287e-05, - "loss": 0.002090749144554138, - "step": 14605 - }, - { - "epoch": 2.4910485933503836, - "grad_norm": 0.08533983677625656, - "learning_rate": 2.4243974324066653e-05, - "loss": 0.002234157919883728, - "step": 14610 - }, - { - "epoch": 2.4919011082693947, - "grad_norm": 0.10050603002309799, - "learning_rate": 2.422000970589177e-05, - "loss": 0.002818283811211586, - "step": 14615 - }, - { - "epoch": 2.4927536231884058, - "grad_norm": 0.057129960507154465, - "learning_rate": 2.4196051287258095e-05, - "loss": 0.004226747527718544, - "step": 14620 - }, - { - "epoch": 2.493606138107417, - "grad_norm": 0.08218846470117569, - "learning_rate": 2.4172099079350256e-05, - "loss": 0.0016387354582548142, - "step": 14625 - }, - { - "epoch": 2.494458653026428, - "grad_norm": 0.07963220775127411, - "learning_rate": 2.4148153093349894e-05, - "loss": 0.002778450772166252, - "step": 14630 - }, - { - "epoch": 2.495311167945439, - "grad_norm": 0.058049995452165604, - "learning_rate": 2.4124213340435834e-05, - "loss": 0.0024016743525862696, - "step": 14635 - }, - { - "epoch": 2.49616368286445, - "grad_norm": 0.13127438724040985, - "learning_rate": 2.410027983178392e-05, - "loss": 0.0038317706435918807, - "step": 14640 - }, - { - "epoch": 2.497016197783461, - "grad_norm": 0.048698920756578445, - "learning_rate": 2.407635257856711e-05, - "loss": 0.00152621790766716, - "step": 14645 - }, - { - "epoch": 2.497868712702472, - "grad_norm": 0.02338201180100441, - "learning_rate": 2.405243159195546e-05, - "loss": 0.0027417311444878577, - "step": 14650 - }, - { - "epoch": 2.498721227621483, - "grad_norm": 0.07108049094676971, - "learning_rate": 2.402851688311607e-05, - "loss": 0.001716497913002968, - "step": 14655 - }, - { - "epoch": 2.4995737425404947, - "grad_norm": 0.028342491015791893, - "learning_rate": 2.4004608463213126e-05, - "loss": 0.0013954185880720616, - "step": 14660 - }, - { - "epoch": 2.4995737425404947, - "eval_loss": 0.04806143045425415, - "eval_runtime": 3.6619, - "eval_samples_per_second": 68.816, - "eval_steps_per_second": 1.092, - "step": 14660 - }, - { - "eval_cer_subset": 0.01446089208070741, - "eval_cer_subset_edit_distance": 888, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 14660 - }, - { - "epoch": 2.5004262574595053, - "grad_norm": 0.0640476793050766, - "learning_rate": 2.398070634340786e-05, - "loss": 0.002191193774342537, - "step": 14665 - }, - { - "epoch": 2.501278772378517, - "grad_norm": 0.034168582409620285, - "learning_rate": 2.3956810534858607e-05, - "loss": 0.0013371256180107593, - "step": 14670 - }, - { - "epoch": 2.502131287297528, - "grad_norm": 0.07181207835674286, - "learning_rate": 2.3932921048720698e-05, - "loss": 0.0019236713647842406, - "step": 14675 - }, - { - "epoch": 2.502983802216539, - "grad_norm": 0.0469810888171196, - "learning_rate": 2.3909037896146552e-05, - "loss": 0.0018267782405018806, - "step": 14680 - }, - { - "epoch": 2.50383631713555, - "grad_norm": 0.028826232999563217, - "learning_rate": 2.3885161088285617e-05, - "loss": 0.0010010387748479843, - "step": 14685 - }, - { - "epoch": 2.504688832054561, - "grad_norm": 0.10193654894828796, - "learning_rate": 2.386129063628437e-05, - "loss": 0.0024211697280406954, - "step": 14690 - }, - { - "epoch": 2.505541346973572, - "grad_norm": 0.14754284918308258, - "learning_rate": 2.3837426551286357e-05, - "loss": 0.0020253278315067293, - "step": 14695 - }, - { - "epoch": 2.506393861892583, - "grad_norm": 0.12155842036008835, - "learning_rate": 2.3813568844432103e-05, - "loss": 0.002546634152531624, - "step": 14700 - }, - { - "epoch": 2.5072463768115942, - "grad_norm": 0.07209643721580505, - "learning_rate": 2.378971752685919e-05, - "loss": 0.002657034806907177, - "step": 14705 - }, - { - "epoch": 2.5080988917306053, - "grad_norm": 0.1210934966802597, - "learning_rate": 2.3765872609702192e-05, - "loss": 0.002788655459880829, - "step": 14710 - }, - { - "epoch": 2.5089514066496164, - "grad_norm": 0.05647290125489235, - "learning_rate": 2.374203410409274e-05, - "loss": 0.0022313324734568596, - "step": 14715 - }, - { - "epoch": 2.5098039215686274, - "grad_norm": 0.18282383680343628, - "learning_rate": 2.371820202115942e-05, - "loss": 0.0019404273480176926, - "step": 14720 - }, - { - "epoch": 2.5106564364876385, - "grad_norm": 0.022936735302209854, - "learning_rate": 2.369437637202784e-05, - "loss": 0.0015677658841013909, - "step": 14725 - }, - { - "epoch": 2.5115089514066495, - "grad_norm": 0.023840973153710365, - "learning_rate": 2.3670557167820614e-05, - "loss": 0.0017029233276844025, - "step": 14730 - }, - { - "epoch": 2.5123614663256606, - "grad_norm": 0.01897628791630268, - "learning_rate": 2.3646744419657323e-05, - "loss": 0.001359708234667778, - "step": 14735 - }, - { - "epoch": 2.5132139812446717, - "grad_norm": 0.03326602280139923, - "learning_rate": 2.3622938138654575e-05, - "loss": 0.0006220267619937659, - "step": 14740 - }, - { - "epoch": 2.5140664961636827, - "grad_norm": 0.0619979090988636, - "learning_rate": 2.3599138335925914e-05, - "loss": 0.002030659094452858, - "step": 14745 - }, - { - "epoch": 2.514919011082694, - "grad_norm": 0.03451136127114296, - "learning_rate": 2.3575345022581896e-05, - "loss": 0.0016797656193375588, - "step": 14750 - }, - { - "epoch": 2.5157715260017053, - "grad_norm": 0.0554860420525074, - "learning_rate": 2.3551558209730018e-05, - "loss": 0.0016403241083025933, - "step": 14755 - }, - { - "epoch": 2.516624040920716, - "grad_norm": 0.08686158061027527, - "learning_rate": 2.3527777908474744e-05, - "loss": 0.003415975719690323, - "step": 14760 - }, - { - "epoch": 2.5174765558397274, - "grad_norm": 0.06636729836463928, - "learning_rate": 2.3504004129917542e-05, - "loss": 0.0018416630104184152, - "step": 14765 - }, - { - "epoch": 2.518329070758738, - "grad_norm": 0.08038193732500076, - "learning_rate": 2.3480236885156776e-05, - "loss": 0.002185085415840149, - "step": 14770 - }, - { - "epoch": 2.5191815856777495, - "grad_norm": 0.06370148062705994, - "learning_rate": 2.3456476185287802e-05, - "loss": 0.001394746359437704, - "step": 14775 - }, - { - "epoch": 2.5200341005967606, - "grad_norm": 0.05585980415344238, - "learning_rate": 2.3432722041402886e-05, - "loss": 0.003035778924822807, - "step": 14780 - }, - { - "epoch": 2.5208866155157716, - "grad_norm": 0.08676521480083466, - "learning_rate": 2.340897446459128e-05, - "loss": 0.002279702201485634, - "step": 14785 - }, - { - "epoch": 2.5217391304347827, - "grad_norm": 0.0421539731323719, - "learning_rate": 2.3385233465939124e-05, - "loss": 0.0015795350074768067, - "step": 14790 - }, - { - "epoch": 2.5225916453537938, - "grad_norm": 0.09380512684583664, - "learning_rate": 2.3361499056529516e-05, - "loss": 0.0024957180023193358, - "step": 14795 - }, - { - "epoch": 2.523444160272805, - "grad_norm": 0.05541060492396355, - "learning_rate": 2.3337771247442457e-05, - "loss": 0.0022170023992657663, - "step": 14800 - }, - { - "epoch": 2.524296675191816, - "grad_norm": 0.030795352533459663, - "learning_rate": 2.3314050049754872e-05, - "loss": 0.0015011204406619072, - "step": 14805 - }, - { - "epoch": 2.525149190110827, - "grad_norm": 0.040677715092897415, - "learning_rate": 2.329033547454063e-05, - "loss": 0.0023216739296913146, - "step": 14810 - }, - { - "epoch": 2.526001705029838, - "grad_norm": 0.036884501576423645, - "learning_rate": 2.3266627532870462e-05, - "loss": 0.0025786716490983964, - "step": 14815 - }, - { - "epoch": 2.526854219948849, - "grad_norm": 0.02398660033941269, - "learning_rate": 2.324292623581204e-05, - "loss": 0.0017933860421180726, - "step": 14820 - }, - { - "epoch": 2.52770673486786, - "grad_norm": 0.06601176410913467, - "learning_rate": 2.321923159442989e-05, - "loss": 0.002885073609650135, - "step": 14825 - }, - { - "epoch": 2.528559249786871, - "grad_norm": 0.08684834837913513, - "learning_rate": 2.3195543619785496e-05, - "loss": 0.0026486974209547045, - "step": 14830 - }, - { - "epoch": 2.5294117647058822, - "grad_norm": 0.04674920067191124, - "learning_rate": 2.3171862322937173e-05, - "loss": 0.0025192024186253548, - "step": 14835 - }, - { - "epoch": 2.5302642796248933, - "grad_norm": 0.059271425008773804, - "learning_rate": 2.314818771494013e-05, - "loss": 0.001517033763229847, - "step": 14840 - }, - { - "epoch": 2.5311167945439044, - "grad_norm": 0.03094577044248581, - "learning_rate": 2.312451980684648e-05, - "loss": 0.001731237769126892, - "step": 14845 - }, - { - "epoch": 2.531969309462916, - "grad_norm": 0.04043465852737427, - "learning_rate": 2.3100858609705167e-05, - "loss": 0.002348044328391552, - "step": 14850 - }, - { - "epoch": 2.5328218243819265, - "grad_norm": 0.05144953727722168, - "learning_rate": 2.3077204134562054e-05, - "loss": 0.0019841600209474564, - "step": 14855 - }, - { - "epoch": 2.533674339300938, - "grad_norm": 0.07220125198364258, - "learning_rate": 2.3053556392459813e-05, - "loss": 0.002818341739475727, - "step": 14860 - }, - { - "epoch": 2.5345268542199486, - "grad_norm": 0.08199959248304367, - "learning_rate": 2.302991539443801e-05, - "loss": 0.0024914808571338655, - "step": 14865 - }, - { - "epoch": 2.53537936913896, - "grad_norm": 0.07761549204587936, - "learning_rate": 2.3006281151533047e-05, - "loss": 0.003526497259736061, - "step": 14870 - }, - { - "epoch": 2.536231884057971, - "grad_norm": 0.1002337783575058, - "learning_rate": 2.298265367477816e-05, - "loss": 0.0022296305745840073, - "step": 14875 - }, - { - "epoch": 2.5370843989769822, - "grad_norm": 0.05918731540441513, - "learning_rate": 2.295903297520346e-05, - "loss": 0.0012512234039604663, - "step": 14880 - }, - { - "epoch": 2.5379369138959933, - "grad_norm": 0.053112782537937164, - "learning_rate": 2.2935419063835868e-05, - "loss": 0.0017477553337812424, - "step": 14885 - }, - { - "epoch": 2.5387894288150044, - "grad_norm": 0.061820488423109055, - "learning_rate": 2.2911811951699155e-05, - "loss": 0.0022626927122473715, - "step": 14890 - }, - { - "epoch": 2.5396419437340154, - "grad_norm": 0.11703728139400482, - "learning_rate": 2.288821164981387e-05, - "loss": 0.0025926090776920317, - "step": 14895 - }, - { - "epoch": 2.5404944586530265, - "grad_norm": 0.04635873809456825, - "learning_rate": 2.2864618169197468e-05, - "loss": 0.0017809070646762847, - "step": 14900 - }, - { - "epoch": 2.5413469735720375, - "grad_norm": 0.11287315934896469, - "learning_rate": 2.2841031520864136e-05, - "loss": 0.00238190982490778, - "step": 14905 - }, - { - "epoch": 2.5421994884910486, - "grad_norm": 0.026871057227253914, - "learning_rate": 2.2817451715824924e-05, - "loss": 0.0015336395241320132, - "step": 14910 - }, - { - "epoch": 2.5430520034100597, - "grad_norm": 0.06438672542572021, - "learning_rate": 2.279387876508766e-05, - "loss": 0.001995333656668663, - "step": 14915 - }, - { - "epoch": 2.5439045183290707, - "grad_norm": 0.06547024846076965, - "learning_rate": 2.277031267965697e-05, - "loss": 0.002060149237513542, - "step": 14920 - }, - { - "epoch": 2.544757033248082, - "grad_norm": 0.07563283294439316, - "learning_rate": 2.2746753470534322e-05, - "loss": 0.0017446789890527726, - "step": 14925 - }, - { - "epoch": 2.545609548167093, - "grad_norm": 0.028652798384428024, - "learning_rate": 2.27232011487179e-05, - "loss": 0.0022552181035280228, - "step": 14930 - }, - { - "epoch": 2.546462063086104, - "grad_norm": 0.0893654152750969, - "learning_rate": 2.269965572520274e-05, - "loss": 0.0029813185334205627, - "step": 14935 - }, - { - "epoch": 2.547314578005115, - "grad_norm": 0.04628995433449745, - "learning_rate": 2.26761172109806e-05, - "loss": 0.0025255372747778893, - "step": 14940 - }, - { - "epoch": 2.548167092924126, - "grad_norm": 0.09175702929496765, - "learning_rate": 2.2652585617040076e-05, - "loss": 0.004577648639678955, - "step": 14945 - }, - { - "epoch": 2.549019607843137, - "grad_norm": 0.041957542300224304, - "learning_rate": 2.262906095436648e-05, - "loss": 0.002292825095355511, - "step": 14950 - }, - { - "epoch": 2.5498721227621486, - "grad_norm": 0.061231136322021484, - "learning_rate": 2.2605543233941904e-05, - "loss": 0.002193107083439827, - "step": 14955 - }, - { - "epoch": 2.550724637681159, - "grad_norm": 0.08939548581838608, - "learning_rate": 2.2582032466745206e-05, - "loss": 0.0013388695195317268, - "step": 14960 - }, - { - "epoch": 2.5515771526001707, - "grad_norm": 0.10106560587882996, - "learning_rate": 2.255852866375199e-05, - "loss": 0.004271790385246277, - "step": 14965 - }, - { - "epoch": 2.5524296675191813, - "grad_norm": 0.04756918177008629, - "learning_rate": 2.253503183593463e-05, - "loss": 0.002253059670329094, - "step": 14970 - }, - { - "epoch": 2.553282182438193, - "grad_norm": 0.06336323171854019, - "learning_rate": 2.2511541994262203e-05, - "loss": 0.0019065763801336289, - "step": 14975 - }, - { - "epoch": 2.554134697357204, - "grad_norm": 0.021801188588142395, - "learning_rate": 2.2488059149700568e-05, - "loss": 0.001671123132109642, - "step": 14980 - }, - { - "epoch": 2.554987212276215, - "grad_norm": 0.07580327987670898, - "learning_rate": 2.2464583313212294e-05, - "loss": 0.0031477130949497225, - "step": 14985 - }, - { - "epoch": 2.555839727195226, - "grad_norm": 0.07757267355918884, - "learning_rate": 2.244111449575666e-05, - "loss": 0.0026445770636200905, - "step": 14990 - }, - { - "epoch": 2.556692242114237, - "grad_norm": 0.043020669370889664, - "learning_rate": 2.2417652708289726e-05, - "loss": 0.002764601819217205, - "step": 14995 - }, - { - "epoch": 2.557544757033248, - "grad_norm": 0.03915635868906975, - "learning_rate": 2.2394197961764212e-05, - "loss": 0.002291044779121876, - "step": 15000 - }, - { - "epoch": 2.558397271952259, - "grad_norm": 0.0665091872215271, - "learning_rate": 2.2370750267129586e-05, - "loss": 0.0017822932451963425, - "step": 15005 - }, - { - "epoch": 2.5592497868712702, - "grad_norm": 0.08525653183460236, - "learning_rate": 2.234730963533199e-05, - "loss": 0.0018473496660590173, - "step": 15010 - }, - { - "epoch": 2.5601023017902813, - "grad_norm": 0.05346886068582535, - "learning_rate": 2.2323876077314327e-05, - "loss": 0.002567983791232109, - "step": 15015 - }, - { - "epoch": 2.5609548167092924, - "grad_norm": 0.04240184277296066, - "learning_rate": 2.2300449604016123e-05, - "loss": 0.0021606752648949622, - "step": 15020 - }, - { - "epoch": 2.5618073316283034, - "grad_norm": 0.08507288247346878, - "learning_rate": 2.2277030226373667e-05, - "loss": 0.0023022485896945, - "step": 15025 - }, - { - "epoch": 2.5626598465473145, - "grad_norm": 0.07468844205141068, - "learning_rate": 2.225361795531989e-05, - "loss": 0.0030104584991931917, - "step": 15030 - }, - { - "epoch": 2.5635123614663256, - "grad_norm": 0.03731158375740051, - "learning_rate": 2.22302128017844e-05, - "loss": 0.0019535191357135774, - "step": 15035 - }, - { - "epoch": 2.5643648763853366, - "grad_norm": 0.09111307561397552, - "learning_rate": 2.2206814776693536e-05, - "loss": 0.0016553621739149094, - "step": 15040 - }, - { - "epoch": 2.5652173913043477, - "grad_norm": 0.04197632521390915, - "learning_rate": 2.2183423890970255e-05, - "loss": 0.0018846508115530013, - "step": 15045 - }, - { - "epoch": 2.566069906223359, - "grad_norm": 0.09259206801652908, - "learning_rate": 2.2160040155534206e-05, - "loss": 0.0028481241315603256, - "step": 15050 - }, - { - "epoch": 2.56692242114237, - "grad_norm": 0.07880257815122604, - "learning_rate": 2.2136663581301696e-05, - "loss": 0.002117951214313507, - "step": 15055 - }, - { - "epoch": 2.5677749360613813, - "grad_norm": 0.0969267189502716, - "learning_rate": 2.2113294179185667e-05, - "loss": 0.00240680705755949, - "step": 15060 - }, - { - "epoch": 2.568627450980392, - "grad_norm": 0.06295698881149292, - "learning_rate": 2.2089931960095754e-05, - "loss": 0.0012395468540489674, - "step": 15065 - }, - { - "epoch": 2.5694799658994034, - "grad_norm": 0.0716724842786789, - "learning_rate": 2.2066576934938224e-05, - "loss": 0.004721567407250405, - "step": 15070 - }, - { - "epoch": 2.5703324808184145, - "grad_norm": 0.04790467768907547, - "learning_rate": 2.2043229114615967e-05, - "loss": 0.0016566522419452668, - "step": 15075 - }, - { - "epoch": 2.5711849957374255, - "grad_norm": 0.014919254928827286, - "learning_rate": 2.2019888510028515e-05, - "loss": 0.00200834795832634, - "step": 15080 - }, - { - "epoch": 2.5720375106564366, - "grad_norm": 0.07281307876110077, - "learning_rate": 2.1996555132072063e-05, - "loss": 0.0021370718255639075, - "step": 15085 - }, - { - "epoch": 2.5728900255754477, - "grad_norm": 0.04918764904141426, - "learning_rate": 2.197322899163938e-05, - "loss": 0.002188747748732567, - "step": 15090 - }, - { - "epoch": 2.5737425404944587, - "grad_norm": 0.05246208980679512, - "learning_rate": 2.1949910099619913e-05, - "loss": 0.002106213942170143, - "step": 15095 - }, - { - "epoch": 2.57459505541347, - "grad_norm": 0.07900833338499069, - "learning_rate": 2.1926598466899674e-05, - "loss": 0.0014828240498900413, - "step": 15100 - }, - { - "epoch": 2.575447570332481, - "grad_norm": 0.1235758364200592, - "learning_rate": 2.19032941043613e-05, - "loss": 0.0033482640981674196, - "step": 15105 - }, - { - "epoch": 2.576300085251492, - "grad_norm": 0.06170985475182533, - "learning_rate": 2.187999702288408e-05, - "loss": 0.0019921788945794104, - "step": 15110 - }, - { - "epoch": 2.577152600170503, - "grad_norm": 0.1210661381483078, - "learning_rate": 2.185670723334384e-05, - "loss": 0.0019077232107520103, - "step": 15115 - }, - { - "epoch": 2.578005115089514, - "grad_norm": 0.06942020356655121, - "learning_rate": 2.1833424746613026e-05, - "loss": 0.0019503291696310043, - "step": 15120 - }, - { - "epoch": 2.578857630008525, - "grad_norm": 0.09329917281866074, - "learning_rate": 2.1810149573560693e-05, - "loss": 0.0026118636131286623, - "step": 15125 - }, - { - "epoch": 2.579710144927536, - "grad_norm": 0.1026659607887268, - "learning_rate": 2.1786881725052445e-05, - "loss": 0.002567945420742035, - "step": 15130 - }, - { - "epoch": 2.580562659846547, - "grad_norm": 0.06306809186935425, - "learning_rate": 2.1763621211950517e-05, - "loss": 0.001768135279417038, - "step": 15135 - }, - { - "epoch": 2.5814151747655583, - "grad_norm": 0.07647090405225754, - "learning_rate": 2.174036804511367e-05, - "loss": 0.0015752470120787621, - "step": 15140 - }, - { - "epoch": 2.5822676896845693, - "grad_norm": 0.045121923089027405, - "learning_rate": 2.171712223539726e-05, - "loss": 0.0025726621970534325, - "step": 15145 - }, - { - "epoch": 2.5831202046035804, - "grad_norm": 0.040667545050382614, - "learning_rate": 2.1693883793653188e-05, - "loss": 0.002222199738025665, - "step": 15150 - }, - { - "epoch": 2.583972719522592, - "grad_norm": 0.08505896478891373, - "learning_rate": 2.1670652730729968e-05, - "loss": 0.0030935727059841155, - "step": 15155 - }, - { - "epoch": 2.5848252344416025, - "grad_norm": 0.05064573138952255, - "learning_rate": 2.164742905747261e-05, - "loss": 0.002387380041182041, - "step": 15160 - }, - { - "epoch": 2.585677749360614, - "grad_norm": 0.0372583344578743, - "learning_rate": 2.1624212784722684e-05, - "loss": 0.0026363788172602655, - "step": 15165 - }, - { - "epoch": 2.5865302642796246, - "grad_norm": 0.06209828332066536, - "learning_rate": 2.1601003923318344e-05, - "loss": 0.0029974017292261125, - "step": 15170 - }, - { - "epoch": 2.587382779198636, - "grad_norm": 0.049798715859651566, - "learning_rate": 2.157780248409424e-05, - "loss": 0.0016345694661140443, - "step": 15175 - }, - { - "epoch": 2.588235294117647, - "grad_norm": 0.06752602010965347, - "learning_rate": 2.1554608477881597e-05, - "loss": 0.0025367341935634614, - "step": 15180 - }, - { - "epoch": 2.5890878090366582, - "grad_norm": 0.10456907004117966, - "learning_rate": 2.1531421915508137e-05, - "loss": 0.002495551109313965, - "step": 15185 - }, - { - "epoch": 2.5899403239556693, - "grad_norm": 0.0790029838681221, - "learning_rate": 2.1508242807798114e-05, - "loss": 0.0025735165923833846, - "step": 15190 - }, - { - "epoch": 2.5907928388746804, - "grad_norm": 0.030237069353461266, - "learning_rate": 2.1485071165572298e-05, - "loss": 0.0018124323338270187, - "step": 15195 - }, - { - "epoch": 2.5916453537936914, - "grad_norm": 0.06030745431780815, - "learning_rate": 2.1461906999648008e-05, - "loss": 0.002845403365790844, - "step": 15200 - }, - { - "epoch": 2.5924978687127025, - "grad_norm": 0.10071806609630585, - "learning_rate": 2.1438750320839037e-05, - "loss": 0.002326494827866554, - "step": 15205 - }, - { - "epoch": 2.5933503836317136, - "grad_norm": 0.050379570573568344, - "learning_rate": 2.1415601139955686e-05, - "loss": 0.0019888151437044144, - "step": 15210 - }, - { - "epoch": 2.5942028985507246, - "grad_norm": 0.09101511538028717, - "learning_rate": 2.1392459467804753e-05, - "loss": 0.003049125336110592, - "step": 15215 - }, - { - "epoch": 2.5950554134697357, - "grad_norm": 0.03804527968168259, - "learning_rate": 2.1369325315189553e-05, - "loss": 0.0016767382621765137, - "step": 15220 - }, - { - "epoch": 2.5959079283887467, - "grad_norm": 0.0779503807425499, - "learning_rate": 2.1346198692909895e-05, - "loss": 0.001964661478996277, - "step": 15225 - }, - { - "epoch": 2.596760443307758, - "grad_norm": 0.07922998070716858, - "learning_rate": 2.1323079611762033e-05, - "loss": 0.001821339875459671, - "step": 15230 - }, - { - "epoch": 2.597612958226769, - "grad_norm": 0.045152947306632996, - "learning_rate": 2.1299968082538734e-05, - "loss": 0.0011449499055743218, - "step": 15235 - }, - { - "epoch": 2.59846547314578, - "grad_norm": 0.026626303791999817, - "learning_rate": 2.1276864116029207e-05, - "loss": 0.0016753975301980971, - "step": 15240 - }, - { - "epoch": 2.599317988064791, - "grad_norm": 0.10935933142900467, - "learning_rate": 2.1253767723019188e-05, - "loss": 0.0026281427592039107, - "step": 15245 - }, - { - "epoch": 2.6001705029838025, - "grad_norm": 0.08133106678724289, - "learning_rate": 2.123067891429082e-05, - "loss": 0.001925770938396454, - "step": 15250 - }, - { - "epoch": 2.601023017902813, - "grad_norm": 0.04865674301981926, - "learning_rate": 2.1207597700622728e-05, - "loss": 0.0019936567172408105, - "step": 15255 - }, - { - "epoch": 2.6018755328218246, - "grad_norm": 0.11841622740030289, - "learning_rate": 2.1184524092789982e-05, - "loss": 0.00298205092549324, - "step": 15260 - }, - { - "epoch": 2.602728047740835, - "grad_norm": 0.04416264593601227, - "learning_rate": 2.1161458101564115e-05, - "loss": 0.0036853265017271044, - "step": 15265 - }, - { - "epoch": 2.6035805626598467, - "grad_norm": 0.08603575825691223, - "learning_rate": 2.1138399737713118e-05, - "loss": 0.004533383995294571, - "step": 15270 - }, - { - "epoch": 2.604433077578858, - "grad_norm": 0.0626961886882782, - "learning_rate": 2.1115349012001388e-05, - "loss": 0.0017330382019281388, - "step": 15275 - }, - { - "epoch": 2.605285592497869, - "grad_norm": 0.12894456088542938, - "learning_rate": 2.1092305935189773e-05, - "loss": 0.0037327542901039123, - "step": 15280 - }, - { - "epoch": 2.60613810741688, - "grad_norm": 0.10542263090610504, - "learning_rate": 2.106927051803554e-05, - "loss": 0.0026806583628058434, - "step": 15285 - }, - { - "epoch": 2.606990622335891, - "grad_norm": 0.05068397521972656, - "learning_rate": 2.1046242771292386e-05, - "loss": 0.0014822190627455712, - "step": 15290 - }, - { - "epoch": 2.607843137254902, - "grad_norm": 0.08927716314792633, - "learning_rate": 2.102322270571045e-05, - "loss": 0.003242380917072296, - "step": 15295 - }, - { - "epoch": 2.608695652173913, - "grad_norm": 0.05792883411049843, - "learning_rate": 2.1000210332036248e-05, - "loss": 0.0017583563923835755, - "step": 15300 - }, - { - "epoch": 2.609548167092924, - "grad_norm": 0.0648881196975708, - "learning_rate": 2.09772056610127e-05, - "loss": 0.002197427675127983, - "step": 15305 - }, - { - "epoch": 2.610400682011935, - "grad_norm": 0.060977645218372345, - "learning_rate": 2.095420870337919e-05, - "loss": 0.002055848389863968, - "step": 15310 - }, - { - "epoch": 2.6112531969309463, - "grad_norm": 0.04654461517930031, - "learning_rate": 2.093121946987146e-05, - "loss": 0.002073242887854576, - "step": 15315 - }, - { - "epoch": 2.6121057118499573, - "grad_norm": 0.04738753288984299, - "learning_rate": 2.0908237971221634e-05, - "loss": 0.0017290839925408364, - "step": 15320 - }, - { - "epoch": 2.6129582267689684, - "grad_norm": 0.07519782334566116, - "learning_rate": 2.0885264218158248e-05, - "loss": 0.0012821624055504798, - "step": 15325 - }, - { - "epoch": 2.6138107416879794, - "grad_norm": 0.06078832224011421, - "learning_rate": 2.0862298221406206e-05, - "loss": 0.0019888199865818025, - "step": 15330 - }, - { - "epoch": 2.6146632566069905, - "grad_norm": 0.04823920503258705, - "learning_rate": 2.083933999168679e-05, - "loss": 0.0015226650051772595, - "step": 15335 - }, - { - "epoch": 2.6155157715260016, - "grad_norm": 0.04050251096487045, - "learning_rate": 2.0816389539717694e-05, - "loss": 0.0024490740150213243, - "step": 15340 - }, - { - "epoch": 2.6163682864450126, - "grad_norm": 0.08443193882703781, - "learning_rate": 2.0793446876212937e-05, - "loss": 0.0027990926057100294, - "step": 15345 - }, - { - "epoch": 2.6172208013640237, - "grad_norm": 0.03322751075029373, - "learning_rate": 2.07705120118829e-05, - "loss": 0.0011861051432788372, - "step": 15350 - }, - { - "epoch": 2.618073316283035, - "grad_norm": 0.06874673068523407, - "learning_rate": 2.0747584957434375e-05, - "loss": 0.0018939610570669174, - "step": 15355 - }, - { - "epoch": 2.618925831202046, - "grad_norm": 0.04990018159151077, - "learning_rate": 2.0724665723570437e-05, - "loss": 0.0013890796341001987, - "step": 15360 - }, - { - "epoch": 2.6197783461210573, - "grad_norm": 0.06342940032482147, - "learning_rate": 2.0701754320990586e-05, - "loss": 0.0019270982593297958, - "step": 15365 - }, - { - "epoch": 2.620630861040068, - "grad_norm": 0.05647345632314682, - "learning_rate": 2.0678850760390607e-05, - "loss": 0.0019773844629526137, - "step": 15370 - }, - { - "epoch": 2.6214833759590794, - "grad_norm": 0.09767530113458633, - "learning_rate": 2.0655955052462643e-05, - "loss": 0.0025425378233194353, - "step": 15375 - }, - { - "epoch": 2.6223358908780905, - "grad_norm": 0.05659051984548569, - "learning_rate": 2.063306720789516e-05, - "loss": 0.0016861587762832641, - "step": 15380 - }, - { - "epoch": 2.6231884057971016, - "grad_norm": 0.07679109275341034, - "learning_rate": 2.061018723737299e-05, - "loss": 0.0012974600307643414, - "step": 15385 - }, - { - "epoch": 2.6240409207161126, - "grad_norm": 0.032084014266729355, - "learning_rate": 2.0587315151577257e-05, - "loss": 0.0009737671352922916, - "step": 15390 - }, - { - "epoch": 2.6248934356351237, - "grad_norm": 0.07588861882686615, - "learning_rate": 2.056445096118539e-05, - "loss": 0.002771071344614029, - "step": 15395 - }, - { - "epoch": 2.6257459505541347, - "grad_norm": 0.07706267386674881, - "learning_rate": 2.0541594676871188e-05, - "loss": 0.002124561369419098, - "step": 15400 - }, - { - "epoch": 2.626598465473146, - "grad_norm": 0.05501805245876312, - "learning_rate": 2.051874630930469e-05, - "loss": 0.001449206192046404, - "step": 15405 - }, - { - "epoch": 2.627450980392157, - "grad_norm": 0.07360731810331345, - "learning_rate": 2.0495905869152303e-05, - "loss": 0.0014007428660988807, - "step": 15410 - }, - { - "epoch": 2.628303495311168, - "grad_norm": 0.03651239722967148, - "learning_rate": 2.04730733670767e-05, - "loss": 0.0013091465458273887, - "step": 15415 - }, - { - "epoch": 2.629156010230179, - "grad_norm": 0.05154712125658989, - "learning_rate": 2.0450248813736842e-05, - "loss": 0.0017904775217175485, - "step": 15420 - }, - { - "epoch": 2.63000852514919, - "grad_norm": 0.03202452138066292, - "learning_rate": 2.0427432219787978e-05, - "loss": 0.002919047139585018, - "step": 15425 - }, - { - "epoch": 2.630861040068201, - "grad_norm": 0.08954522758722305, - "learning_rate": 2.040462359588169e-05, - "loss": 0.0031249357387423517, - "step": 15430 - }, - { - "epoch": 2.631713554987212, - "grad_norm": 0.09551462531089783, - "learning_rate": 2.038182295266577e-05, - "loss": 0.0016073914244771003, - "step": 15435 - }, - { - "epoch": 2.632566069906223, - "grad_norm": 0.0576794371008873, - "learning_rate": 2.035903030078432e-05, - "loss": 0.0028427325189113615, - "step": 15440 - }, - { - "epoch": 2.6334185848252343, - "grad_norm": 0.0879262238740921, - "learning_rate": 2.0336245650877728e-05, - "loss": 0.0012862576171755792, - "step": 15445 - }, - { - "epoch": 2.634271099744246, - "grad_norm": 0.1022641509771347, - "learning_rate": 2.03134690135826e-05, - "loss": 0.002296357229351997, - "step": 15450 - }, - { - "epoch": 2.6351236146632564, - "grad_norm": 0.07090801000595093, - "learning_rate": 2.029070039953186e-05, - "loss": 0.0032129865139722824, - "step": 15455 - }, - { - "epoch": 2.635976129582268, - "grad_norm": 0.06394338607788086, - "learning_rate": 2.026793981935463e-05, - "loss": 0.0022887293249368667, - "step": 15460 - }, - { - "epoch": 2.6368286445012785, - "grad_norm": 0.03828660771250725, - "learning_rate": 2.0245187283676316e-05, - "loss": 0.0023141488432884215, - "step": 15465 - }, - { - "epoch": 2.63768115942029, - "grad_norm": 0.0748148262500763, - "learning_rate": 2.0222442803118537e-05, - "loss": 0.002477791905403137, - "step": 15470 - }, - { - "epoch": 2.638533674339301, - "grad_norm": 0.02352295070886612, - "learning_rate": 2.019970638829921e-05, - "loss": 0.0021653104573488235, - "step": 15475 - }, - { - "epoch": 2.639386189258312, - "grad_norm": 0.019303878769278526, - "learning_rate": 2.017697804983243e-05, - "loss": 0.0014067382551729679, - "step": 15480 - }, - { - "epoch": 2.640238704177323, - "grad_norm": 0.036747269332408905, - "learning_rate": 2.015425779832854e-05, - "loss": 0.002145359478890896, - "step": 15485 - }, - { - "epoch": 2.6410912190963343, - "grad_norm": 0.04195109382271767, - "learning_rate": 2.0131545644394096e-05, - "loss": 0.0014138499274849892, - "step": 15490 - }, - { - "epoch": 2.6419437340153453, - "grad_norm": 0.07388610392808914, - "learning_rate": 2.0108841598631904e-05, - "loss": 0.0025294892489910125, - "step": 15495 - }, - { - "epoch": 2.6427962489343564, - "grad_norm": 0.0890735536813736, - "learning_rate": 2.0086145671640973e-05, - "loss": 0.0026762137189507485, - "step": 15500 - }, - { - "epoch": 2.6436487638533674, - "grad_norm": 0.07587535679340363, - "learning_rate": 2.006345787401652e-05, - "loss": 0.0031544029712677, - "step": 15505 - }, - { - "epoch": 2.6445012787723785, - "grad_norm": 0.10948733240365982, - "learning_rate": 2.004077821634995e-05, - "loss": 0.0023899499326944353, - "step": 15510 - }, - { - "epoch": 2.6453537936913896, - "grad_norm": 0.07914752513170242, - "learning_rate": 2.0018106709228886e-05, - "loss": 0.004097612574696541, - "step": 15515 - }, - { - "epoch": 2.6462063086104006, - "grad_norm": 0.07947845757007599, - "learning_rate": 1.9995443363237126e-05, - "loss": 0.0022834014147520066, - "step": 15520 - }, - { - "epoch": 2.6470588235294117, - "grad_norm": 0.05973362177610397, - "learning_rate": 1.9972788188954704e-05, - "loss": 0.001445610448718071, - "step": 15525 - }, - { - "epoch": 2.6479113384484227, - "grad_norm": 0.07292830944061279, - "learning_rate": 1.9950141196957792e-05, - "loss": 0.0023502418771386147, - "step": 15530 - }, - { - "epoch": 2.648763853367434, - "grad_norm": 0.09226574003696442, - "learning_rate": 1.9927502397818745e-05, - "loss": 0.002285385876893997, - "step": 15535 - }, - { - "epoch": 2.649616368286445, - "grad_norm": 0.08981240540742874, - "learning_rate": 1.9904871802106124e-05, - "loss": 0.0023617954924702645, - "step": 15540 - }, - { - "epoch": 2.6504688832054564, - "grad_norm": 0.07505398988723755, - "learning_rate": 1.988224942038466e-05, - "loss": 0.0016136666759848594, - "step": 15545 - }, - { - "epoch": 2.651321398124467, - "grad_norm": 0.06795456260442734, - "learning_rate": 1.9859635263215215e-05, - "loss": 0.0014020048081874847, - "step": 15550 - }, - { - "epoch": 2.6521739130434785, - "grad_norm": 0.07863990217447281, - "learning_rate": 1.983702934115483e-05, - "loss": 0.0016099724918603898, - "step": 15555 - }, - { - "epoch": 2.653026427962489, - "grad_norm": 0.15475937724113464, - "learning_rate": 1.9814431664756705e-05, - "loss": 0.0028660917654633523, - "step": 15560 - }, - { - "epoch": 2.6538789428815006, - "grad_norm": 0.09072619676589966, - "learning_rate": 1.979184224457017e-05, - "loss": 0.0038232788443565368, - "step": 15565 - }, - { - "epoch": 2.6547314578005117, - "grad_norm": 0.04944036900997162, - "learning_rate": 1.9769261091140746e-05, - "loss": 0.002762124501168728, - "step": 15570 - }, - { - "epoch": 2.6555839727195227, - "grad_norm": 0.07315114885568619, - "learning_rate": 1.974668821501005e-05, - "loss": 0.0018053753301501274, - "step": 15575 - }, - { - "epoch": 2.656436487638534, - "grad_norm": 0.03133604675531387, - "learning_rate": 1.972412362671584e-05, - "loss": 0.0012923687696456908, - "step": 15580 - }, - { - "epoch": 2.657289002557545, - "grad_norm": 0.07396573573350906, - "learning_rate": 1.9701567336792037e-05, - "loss": 0.004405549541115761, - "step": 15585 - }, - { - "epoch": 2.658141517476556, - "grad_norm": 0.05702332779765129, - "learning_rate": 1.967901935576867e-05, - "loss": 0.001864958368241787, - "step": 15590 - }, - { - "epoch": 2.658994032395567, - "grad_norm": 0.06003536656498909, - "learning_rate": 1.9656479694171882e-05, - "loss": 0.0025712646543979644, - "step": 15595 - }, - { - "epoch": 2.659846547314578, - "grad_norm": 0.06424745172262192, - "learning_rate": 1.963394836252393e-05, - "loss": 0.002156762033700943, - "step": 15600 - }, - { - "epoch": 2.660699062233589, - "grad_norm": 0.0703018307685852, - "learning_rate": 1.9611425371343193e-05, - "loss": 0.0034677576273679732, - "step": 15605 - }, - { - "epoch": 2.6615515771526, - "grad_norm": 0.06616941094398499, - "learning_rate": 1.958891073114414e-05, - "loss": 0.002393544837832451, - "step": 15610 - }, - { - "epoch": 2.662404092071611, - "grad_norm": 0.04988931491971016, - "learning_rate": 1.9566404452437374e-05, - "loss": 0.00223421361297369, - "step": 15615 - }, - { - "epoch": 2.6632566069906223, - "grad_norm": 0.04869920015335083, - "learning_rate": 1.9543906545729573e-05, - "loss": 0.0013234581798315047, - "step": 15620 - }, - { - "epoch": 2.6641091219096333, - "grad_norm": 0.06417152285575867, - "learning_rate": 1.9521417021523482e-05, - "loss": 0.002075556293129921, - "step": 15625 - }, - { - "epoch": 2.6649616368286444, - "grad_norm": 0.07077648490667343, - "learning_rate": 1.949893589031799e-05, - "loss": 0.002136031910777092, - "step": 15630 - }, - { - "epoch": 2.6658141517476555, - "grad_norm": 0.060446277260780334, - "learning_rate": 1.9476463162608016e-05, - "loss": 0.0025891490280628205, - "step": 15635 - }, - { - "epoch": 2.6666666666666665, - "grad_norm": 0.0396479107439518, - "learning_rate": 1.94539988488846e-05, - "loss": 0.0018554994836449622, - "step": 15640 - }, - { - "epoch": 2.6675191815856776, - "grad_norm": 0.06787905842065811, - "learning_rate": 1.9431542959634817e-05, - "loss": 0.0017763305455446242, - "step": 15645 - }, - { - "epoch": 2.668371696504689, - "grad_norm": 0.08236365765333176, - "learning_rate": 1.940909550534182e-05, - "loss": 0.0018430478870868684, - "step": 15650 - }, - { - "epoch": 2.6692242114236997, - "grad_norm": 0.10013429820537567, - "learning_rate": 1.9386656496484816e-05, - "loss": 0.0021154627203941343, - "step": 15655 - }, - { - "epoch": 2.670076726342711, - "grad_norm": 0.046891309320926666, - "learning_rate": 1.936422594353911e-05, - "loss": 0.0018745694309473039, - "step": 15660 - }, - { - "epoch": 2.670929241261722, - "grad_norm": 0.03863799199461937, - "learning_rate": 1.9341803856976016e-05, - "loss": 0.002184972912073135, - "step": 15665 - }, - { - "epoch": 2.6717817561807333, - "grad_norm": 0.05828642472624779, - "learning_rate": 1.9319390247262896e-05, - "loss": 0.0022127529606223106, - "step": 15670 - }, - { - "epoch": 2.6726342710997444, - "grad_norm": 0.08675269782543182, - "learning_rate": 1.9296985124863194e-05, - "loss": 0.002008732967078686, - "step": 15675 - }, - { - "epoch": 2.6734867860187554, - "grad_norm": 0.0700579434633255, - "learning_rate": 1.9274588500236345e-05, - "loss": 0.0024785833433270455, - "step": 15680 - }, - { - "epoch": 2.6743393009377665, - "grad_norm": 0.10120563209056854, - "learning_rate": 1.9252200383837854e-05, - "loss": 0.002890965715050697, - "step": 15685 - }, - { - "epoch": 2.6751918158567776, - "grad_norm": 0.10622604191303253, - "learning_rate": 1.9229820786119235e-05, - "loss": 0.002458018809556961, - "step": 15690 - }, - { - "epoch": 2.6760443307757886, - "grad_norm": 0.07292070984840393, - "learning_rate": 1.920744971752803e-05, - "loss": 0.0030672624707221985, - "step": 15695 - }, - { - "epoch": 2.6768968456947997, - "grad_norm": 0.030893906950950623, - "learning_rate": 1.918508718850779e-05, - "loss": 0.002947884239256382, - "step": 15700 - }, - { - "epoch": 2.6777493606138107, - "grad_norm": 0.07428406924009323, - "learning_rate": 1.9162733209498077e-05, - "loss": 0.003342418372631073, - "step": 15705 - }, - { - "epoch": 2.678601875532822, - "grad_norm": 0.014073869213461876, - "learning_rate": 1.9140387790934502e-05, - "loss": 0.0020058237016201018, - "step": 15710 - }, - { - "epoch": 2.679454390451833, - "grad_norm": 0.08076811581850052, - "learning_rate": 1.911805094324863e-05, - "loss": 0.0020218659192323685, - "step": 15715 - }, - { - "epoch": 2.680306905370844, - "grad_norm": 0.059959858655929565, - "learning_rate": 1.909572267686804e-05, - "loss": 0.0012911208905279636, - "step": 15720 - }, - { - "epoch": 2.681159420289855, - "grad_norm": 0.08688201755285263, - "learning_rate": 1.9073403002216313e-05, - "loss": 0.001681494526565075, - "step": 15725 - }, - { - "epoch": 2.682011935208866, - "grad_norm": 0.08083862066268921, - "learning_rate": 1.905109192971304e-05, - "loss": 0.002467956393957138, - "step": 15730 - }, - { - "epoch": 2.682864450127877, - "grad_norm": 0.06145070865750313, - "learning_rate": 1.902878946977375e-05, - "loss": 0.004355132207274437, - "step": 15735 - }, - { - "epoch": 2.683716965046888, - "grad_norm": 0.07410819083452225, - "learning_rate": 1.900649563280997e-05, - "loss": 0.0029904641211032867, - "step": 15740 - }, - { - "epoch": 2.6845694799658997, - "grad_norm": 0.03833797574043274, - "learning_rate": 1.8984210429229217e-05, - "loss": 0.0012984732165932655, - "step": 15745 - }, - { - "epoch": 2.6854219948849103, - "grad_norm": 0.023088248446583748, - "learning_rate": 1.896193386943494e-05, - "loss": 0.001397434249520302, - "step": 15750 - }, - { - "epoch": 2.686274509803922, - "grad_norm": 0.06918703764677048, - "learning_rate": 1.8939665963826616e-05, - "loss": 0.0015222997404634952, - "step": 15755 - }, - { - "epoch": 2.6871270247229324, - "grad_norm": 0.0286374781280756, - "learning_rate": 1.891740672279962e-05, - "loss": 0.0015881337225437165, - "step": 15760 - }, - { - "epoch": 2.687979539641944, - "grad_norm": 0.05485616624355316, - "learning_rate": 1.88951561567453e-05, - "loss": 0.0034288309514522554, - "step": 15765 - }, - { - "epoch": 2.688832054560955, - "grad_norm": 0.05021583288908005, - "learning_rate": 1.887291427605097e-05, - "loss": 0.0013944344595074654, - "step": 15770 - }, - { - "epoch": 2.689684569479966, - "grad_norm": 0.06752395629882812, - "learning_rate": 1.8850681091099895e-05, - "loss": 0.002590004727244377, - "step": 15775 - }, - { - "epoch": 2.690537084398977, - "grad_norm": 0.04273150861263275, - "learning_rate": 1.8828456612271255e-05, - "loss": 0.0019359454512596131, - "step": 15780 - }, - { - "epoch": 2.691389599317988, - "grad_norm": 0.0928453654050827, - "learning_rate": 1.8806240849940167e-05, - "loss": 0.003046049177646637, - "step": 15785 - }, - { - "epoch": 2.692242114236999, - "grad_norm": 0.025754287838935852, - "learning_rate": 1.8784033814477692e-05, - "loss": 0.0018295232206583024, - "step": 15790 - }, - { - "epoch": 2.6930946291560103, - "grad_norm": 0.07345419377088547, - "learning_rate": 1.8761835516250806e-05, - "loss": 0.0018985627219080925, - "step": 15795 - }, - { - "epoch": 2.6939471440750213, - "grad_norm": 0.08317514508962631, - "learning_rate": 1.873964596562243e-05, - "loss": 0.0030419353395700456, - "step": 15800 - }, - { - "epoch": 2.6947996589940324, - "grad_norm": 0.07300770282745361, - "learning_rate": 1.8717465172951377e-05, - "loss": 0.002040323428809643, - "step": 15805 - }, - { - "epoch": 2.6956521739130435, - "grad_norm": 0.07284363359212875, - "learning_rate": 1.8695293148592362e-05, - "loss": 0.001639954373240471, - "step": 15810 - }, - { - "epoch": 2.6965046888320545, - "grad_norm": 0.05817059800028801, - "learning_rate": 1.867312990289606e-05, - "loss": 0.0015234597958624363, - "step": 15815 - }, - { - "epoch": 2.6973572037510656, - "grad_norm": 0.11319714039564133, - "learning_rate": 1.865097544620897e-05, - "loss": 0.0018295228481292724, - "step": 15820 - }, - { - "epoch": 2.6982097186700766, - "grad_norm": 0.10493957251310349, - "learning_rate": 1.8628829788873567e-05, - "loss": 0.0025029994547367098, - "step": 15825 - }, - { - "epoch": 2.6990622335890877, - "grad_norm": 0.03161423280835152, - "learning_rate": 1.860669294122816e-05, - "loss": 0.0014271627180278302, - "step": 15830 - }, - { - "epoch": 2.6999147485080988, - "grad_norm": 0.03267689794301987, - "learning_rate": 1.858456491360697e-05, - "loss": 0.0012216478586196899, - "step": 15835 - }, - { - "epoch": 2.70076726342711, - "grad_norm": 0.07986247539520264, - "learning_rate": 1.856244571634008e-05, - "loss": 0.0018704459071159363, - "step": 15840 - }, - { - "epoch": 2.701619778346121, - "grad_norm": 0.10120461881160736, - "learning_rate": 1.85403353597535e-05, - "loss": 0.0020706810057163237, - "step": 15845 - }, - { - "epoch": 2.7024722932651324, - "grad_norm": 0.05339881405234337, - "learning_rate": 1.8518233854169056e-05, - "loss": 0.0017986055463552475, - "step": 15850 - }, - { - "epoch": 2.703324808184143, - "grad_norm": 0.11433786898851395, - "learning_rate": 1.8496141209904464e-05, - "loss": 0.0034054510295391084, - "step": 15855 - }, - { - "epoch": 2.7041773231031545, - "grad_norm": 0.061081189662218094, - "learning_rate": 1.8474057437273328e-05, - "loss": 0.002348882704973221, - "step": 15860 - }, - { - "epoch": 2.705029838022165, - "grad_norm": 0.055195316672325134, - "learning_rate": 1.8451982546585055e-05, - "loss": 0.0015221487730741501, - "step": 15865 - }, - { - "epoch": 2.7058823529411766, - "grad_norm": 0.06800514459609985, - "learning_rate": 1.8429916548144973e-05, - "loss": 0.0023088542744517325, - "step": 15870 - }, - { - "epoch": 2.7067348678601877, - "grad_norm": 0.05646739527583122, - "learning_rate": 1.8407859452254206e-05, - "loss": 0.0024141166359186172, - "step": 15875 - }, - { - "epoch": 2.7075873827791987, - "grad_norm": 0.10886628180742264, - "learning_rate": 1.8385811269209743e-05, - "loss": 0.0019476715475320815, - "step": 15880 - }, - { - "epoch": 2.70843989769821, - "grad_norm": 0.04279763624072075, - "learning_rate": 1.8363772009304395e-05, - "loss": 0.002021237276494503, - "step": 15885 - }, - { - "epoch": 2.709292412617221, - "grad_norm": 0.09583209455013275, - "learning_rate": 1.8341741682826852e-05, - "loss": 0.002025018632411957, - "step": 15890 - }, - { - "epoch": 2.710144927536232, - "grad_norm": 0.06695323437452316, - "learning_rate": 1.8319720300061582e-05, - "loss": 0.0026269391179084777, - "step": 15895 - }, - { - "epoch": 2.710997442455243, - "grad_norm": 0.07438764721155167, - "learning_rate": 1.829770787128889e-05, - "loss": 0.0014647828415036202, - "step": 15900 - }, - { - "epoch": 2.711849957374254, - "grad_norm": 0.05395448952913284, - "learning_rate": 1.8275704406784933e-05, - "loss": 0.0024559808894991874, - "step": 15905 - }, - { - "epoch": 2.712702472293265, - "grad_norm": 0.03163938969373703, - "learning_rate": 1.825370991682164e-05, - "loss": 0.0022430509328842164, - "step": 15910 - }, - { - "epoch": 2.713554987212276, - "grad_norm": 0.104282446205616, - "learning_rate": 1.8231724411666794e-05, - "loss": 0.001472956594079733, - "step": 15915 - }, - { - "epoch": 2.7144075021312872, - "grad_norm": 0.07355596870183945, - "learning_rate": 1.8209747901583944e-05, - "loss": 0.0023859225213527678, - "step": 15920 - }, - { - "epoch": 2.7152600170502983, - "grad_norm": 0.06525922566652298, - "learning_rate": 1.8187780396832463e-05, - "loss": 0.00265895314514637, - "step": 15925 - }, - { - "epoch": 2.7161125319693094, - "grad_norm": 0.09379115700721741, - "learning_rate": 1.8165821907667505e-05, - "loss": 0.002496413141489029, - "step": 15930 - }, - { - "epoch": 2.7169650468883204, - "grad_norm": 0.05254679545760155, - "learning_rate": 1.8143872444340017e-05, - "loss": 0.0022162407636642455, - "step": 15935 - }, - { - "epoch": 2.7178175618073315, - "grad_norm": 0.06203889846801758, - "learning_rate": 1.8121932017096758e-05, - "loss": 0.0016900423914194107, - "step": 15940 - }, - { - "epoch": 2.718670076726343, - "grad_norm": 0.08532653003931046, - "learning_rate": 1.810000063618023e-05, - "loss": 0.0028453752398490905, - "step": 15945 - }, - { - "epoch": 2.7195225916453536, - "grad_norm": 0.08361469209194183, - "learning_rate": 1.807807831182875e-05, - "loss": 0.0029737703502178193, - "step": 15950 - }, - { - "epoch": 2.720375106564365, - "grad_norm": 0.06439653784036636, - "learning_rate": 1.805616505427637e-05, - "loss": 0.002233676239848137, - "step": 15955 - }, - { - "epoch": 2.7212276214833757, - "grad_norm": 0.09197837114334106, - "learning_rate": 1.803426087375295e-05, - "loss": 0.0020749013870954513, - "step": 15960 - }, - { - "epoch": 2.722080136402387, - "grad_norm": 0.055145513266325, - "learning_rate": 1.8012365780484074e-05, - "loss": 0.0013141044415533542, - "step": 15965 - }, - { - "epoch": 2.7229326513213983, - "grad_norm": 0.06788767874240875, - "learning_rate": 1.7990479784691105e-05, - "loss": 0.0023008717224001886, - "step": 15970 - }, - { - "epoch": 2.7237851662404093, - "grad_norm": 0.10216987133026123, - "learning_rate": 1.7968602896591152e-05, - "loss": 0.002799564599990845, - "step": 15975 - }, - { - "epoch": 2.7246376811594204, - "grad_norm": 0.0995464101433754, - "learning_rate": 1.7946735126397056e-05, - "loss": 0.0023927824571728707, - "step": 15980 - }, - { - "epoch": 2.7254901960784315, - "grad_norm": 0.05936437472701073, - "learning_rate": 1.7924876484317453e-05, - "loss": 0.001253789383918047, - "step": 15985 - }, - { - "epoch": 2.7263427109974425, - "grad_norm": 0.06160435080528259, - "learning_rate": 1.7903026980556672e-05, - "loss": 0.00238239299505949, - "step": 15990 - }, - { - "epoch": 2.7271952259164536, - "grad_norm": 0.05691118910908699, - "learning_rate": 1.788118662531477e-05, - "loss": 0.0015995081514120102, - "step": 15995 - }, - { - "epoch": 2.7280477408354646, - "grad_norm": 0.0878402590751648, - "learning_rate": 1.7859355428787564e-05, - "loss": 0.002066444233059883, - "step": 16000 - }, - { - "epoch": 2.7289002557544757, - "grad_norm": 0.04065166413784027, - "learning_rate": 1.7837533401166598e-05, - "loss": 0.0022698283195495606, - "step": 16005 - }, - { - "epoch": 2.7297527706734868, - "grad_norm": 0.08980758488178253, - "learning_rate": 1.7815720552639105e-05, - "loss": 0.0016043156385421753, - "step": 16010 - }, - { - "epoch": 2.730605285592498, - "grad_norm": 0.05619784817099571, - "learning_rate": 1.7793916893388055e-05, - "loss": 0.0025583259761333466, - "step": 16015 - }, - { - "epoch": 2.731457800511509, - "grad_norm": 0.09853291511535645, - "learning_rate": 1.7772122433592116e-05, - "loss": 0.0025311170145869257, - "step": 16020 - }, - { - "epoch": 2.73231031543052, - "grad_norm": 0.044340990483760834, - "learning_rate": 1.7750337183425652e-05, - "loss": 0.0020809115841984747, - "step": 16025 - }, - { - "epoch": 2.733162830349531, - "grad_norm": 0.024994025006890297, - "learning_rate": 1.772856115305877e-05, - "loss": 0.001932576857507229, - "step": 16030 - }, - { - "epoch": 2.734015345268542, - "grad_norm": 0.07059352099895477, - "learning_rate": 1.770679435265724e-05, - "loss": 0.002347341552376747, - "step": 16035 - }, - { - "epoch": 2.734867860187553, - "grad_norm": 0.08121193200349808, - "learning_rate": 1.7685036792382506e-05, - "loss": 0.0015123223885893822, - "step": 16040 - }, - { - "epoch": 2.735720375106564, - "grad_norm": 0.07900503277778625, - "learning_rate": 1.766328848239175e-05, - "loss": 0.0028667191043496134, - "step": 16045 - }, - { - "epoch": 2.7365728900255757, - "grad_norm": 0.08574212342500687, - "learning_rate": 1.7641549432837778e-05, - "loss": 0.002038617432117462, - "step": 16050 - }, - { - "epoch": 2.7374254049445863, - "grad_norm": 0.06154071167111397, - "learning_rate": 1.7619819653869132e-05, - "loss": 0.0017743892967700958, - "step": 16055 - }, - { - "epoch": 2.738277919863598, - "grad_norm": 0.06745338439941406, - "learning_rate": 1.7598099155629982e-05, - "loss": 0.0018204674124717712, - "step": 16060 - }, - { - "epoch": 2.7391304347826084, - "grad_norm": 0.029756128787994385, - "learning_rate": 1.7576387948260175e-05, - "loss": 0.0020426372066140175, - "step": 16065 - }, - { - "epoch": 2.73998294970162, - "grad_norm": 0.13447973132133484, - "learning_rate": 1.7554686041895217e-05, - "loss": 0.0023698143661022185, - "step": 16070 - }, - { - "epoch": 2.740835464620631, - "grad_norm": 0.09888533502817154, - "learning_rate": 1.7532993446666298e-05, - "loss": 0.0024117348715662957, - "step": 16075 - }, - { - "epoch": 2.741687979539642, - "grad_norm": 0.05919703096151352, - "learning_rate": 1.751131017270024e-05, - "loss": 0.0027751058340072634, - "step": 16080 - }, - { - "epoch": 2.742540494458653, - "grad_norm": 0.04920949414372444, - "learning_rate": 1.74896362301195e-05, - "loss": 0.0022046850994229318, - "step": 16085 - }, - { - "epoch": 2.743393009377664, - "grad_norm": 0.028095668181777, - "learning_rate": 1.746797162904222e-05, - "loss": 0.001455264538526535, - "step": 16090 - }, - { - "epoch": 2.7442455242966752, - "grad_norm": 0.03558868542313576, - "learning_rate": 1.7446316379582125e-05, - "loss": 0.0023241037502884864, - "step": 16095 - }, - { - "epoch": 2.7450980392156863, - "grad_norm": 0.07124538719654083, - "learning_rate": 1.742467049184864e-05, - "loss": 0.0014614716172218322, - "step": 16100 - }, - { - "epoch": 2.7459505541346974, - "grad_norm": 0.07355284690856934, - "learning_rate": 1.7403033975946774e-05, - "loss": 0.0018932107836008073, - "step": 16105 - }, - { - "epoch": 2.7468030690537084, - "grad_norm": 0.06485545635223389, - "learning_rate": 1.738140684197717e-05, - "loss": 0.0021881703287363052, - "step": 16110 - }, - { - "epoch": 2.7476555839727195, - "grad_norm": 0.05748758837580681, - "learning_rate": 1.735978910003607e-05, - "loss": 0.0019190840423107148, - "step": 16115 - }, - { - "epoch": 2.7485080988917305, - "grad_norm": 0.04986255615949631, - "learning_rate": 1.7338180760215395e-05, - "loss": 0.001525167189538479, - "step": 16120 - }, - { - "epoch": 2.7493606138107416, - "grad_norm": 0.06383983045816422, - "learning_rate": 1.731658183260262e-05, - "loss": 0.0026792695745825766, - "step": 16125 - }, - { - "epoch": 2.749531116794544, - "eval_loss": 0.047858335077762604, - "eval_runtime": 3.7263, - "eval_samples_per_second": 67.627, - "eval_steps_per_second": 1.073, - "step": 16126 - }, - { - "eval_cer_subset": 0.01459117038774081, - "eval_cer_subset_edit_distance": 896, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 16126 - }, - { - "epoch": 2.7502131287297527, - "grad_norm": 0.10121899098157883, - "learning_rate": 1.7294992327280826e-05, - "loss": 0.0027876641601324082, - "step": 16130 - }, - { - "epoch": 2.7510656436487637, - "grad_norm": 0.029004251584410667, - "learning_rate": 1.7273412254328743e-05, - "loss": 0.0015729216858744622, - "step": 16135 - }, - { - "epoch": 2.7519181585677748, - "grad_norm": 0.06613736599683762, - "learning_rate": 1.7251841623820638e-05, - "loss": 0.0020701587200164795, - "step": 16140 - }, - { - "epoch": 2.7527706734867863, - "grad_norm": 0.09212189167737961, - "learning_rate": 1.7230280445826422e-05, - "loss": 0.0026726944372057913, - "step": 16145 - }, - { - "epoch": 2.753623188405797, - "grad_norm": 0.12724192440509796, - "learning_rate": 1.720872873041157e-05, - "loss": 0.002590762265026569, - "step": 16150 - }, - { - "epoch": 2.7544757033248084, - "grad_norm": 0.043855708092451096, - "learning_rate": 1.7187186487637124e-05, - "loss": 0.001780974492430687, - "step": 16155 - }, - { - "epoch": 2.755328218243819, - "grad_norm": 0.10562611371278763, - "learning_rate": 1.7165653727559725e-05, - "loss": 0.002336742728948593, - "step": 16160 - }, - { - "epoch": 2.7561807331628305, - "grad_norm": 0.05162282660603523, - "learning_rate": 1.7144130460231574e-05, - "loss": 0.0018106916919350623, - "step": 16165 - }, - { - "epoch": 2.7570332480818416, - "grad_norm": 0.020110471174120903, - "learning_rate": 1.7122616695700467e-05, - "loss": 0.0014431983232498168, - "step": 16170 - }, - { - "epoch": 2.7578857630008526, - "grad_norm": 0.15154017508029938, - "learning_rate": 1.7101112444009725e-05, - "loss": 0.0019074320793151856, - "step": 16175 - }, - { - "epoch": 2.7587382779198637, - "grad_norm": 0.03481750935316086, - "learning_rate": 1.7079617715198264e-05, - "loss": 0.0037923645228147506, - "step": 16180 - }, - { - "epoch": 2.7595907928388748, - "grad_norm": 0.024081731215119362, - "learning_rate": 1.7058132519300524e-05, - "loss": 0.002791491337120533, - "step": 16185 - }, - { - "epoch": 2.760443307757886, - "grad_norm": 0.07880852371454239, - "learning_rate": 1.703665686634653e-05, - "loss": 0.0028480572625994684, - "step": 16190 - }, - { - "epoch": 2.761295822676897, - "grad_norm": 0.06910362094640732, - "learning_rate": 1.701519076636182e-05, - "loss": 0.0018049828708171845, - "step": 16195 - }, - { - "epoch": 2.762148337595908, - "grad_norm": 0.09321995079517365, - "learning_rate": 1.699373422936748e-05, - "loss": 0.001952703855931759, - "step": 16200 - }, - { - "epoch": 2.763000852514919, - "grad_norm": 0.05871212109923363, - "learning_rate": 1.6972287265380137e-05, - "loss": 0.00121518075466156, - "step": 16205 - }, - { - "epoch": 2.76385336743393, - "grad_norm": 0.10542161762714386, - "learning_rate": 1.6950849884411936e-05, - "loss": 0.0024038642644882203, - "step": 16210 - }, - { - "epoch": 2.764705882352941, - "grad_norm": 0.0580933652818203, - "learning_rate": 1.6929422096470582e-05, - "loss": 0.0021081961691379546, - "step": 16215 - }, - { - "epoch": 2.765558397271952, - "grad_norm": 0.024878472089767456, - "learning_rate": 1.6908003911559256e-05, - "loss": 0.0022545790299773215, - "step": 16220 - }, - { - "epoch": 2.7664109121909632, - "grad_norm": 0.060553766787052155, - "learning_rate": 1.6886595339676703e-05, - "loss": 0.0015277018770575523, - "step": 16225 - }, - { - "epoch": 2.7672634271099743, - "grad_norm": 0.06857582181692123, - "learning_rate": 1.6865196390817137e-05, - "loss": 0.001996198855340481, - "step": 16230 - }, - { - "epoch": 2.7681159420289854, - "grad_norm": 0.06866193562746048, - "learning_rate": 1.6843807074970316e-05, - "loss": 0.0014093054458498954, - "step": 16235 - }, - { - "epoch": 2.7689684569479964, - "grad_norm": 0.12889426946640015, - "learning_rate": 1.6822427402121476e-05, - "loss": 0.0029415406286716463, - "step": 16240 - }, - { - "epoch": 2.7698209718670075, - "grad_norm": 0.05907638370990753, - "learning_rate": 1.6801057382251363e-05, - "loss": 0.0020356021821498873, - "step": 16245 - }, - { - "epoch": 2.770673486786019, - "grad_norm": 0.05899703502655029, - "learning_rate": 1.6779697025336205e-05, - "loss": 0.0010949989780783653, - "step": 16250 - }, - { - "epoch": 2.7715260017050296, - "grad_norm": 0.048360541462898254, - "learning_rate": 1.6758346341347716e-05, - "loss": 0.002375531755387783, - "step": 16255 - }, - { - "epoch": 2.772378516624041, - "grad_norm": 0.06712590157985687, - "learning_rate": 1.6737005340253134e-05, - "loss": 0.0016120089218020438, - "step": 16260 - }, - { - "epoch": 2.7732310315430517, - "grad_norm": 0.04694962501525879, - "learning_rate": 1.6715674032015137e-05, - "loss": 0.0010866542346775533, - "step": 16265 - }, - { - "epoch": 2.7740835464620632, - "grad_norm": 0.06813527643680573, - "learning_rate": 1.6694352426591873e-05, - "loss": 0.001494432892650366, - "step": 16270 - }, - { - "epoch": 2.7749360613810743, - "grad_norm": 0.12899814546108246, - "learning_rate": 1.6673040533937004e-05, - "loss": 0.003590015694499016, - "step": 16275 - }, - { - "epoch": 2.7757885763000854, - "grad_norm": 0.013963109813630581, - "learning_rate": 1.6651738363999604e-05, - "loss": 0.0019298167899250984, - "step": 16280 - }, - { - "epoch": 2.7766410912190964, - "grad_norm": 0.03605286777019501, - "learning_rate": 1.6630445926724262e-05, - "loss": 0.0031480703502893446, - "step": 16285 - }, - { - "epoch": 2.7774936061381075, - "grad_norm": 0.10986622422933578, - "learning_rate": 1.660916323205098e-05, - "loss": 0.002917572297155857, - "step": 16290 - }, - { - "epoch": 2.7783461210571185, - "grad_norm": 0.057930365204811096, - "learning_rate": 1.658789028991523e-05, - "loss": 0.0026299282908439636, - "step": 16295 - }, - { - "epoch": 2.7791986359761296, - "grad_norm": 0.029447276145219803, - "learning_rate": 1.6566627110247917e-05, - "loss": 0.0022400498390197756, - "step": 16300 - }, - { - "epoch": 2.7800511508951407, - "grad_norm": 0.045625604689121246, - "learning_rate": 1.6545373702975423e-05, - "loss": 0.0010993774980306626, - "step": 16305 - }, - { - "epoch": 2.7809036658141517, - "grad_norm": 0.03276116028428078, - "learning_rate": 1.6524130078019536e-05, - "loss": 0.0017030857503414153, - "step": 16310 - }, - { - "epoch": 2.7817561807331628, - "grad_norm": 0.07950260490179062, - "learning_rate": 1.650289624529747e-05, - "loss": 0.0029186248779296876, - "step": 16315 - }, - { - "epoch": 2.782608695652174, - "grad_norm": 0.03196907788515091, - "learning_rate": 1.6481672214721915e-05, - "loss": 0.0021285150200128556, - "step": 16320 - }, - { - "epoch": 2.783461210571185, - "grad_norm": 0.08347548544406891, - "learning_rate": 1.6460457996200926e-05, - "loss": 0.0018467068672180175, - "step": 16325 - }, - { - "epoch": 2.784313725490196, - "grad_norm": 0.062316033989191055, - "learning_rate": 1.643925359963803e-05, - "loss": 0.002080459892749786, - "step": 16330 - }, - { - "epoch": 2.785166240409207, - "grad_norm": 0.06067380681633949, - "learning_rate": 1.641805903493214e-05, - "loss": 0.0014378841035068036, - "step": 16335 - }, - { - "epoch": 2.786018755328218, - "grad_norm": 0.19668881595134735, - "learning_rate": 1.6396874311977574e-05, - "loss": 0.0018663834780454636, - "step": 16340 - }, - { - "epoch": 2.7868712702472296, - "grad_norm": 0.03857511281967163, - "learning_rate": 1.637569944066407e-05, - "loss": 0.0017508219927549363, - "step": 16345 - }, - { - "epoch": 2.78772378516624, - "grad_norm": 0.06684751063585281, - "learning_rate": 1.6354534430876746e-05, - "loss": 0.0021339647471904756, - "step": 16350 - }, - { - "epoch": 2.7885763000852517, - "grad_norm": 0.0722980946302414, - "learning_rate": 1.633337929249616e-05, - "loss": 0.002456018142402172, - "step": 16355 - }, - { - "epoch": 2.7894288150042623, - "grad_norm": 0.013861587271094322, - "learning_rate": 1.6312234035398214e-05, - "loss": 0.0013738014735281468, - "step": 16360 - }, - { - "epoch": 2.790281329923274, - "grad_norm": 0.05103524401783943, - "learning_rate": 1.6291098669454237e-05, - "loss": 0.0012777662836015225, - "step": 16365 - }, - { - "epoch": 2.791133844842285, - "grad_norm": 0.08019815385341644, - "learning_rate": 1.6269973204530896e-05, - "loss": 0.0021779144182801245, - "step": 16370 - }, - { - "epoch": 2.791986359761296, - "grad_norm": 0.11674029380083084, - "learning_rate": 1.6248857650490287e-05, - "loss": 0.003945905342698097, - "step": 16375 - }, - { - "epoch": 2.792838874680307, - "grad_norm": 0.10289142280817032, - "learning_rate": 1.622775201718984e-05, - "loss": 0.0033991221338510514, - "step": 16380 - }, - { - "epoch": 2.793691389599318, - "grad_norm": 0.08295715600252151, - "learning_rate": 1.6206656314482372e-05, - "loss": 0.0025476697832345963, - "step": 16385 - }, - { - "epoch": 2.794543904518329, - "grad_norm": 0.09820916503667831, - "learning_rate": 1.618557055221605e-05, - "loss": 0.002469751611351967, - "step": 16390 - }, - { - "epoch": 2.79539641943734, - "grad_norm": 0.04547690227627754, - "learning_rate": 1.61644947402344e-05, - "loss": 0.0017419423907995223, - "step": 16395 - }, - { - "epoch": 2.7962489343563512, - "grad_norm": 0.09098807722330093, - "learning_rate": 1.6143428888376336e-05, - "loss": 0.0025540072470903396, - "step": 16400 - }, - { - "epoch": 2.7971014492753623, - "grad_norm": 0.06253538280725479, - "learning_rate": 1.6122373006476078e-05, - "loss": 0.00161474347114563, - "step": 16405 - }, - { - "epoch": 2.7979539641943734, - "grad_norm": 0.10068398714065552, - "learning_rate": 1.6101327104363236e-05, - "loss": 0.0030464882031083105, - "step": 16410 - }, - { - "epoch": 2.7988064791133844, - "grad_norm": 0.04052518680691719, - "learning_rate": 1.6080291191862708e-05, - "loss": 0.001292982418090105, - "step": 16415 - }, - { - "epoch": 2.7996589940323955, - "grad_norm": 0.09480784833431244, - "learning_rate": 1.605926527879478e-05, - "loss": 0.002949811331927776, - "step": 16420 - }, - { - "epoch": 2.8005115089514065, - "grad_norm": 0.08064186573028564, - "learning_rate": 1.603824937497505e-05, - "loss": 0.001863202080130577, - "step": 16425 - }, - { - "epoch": 2.8013640238704176, - "grad_norm": 0.03577118366956711, - "learning_rate": 1.601724349021443e-05, - "loss": 0.0015472200699150561, - "step": 16430 - }, - { - "epoch": 2.8022165387894287, - "grad_norm": 0.04698857292532921, - "learning_rate": 1.5996247634319162e-05, - "loss": 0.002168430760502815, - "step": 16435 - }, - { - "epoch": 2.80306905370844, - "grad_norm": 0.09031593799591064, - "learning_rate": 1.5975261817090803e-05, - "loss": 0.0017798427492380143, - "step": 16440 - }, - { - "epoch": 2.803921568627451, - "grad_norm": 0.24021683633327484, - "learning_rate": 1.5954286048326258e-05, - "loss": 0.0024022582918405535, - "step": 16445 - }, - { - "epoch": 2.8047740835464623, - "grad_norm": 0.07379221171140671, - "learning_rate": 1.5933320337817685e-05, - "loss": 0.0016447069123387338, - "step": 16450 - }, - { - "epoch": 2.805626598465473, - "grad_norm": 0.07145442813634872, - "learning_rate": 1.59123646953526e-05, - "loss": 0.002100140042603016, - "step": 16455 - }, - { - "epoch": 2.8064791133844844, - "grad_norm": 0.06444204598665237, - "learning_rate": 1.5891419130713783e-05, - "loss": 0.0022544978186488152, - "step": 16460 - }, - { - "epoch": 2.8073316283034955, - "grad_norm": 0.07764707505702972, - "learning_rate": 1.5870483653679307e-05, - "loss": 0.002028309740126133, - "step": 16465 - }, - { - "epoch": 2.8081841432225065, - "grad_norm": 0.13890637457370758, - "learning_rate": 1.584955827402257e-05, - "loss": 0.001833663322031498, - "step": 16470 - }, - { - "epoch": 2.8090366581415176, - "grad_norm": 0.06412612646818161, - "learning_rate": 1.5828643001512236e-05, - "loss": 0.0017296869307756424, - "step": 16475 - }, - { - "epoch": 2.8098891730605287, - "grad_norm": 0.05978688597679138, - "learning_rate": 1.5807737845912234e-05, - "loss": 0.001933468133211136, - "step": 16480 - }, - { - "epoch": 2.8107416879795397, - "grad_norm": 0.1131395548582077, - "learning_rate": 1.5786842816981778e-05, - "loss": 0.003291580080986023, - "step": 16485 - }, - { - "epoch": 2.8115942028985508, - "grad_norm": 0.0549713559448719, - "learning_rate": 1.5765957924475394e-05, - "loss": 0.0019789932295680044, - "step": 16490 - }, - { - "epoch": 2.812446717817562, - "grad_norm": 0.08038460463285446, - "learning_rate": 1.5745083178142833e-05, - "loss": 0.002347235009074211, - "step": 16495 - }, - { - "epoch": 2.813299232736573, - "grad_norm": 0.05014783889055252, - "learning_rate": 1.5724218587729098e-05, - "loss": 0.0016623528674244881, - "step": 16500 - }, - { - "epoch": 2.814151747655584, - "grad_norm": 0.05042316019535065, - "learning_rate": 1.5703364162974503e-05, - "loss": 0.0018199939280748368, - "step": 16505 - }, - { - "epoch": 2.815004262574595, - "grad_norm": 0.056051138788461685, - "learning_rate": 1.5682519913614565e-05, - "loss": 0.0016215803101658822, - "step": 16510 - }, - { - "epoch": 2.815856777493606, - "grad_norm": 0.04295732453465462, - "learning_rate": 1.5661685849380098e-05, - "loss": 0.0020044256001710893, - "step": 16515 - }, - { - "epoch": 2.816709292412617, - "grad_norm": 0.02020161598920822, - "learning_rate": 1.564086197999712e-05, - "loss": 0.0018876813352108, - "step": 16520 - }, - { - "epoch": 2.817561807331628, - "grad_norm": 0.09220346808433533, - "learning_rate": 1.562004831518691e-05, - "loss": 0.0015535833314061164, - "step": 16525 - }, - { - "epoch": 2.8184143222506393, - "grad_norm": 0.09728234261274338, - "learning_rate": 1.5599244864665966e-05, - "loss": 0.0015536649152636528, - "step": 16530 - }, - { - "epoch": 2.8192668371696503, - "grad_norm": 0.17288024723529816, - "learning_rate": 1.5578451638146053e-05, - "loss": 0.0021170184016227724, - "step": 16535 - }, - { - "epoch": 2.8201193520886614, - "grad_norm": 0.056582558900117874, - "learning_rate": 1.5557668645334132e-05, - "loss": 0.0030540911480784415, - "step": 16540 - }, - { - "epoch": 2.820971867007673, - "grad_norm": 0.17674417793750763, - "learning_rate": 1.553689589593238e-05, - "loss": 0.001543693896383047, - "step": 16545 - }, - { - "epoch": 2.8218243819266835, - "grad_norm": 0.06186344474554062, - "learning_rate": 1.551613339963823e-05, - "loss": 0.001764528639614582, - "step": 16550 - }, - { - "epoch": 2.822676896845695, - "grad_norm": 0.13224560022354126, - "learning_rate": 1.5495381166144288e-05, - "loss": 0.004735496640205383, - "step": 16555 - }, - { - "epoch": 2.8235294117647056, - "grad_norm": 0.1427813619375229, - "learning_rate": 1.5474639205138406e-05, - "loss": 0.003041662834584713, - "step": 16560 - }, - { - "epoch": 2.824381926683717, - "grad_norm": 0.09970462322235107, - "learning_rate": 1.5453907526303614e-05, - "loss": 0.0025150768458843233, - "step": 16565 - }, - { - "epoch": 2.825234441602728, - "grad_norm": 0.02305634692311287, - "learning_rate": 1.5433186139318144e-05, - "loss": 0.001219399645924568, - "step": 16570 - }, - { - "epoch": 2.8260869565217392, - "grad_norm": 0.04805911332368851, - "learning_rate": 1.541247505385543e-05, - "loss": 0.0012801218777894973, - "step": 16575 - }, - { - "epoch": 2.8269394714407503, - "grad_norm": 0.08059800416231155, - "learning_rate": 1.539177427958408e-05, - "loss": 0.0031003907322883608, - "step": 16580 - }, - { - "epoch": 2.8277919863597614, - "grad_norm": 0.05763188377022743, - "learning_rate": 1.537108382616794e-05, - "loss": 0.002337191253900528, - "step": 16585 - }, - { - "epoch": 2.8286445012787724, - "grad_norm": 0.06821907311677933, - "learning_rate": 1.535040370326597e-05, - "loss": 0.0030008716508746146, - "step": 16590 - }, - { - "epoch": 2.8294970161977835, - "grad_norm": 0.12901924550533295, - "learning_rate": 1.5329733920532358e-05, - "loss": 0.0035179533064365388, - "step": 16595 - }, - { - "epoch": 2.8303495311167945, - "grad_norm": 0.040896832942962646, - "learning_rate": 1.5309074487616435e-05, - "loss": 0.0020170003175735475, - "step": 16600 - }, - { - "epoch": 2.8312020460358056, - "grad_norm": 0.06776095926761627, - "learning_rate": 1.5288425414162725e-05, - "loss": 0.0017662534490227699, - "step": 16605 - }, - { - "epoch": 2.8320545609548167, - "grad_norm": 0.08130808174610138, - "learning_rate": 1.5267786709810897e-05, - "loss": 0.0018257603049278259, - "step": 16610 - }, - { - "epoch": 2.8329070758738277, - "grad_norm": 0.05846976861357689, - "learning_rate": 1.5247158384195778e-05, - "loss": 0.0013240544125437737, - "step": 16615 - }, - { - "epoch": 2.833759590792839, - "grad_norm": 0.113974429666996, - "learning_rate": 1.522654044694736e-05, - "loss": 0.002671768143773079, - "step": 16620 - }, - { - "epoch": 2.83461210571185, - "grad_norm": 0.03519630804657936, - "learning_rate": 1.5205932907690771e-05, - "loss": 0.001667863130569458, - "step": 16625 - }, - { - "epoch": 2.835464620630861, - "grad_norm": 0.014673003926873207, - "learning_rate": 1.5185335776046322e-05, - "loss": 0.002035524509847164, - "step": 16630 - }, - { - "epoch": 2.836317135549872, - "grad_norm": 0.05683857575058937, - "learning_rate": 1.5164749061629407e-05, - "loss": 0.0021878147497773172, - "step": 16635 - }, - { - "epoch": 2.8371696504688835, - "grad_norm": 0.08671200275421143, - "learning_rate": 1.5144172774050623e-05, - "loss": 0.002064511738717556, - "step": 16640 - }, - { - "epoch": 2.838022165387894, - "grad_norm": 0.041581057012081146, - "learning_rate": 1.512360692291563e-05, - "loss": 0.0019536083564162254, - "step": 16645 - }, - { - "epoch": 2.8388746803069056, - "grad_norm": 0.10846979171037674, - "learning_rate": 1.5103051517825288e-05, - "loss": 0.0026564691215753555, - "step": 16650 - }, - { - "epoch": 2.839727195225916, - "grad_norm": 0.026884516701102257, - "learning_rate": 1.5082506568375526e-05, - "loss": 0.0026851309463381766, - "step": 16655 - }, - { - "epoch": 2.8405797101449277, - "grad_norm": 0.0613347552716732, - "learning_rate": 1.506197208415741e-05, - "loss": 0.0014739801175892354, - "step": 16660 - }, - { - "epoch": 2.8414322250639388, - "grad_norm": 0.06315013766288757, - "learning_rate": 1.504144807475712e-05, - "loss": 0.0026756677776575088, - "step": 16665 - }, - { - "epoch": 2.84228473998295, - "grad_norm": 0.04869166761636734, - "learning_rate": 1.5020934549755933e-05, - "loss": 0.0020816361531615256, - "step": 16670 - }, - { - "epoch": 2.843137254901961, - "grad_norm": 0.07282520830631256, - "learning_rate": 1.5000431518730273e-05, - "loss": 0.0008225045166909695, - "step": 16675 - }, - { - "epoch": 2.843989769820972, - "grad_norm": 0.051693812012672424, - "learning_rate": 1.4979938991251607e-05, - "loss": 0.002745438739657402, - "step": 16680 - }, - { - "epoch": 2.844842284739983, - "grad_norm": 0.1495431363582611, - "learning_rate": 1.4959456976886558e-05, - "loss": 0.001805400662124157, - "step": 16685 - }, - { - "epoch": 2.845694799658994, - "grad_norm": 0.05393834039568901, - "learning_rate": 1.4938985485196799e-05, - "loss": 0.0017135551199316979, - "step": 16690 - }, - { - "epoch": 2.846547314578005, - "grad_norm": 0.06205644831061363, - "learning_rate": 1.4918524525739088e-05, - "loss": 0.002358596958220005, - "step": 16695 - }, - { - "epoch": 2.847399829497016, - "grad_norm": 0.1177382543683052, - "learning_rate": 1.4898074108065306e-05, - "loss": 0.00382155142724514, - "step": 16700 - }, - { - "epoch": 2.8482523444160273, - "grad_norm": 0.06532850116491318, - "learning_rate": 1.487763424172238e-05, - "loss": 0.002384480834007263, - "step": 16705 - }, - { - "epoch": 2.8491048593350383, - "grad_norm": 0.05195530876517296, - "learning_rate": 1.4857204936252313e-05, - "loss": 0.0030395207926630975, - "step": 16710 - }, - { - "epoch": 2.8499573742540494, - "grad_norm": 0.06609994173049927, - "learning_rate": 1.4836786201192182e-05, - "loss": 0.002476612851023674, - "step": 16715 - }, - { - "epoch": 2.8508098891730604, - "grad_norm": 0.07928726077079773, - "learning_rate": 1.4816378046074146e-05, - "loss": 0.001881701312959194, - "step": 16720 - }, - { - "epoch": 2.8516624040920715, - "grad_norm": 0.08206343650817871, - "learning_rate": 1.4795980480425392e-05, - "loss": 0.0017553886398673057, - "step": 16725 - }, - { - "epoch": 2.8525149190110826, - "grad_norm": 0.08301947265863419, - "learning_rate": 1.4775593513768202e-05, - "loss": 0.0031315773725509644, - "step": 16730 - }, - { - "epoch": 2.8533674339300936, - "grad_norm": 0.034867819398641586, - "learning_rate": 1.4755217155619887e-05, - "loss": 0.0016052091494202613, - "step": 16735 - }, - { - "epoch": 2.8542199488491047, - "grad_norm": 0.03188352286815643, - "learning_rate": 1.4734851415492789e-05, - "loss": 0.002192831225693226, - "step": 16740 - }, - { - "epoch": 2.855072463768116, - "grad_norm": 0.07953578233718872, - "learning_rate": 1.4714496302894339e-05, - "loss": 0.002898801490664482, - "step": 16745 - }, - { - "epoch": 2.855924978687127, - "grad_norm": 0.06410107016563416, - "learning_rate": 1.4694151827326966e-05, - "loss": 0.0023399315774440765, - "step": 16750 - }, - { - "epoch": 2.8567774936061383, - "grad_norm": 0.09000501781702042, - "learning_rate": 1.4673817998288152e-05, - "loss": 0.003346502408385277, - "step": 16755 - }, - { - "epoch": 2.857630008525149, - "grad_norm": 0.07080121338367462, - "learning_rate": 1.465349482527039e-05, - "loss": 0.002062254026532173, - "step": 16760 - }, - { - "epoch": 2.8584825234441604, - "grad_norm": 0.03813991695642471, - "learning_rate": 1.4633182317761244e-05, - "loss": 0.0037174589931964876, - "step": 16765 - }, - { - "epoch": 2.8593350383631715, - "grad_norm": 0.035782843828201294, - "learning_rate": 1.4612880485243246e-05, - "loss": 0.0017096459865570067, - "step": 16770 - }, - { - "epoch": 2.8601875532821825, - "grad_norm": 0.058607637882232666, - "learning_rate": 1.4592589337193962e-05, - "loss": 0.0013915538787841798, - "step": 16775 - }, - { - "epoch": 2.8610400682011936, - "grad_norm": 0.06763444095849991, - "learning_rate": 1.4572308883085995e-05, - "loss": 0.0025088803842663763, - "step": 16780 - }, - { - "epoch": 2.8618925831202047, - "grad_norm": 0.08015233278274536, - "learning_rate": 1.4552039132386913e-05, - "loss": 0.001922524720430374, - "step": 16785 - }, - { - "epoch": 2.8627450980392157, - "grad_norm": 0.07501938939094543, - "learning_rate": 1.4531780094559332e-05, - "loss": 0.0023180417716503142, - "step": 16790 - }, - { - "epoch": 2.863597612958227, - "grad_norm": 0.1105467900633812, - "learning_rate": 1.4511531779060838e-05, - "loss": 0.0017500972375273705, - "step": 16795 - }, - { - "epoch": 2.864450127877238, - "grad_norm": 0.016127226874232292, - "learning_rate": 1.4491294195344016e-05, - "loss": 0.0029237957671284674, - "step": 16800 - }, - { - "epoch": 2.865302642796249, - "grad_norm": 0.06432373076677322, - "learning_rate": 1.447106735285644e-05, - "loss": 0.002439063973724842, - "step": 16805 - }, - { - "epoch": 2.86615515771526, - "grad_norm": 0.07629001885652542, - "learning_rate": 1.4450851261040664e-05, - "loss": 0.0021009005606174467, - "step": 16810 - }, - { - "epoch": 2.867007672634271, - "grad_norm": 0.05186440795660019, - "learning_rate": 1.4430645929334253e-05, - "loss": 0.0010275249369442463, - "step": 16815 - }, - { - "epoch": 2.867860187553282, - "grad_norm": 0.06517529487609863, - "learning_rate": 1.4410451367169705e-05, - "loss": 0.0022583767771720887, - "step": 16820 - }, - { - "epoch": 2.868712702472293, - "grad_norm": 0.03262385353446007, - "learning_rate": 1.4390267583974544e-05, - "loss": 0.002132249251008034, - "step": 16825 - }, - { - "epoch": 2.869565217391304, - "grad_norm": 0.04578368365764618, - "learning_rate": 1.4370094589171199e-05, - "loss": 0.0015474225394427777, - "step": 16830 - }, - { - "epoch": 2.8704177323103153, - "grad_norm": 0.11160826683044434, - "learning_rate": 1.4349932392177122e-05, - "loss": 0.001869696006178856, - "step": 16835 - }, - { - "epoch": 2.8712702472293268, - "grad_norm": 0.07949322462081909, - "learning_rate": 1.4329781002404687e-05, - "loss": 0.002716188505291939, - "step": 16840 - }, - { - "epoch": 2.8721227621483374, - "grad_norm": 0.12685050070285797, - "learning_rate": 1.430964042926123e-05, - "loss": 0.0026786208152770998, - "step": 16845 - }, - { - "epoch": 2.872975277067349, - "grad_norm": 0.03826960548758507, - "learning_rate": 1.428951068214904e-05, - "loss": 0.0015644762665033341, - "step": 16850 - }, - { - "epoch": 2.8738277919863595, - "grad_norm": 0.0909774899482727, - "learning_rate": 1.4269391770465346e-05, - "loss": 0.0020492007955908776, - "step": 16855 - }, - { - "epoch": 2.874680306905371, - "grad_norm": 0.09891391545534134, - "learning_rate": 1.4249283703602345e-05, - "loss": 0.0028120437636971474, - "step": 16860 - }, - { - "epoch": 2.875532821824382, - "grad_norm": 0.06281251460313797, - "learning_rate": 1.4229186490947126e-05, - "loss": 0.001888560503721237, - "step": 16865 - }, - { - "epoch": 2.876385336743393, - "grad_norm": 0.0330815464258194, - "learning_rate": 1.4209100141881763e-05, - "loss": 0.002112870290875435, - "step": 16870 - }, - { - "epoch": 2.877237851662404, - "grad_norm": 0.053650904446840286, - "learning_rate": 1.4189024665783207e-05, - "loss": 0.0012864695861935615, - "step": 16875 - }, - { - "epoch": 2.8780903665814153, - "grad_norm": 0.035941146314144135, - "learning_rate": 1.4168960072023384e-05, - "loss": 0.0028607305139303207, - "step": 16880 - }, - { - "epoch": 2.8789428815004263, - "grad_norm": 0.025085339322686195, - "learning_rate": 1.41489063699691e-05, - "loss": 0.001800362393260002, - "step": 16885 - }, - { - "epoch": 2.8797953964194374, - "grad_norm": 0.08627615869045258, - "learning_rate": 1.4128863568982088e-05, - "loss": 0.0023837506771087645, - "step": 16890 - }, - { - "epoch": 2.8806479113384484, - "grad_norm": 0.11542297154664993, - "learning_rate": 1.4108831678419e-05, - "loss": 0.003114992380142212, - "step": 16895 - }, - { - "epoch": 2.8815004262574595, - "grad_norm": 0.04762958735227585, - "learning_rate": 1.4088810707631375e-05, - "loss": 0.0020215384662151336, - "step": 16900 - }, - { - "epoch": 2.8823529411764706, - "grad_norm": 0.08232380449771881, - "learning_rate": 1.4068800665965687e-05, - "loss": 0.002120315283536911, - "step": 16905 - }, - { - "epoch": 2.8832054560954816, - "grad_norm": 0.04248562082648277, - "learning_rate": 1.4048801562763272e-05, - "loss": 0.001563185639679432, - "step": 16910 - }, - { - "epoch": 2.8840579710144927, - "grad_norm": 0.058416519314050674, - "learning_rate": 1.4028813407360393e-05, - "loss": 0.0017185319215059281, - "step": 16915 - }, - { - "epoch": 2.8849104859335037, - "grad_norm": 0.03542419150471687, - "learning_rate": 1.4008836209088185e-05, - "loss": 0.0017645543441176415, - "step": 16920 - }, - { - "epoch": 2.885763000852515, - "grad_norm": 0.055227622389793396, - "learning_rate": 1.3988869977272645e-05, - "loss": 0.002331301011145115, - "step": 16925 - }, - { - "epoch": 2.886615515771526, - "grad_norm": 0.02851465903222561, - "learning_rate": 1.3968914721234703e-05, - "loss": 0.00188722126185894, - "step": 16930 - }, - { - "epoch": 2.887468030690537, - "grad_norm": 0.10346336662769318, - "learning_rate": 1.3948970450290129e-05, - "loss": 0.003334081172943115, - "step": 16935 - }, - { - "epoch": 2.888320545609548, - "grad_norm": 0.040114935487508774, - "learning_rate": 1.3929037173749564e-05, - "loss": 0.002542957104742527, - "step": 16940 - }, - { - "epoch": 2.8891730605285595, - "grad_norm": 0.06734409183263779, - "learning_rate": 1.3909114900918517e-05, - "loss": 0.002022533863782883, - "step": 16945 - }, - { - "epoch": 2.89002557544757, - "grad_norm": 0.03672570362687111, - "learning_rate": 1.3889203641097392e-05, - "loss": 0.0017688646912574768, - "step": 16950 - }, - { - "epoch": 2.8908780903665816, - "grad_norm": 0.016099590808153152, - "learning_rate": 1.3869303403581397e-05, - "loss": 0.002179678343236446, - "step": 16955 - }, - { - "epoch": 2.8917306052855922, - "grad_norm": 0.0655573159456253, - "learning_rate": 1.384941419766066e-05, - "loss": 0.0020285720005631448, - "step": 16960 - }, - { - "epoch": 2.8925831202046037, - "grad_norm": 0.03573548421263695, - "learning_rate": 1.3829536032620105e-05, - "loss": 0.002248694933950901, - "step": 16965 - }, - { - "epoch": 2.893435635123615, - "grad_norm": 0.06182318180799484, - "learning_rate": 1.3809668917739507e-05, - "loss": 0.002159777097404003, - "step": 16970 - }, - { - "epoch": 2.894288150042626, - "grad_norm": 0.09492490440607071, - "learning_rate": 1.3789812862293527e-05, - "loss": 0.0027505803853273393, - "step": 16975 - }, - { - "epoch": 2.895140664961637, - "grad_norm": 0.043292637914419174, - "learning_rate": 1.3769967875551613e-05, - "loss": 0.0018307223916053772, - "step": 16980 - }, - { - "epoch": 2.895993179880648, - "grad_norm": 0.08455146849155426, - "learning_rate": 1.375013396677807e-05, - "loss": 0.0019843194633722304, - "step": 16985 - }, - { - "epoch": 2.896845694799659, - "grad_norm": 0.06926032900810242, - "learning_rate": 1.3730311145232023e-05, - "loss": 0.0024761717766523363, - "step": 16990 - }, - { - "epoch": 2.89769820971867, - "grad_norm": 0.0860179215669632, - "learning_rate": 1.3710499420167413e-05, - "loss": 0.002175389975309372, - "step": 16995 - }, - { - "epoch": 2.898550724637681, - "grad_norm": 0.10651890188455582, - "learning_rate": 1.3690698800833026e-05, - "loss": 0.0033860310912132265, - "step": 17000 - }, - { - "epoch": 2.899403239556692, - "grad_norm": 0.09691976010799408, - "learning_rate": 1.3670909296472464e-05, - "loss": 0.0021878845989704134, - "step": 17005 - }, - { - "epoch": 2.9002557544757033, - "grad_norm": 0.11704960465431213, - "learning_rate": 1.3651130916324107e-05, - "loss": 0.00286871287971735, - "step": 17010 - }, - { - "epoch": 2.9011082693947143, - "grad_norm": 0.09645909816026688, - "learning_rate": 1.3631363669621153e-05, - "loss": 0.001873398572206497, - "step": 17015 - }, - { - "epoch": 2.9019607843137254, - "grad_norm": 0.13174127042293549, - "learning_rate": 1.3611607565591639e-05, - "loss": 0.00285712368786335, - "step": 17020 - }, - { - "epoch": 2.9028132992327365, - "grad_norm": 0.07539260387420654, - "learning_rate": 1.359186261345835e-05, - "loss": 0.0027119526639580727, - "step": 17025 - }, - { - "epoch": 2.9036658141517475, - "grad_norm": 0.06165684387087822, - "learning_rate": 1.3572128822438892e-05, - "loss": 0.0018354985862970353, - "step": 17030 - }, - { - "epoch": 2.9045183290707586, - "grad_norm": 0.06021244078874588, - "learning_rate": 1.3552406201745654e-05, - "loss": 0.0016940701752901077, - "step": 17035 - }, - { - "epoch": 2.90537084398977, - "grad_norm": 0.09488464146852493, - "learning_rate": 1.3532694760585795e-05, - "loss": 0.0019129924476146698, - "step": 17040 - }, - { - "epoch": 2.9062233589087807, - "grad_norm": 0.04894041642546654, - "learning_rate": 1.3512994508161307e-05, - "loss": 0.002598444186151028, - "step": 17045 - }, - { - "epoch": 2.907075873827792, - "grad_norm": 0.045589860528707504, - "learning_rate": 1.349330545366889e-05, - "loss": 0.0018267668783664703, - "step": 17050 - }, - { - "epoch": 2.907928388746803, - "grad_norm": 0.04273771867156029, - "learning_rate": 1.3473627606300071e-05, - "loss": 0.0013479530811309815, - "step": 17055 - }, - { - "epoch": 2.9087809036658143, - "grad_norm": 0.050675440579652786, - "learning_rate": 1.345396097524111e-05, - "loss": 0.001664750650525093, - "step": 17060 - }, - { - "epoch": 2.9096334185848254, - "grad_norm": 0.07637523114681244, - "learning_rate": 1.3434305569673059e-05, - "loss": 0.001719363033771515, - "step": 17065 - }, - { - "epoch": 2.9104859335038364, - "grad_norm": 0.03540422394871712, - "learning_rate": 1.3414661398771711e-05, - "loss": 0.002338713780045509, - "step": 17070 - }, - { - "epoch": 2.9113384484228475, - "grad_norm": 0.09252000600099564, - "learning_rate": 1.3395028471707613e-05, - "loss": 0.0018722079694271088, - "step": 17075 - }, - { - "epoch": 2.9121909633418586, - "grad_norm": 0.08759574592113495, - "learning_rate": 1.3375406797646068e-05, - "loss": 0.003211042284965515, - "step": 17080 - }, - { - "epoch": 2.9130434782608696, - "grad_norm": 0.07291707396507263, - "learning_rate": 1.3355796385747121e-05, - "loss": 0.002141663059592247, - "step": 17085 - }, - { - "epoch": 2.9138959931798807, - "grad_norm": 0.03608965128660202, - "learning_rate": 1.3336197245165578e-05, - "loss": 0.0015133512206375599, - "step": 17090 - }, - { - "epoch": 2.9147485080988917, - "grad_norm": 0.0686686635017395, - "learning_rate": 1.3316609385050954e-05, - "loss": 0.0015084316954016685, - "step": 17095 - }, - { - "epoch": 2.915601023017903, - "grad_norm": 0.052468664944171906, - "learning_rate": 1.3297032814547539e-05, - "loss": 0.00120701240375638, - "step": 17100 - }, - { - "epoch": 2.916453537936914, - "grad_norm": 0.06129363924264908, - "learning_rate": 1.3277467542794304e-05, - "loss": 0.002575872652232647, - "step": 17105 - }, - { - "epoch": 2.917306052855925, - "grad_norm": 0.06045043095946312, - "learning_rate": 1.3257913578924969e-05, - "loss": 0.0022510627284646036, - "step": 17110 - }, - { - "epoch": 2.918158567774936, - "grad_norm": 0.09090365469455719, - "learning_rate": 1.3238370932067996e-05, - "loss": 0.002203880250453949, - "step": 17115 - }, - { - "epoch": 2.919011082693947, - "grad_norm": 0.03382663428783417, - "learning_rate": 1.3218839611346522e-05, - "loss": 0.0009420939721167087, - "step": 17120 - }, - { - "epoch": 2.919863597612958, - "grad_norm": 0.06900735199451447, - "learning_rate": 1.3199319625878431e-05, - "loss": 0.0021647622808814047, - "step": 17125 - }, - { - "epoch": 2.920716112531969, - "grad_norm": 0.04494655504822731, - "learning_rate": 1.3179810984776277e-05, - "loss": 0.0027208495885133743, - "step": 17130 - }, - { - "epoch": 2.9215686274509802, - "grad_norm": 0.05262625217437744, - "learning_rate": 1.3160313697147373e-05, - "loss": 0.0015311154536902904, - "step": 17135 - }, - { - "epoch": 2.9224211423699913, - "grad_norm": 0.025083297863602638, - "learning_rate": 1.314082777209368e-05, - "loss": 0.00193443913012743, - "step": 17140 - }, - { - "epoch": 2.923273657289003, - "grad_norm": 0.08246373385190964, - "learning_rate": 1.3121353218711892e-05, - "loss": 0.0019143052399158479, - "step": 17145 - }, - { - "epoch": 2.9241261722080134, - "grad_norm": 0.1049862802028656, - "learning_rate": 1.3101890046093376e-05, - "loss": 0.002230258658528328, - "step": 17150 - }, - { - "epoch": 2.924978687127025, - "grad_norm": 0.042054325342178345, - "learning_rate": 1.3082438263324169e-05, - "loss": 0.0011081861332058907, - "step": 17155 - }, - { - "epoch": 2.9258312020460355, - "grad_norm": 0.0713399276137352, - "learning_rate": 1.3062997879485033e-05, - "loss": 0.0015817128121852874, - "step": 17160 - }, - { - "epoch": 2.926683716965047, - "grad_norm": 0.07212921977043152, - "learning_rate": 1.3043568903651381e-05, - "loss": 0.002985073998570442, - "step": 17165 - }, - { - "epoch": 2.927536231884058, - "grad_norm": 0.14285585284233093, - "learning_rate": 1.3024151344893299e-05, - "loss": 0.0019961275160312653, - "step": 17170 - }, - { - "epoch": 2.928388746803069, - "grad_norm": 0.06164155155420303, - "learning_rate": 1.3004745212275543e-05, - "loss": 0.0017055023461580276, - "step": 17175 - }, - { - "epoch": 2.92924126172208, - "grad_norm": 0.02376371994614601, - "learning_rate": 1.298535051485756e-05, - "loss": 0.0013552471064031124, - "step": 17180 - }, - { - "epoch": 2.9300937766410913, - "grad_norm": 0.07454569637775421, - "learning_rate": 1.296596726169342e-05, - "loss": 0.002513031102716923, - "step": 17185 - }, - { - "epoch": 2.9309462915601023, - "grad_norm": 0.0765121579170227, - "learning_rate": 1.2946595461831892e-05, - "loss": 0.0019039563834667207, - "step": 17190 - }, - { - "epoch": 2.9317988064791134, - "grad_norm": 0.07360806316137314, - "learning_rate": 1.2927235124316362e-05, - "loss": 0.001339799538254738, - "step": 17195 - }, - { - "epoch": 2.9326513213981245, - "grad_norm": 0.18903285264968872, - "learning_rate": 1.2907886258184876e-05, - "loss": 0.003720489144325256, - "step": 17200 - }, - { - "epoch": 2.9335038363171355, - "grad_norm": 0.07760016620159149, - "learning_rate": 1.2888548872470143e-05, - "loss": 0.0015237806364893913, - "step": 17205 - }, - { - "epoch": 2.9343563512361466, - "grad_norm": 0.055864643305540085, - "learning_rate": 1.286922297619949e-05, - "loss": 0.0014091457240283489, - "step": 17210 - }, - { - "epoch": 2.9352088661551576, - "grad_norm": 0.08161517977714539, - "learning_rate": 1.2849908578394888e-05, - "loss": 0.002047298289835453, - "step": 17215 - }, - { - "epoch": 2.9360613810741687, - "grad_norm": 0.11219590902328491, - "learning_rate": 1.283060568807294e-05, - "loss": 0.0023268122225999833, - "step": 17220 - }, - { - "epoch": 2.9369138959931798, - "grad_norm": 0.10008323192596436, - "learning_rate": 1.2811314314244867e-05, - "loss": 0.002319963276386261, - "step": 17225 - }, - { - "epoch": 2.937766410912191, - "grad_norm": 0.077080138027668, - "learning_rate": 1.2792034465916536e-05, - "loss": 0.0020459359511733055, - "step": 17230 - }, - { - "epoch": 2.938618925831202, - "grad_norm": 0.09049349278211594, - "learning_rate": 1.2772766152088431e-05, - "loss": 0.0038630947470664977, - "step": 17235 - }, - { - "epoch": 2.9394714407502134, - "grad_norm": 0.09306768327951431, - "learning_rate": 1.275350938175563e-05, - "loss": 0.0017305316403508186, - "step": 17240 - }, - { - "epoch": 2.940323955669224, - "grad_norm": 0.061699800193309784, - "learning_rate": 1.2734264163907824e-05, - "loss": 0.00341113954782486, - "step": 17245 - }, - { - "epoch": 2.9411764705882355, - "grad_norm": 0.11029893159866333, - "learning_rate": 1.2715030507529347e-05, - "loss": 0.0023353056982159614, - "step": 17250 - }, - { - "epoch": 2.942028985507246, - "grad_norm": 0.06272252649068832, - "learning_rate": 1.2695808421599087e-05, - "loss": 0.0012727061286568642, - "step": 17255 - }, - { - "epoch": 2.9428815004262576, - "grad_norm": 0.02106044627726078, - "learning_rate": 1.2676597915090567e-05, - "loss": 0.0020675512030720712, - "step": 17260 - }, - { - "epoch": 2.9437340153452687, - "grad_norm": 0.08245997875928879, - "learning_rate": 1.2657398996971883e-05, - "loss": 0.002128716930747032, - "step": 17265 - }, - { - "epoch": 2.9445865302642797, - "grad_norm": 0.10804266482591629, - "learning_rate": 1.2638211676205718e-05, - "loss": 0.0012407343834638595, - "step": 17270 - }, - { - "epoch": 2.945439045183291, - "grad_norm": 0.0485721081495285, - "learning_rate": 1.2619035961749375e-05, - "loss": 0.0019056517630815506, - "step": 17275 - }, - { - "epoch": 2.946291560102302, - "grad_norm": 0.04094598814845085, - "learning_rate": 1.2599871862554694e-05, - "loss": 0.0014778503216803073, - "step": 17280 - }, - { - "epoch": 2.947144075021313, - "grad_norm": 0.08831547200679779, - "learning_rate": 1.2580719387568133e-05, - "loss": 0.002304557338356972, - "step": 17285 - }, - { - "epoch": 2.947996589940324, - "grad_norm": 0.02547610178589821, - "learning_rate": 1.2561578545730685e-05, - "loss": 0.0010631450451910496, - "step": 17290 - }, - { - "epoch": 2.948849104859335, - "grad_norm": 0.09562932699918747, - "learning_rate": 1.2542449345977952e-05, - "loss": 0.0021377883851528166, - "step": 17295 - }, - { - "epoch": 2.949701619778346, - "grad_norm": 0.02090577222406864, - "learning_rate": 1.2523331797240072e-05, - "loss": 0.001333952508866787, - "step": 17300 - }, - { - "epoch": 2.950554134697357, - "grad_norm": 0.12461904436349869, - "learning_rate": 1.2504225908441751e-05, - "loss": 0.0025647601112723352, - "step": 17305 - }, - { - "epoch": 2.9514066496163682, - "grad_norm": 0.047791410237550735, - "learning_rate": 1.2485131688502254e-05, - "loss": 0.0014650242403149605, - "step": 17310 - }, - { - "epoch": 2.9522591645353793, - "grad_norm": 0.055085547268390656, - "learning_rate": 1.2466049146335387e-05, - "loss": 0.002520528435707092, - "step": 17315 - }, - { - "epoch": 2.9531116794543903, - "grad_norm": 0.09370748698711395, - "learning_rate": 1.2446978290849538e-05, - "loss": 0.002327192947268486, - "step": 17320 - }, - { - "epoch": 2.9539641943734014, - "grad_norm": 0.06663045287132263, - "learning_rate": 1.242791913094759e-05, - "loss": 0.0025285203009843826, - "step": 17325 - }, - { - "epoch": 2.9548167092924125, - "grad_norm": 0.06620613485574722, - "learning_rate": 1.2408871675527022e-05, - "loss": 0.001520772185176611, - "step": 17330 - }, - { - "epoch": 2.955669224211424, - "grad_norm": 0.08397935330867767, - "learning_rate": 1.2389835933479805e-05, - "loss": 0.001917354017496109, - "step": 17335 - }, - { - "epoch": 2.9565217391304346, - "grad_norm": 0.037347212433815, - "learning_rate": 1.2370811913692447e-05, - "loss": 0.001991302520036697, - "step": 17340 - }, - { - "epoch": 2.957374254049446, - "grad_norm": 0.09309769421815872, - "learning_rate": 1.2351799625046013e-05, - "loss": 0.0028038494288921355, - "step": 17345 - }, - { - "epoch": 2.9582267689684567, - "grad_norm": 0.03684366121888161, - "learning_rate": 1.2332799076416064e-05, - "loss": 0.0017773956060409546, - "step": 17350 - }, - { - "epoch": 2.959079283887468, - "grad_norm": 0.05473257228732109, - "learning_rate": 1.2313810276672687e-05, - "loss": 0.0012853020802140237, - "step": 17355 - }, - { - "epoch": 2.9599317988064793, - "grad_norm": 0.042117465287446976, - "learning_rate": 1.2294833234680473e-05, - "loss": 0.001919369027018547, - "step": 17360 - }, - { - "epoch": 2.9607843137254903, - "grad_norm": 0.05097515508532524, - "learning_rate": 1.2275867959298559e-05, - "loss": 0.001891462691128254, - "step": 17365 - }, - { - "epoch": 2.9616368286445014, - "grad_norm": 0.09409259259700775, - "learning_rate": 1.2256914459380544e-05, - "loss": 0.0014902386814355851, - "step": 17370 - }, - { - "epoch": 2.9624893435635125, - "grad_norm": 0.09465356171131134, - "learning_rate": 1.2237972743774576e-05, - "loss": 0.002463678829371929, - "step": 17375 - }, - { - "epoch": 2.9633418584825235, - "grad_norm": 0.02534087561070919, - "learning_rate": 1.221904282132327e-05, - "loss": 0.0023292653262615205, - "step": 17380 - }, - { - "epoch": 2.9641943734015346, - "grad_norm": 0.1058032363653183, - "learning_rate": 1.2200124700863723e-05, - "loss": 0.002900855429470539, - "step": 17385 - }, - { - "epoch": 2.9650468883205456, - "grad_norm": 0.07726191729307175, - "learning_rate": 1.218121839122757e-05, - "loss": 0.0014870663173496724, - "step": 17390 - }, - { - "epoch": 2.9658994032395567, - "grad_norm": 0.0792614072561264, - "learning_rate": 1.21623239012409e-05, - "loss": 0.001744781993329525, - "step": 17395 - }, - { - "epoch": 2.9667519181585678, - "grad_norm": 0.07266564667224884, - "learning_rate": 1.214344123972428e-05, - "loss": 0.002622047811746597, - "step": 17400 - }, - { - "epoch": 2.967604433077579, - "grad_norm": 0.06203412637114525, - "learning_rate": 1.2124570415492758e-05, - "loss": 0.002504969388246536, - "step": 17405 - }, - { - "epoch": 2.96845694799659, - "grad_norm": 0.07259709388017654, - "learning_rate": 1.2105711437355884e-05, - "loss": 0.0018782744184136391, - "step": 17410 - }, - { - "epoch": 2.969309462915601, - "grad_norm": 0.05496470257639885, - "learning_rate": 1.2086864314117633e-05, - "loss": 0.0018179532140493392, - "step": 17415 - }, - { - "epoch": 2.970161977834612, - "grad_norm": 0.0235351100564003, - "learning_rate": 1.2068029054576496e-05, - "loss": 0.0015613840892910956, - "step": 17420 - }, - { - "epoch": 2.971014492753623, - "grad_norm": 0.046441882848739624, - "learning_rate": 1.2049205667525383e-05, - "loss": 0.0014228712767362594, - "step": 17425 - }, - { - "epoch": 2.971867007672634, - "grad_norm": 0.06290153414011002, - "learning_rate": 1.2030394161751664e-05, - "loss": 0.0011624433100223541, - "step": 17430 - }, - { - "epoch": 2.972719522591645, - "grad_norm": 0.0662989467382431, - "learning_rate": 1.2011594546037205e-05, - "loss": 0.002170179411768913, - "step": 17435 - }, - { - "epoch": 2.9735720375106567, - "grad_norm": 0.06470426172018051, - "learning_rate": 1.1992806829158275e-05, - "loss": 0.0010997526347637176, - "step": 17440 - }, - { - "epoch": 2.9744245524296673, - "grad_norm": 0.039091553539037704, - "learning_rate": 1.1974031019885612e-05, - "loss": 0.0014238604344427586, - "step": 17445 - }, - { - "epoch": 2.975277067348679, - "grad_norm": 0.03796529024839401, - "learning_rate": 1.1955267126984376e-05, - "loss": 0.002270728349685669, - "step": 17450 - }, - { - "epoch": 2.9761295822676894, - "grad_norm": 0.09608127176761627, - "learning_rate": 1.1936515159214177e-05, - "loss": 0.0030095497146248817, - "step": 17455 - }, - { - "epoch": 2.976982097186701, - "grad_norm": 0.09011568874120712, - "learning_rate": 1.1917775125329063e-05, - "loss": 0.0031840000301599503, - "step": 17460 - }, - { - "epoch": 2.977834612105712, - "grad_norm": 0.057273294776678085, - "learning_rate": 1.1899047034077522e-05, - "loss": 0.0011888986453413963, - "step": 17465 - }, - { - "epoch": 2.978687127024723, - "grad_norm": 0.14515799283981323, - "learning_rate": 1.1880330894202432e-05, - "loss": 0.001710682176053524, - "step": 17470 - }, - { - "epoch": 2.979539641943734, - "grad_norm": 0.09522838145494461, - "learning_rate": 1.1861626714441096e-05, - "loss": 0.002519896999001503, - "step": 17475 - }, - { - "epoch": 2.980392156862745, - "grad_norm": 0.08164853602647781, - "learning_rate": 1.1842934503525282e-05, - "loss": 0.002578527852892876, - "step": 17480 - }, - { - "epoch": 2.9812446717817562, - "grad_norm": 0.08428774774074554, - "learning_rate": 1.1824254270181112e-05, - "loss": 0.0012953916564583778, - "step": 17485 - }, - { - "epoch": 2.9820971867007673, - "grad_norm": 0.07469037175178528, - "learning_rate": 1.180558602312915e-05, - "loss": 0.0037867244333028792, - "step": 17490 - }, - { - "epoch": 2.9829497016197783, - "grad_norm": 0.08371725678443909, - "learning_rate": 1.1786929771084346e-05, - "loss": 0.002791520766913891, - "step": 17495 - }, - { - "epoch": 2.9838022165387894, - "grad_norm": 0.014852025546133518, - "learning_rate": 1.1768285522756056e-05, - "loss": 0.0014176778495311737, - "step": 17500 - }, - { - "epoch": 2.9846547314578005, - "grad_norm": 0.04576858505606651, - "learning_rate": 1.174965328684804e-05, - "loss": 0.002578184753656387, - "step": 17505 - }, - { - "epoch": 2.9855072463768115, - "grad_norm": 0.05726059526205063, - "learning_rate": 1.1731033072058464e-05, - "loss": 0.0016687212511897088, - "step": 17510 - }, - { - "epoch": 2.9863597612958226, - "grad_norm": 0.0770409107208252, - "learning_rate": 1.171242488707984e-05, - "loss": 0.0013428821228444576, - "step": 17515 - }, - { - "epoch": 2.9872122762148337, - "grad_norm": 0.10322020947933197, - "learning_rate": 1.1693828740599093e-05, - "loss": 0.0019340002909302711, - "step": 17520 - }, - { - "epoch": 2.9880647911338447, - "grad_norm": 0.08900497853755951, - "learning_rate": 1.1675244641297531e-05, - "loss": 0.002262430638074875, - "step": 17525 - }, - { - "epoch": 2.9889173060528558, - "grad_norm": 0.06439421325922012, - "learning_rate": 1.1656672597850828e-05, - "loss": 0.003663495182991028, - "step": 17530 - }, - { - "epoch": 2.9897698209718673, - "grad_norm": 0.032524604350328445, - "learning_rate": 1.1638112618929023e-05, - "loss": 0.00146266371011734, - "step": 17535 - }, - { - "epoch": 2.990622335890878, - "grad_norm": 0.09089723974466324, - "learning_rate": 1.1619564713196542e-05, - "loss": 0.002597668394446373, - "step": 17540 - }, - { - "epoch": 2.9914748508098894, - "grad_norm": 0.11931595206260681, - "learning_rate": 1.1601028889312144e-05, - "loss": 0.0025284418836236, - "step": 17545 - }, - { - "epoch": 2.9923273657289, - "grad_norm": 0.05474149063229561, - "learning_rate": 1.1582505155928994e-05, - "loss": 0.002077813073992729, - "step": 17550 - }, - { - "epoch": 2.9931798806479115, - "grad_norm": 0.060414139181375504, - "learning_rate": 1.1563993521694564e-05, - "loss": 0.0014027852565050125, - "step": 17555 - }, - { - "epoch": 2.9940323955669226, - "grad_norm": 0.03036579303443432, - "learning_rate": 1.1545493995250727e-05, - "loss": 0.0008949190378189087, - "step": 17560 - }, - { - "epoch": 2.9948849104859336, - "grad_norm": 0.030154328793287277, - "learning_rate": 1.1527006585233662e-05, - "loss": 0.002073490060865879, - "step": 17565 - }, - { - "epoch": 2.9957374254049447, - "grad_norm": 0.04413657262921333, - "learning_rate": 1.1508531300273893e-05, - "loss": 0.0018356587737798692, - "step": 17570 - }, - { - "epoch": 2.9965899403239558, - "grad_norm": 0.022916359826922417, - "learning_rate": 1.1490068148996329e-05, - "loss": 0.0018058544024825095, - "step": 17575 - }, - { - "epoch": 2.997442455242967, - "grad_norm": 0.059595149010419846, - "learning_rate": 1.1471617140020162e-05, - "loss": 0.0019177049398422241, - "step": 17580 - }, - { - "epoch": 2.998294970161978, - "grad_norm": 0.038439393043518066, - "learning_rate": 1.1453178281958944e-05, - "loss": 0.002159320004284382, - "step": 17585 - }, - { - "epoch": 2.999147485080989, - "grad_norm": 0.021210921928286552, - "learning_rate": 1.1434751583420536e-05, - "loss": 0.0014576959423720838, - "step": 17590 - }, - { - "epoch": 2.9994884910485933, - "eval_loss": 0.04721549153327942, - "eval_runtime": 3.7007, - "eval_samples_per_second": 68.095, - "eval_steps_per_second": 1.081, - "step": 17592 - }, - { - "eval_cer_subset": 0.014346898562053186, - "eval_cer_subset_edit_distance": 881, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 17592 - }, - { - "epoch": 3.0, - "grad_norm": 0.12229876965284348, - "learning_rate": 1.1416337053007148e-05, - "loss": 0.0023294053971767426, - "step": 17595 - }, - { - "epoch": 3.000852514919011, - "grad_norm": 0.00884711928665638, - "learning_rate": 1.1397934699315283e-05, - "loss": 0.0007601963356137275, - "step": 17600 - }, - { - "epoch": 3.001705029838022, - "grad_norm": 0.006744864396750927, - "learning_rate": 1.1379544530935788e-05, - "loss": 0.0009616459719836712, - "step": 17605 - }, - { - "epoch": 3.002557544757033, - "grad_norm": 0.015270856209099293, - "learning_rate": 1.1361166556453794e-05, - "loss": 0.0008831757120788097, - "step": 17610 - }, - { - "epoch": 3.0034100596760442, - "grad_norm": 0.03363358601927757, - "learning_rate": 1.1342800784448747e-05, - "loss": 0.000755470572039485, - "step": 17615 - }, - { - "epoch": 3.0042625745950553, - "grad_norm": 0.013082304038107395, - "learning_rate": 1.1324447223494415e-05, - "loss": 0.0005882583092898131, - "step": 17620 - }, - { - "epoch": 3.0051150895140664, - "grad_norm": 0.02833358384668827, - "learning_rate": 1.1306105882158842e-05, - "loss": 0.0011209994554519653, - "step": 17625 - }, - { - "epoch": 3.0059676044330774, - "grad_norm": 0.018923277035355568, - "learning_rate": 1.1287776769004374e-05, - "loss": 0.0006263695657253265, - "step": 17630 - }, - { - "epoch": 3.0068201193520885, - "grad_norm": 0.06693795323371887, - "learning_rate": 1.1269459892587659e-05, - "loss": 0.0004380833823233843, - "step": 17635 - }, - { - "epoch": 3.0076726342710995, - "grad_norm": 0.046298157423734665, - "learning_rate": 1.1251155261459601e-05, - "loss": 0.0008744291961193084, - "step": 17640 - }, - { - "epoch": 3.008525149190111, - "grad_norm": 0.01699778623878956, - "learning_rate": 1.1232862884165428e-05, - "loss": 0.0010823222808539867, - "step": 17645 - }, - { - "epoch": 3.009377664109122, - "grad_norm": 0.03985033556818962, - "learning_rate": 1.1214582769244643e-05, - "loss": 0.0010692596435546875, - "step": 17650 - }, - { - "epoch": 3.010230179028133, - "grad_norm": 0.03855466470122337, - "learning_rate": 1.1196314925231002e-05, - "loss": 0.0005569665227085352, - "step": 17655 - }, - { - "epoch": 3.0110826939471442, - "grad_norm": 0.04734903946518898, - "learning_rate": 1.1178059360652523e-05, - "loss": 0.0006342111155390739, - "step": 17660 - }, - { - "epoch": 3.0119352088661553, - "grad_norm": 0.016796378418803215, - "learning_rate": 1.115981608403154e-05, - "loss": 0.0004393692128360271, - "step": 17665 - }, - { - "epoch": 3.0127877237851663, - "grad_norm": 0.052275605499744415, - "learning_rate": 1.1141585103884607e-05, - "loss": 0.0007596808485686779, - "step": 17670 - }, - { - "epoch": 3.0136402387041774, - "grad_norm": 0.01877668686211109, - "learning_rate": 1.1123366428722558e-05, - "loss": 0.000605479022487998, - "step": 17675 - }, - { - "epoch": 3.0144927536231885, - "grad_norm": 0.03435613960027695, - "learning_rate": 1.1105160067050468e-05, - "loss": 0.0006241496652364731, - "step": 17680 - }, - { - "epoch": 3.0153452685421995, - "grad_norm": 0.017725376412272453, - "learning_rate": 1.1086966027367666e-05, - "loss": 0.0004620179533958435, - "step": 17685 - }, - { - "epoch": 3.0161977834612106, - "grad_norm": 0.08081609010696411, - "learning_rate": 1.1068784318167741e-05, - "loss": 0.0007450764998793602, - "step": 17690 - }, - { - "epoch": 3.0170502983802217, - "grad_norm": 0.01702817529439926, - "learning_rate": 1.105061494793854e-05, - "loss": 0.0004339885897934437, - "step": 17695 - }, - { - "epoch": 3.0179028132992327, - "grad_norm": 0.01536079403012991, - "learning_rate": 1.1032457925162112e-05, - "loss": 0.00040107620880007743, - "step": 17700 - }, - { - "epoch": 3.0187553282182438, - "grad_norm": 0.005985455587506294, - "learning_rate": 1.101431325831475e-05, - "loss": 0.0005834253039211035, - "step": 17705 - }, - { - "epoch": 3.019607843137255, - "grad_norm": 0.06888663023710251, - "learning_rate": 1.099618095586701e-05, - "loss": 0.00041420520283281804, - "step": 17710 - }, - { - "epoch": 3.020460358056266, - "grad_norm": 0.017603853717446327, - "learning_rate": 1.097806102628364e-05, - "loss": 0.001230797078460455, - "step": 17715 - }, - { - "epoch": 3.021312872975277, - "grad_norm": 0.08015599846839905, - "learning_rate": 1.0959953478023628e-05, - "loss": 0.0010655376128852368, - "step": 17720 - }, - { - "epoch": 3.022165387894288, - "grad_norm": 0.03606700897216797, - "learning_rate": 1.0941858319540184e-05, - "loss": 0.0005988342221826315, - "step": 17725 - }, - { - "epoch": 3.023017902813299, - "grad_norm": 0.01536708977073431, - "learning_rate": 1.0923775559280712e-05, - "loss": 0.0006389651913195849, - "step": 17730 - }, - { - "epoch": 3.02387041773231, - "grad_norm": 0.007655138149857521, - "learning_rate": 1.090570520568686e-05, - "loss": 0.00042916592210531237, - "step": 17735 - }, - { - "epoch": 3.024722932651321, - "grad_norm": 0.029535889625549316, - "learning_rate": 1.0887647267194479e-05, - "loss": 0.0005783494096249342, - "step": 17740 - }, - { - "epoch": 3.0255754475703327, - "grad_norm": 0.04330015555024147, - "learning_rate": 1.0869601752233612e-05, - "loss": 0.000689673563465476, - "step": 17745 - }, - { - "epoch": 3.0264279624893438, - "grad_norm": 0.010324080474674702, - "learning_rate": 1.08515686692285e-05, - "loss": 0.00033456801902502774, - "step": 17750 - }, - { - "epoch": 3.027280477408355, - "grad_norm": 0.021205585449934006, - "learning_rate": 1.0833548026597568e-05, - "loss": 0.0005186548456549645, - "step": 17755 - }, - { - "epoch": 3.028132992327366, - "grad_norm": 0.01829897239804268, - "learning_rate": 1.081553983275349e-05, - "loss": 0.0010600530542433262, - "step": 17760 - }, - { - "epoch": 3.028985507246377, - "grad_norm": 0.014599860645830631, - "learning_rate": 1.0797544096103066e-05, - "loss": 0.0007877435535192489, - "step": 17765 - }, - { - "epoch": 3.029838022165388, - "grad_norm": 0.04433593526482582, - "learning_rate": 1.0779560825047306e-05, - "loss": 0.0006474100053310394, - "step": 17770 - }, - { - "epoch": 3.030690537084399, - "grad_norm": 0.037262968719005585, - "learning_rate": 1.0761590027981393e-05, - "loss": 0.0009919026866555213, - "step": 17775 - }, - { - "epoch": 3.03154305200341, - "grad_norm": 0.009379384107887745, - "learning_rate": 1.0743631713294696e-05, - "loss": 0.0008003567345440388, - "step": 17780 - }, - { - "epoch": 3.032395566922421, - "grad_norm": 0.05301728472113609, - "learning_rate": 1.0725685889370778e-05, - "loss": 0.0005272284150123596, - "step": 17785 - }, - { - "epoch": 3.0332480818414322, - "grad_norm": 0.014656663872301579, - "learning_rate": 1.0707752564587322e-05, - "loss": 0.0008352659642696381, - "step": 17790 - }, - { - "epoch": 3.0341005967604433, - "grad_norm": 0.022964198142290115, - "learning_rate": 1.0689831747316206e-05, - "loss": 0.0004729554522782564, - "step": 17795 - }, - { - "epoch": 3.0349531116794544, - "grad_norm": 0.00882013700902462, - "learning_rate": 1.0671923445923454e-05, - "loss": 0.0004315647296607494, - "step": 17800 - }, - { - "epoch": 3.0358056265984654, - "grad_norm": 0.03546347841620445, - "learning_rate": 1.0654027668769282e-05, - "loss": 0.0006825461052358151, - "step": 17805 - }, - { - "epoch": 3.0366581415174765, - "grad_norm": 0.009973641484975815, - "learning_rate": 1.063614442420801e-05, - "loss": 0.00043485076166689397, - "step": 17810 - }, - { - "epoch": 3.0375106564364875, - "grad_norm": 0.04359155148267746, - "learning_rate": 1.0618273720588144e-05, - "loss": 0.0005858796648681164, - "step": 17815 - }, - { - "epoch": 3.0383631713554986, - "grad_norm": 0.03281440958380699, - "learning_rate": 1.0600415566252307e-05, - "loss": 0.0004939477425068617, - "step": 17820 - }, - { - "epoch": 3.0392156862745097, - "grad_norm": 0.008886588737368584, - "learning_rate": 1.0582569969537304e-05, - "loss": 0.00031390462536364796, - "step": 17825 - }, - { - "epoch": 3.0400682011935207, - "grad_norm": 0.03827208653092384, - "learning_rate": 1.0564736938774028e-05, - "loss": 0.0004257161170244217, - "step": 17830 - }, - { - "epoch": 3.040920716112532, - "grad_norm": 0.01186210848391056, - "learning_rate": 1.0546916482287554e-05, - "loss": 0.0004740235395729542, - "step": 17835 - }, - { - "epoch": 3.041773231031543, - "grad_norm": 0.03290088102221489, - "learning_rate": 1.0529108608397058e-05, - "loss": 0.0005131486803293228, - "step": 17840 - }, - { - "epoch": 3.0426257459505544, - "grad_norm": 0.0059613995254039764, - "learning_rate": 1.0511313325415826e-05, - "loss": 0.0003371193306520581, - "step": 17845 - }, - { - "epoch": 3.0434782608695654, - "grad_norm": 0.008181710727512836, - "learning_rate": 1.049353064165132e-05, - "loss": 0.0003253704868257046, - "step": 17850 - }, - { - "epoch": 3.0443307757885765, - "grad_norm": 0.0913059189915657, - "learning_rate": 1.0475760565405071e-05, - "loss": 0.0008136253803968429, - "step": 17855 - }, - { - "epoch": 3.0451832907075875, - "grad_norm": 0.022350724786520004, - "learning_rate": 1.0458003104972746e-05, - "loss": 0.0003261453006416559, - "step": 17860 - }, - { - "epoch": 3.0460358056265986, - "grad_norm": 0.006000332068651915, - "learning_rate": 1.0440258268644106e-05, - "loss": 0.0004110721405595541, - "step": 17865 - }, - { - "epoch": 3.0468883205456097, - "grad_norm": 0.029682369902729988, - "learning_rate": 1.0422526064703051e-05, - "loss": 0.0003804177977144718, - "step": 17870 - }, - { - "epoch": 3.0477408354646207, - "grad_norm": 0.027596490457654, - "learning_rate": 1.0404806501427545e-05, - "loss": 0.00084029920399189, - "step": 17875 - }, - { - "epoch": 3.0485933503836318, - "grad_norm": 0.024016134440898895, - "learning_rate": 1.0387099587089688e-05, - "loss": 0.0004024073481559753, - "step": 17880 - }, - { - "epoch": 3.049445865302643, - "grad_norm": 0.03249691426753998, - "learning_rate": 1.0369405329955648e-05, - "loss": 0.000899493508040905, - "step": 17885 - }, - { - "epoch": 3.050298380221654, - "grad_norm": 0.06273671239614487, - "learning_rate": 1.035172373828568e-05, - "loss": 0.0005969330668449402, - "step": 17890 - }, - { - "epoch": 3.051150895140665, - "grad_norm": 0.02702365443110466, - "learning_rate": 1.0334054820334163e-05, - "loss": 0.0006494319997727871, - "step": 17895 - }, - { - "epoch": 3.052003410059676, - "grad_norm": 0.026773499324917793, - "learning_rate": 1.0316398584349527e-05, - "loss": 0.000355540681630373, - "step": 17900 - }, - { - "epoch": 3.052855924978687, - "grad_norm": 0.008991194888949394, - "learning_rate": 1.0298755038574284e-05, - "loss": 0.00036041475832462313, - "step": 17905 - }, - { - "epoch": 3.053708439897698, - "grad_norm": 0.0076339710503816605, - "learning_rate": 1.0281124191245031e-05, - "loss": 0.0003042724449187517, - "step": 17910 - }, - { - "epoch": 3.054560954816709, - "grad_norm": 0.026531491428613663, - "learning_rate": 1.0263506050592423e-05, - "loss": 0.0005534607917070389, - "step": 17915 - }, - { - "epoch": 3.0554134697357203, - "grad_norm": 0.009419003501534462, - "learning_rate": 1.0245900624841207e-05, - "loss": 0.0007199038751423359, - "step": 17920 - }, - { - "epoch": 3.0562659846547313, - "grad_norm": 0.02511359192430973, - "learning_rate": 1.0228307922210192e-05, - "loss": 0.0004511539824306965, - "step": 17925 - }, - { - "epoch": 3.0571184995737424, - "grad_norm": 0.007004071492701769, - "learning_rate": 1.0210727950912223e-05, - "loss": 0.0005472676362842322, - "step": 17930 - }, - { - "epoch": 3.0579710144927534, - "grad_norm": 0.005720047280192375, - "learning_rate": 1.0193160719154206e-05, - "loss": 0.0003349650418385863, - "step": 17935 - }, - { - "epoch": 3.0588235294117645, - "grad_norm": 0.00688981031998992, - "learning_rate": 1.017560623513713e-05, - "loss": 0.00028961682692170144, - "step": 17940 - }, - { - "epoch": 3.059676044330776, - "grad_norm": 0.034819502383470535, - "learning_rate": 1.0158064507056004e-05, - "loss": 0.0006485281512141228, - "step": 17945 - }, - { - "epoch": 3.060528559249787, - "grad_norm": 0.016207491979002953, - "learning_rate": 1.0140535543099885e-05, - "loss": 0.0006803269498050213, - "step": 17950 - }, - { - "epoch": 3.061381074168798, - "grad_norm": 0.013904010877013206, - "learning_rate": 1.0123019351451886e-05, - "loss": 0.001280614733695984, - "step": 17955 - }, - { - "epoch": 3.062233589087809, - "grad_norm": 0.006732371635735035, - "learning_rate": 1.0105515940289128e-05, - "loss": 0.0004976587370038033, - "step": 17960 - }, - { - "epoch": 3.0630861040068202, - "grad_norm": 0.004747320897877216, - "learning_rate": 1.0088025317782798e-05, - "loss": 0.00041887001134455204, - "step": 17965 - }, - { - "epoch": 3.0639386189258313, - "grad_norm": 0.024853700771927834, - "learning_rate": 1.0070547492098114e-05, - "loss": 0.0002716945484280586, - "step": 17970 - }, - { - "epoch": 3.0647911338448424, - "grad_norm": 0.014338959008455276, - "learning_rate": 1.0053082471394292e-05, - "loss": 0.00021331470925360917, - "step": 17975 - }, - { - "epoch": 3.0656436487638534, - "grad_norm": 0.09530264884233475, - "learning_rate": 1.003563026382459e-05, - "loss": 0.0005737710744142532, - "step": 17980 - }, - { - "epoch": 3.0664961636828645, - "grad_norm": 0.055152688175439835, - "learning_rate": 1.0018190877536263e-05, - "loss": 0.0005966671742498875, - "step": 17985 - }, - { - "epoch": 3.0673486786018755, - "grad_norm": 0.014675126411020756, - "learning_rate": 1.0000764320670622e-05, - "loss": 0.00021265523973852397, - "step": 17990 - }, - { - "epoch": 3.0682011935208866, - "grad_norm": 0.03896075114607811, - "learning_rate": 9.983350601362952e-06, - "loss": 0.00042183417826890945, - "step": 17995 - }, - { - "epoch": 3.0690537084398977, - "grad_norm": 0.026887232437729836, - "learning_rate": 9.965949727742554e-06, - "loss": 0.0004932911600917578, - "step": 18000 - }, - { - "epoch": 3.0699062233589087, - "grad_norm": 0.031420499086380005, - "learning_rate": 9.948561707932722e-06, - "loss": 0.0003844423685222864, - "step": 18005 - }, - { - "epoch": 3.07075873827792, - "grad_norm": 0.036880481988191605, - "learning_rate": 9.931186550050781e-06, - "loss": 0.0004236038308590651, - "step": 18010 - }, - { - "epoch": 3.071611253196931, - "grad_norm": 0.09423381090164185, - "learning_rate": 9.913824262208035e-06, - "loss": 0.0002767757046967745, - "step": 18015 - }, - { - "epoch": 3.072463768115942, - "grad_norm": 0.02230706810951233, - "learning_rate": 9.896474852509774e-06, - "loss": 0.00040940651670098307, - "step": 18020 - }, - { - "epoch": 3.073316283034953, - "grad_norm": 0.016502562910318375, - "learning_rate": 9.879138329055277e-06, - "loss": 0.0003771143034100533, - "step": 18025 - }, - { - "epoch": 3.074168797953964, - "grad_norm": 0.11179275810718536, - "learning_rate": 9.861814699937794e-06, - "loss": 0.0011194558814167977, - "step": 18030 - }, - { - "epoch": 3.075021312872975, - "grad_norm": 0.0195760540664196, - "learning_rate": 9.844503973244599e-06, - "loss": 0.00031050120014697313, - "step": 18035 - }, - { - "epoch": 3.075873827791986, - "grad_norm": 0.022706160321831703, - "learning_rate": 9.827206157056901e-06, - "loss": 0.0006133354268968106, - "step": 18040 - }, - { - "epoch": 3.0767263427109977, - "grad_norm": 0.04711553826928139, - "learning_rate": 9.809921259449896e-06, - "loss": 0.0005805216729640961, - "step": 18045 - }, - { - "epoch": 3.0775788576300087, - "grad_norm": 0.013767831958830357, - "learning_rate": 9.792649288492741e-06, - "loss": 0.0018730144947767258, - "step": 18050 - }, - { - "epoch": 3.0784313725490198, - "grad_norm": 0.03198297694325447, - "learning_rate": 9.775390252248584e-06, - "loss": 0.0008407266810536385, - "step": 18055 - }, - { - "epoch": 3.079283887468031, - "grad_norm": 0.004666489083319902, - "learning_rate": 9.758144158774502e-06, - "loss": 0.0006300830282270908, - "step": 18060 - }, - { - "epoch": 3.080136402387042, - "grad_norm": 0.046730559319257736, - "learning_rate": 9.740911016121561e-06, - "loss": 0.0010341707617044448, - "step": 18065 - }, - { - "epoch": 3.080988917306053, - "grad_norm": 0.028454085811972618, - "learning_rate": 9.72369083233476e-06, - "loss": 0.0006070803385227919, - "step": 18070 - }, - { - "epoch": 3.081841432225064, - "grad_norm": 0.03427174314856529, - "learning_rate": 9.706483615453036e-06, - "loss": 0.0005169651005417109, - "step": 18075 - }, - { - "epoch": 3.082693947144075, - "grad_norm": 0.06954972445964813, - "learning_rate": 9.689289373509316e-06, - "loss": 0.0006448618602007628, - "step": 18080 - }, - { - "epoch": 3.083546462063086, - "grad_norm": 0.06108829006552696, - "learning_rate": 9.672108114530434e-06, - "loss": 0.000641945656388998, - "step": 18085 - }, - { - "epoch": 3.084398976982097, - "grad_norm": 0.06737220287322998, - "learning_rate": 9.65493984653717e-06, - "loss": 0.0002690809080377221, - "step": 18090 - }, - { - "epoch": 3.0852514919011083, - "grad_norm": 0.016048768535256386, - "learning_rate": 9.637784577544234e-06, - "loss": 0.00035306806676089765, - "step": 18095 - }, - { - "epoch": 3.0861040068201193, - "grad_norm": 0.0573379211127758, - "learning_rate": 9.620642315560295e-06, - "loss": 0.0006647071335464716, - "step": 18100 - }, - { - "epoch": 3.0869565217391304, - "grad_norm": 0.006947176530957222, - "learning_rate": 9.603513068587913e-06, - "loss": 0.00039295474998652936, - "step": 18105 - }, - { - "epoch": 3.0878090366581414, - "grad_norm": 0.012169529683887959, - "learning_rate": 9.586396844623612e-06, - "loss": 0.0002381766214966774, - "step": 18110 - }, - { - "epoch": 3.0886615515771525, - "grad_norm": 0.007689214311540127, - "learning_rate": 9.569293651657802e-06, - "loss": 0.00043741161935031416, - "step": 18115 - }, - { - "epoch": 3.0895140664961636, - "grad_norm": 0.007992210797965527, - "learning_rate": 9.552203497674813e-06, - "loss": 0.00020460875239223242, - "step": 18120 - }, - { - "epoch": 3.0903665814151746, - "grad_norm": 0.0637383833527565, - "learning_rate": 9.535126390652917e-06, - "loss": 0.0009160020388662815, - "step": 18125 - }, - { - "epoch": 3.0912190963341857, - "grad_norm": 0.027393560856580734, - "learning_rate": 9.518062338564269e-06, - "loss": 0.00038999966345727445, - "step": 18130 - }, - { - "epoch": 3.0920716112531967, - "grad_norm": 0.07132820785045624, - "learning_rate": 9.501011349374927e-06, - "loss": 0.0006502022966742516, - "step": 18135 - }, - { - "epoch": 3.092924126172208, - "grad_norm": 0.03045077994465828, - "learning_rate": 9.48397343104486e-06, - "loss": 0.00084984190762043, - "step": 18140 - }, - { - "epoch": 3.0937766410912193, - "grad_norm": 0.009866426698863506, - "learning_rate": 9.466948591527953e-06, - "loss": 0.0005647209007292985, - "step": 18145 - }, - { - "epoch": 3.0946291560102304, - "grad_norm": 0.11743370443582535, - "learning_rate": 9.449936838771943e-06, - "loss": 0.0014441744424402714, - "step": 18150 - }, - { - "epoch": 3.0954816709292414, - "grad_norm": 0.03290848433971405, - "learning_rate": 9.432938180718506e-06, - "loss": 0.00036750044673681257, - "step": 18155 - }, - { - "epoch": 3.0963341858482525, - "grad_norm": 0.011831770651042461, - "learning_rate": 9.415952625303169e-06, - "loss": 0.0004473600536584854, - "step": 18160 - }, - { - "epoch": 3.0971867007672635, - "grad_norm": 0.08015407621860504, - "learning_rate": 9.398980180455355e-06, - "loss": 0.0006069076247513294, - "step": 18165 - }, - { - "epoch": 3.0980392156862746, - "grad_norm": 0.029129406437277794, - "learning_rate": 9.382020854098356e-06, - "loss": 0.0007575173862278461, - "step": 18170 - }, - { - "epoch": 3.0988917306052857, - "grad_norm": 0.0051441071555018425, - "learning_rate": 9.365074654149368e-06, - "loss": 0.00029567121528089045, - "step": 18175 - }, - { - "epoch": 3.0997442455242967, - "grad_norm": 0.055952105671167374, - "learning_rate": 9.348141588519435e-06, - "loss": 0.0005467975046485662, - "step": 18180 - }, - { - "epoch": 3.100596760443308, - "grad_norm": 0.009275187738239765, - "learning_rate": 9.331221665113471e-06, - "loss": 0.0003922369331121445, - "step": 18185 - }, - { - "epoch": 3.101449275362319, - "grad_norm": 0.052929461002349854, - "learning_rate": 9.314314891830251e-06, - "loss": 0.0005707596894353629, - "step": 18190 - }, - { - "epoch": 3.10230179028133, - "grad_norm": 0.011049921624362469, - "learning_rate": 9.297421276562426e-06, - "loss": 0.0002663507591933012, - "step": 18195 - }, - { - "epoch": 3.103154305200341, - "grad_norm": 0.04371742531657219, - "learning_rate": 9.280540827196516e-06, - "loss": 0.00039334925822913646, - "step": 18200 - }, - { - "epoch": 3.104006820119352, - "grad_norm": 0.04068119451403618, - "learning_rate": 9.263673551612858e-06, - "loss": 0.00039259335026144984, - "step": 18205 - }, - { - "epoch": 3.104859335038363, - "grad_norm": 0.020368283614516258, - "learning_rate": 9.246819457685662e-06, - "loss": 0.00041896156035363673, - "step": 18210 - }, - { - "epoch": 3.105711849957374, - "grad_norm": 0.03870120272040367, - "learning_rate": 9.229978553282968e-06, - "loss": 0.00047820848412811757, - "step": 18215 - }, - { - "epoch": 3.106564364876385, - "grad_norm": 0.013779827393591404, - "learning_rate": 9.213150846266686e-06, - "loss": 0.0003055138513445854, - "step": 18220 - }, - { - "epoch": 3.1074168797953963, - "grad_norm": 0.013860267587006092, - "learning_rate": 9.19633634449255e-06, - "loss": 0.00031585688702762127, - "step": 18225 - }, - { - "epoch": 3.1082693947144073, - "grad_norm": 0.05094626918435097, - "learning_rate": 9.179535055810118e-06, - "loss": 0.002102493681013584, - "step": 18230 - }, - { - "epoch": 3.1091219096334184, - "grad_norm": 0.007574997376650572, - "learning_rate": 9.162746988062783e-06, - "loss": 0.00019260718254372479, - "step": 18235 - }, - { - "epoch": 3.10997442455243, - "grad_norm": 0.04581240937113762, - "learning_rate": 9.145972149087787e-06, - "loss": 0.0008758898824453354, - "step": 18240 - }, - { - "epoch": 3.110826939471441, - "grad_norm": 0.02130374312400818, - "learning_rate": 9.129210546716194e-06, - "loss": 0.0003915982786566019, - "step": 18245 - }, - { - "epoch": 3.111679454390452, - "grad_norm": 0.022723006084561348, - "learning_rate": 9.112462188772862e-06, - "loss": 0.0007300690747797489, - "step": 18250 - }, - { - "epoch": 3.112531969309463, - "grad_norm": 0.08266850560903549, - "learning_rate": 9.095727083076484e-06, - "loss": 0.0005315537564456463, - "step": 18255 - }, - { - "epoch": 3.113384484228474, - "grad_norm": 0.06052851676940918, - "learning_rate": 9.079005237439557e-06, - "loss": 0.000990215130150318, - "step": 18260 - }, - { - "epoch": 3.114236999147485, - "grad_norm": 0.005194041877985001, - "learning_rate": 9.062296659668411e-06, - "loss": 0.00022260420955717565, - "step": 18265 - }, - { - "epoch": 3.1150895140664963, - "grad_norm": 0.06306616961956024, - "learning_rate": 9.045601357563156e-06, - "loss": 0.00041153267957270143, - "step": 18270 - }, - { - "epoch": 3.1159420289855073, - "grad_norm": 0.010921395383775234, - "learning_rate": 9.028919338917712e-06, - "loss": 0.0006207648664712905, - "step": 18275 - }, - { - "epoch": 3.1167945439045184, - "grad_norm": 0.04868682101368904, - "learning_rate": 9.012250611519802e-06, - "loss": 0.0006932941731065511, - "step": 18280 - }, - { - "epoch": 3.1176470588235294, - "grad_norm": 0.08955781161785126, - "learning_rate": 8.99559518315094e-06, - "loss": 0.0011549662798643113, - "step": 18285 - }, - { - "epoch": 3.1184995737425405, - "grad_norm": 0.013239394873380661, - "learning_rate": 8.978953061586447e-06, - "loss": 0.0007932853884994983, - "step": 18290 - }, - { - "epoch": 3.1193520886615516, - "grad_norm": 0.018361147493124008, - "learning_rate": 8.962324254595406e-06, - "loss": 0.0008410025388002395, - "step": 18295 - }, - { - "epoch": 3.1202046035805626, - "grad_norm": 0.01051001250743866, - "learning_rate": 8.94570876994071e-06, - "loss": 0.000252532446756959, - "step": 18300 - }, - { - "epoch": 3.1210571184995737, - "grad_norm": 0.03304922580718994, - "learning_rate": 8.929106615378996e-06, - "loss": 0.00035131536424160004, - "step": 18305 - }, - { - "epoch": 3.1219096334185847, - "grad_norm": 0.04879309609532356, - "learning_rate": 8.912517798660728e-06, - "loss": 0.000421084463596344, - "step": 18310 - }, - { - "epoch": 3.122762148337596, - "grad_norm": 0.010428894311189651, - "learning_rate": 8.89594232753011e-06, - "loss": 0.0004888340365141631, - "step": 18315 - }, - { - "epoch": 3.123614663256607, - "grad_norm": 0.03332305699586868, - "learning_rate": 8.879380209725114e-06, - "loss": 0.0005710380151867867, - "step": 18320 - }, - { - "epoch": 3.124467178175618, - "grad_norm": 0.11709732562303543, - "learning_rate": 8.862831452977486e-06, - "loss": 0.0007624867372214794, - "step": 18325 - }, - { - "epoch": 3.125319693094629, - "grad_norm": 0.010226447135210037, - "learning_rate": 8.846296065012758e-06, - "loss": 0.0002884409856051207, - "step": 18330 - }, - { - "epoch": 3.12617220801364, - "grad_norm": 0.06316721439361572, - "learning_rate": 8.829774053550167e-06, - "loss": 0.0005763838067650795, - "step": 18335 - }, - { - "epoch": 3.127024722932651, - "grad_norm": 0.009583157487213612, - "learning_rate": 8.813265426302772e-06, - "loss": 0.000740795349702239, - "step": 18340 - }, - { - "epoch": 3.1278772378516626, - "grad_norm": 0.09729946404695511, - "learning_rate": 8.796770190977332e-06, - "loss": 0.0009914403781294823, - "step": 18345 - }, - { - "epoch": 3.1287297527706737, - "grad_norm": 0.06878595054149628, - "learning_rate": 8.78028835527436e-06, - "loss": 0.000869260635226965, - "step": 18350 - }, - { - "epoch": 3.1295822676896847, - "grad_norm": 0.07358408719301224, - "learning_rate": 8.763819926888147e-06, - "loss": 0.0003912035841494799, - "step": 18355 - }, - { - "epoch": 3.130434782608696, - "grad_norm": 0.023181110620498657, - "learning_rate": 8.747364913506694e-06, - "loss": 0.0006454653572291136, - "step": 18360 - }, - { - "epoch": 3.131287297527707, - "grad_norm": 0.014834016561508179, - "learning_rate": 8.730923322811748e-06, - "loss": 0.0004834470339119434, - "step": 18365 - }, - { - "epoch": 3.132139812446718, - "grad_norm": 0.013688327744603157, - "learning_rate": 8.714495162478786e-06, - "loss": 0.00042675542645156386, - "step": 18370 - }, - { - "epoch": 3.132992327365729, - "grad_norm": 0.09365646541118622, - "learning_rate": 8.69808044017703e-06, - "loss": 0.000931151770055294, - "step": 18375 - }, - { - "epoch": 3.13384484228474, - "grad_norm": 0.026040131226181984, - "learning_rate": 8.681679163569399e-06, - "loss": 0.0008362406864762306, - "step": 18380 - }, - { - "epoch": 3.134697357203751, - "grad_norm": 0.026200976222753525, - "learning_rate": 8.665291340312585e-06, - "loss": 0.00024140358436852694, - "step": 18385 - }, - { - "epoch": 3.135549872122762, - "grad_norm": 0.06846249848604202, - "learning_rate": 8.648916978056948e-06, - "loss": 0.00040455334819853304, - "step": 18390 - }, - { - "epoch": 3.136402387041773, - "grad_norm": 0.005816053133457899, - "learning_rate": 8.632556084446594e-06, - "loss": 0.00026596912648528813, - "step": 18395 - }, - { - "epoch": 3.1372549019607843, - "grad_norm": 0.04414185881614685, - "learning_rate": 8.616208667119315e-06, - "loss": 0.0012877457775175571, - "step": 18400 - }, - { - "epoch": 3.1381074168797953, - "grad_norm": 0.03767494857311249, - "learning_rate": 8.59987473370665e-06, - "loss": 0.0010142676532268525, - "step": 18405 - }, - { - "epoch": 3.1389599317988064, - "grad_norm": 0.04332097992300987, - "learning_rate": 8.583554291833817e-06, - "loss": 0.0006124789826571942, - "step": 18410 - }, - { - "epoch": 3.1398124467178175, - "grad_norm": 0.007427348289638758, - "learning_rate": 8.567247349119739e-06, - "loss": 0.0007707455195486545, - "step": 18415 - }, - { - "epoch": 3.1406649616368285, - "grad_norm": 0.04704085737466812, - "learning_rate": 8.550953913177026e-06, - "loss": 0.0003297704039141536, - "step": 18420 - }, - { - "epoch": 3.1415174765558396, - "grad_norm": 0.015286453999578953, - "learning_rate": 8.534673991612011e-06, - "loss": 0.0007448584772646427, - "step": 18425 - }, - { - "epoch": 3.1423699914748506, - "grad_norm": 0.06501411646604538, - "learning_rate": 8.518407592024712e-06, - "loss": 0.0009272911585867405, - "step": 18430 - }, - { - "epoch": 3.1432225063938617, - "grad_norm": 0.07314588874578476, - "learning_rate": 8.50215472200881e-06, - "loss": 0.000575255136936903, - "step": 18435 - }, - { - "epoch": 3.144075021312873, - "grad_norm": 0.01809004321694374, - "learning_rate": 8.485915389151694e-06, - "loss": 0.000459101889282465, - "step": 18440 - }, - { - "epoch": 3.1449275362318843, - "grad_norm": 0.0908508151769638, - "learning_rate": 8.469689601034406e-06, - "loss": 0.00044624172151088717, - "step": 18445 - }, - { - "epoch": 3.1457800511508953, - "grad_norm": 0.029488559812307358, - "learning_rate": 8.45347736523171e-06, - "loss": 0.0003597501665353775, - "step": 18450 - }, - { - "epoch": 3.1466325660699064, - "grad_norm": 0.015282983891665936, - "learning_rate": 8.437278689312007e-06, - "loss": 0.0005331444554030895, - "step": 18455 - }, - { - "epoch": 3.1474850809889174, - "grad_norm": 0.0475476048886776, - "learning_rate": 8.421093580837374e-06, - "loss": 0.0010153815150260926, - "step": 18460 - }, - { - "epoch": 3.1483375959079285, - "grad_norm": 0.09270385652780533, - "learning_rate": 8.404922047363548e-06, - "loss": 0.0007084616459906101, - "step": 18465 - }, - { - "epoch": 3.1491901108269396, - "grad_norm": 0.025847190991044044, - "learning_rate": 8.388764096439953e-06, - "loss": 0.00023725461214780809, - "step": 18470 - }, - { - "epoch": 3.1500426257459506, - "grad_norm": 0.023858604952692986, - "learning_rate": 8.372619735609662e-06, - "loss": 0.0003485321067273617, - "step": 18475 - }, - { - "epoch": 3.1508951406649617, - "grad_norm": 0.005237930454313755, - "learning_rate": 8.356488972409398e-06, - "loss": 0.0005028464831411839, - "step": 18480 - }, - { - "epoch": 3.1517476555839727, - "grad_norm": 0.04220377653837204, - "learning_rate": 8.340371814369532e-06, - "loss": 0.0009449001401662827, - "step": 18485 - }, - { - "epoch": 3.152600170502984, - "grad_norm": 0.0714297816157341, - "learning_rate": 8.324268269014078e-06, - "loss": 0.0004925032146275044, - "step": 18490 - }, - { - "epoch": 3.153452685421995, - "grad_norm": 0.023220403119921684, - "learning_rate": 8.308178343860729e-06, - "loss": 0.0006748316343873739, - "step": 18495 - }, - { - "epoch": 3.154305200341006, - "grad_norm": 0.05557497963309288, - "learning_rate": 8.292102046420787e-06, - "loss": 0.0007373414933681488, - "step": 18500 - }, - { - "epoch": 3.155157715260017, - "grad_norm": 0.017496848478913307, - "learning_rate": 8.276039384199203e-06, - "loss": 0.001099762413650751, - "step": 18505 - }, - { - "epoch": 3.156010230179028, - "grad_norm": 0.006222693715244532, - "learning_rate": 8.259990364694557e-06, - "loss": 0.00030525855254381895, - "step": 18510 - }, - { - "epoch": 3.156862745098039, - "grad_norm": 0.016474226489663124, - "learning_rate": 8.243954995399062e-06, - "loss": 0.0003330275183543563, - "step": 18515 - }, - { - "epoch": 3.15771526001705, - "grad_norm": 0.011799500323832035, - "learning_rate": 8.227933283798587e-06, - "loss": 0.0008484587073326111, - "step": 18520 - }, - { - "epoch": 3.1585677749360612, - "grad_norm": 0.10201061517000198, - "learning_rate": 8.211925237372581e-06, - "loss": 0.0006466713268309832, - "step": 18525 - }, - { - "epoch": 3.1594202898550723, - "grad_norm": 0.04381557181477547, - "learning_rate": 8.195930863594131e-06, - "loss": 0.0004293074831366539, - "step": 18530 - }, - { - "epoch": 3.1602728047740833, - "grad_norm": 0.019579825922846794, - "learning_rate": 8.17995016992994e-06, - "loss": 0.000977578666061163, - "step": 18535 - }, - { - "epoch": 3.1611253196930944, - "grad_norm": 0.043777912855148315, - "learning_rate": 8.163983163840338e-06, - "loss": 0.0004046197980642319, - "step": 18540 - }, - { - "epoch": 3.161977834612106, - "grad_norm": 0.006554318591952324, - "learning_rate": 8.148029852779258e-06, - "loss": 0.0013218319974839688, - "step": 18545 - }, - { - "epoch": 3.162830349531117, - "grad_norm": 0.005029724910855293, - "learning_rate": 8.13209024419422e-06, - "loss": 0.0006320577114820481, - "step": 18550 - }, - { - "epoch": 3.163682864450128, - "grad_norm": 0.04088412597775459, - "learning_rate": 8.11616434552637e-06, - "loss": 0.001373323891311884, - "step": 18555 - }, - { - "epoch": 3.164535379369139, - "grad_norm": 0.04089086875319481, - "learning_rate": 8.100252164210444e-06, - "loss": 0.0009590038098394871, - "step": 18560 - }, - { - "epoch": 3.16538789428815, - "grad_norm": 0.059163108468055725, - "learning_rate": 8.084353707674792e-06, - "loss": 0.0007160831708461046, - "step": 18565 - }, - { - "epoch": 3.166240409207161, - "grad_norm": 0.040127795189619064, - "learning_rate": 8.068468983341338e-06, - "loss": 0.000580929359421134, - "step": 18570 - }, - { - "epoch": 3.1670929241261723, - "grad_norm": 0.11662314087152481, - "learning_rate": 8.052597998625588e-06, - "loss": 0.0005010033026337623, - "step": 18575 - }, - { - "epoch": 3.1679454390451833, - "grad_norm": 0.0162198469042778, - "learning_rate": 8.036740760936647e-06, - "loss": 0.0003753812052309513, - "step": 18580 - }, - { - "epoch": 3.1687979539641944, - "grad_norm": 0.12012816220521927, - "learning_rate": 8.020897277677215e-06, - "loss": 0.0009693917818367481, - "step": 18585 - }, - { - "epoch": 3.1696504688832055, - "grad_norm": 0.03412945196032524, - "learning_rate": 8.00506755624355e-06, - "loss": 0.0001745267305523157, - "step": 18590 - }, - { - "epoch": 3.1705029838022165, - "grad_norm": 0.005339973606169224, - "learning_rate": 7.989251604025489e-06, - "loss": 0.0009339713491499424, - "step": 18595 - }, - { - "epoch": 3.1713554987212276, - "grad_norm": 0.021463308483362198, - "learning_rate": 7.973449428406439e-06, - "loss": 0.0006150984205305577, - "step": 18600 - }, - { - "epoch": 3.1722080136402386, - "grad_norm": 0.011554487980902195, - "learning_rate": 7.957661036763397e-06, - "loss": 0.00038701703306287527, - "step": 18605 - }, - { - "epoch": 3.1730605285592497, - "grad_norm": 0.021070247516036034, - "learning_rate": 7.941886436466888e-06, - "loss": 0.0007086104713380336, - "step": 18610 - }, - { - "epoch": 3.1739130434782608, - "grad_norm": 0.07222088426351547, - "learning_rate": 7.926125634881047e-06, - "loss": 0.0005243740510195493, - "step": 18615 - }, - { - "epoch": 3.174765558397272, - "grad_norm": 0.09254760295152664, - "learning_rate": 7.910378639363528e-06, - "loss": 0.0007765952497720719, - "step": 18620 - }, - { - "epoch": 3.175618073316283, - "grad_norm": 0.037701316177845, - "learning_rate": 7.89464545726555e-06, - "loss": 0.00039138970896601677, - "step": 18625 - }, - { - "epoch": 3.176470588235294, - "grad_norm": 0.015249347314238548, - "learning_rate": 7.878926095931876e-06, - "loss": 0.0003943302668631077, - "step": 18630 - }, - { - "epoch": 3.177323103154305, - "grad_norm": 0.016091618686914444, - "learning_rate": 7.863220562700847e-06, - "loss": 0.000575948553159833, - "step": 18635 - }, - { - "epoch": 3.1781756180733165, - "grad_norm": 0.05504714697599411, - "learning_rate": 7.847528864904322e-06, - "loss": 0.0012753555551171304, - "step": 18640 - }, - { - "epoch": 3.1790281329923276, - "grad_norm": 0.01844659261405468, - "learning_rate": 7.831851009867693e-06, - "loss": 0.00032608325127512214, - "step": 18645 - }, - { - "epoch": 3.1798806479113386, - "grad_norm": 0.02932833693921566, - "learning_rate": 7.816187004909927e-06, - "loss": 0.0002993215108290315, - "step": 18650 - }, - { - "epoch": 3.1807331628303497, - "grad_norm": 0.03746391460299492, - "learning_rate": 7.800536857343479e-06, - "loss": 0.000614574272185564, - "step": 18655 - }, - { - "epoch": 3.1815856777493607, - "grad_norm": 0.04635264351963997, - "learning_rate": 7.784900574474383e-06, - "loss": 0.0005155592691153288, - "step": 18660 - }, - { - "epoch": 3.182438192668372, - "grad_norm": 0.024929136037826538, - "learning_rate": 7.769278163602164e-06, - "loss": 0.0003661647439002991, - "step": 18665 - }, - { - "epoch": 3.183290707587383, - "grad_norm": 0.005773736163973808, - "learning_rate": 7.753669632019881e-06, - "loss": 0.0003662605304270983, - "step": 18670 - }, - { - "epoch": 3.184143222506394, - "grad_norm": 0.14380963146686554, - "learning_rate": 7.738074987014107e-06, - "loss": 0.000703729223459959, - "step": 18675 - }, - { - "epoch": 3.184995737425405, - "grad_norm": 0.03039398603141308, - "learning_rate": 7.722494235864967e-06, - "loss": 0.00028703445568680765, - "step": 18680 - }, - { - "epoch": 3.185848252344416, - "grad_norm": 0.022264502942562103, - "learning_rate": 7.706927385846053e-06, - "loss": 0.0004953373223543167, - "step": 18685 - }, - { - "epoch": 3.186700767263427, - "grad_norm": 0.022176261991262436, - "learning_rate": 7.691374444224497e-06, - "loss": 0.0006184632889926434, - "step": 18690 - }, - { - "epoch": 3.187553282182438, - "grad_norm": 0.030033515766263008, - "learning_rate": 7.675835418260915e-06, - "loss": 0.0006910198833793401, - "step": 18695 - }, - { - "epoch": 3.1884057971014492, - "grad_norm": 0.12117313593626022, - "learning_rate": 7.660310315209455e-06, - "loss": 0.0012623773887753486, - "step": 18700 - }, - { - "epoch": 3.1892583120204603, - "grad_norm": 0.013550493866205215, - "learning_rate": 7.644799142317753e-06, - "loss": 0.0007082201074808836, - "step": 18705 - }, - { - "epoch": 3.1901108269394713, - "grad_norm": 0.0489371083676815, - "learning_rate": 7.629301906826945e-06, - "loss": 0.0007669483777135611, - "step": 18710 - }, - { - "epoch": 3.1909633418584824, - "grad_norm": 0.028650205582380295, - "learning_rate": 7.6138186159716435e-06, - "loss": 0.0009685775265097618, - "step": 18715 - }, - { - "epoch": 3.1918158567774935, - "grad_norm": 0.06777958571910858, - "learning_rate": 7.598349276979958e-06, - "loss": 0.0003870198968797922, - "step": 18720 - }, - { - "epoch": 3.1926683716965045, - "grad_norm": 0.027635935693979263, - "learning_rate": 7.582893897073514e-06, - "loss": 0.00038398322649300096, - "step": 18725 - }, - { - "epoch": 3.1935208866155156, - "grad_norm": 0.0027559841983020306, - "learning_rate": 7.567452483467381e-06, - "loss": 0.0003620174713432789, - "step": 18730 - }, - { - "epoch": 3.1943734015345266, - "grad_norm": 0.010767337866127491, - "learning_rate": 7.552025043370125e-06, - "loss": 0.00037704890128225087, - "step": 18735 - }, - { - "epoch": 3.1952259164535377, - "grad_norm": 0.0657992735505104, - "learning_rate": 7.5366115839837815e-06, - "loss": 0.0004264485090970993, - "step": 18740 - }, - { - "epoch": 3.196078431372549, - "grad_norm": 0.029620325192809105, - "learning_rate": 7.5212121125038796e-06, - "loss": 0.0002805137075483799, - "step": 18745 - }, - { - "epoch": 3.1969309462915603, - "grad_norm": 0.04062730073928833, - "learning_rate": 7.505826636119407e-06, - "loss": 0.00017744075739756227, - "step": 18750 - }, - { - "epoch": 3.1977834612105713, - "grad_norm": 0.0391278937458992, - "learning_rate": 7.490455162012808e-06, - "loss": 0.001126928348094225, - "step": 18755 - }, - { - "epoch": 3.1986359761295824, - "grad_norm": 0.0766754299402237, - "learning_rate": 7.4750976973599986e-06, - "loss": 0.0006041087210178376, - "step": 18760 - }, - { - "epoch": 3.1994884910485935, - "grad_norm": 0.01741630584001541, - "learning_rate": 7.459754249330347e-06, - "loss": 0.0007178018335253, - "step": 18765 - }, - { - "epoch": 3.2003410059676045, - "grad_norm": 0.052834443747997284, - "learning_rate": 7.444424825086698e-06, - "loss": 0.0006523634772747755, - "step": 18770 - }, - { - "epoch": 3.2011935208866156, - "grad_norm": 0.051064226776361465, - "learning_rate": 7.4291094317853365e-06, - "loss": 0.0004721490200608969, - "step": 18775 - }, - { - "epoch": 3.2020460358056266, - "grad_norm": 0.01717698574066162, - "learning_rate": 7.4138080765759885e-06, - "loss": 0.0010264517739415168, - "step": 18780 - }, - { - "epoch": 3.2028985507246377, - "grad_norm": 0.0630933865904808, - "learning_rate": 7.398520766601833e-06, - "loss": 0.000731096789240837, - "step": 18785 - }, - { - "epoch": 3.2037510656436488, - "grad_norm": 0.021110277622938156, - "learning_rate": 7.383247508999501e-06, - "loss": 0.00034617548808455465, - "step": 18790 - }, - { - "epoch": 3.20460358056266, - "grad_norm": 0.03756425157189369, - "learning_rate": 7.367988310899066e-06, - "loss": 0.0005103135481476784, - "step": 18795 - }, - { - "epoch": 3.205456095481671, - "grad_norm": 0.009691229090094566, - "learning_rate": 7.352743179424024e-06, - "loss": 0.0007526874542236328, - "step": 18800 - }, - { - "epoch": 3.206308610400682, - "grad_norm": 0.007688464596867561, - "learning_rate": 7.337512121691304e-06, - "loss": 0.0008044790476560593, - "step": 18805 - }, - { - "epoch": 3.207161125319693, - "grad_norm": 0.010440339334309101, - "learning_rate": 7.322295144811276e-06, - "loss": 0.00020275618880987166, - "step": 18810 - }, - { - "epoch": 3.208013640238704, - "grad_norm": 0.01417286042124033, - "learning_rate": 7.307092255887711e-06, - "loss": 0.0007631714455783367, - "step": 18815 - }, - { - "epoch": 3.208866155157715, - "grad_norm": 0.0317782461643219, - "learning_rate": 7.291903462017859e-06, - "loss": 0.0003871546592563391, - "step": 18820 - }, - { - "epoch": 3.209718670076726, - "grad_norm": 0.017280934378504753, - "learning_rate": 7.27672877029233e-06, - "loss": 0.002473811246454716, - "step": 18825 - }, - { - "epoch": 3.2105711849957372, - "grad_norm": 0.015073291026055813, - "learning_rate": 7.261568187795169e-06, - "loss": 0.00022768331691622735, - "step": 18830 - }, - { - "epoch": 3.2114236999147483, - "grad_norm": 0.10031914710998535, - "learning_rate": 7.246421721603867e-06, - "loss": 0.0005759174935519695, - "step": 18835 - }, - { - "epoch": 3.21227621483376, - "grad_norm": 0.012291891500353813, - "learning_rate": 7.2312893787892695e-06, - "loss": 0.000749863451346755, - "step": 18840 - }, - { - "epoch": 3.213128729752771, - "grad_norm": 0.048827286809682846, - "learning_rate": 7.216171166415677e-06, - "loss": 0.00025824215263128283, - "step": 18845 - }, - { - "epoch": 3.213981244671782, - "grad_norm": 0.02683771587908268, - "learning_rate": 7.201067091540773e-06, - "loss": 0.0006501530762761832, - "step": 18850 - }, - { - "epoch": 3.214833759590793, - "grad_norm": 0.019678298383951187, - "learning_rate": 7.185977161215633e-06, - "loss": 0.0006075311917811632, - "step": 18855 - }, - { - "epoch": 3.215686274509804, - "grad_norm": 0.03252946212887764, - "learning_rate": 7.170901382484737e-06, - "loss": 0.0005334909074008465, - "step": 18860 - }, - { - "epoch": 3.216538789428815, - "grad_norm": 0.02117627114057541, - "learning_rate": 7.155839762385973e-06, - "loss": 0.0012689195573329926, - "step": 18865 - }, - { - "epoch": 3.217391304347826, - "grad_norm": 0.057649675756692886, - "learning_rate": 7.140792307950598e-06, - "loss": 0.0006012415513396263, - "step": 18870 - }, - { - "epoch": 3.2182438192668372, - "grad_norm": 0.01991843432188034, - "learning_rate": 7.125759026203254e-06, - "loss": 0.0006623437628149986, - "step": 18875 - }, - { - "epoch": 3.2190963341858483, - "grad_norm": 0.020812440663576126, - "learning_rate": 7.110739924161998e-06, - "loss": 0.00042641563341021537, - "step": 18880 - }, - { - "epoch": 3.2199488491048593, - "grad_norm": 0.001917969319038093, - "learning_rate": 7.095735008838227e-06, - "loss": 0.00040855356492102145, - "step": 18885 - }, - { - "epoch": 3.2208013640238704, - "grad_norm": 0.02301851660013199, - "learning_rate": 7.080744287236753e-06, - "loss": 0.000662582740187645, - "step": 18890 - }, - { - "epoch": 3.2216538789428815, - "grad_norm": 0.045610178261995316, - "learning_rate": 7.065767766355733e-06, - "loss": 0.0003238538280129433, - "step": 18895 - }, - { - "epoch": 3.2225063938618925, - "grad_norm": 0.02546820044517517, - "learning_rate": 7.050805453186707e-06, - "loss": 0.0005394276697188616, - "step": 18900 - }, - { - "epoch": 3.2233589087809036, - "grad_norm": 0.01680169068276882, - "learning_rate": 7.035857354714572e-06, - "loss": 0.0001849454827606678, - "step": 18905 - }, - { - "epoch": 3.2242114236999146, - "grad_norm": 0.07080511003732681, - "learning_rate": 7.020923477917616e-06, - "loss": 0.0009049614891409874, - "step": 18910 - }, - { - "epoch": 3.2250639386189257, - "grad_norm": 0.015143739990890026, - "learning_rate": 7.006003829767458e-06, - "loss": 0.0003394487779587507, - "step": 18915 - }, - { - "epoch": 3.2259164535379368, - "grad_norm": 0.02981925569474697, - "learning_rate": 6.991098417229077e-06, - "loss": 0.0008880021050572395, - "step": 18920 - }, - { - "epoch": 3.226768968456948, - "grad_norm": 0.011556530371308327, - "learning_rate": 6.976207247260836e-06, - "loss": 0.0005170104093849659, - "step": 18925 - }, - { - "epoch": 3.227621483375959, - "grad_norm": 0.06276580691337585, - "learning_rate": 6.961330326814407e-06, - "loss": 0.0013013094663619994, - "step": 18930 - }, - { - "epoch": 3.2284739982949704, - "grad_norm": 0.015998052433133125, - "learning_rate": 6.946467662834852e-06, - "loss": 0.0005359035450965167, - "step": 18935 - }, - { - "epoch": 3.229326513213981, - "grad_norm": 0.04979191720485687, - "learning_rate": 6.931619262260546e-06, - "loss": 0.0007673433981835842, - "step": 18940 - }, - { - "epoch": 3.2301790281329925, - "grad_norm": 0.12047834694385529, - "learning_rate": 6.9167851320232225e-06, - "loss": 0.0017763294279575347, - "step": 18945 - }, - { - "epoch": 3.2310315430520036, - "grad_norm": 0.010395308956503868, - "learning_rate": 6.901965279047926e-06, - "loss": 0.000369875249452889, - "step": 18950 - }, - { - "epoch": 3.2318840579710146, - "grad_norm": 0.07600873708724976, - "learning_rate": 6.887159710253089e-06, - "loss": 0.0005032925866544247, - "step": 18955 - }, - { - "epoch": 3.2327365728900257, - "grad_norm": 0.011709989979863167, - "learning_rate": 6.8723684325504235e-06, - "loss": 0.0009861321188509465, - "step": 18960 - }, - { - "epoch": 3.2335890878090368, - "grad_norm": 0.024761514738202095, - "learning_rate": 6.857591452844994e-06, - "loss": 0.00034510630648583175, - "step": 18965 - }, - { - "epoch": 3.234441602728048, - "grad_norm": 0.04486176744103432, - "learning_rate": 6.8428287780351755e-06, - "loss": 0.00017987118335440754, - "step": 18970 - }, - { - "epoch": 3.235294117647059, - "grad_norm": 0.020876318216323853, - "learning_rate": 6.828080415012691e-06, - "loss": 0.00046160193160176276, - "step": 18975 - }, - { - "epoch": 3.23614663256607, - "grad_norm": 0.005108790006488562, - "learning_rate": 6.813346370662566e-06, - "loss": 0.0002624133136123419, - "step": 18980 - }, - { - "epoch": 3.236999147485081, - "grad_norm": 0.023575518280267715, - "learning_rate": 6.798626651863142e-06, - "loss": 0.0004043182358145714, - "step": 18985 - }, - { - "epoch": 3.237851662404092, - "grad_norm": 0.03796171769499779, - "learning_rate": 6.78392126548607e-06, - "loss": 0.000300011713989079, - "step": 18990 - }, - { - "epoch": 3.238704177323103, - "grad_norm": 0.010747452266514301, - "learning_rate": 6.769230218396302e-06, - "loss": 0.0008313095197081566, - "step": 18995 - }, - { - "epoch": 3.239556692242114, - "grad_norm": 0.037278912961483, - "learning_rate": 6.75455351745213e-06, - "loss": 0.0004106287844479084, - "step": 19000 - }, - { - "epoch": 3.2404092071611252, - "grad_norm": 0.06086887791752815, - "learning_rate": 6.7398911695051155e-06, - "loss": 0.0005375253967940807, - "step": 19005 - }, - { - "epoch": 3.2412617220801363, - "grad_norm": 0.008513805456459522, - "learning_rate": 6.725243181400129e-06, - "loss": 0.0002401076490059495, - "step": 19010 - }, - { - "epoch": 3.2421142369991474, - "grad_norm": 0.11595302820205688, - "learning_rate": 6.71060955997533e-06, - "loss": 0.000469267088919878, - "step": 19015 - }, - { - "epoch": 3.2429667519181584, - "grad_norm": 0.032653845846652985, - "learning_rate": 6.695990312062191e-06, - "loss": 0.0005158457439392805, - "step": 19020 - }, - { - "epoch": 3.2438192668371695, - "grad_norm": 0.0623047836124897, - "learning_rate": 6.6813854444854695e-06, - "loss": 0.0005772956646978855, - "step": 19025 - }, - { - "epoch": 3.2446717817561805, - "grad_norm": 0.003954550251364708, - "learning_rate": 6.666794964063195e-06, - "loss": 0.0011268424801528453, - "step": 19030 - }, - { - "epoch": 3.2455242966751916, - "grad_norm": 0.02917463518679142, - "learning_rate": 6.6522188776066935e-06, - "loss": 0.0007552789058536292, - "step": 19035 - }, - { - "epoch": 3.246376811594203, - "grad_norm": 0.0267369132488966, - "learning_rate": 6.637657191920561e-06, - "loss": 0.00021620083134621382, - "step": 19040 - }, - { - "epoch": 3.247229326513214, - "grad_norm": 0.05929157882928848, - "learning_rate": 6.62310991380267e-06, - "loss": 0.0005157306790351867, - "step": 19045 - }, - { - "epoch": 3.2480818414322252, - "grad_norm": 0.01827944628894329, - "learning_rate": 6.608577050044193e-06, - "loss": 0.0003319120965898037, - "step": 19050 - }, - { - "epoch": 3.2489343563512363, - "grad_norm": 0.013090296648442745, - "learning_rate": 6.594058607429542e-06, - "loss": 0.0005971027072519064, - "step": 19055 - }, - { - "epoch": 3.2494458653026426, - "eval_loss": 0.06181741878390312, - "eval_runtime": 3.7049, - "eval_samples_per_second": 68.017, - "eval_steps_per_second": 1.08, - "step": 19058 - }, - { - "eval_cer_subset": 0.013418665624440211, - "eval_cer_subset_edit_distance": 824, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 19058 - }, - { - "epoch": 3.2497868712702473, - "grad_norm": 0.11219825595617294, - "learning_rate": 6.579554592736402e-06, - "loss": 0.0005919500254094601, - "step": 19060 - }, - { - "epoch": 3.2506393861892584, - "grad_norm": 0.010144476778805256, - "learning_rate": 6.565065012735742e-06, - "loss": 0.00045172283425927163, - "step": 19065 - }, - { - "epoch": 3.2514919011082695, - "grad_norm": 0.013089130632579327, - "learning_rate": 6.550589874191782e-06, - "loss": 0.00025117534678429363, - "step": 19070 - }, - { - "epoch": 3.2523444160272805, - "grad_norm": 0.047521159052848816, - "learning_rate": 6.536129183861994e-06, - "loss": 0.0008897949941456318, - "step": 19075 - }, - { - "epoch": 3.2531969309462916, - "grad_norm": 0.04106447473168373, - "learning_rate": 6.5216829484971085e-06, - "loss": 0.000283010583370924, - "step": 19080 - }, - { - "epoch": 3.2540494458653026, - "grad_norm": 0.04407098516821861, - "learning_rate": 6.507251174841109e-06, - "loss": 0.0006978865712881088, - "step": 19085 - }, - { - "epoch": 3.2549019607843137, - "grad_norm": 0.01719486154615879, - "learning_rate": 6.492833869631217e-06, - "loss": 0.00031038771849125626, - "step": 19090 - }, - { - "epoch": 3.2557544757033248, - "grad_norm": 0.007759191561490297, - "learning_rate": 6.478431039597928e-06, - "loss": 0.0005179021041840315, - "step": 19095 - }, - { - "epoch": 3.256606990622336, - "grad_norm": 0.13484114408493042, - "learning_rate": 6.464042691464956e-06, - "loss": 0.0006286890245974063, - "step": 19100 - }, - { - "epoch": 3.257459505541347, - "grad_norm": 0.015848470851778984, - "learning_rate": 6.449668831949248e-06, - "loss": 0.0005874604452401399, - "step": 19105 - }, - { - "epoch": 3.258312020460358, - "grad_norm": 0.010055625811219215, - "learning_rate": 6.43530946776102e-06, - "loss": 0.00036783108953386545, - "step": 19110 - }, - { - "epoch": 3.259164535379369, - "grad_norm": 0.010497787036001682, - "learning_rate": 6.420964605603681e-06, - "loss": 0.00043828897178173066, - "step": 19115 - }, - { - "epoch": 3.26001705029838, - "grad_norm": 0.024217624217271805, - "learning_rate": 6.40663425217391e-06, - "loss": 0.00042369402945041656, - "step": 19120 - }, - { - "epoch": 3.260869565217391, - "grad_norm": 0.022596243768930435, - "learning_rate": 6.392318414161583e-06, - "loss": 0.00037041325122118, - "step": 19125 - }, - { - "epoch": 3.261722080136402, - "grad_norm": 0.02588561922311783, - "learning_rate": 6.378017098249812e-06, - "loss": 0.00029896264895796777, - "step": 19130 - }, - { - "epoch": 3.2625745950554137, - "grad_norm": 0.01108810119330883, - "learning_rate": 6.363730311114913e-06, - "loss": 0.0006667471025139093, - "step": 19135 - }, - { - "epoch": 3.2634271099744243, - "grad_norm": 0.009552333503961563, - "learning_rate": 6.349458059426453e-06, - "loss": 0.0004497227258980274, - "step": 19140 - }, - { - "epoch": 3.264279624893436, - "grad_norm": 0.01460598036646843, - "learning_rate": 6.335200349847185e-06, - "loss": 0.0002194883767515421, - "step": 19145 - }, - { - "epoch": 3.265132139812447, - "grad_norm": 0.03732374310493469, - "learning_rate": 6.320957189033071e-06, - "loss": 0.0002586513292044401, - "step": 19150 - }, - { - "epoch": 3.265984654731458, - "grad_norm": 0.016899019479751587, - "learning_rate": 6.306728583633319e-06, - "loss": 0.0009543164633214474, - "step": 19155 - }, - { - "epoch": 3.266837169650469, - "grad_norm": 0.01586720161139965, - "learning_rate": 6.292514540290286e-06, - "loss": 0.0008085070177912713, - "step": 19160 - }, - { - "epoch": 3.26768968456948, - "grad_norm": 0.009034652262926102, - "learning_rate": 6.278315065639588e-06, - "loss": 0.000653286511078477, - "step": 19165 - }, - { - "epoch": 3.268542199488491, - "grad_norm": 0.003070715581998229, - "learning_rate": 6.264130166309996e-06, - "loss": 0.00026131083723157644, - "step": 19170 - }, - { - "epoch": 3.269394714407502, - "grad_norm": 0.032700877636671066, - "learning_rate": 6.249959848923497e-06, - "loss": 0.0004788469523191452, - "step": 19175 - }, - { - "epoch": 3.2702472293265132, - "grad_norm": 0.03986335173249245, - "learning_rate": 6.235804120095252e-06, - "loss": 0.0005488947499543428, - "step": 19180 - }, - { - "epoch": 3.2710997442455243, - "grad_norm": 0.02649238146841526, - "learning_rate": 6.221662986433652e-06, - "loss": 0.0008479308336973191, - "step": 19185 - }, - { - "epoch": 3.2719522591645354, - "grad_norm": 0.04019145667552948, - "learning_rate": 6.207536454540235e-06, - "loss": 0.0008174203336238861, - "step": 19190 - }, - { - "epoch": 3.2728047740835464, - "grad_norm": 0.013805892318487167, - "learning_rate": 6.193424531009733e-06, - "loss": 0.0010017482563853264, - "step": 19195 - }, - { - "epoch": 3.2736572890025575, - "grad_norm": 0.022588396444916725, - "learning_rate": 6.17932722243006e-06, - "loss": 0.0004558952059596777, - "step": 19200 - }, - { - "epoch": 3.2745098039215685, - "grad_norm": 0.009211315773427486, - "learning_rate": 6.1652445353823136e-06, - "loss": 0.0002530797151848674, - "step": 19205 - }, - { - "epoch": 3.2753623188405796, - "grad_norm": 0.03019166924059391, - "learning_rate": 6.151176476440768e-06, - "loss": 0.0010970567353069782, - "step": 19210 - }, - { - "epoch": 3.2762148337595907, - "grad_norm": 0.10982025414705276, - "learning_rate": 6.137123052172854e-06, - "loss": 0.00046633705496788023, - "step": 19215 - }, - { - "epoch": 3.2770673486786017, - "grad_norm": 0.09192784875631332, - "learning_rate": 6.123084269139178e-06, - "loss": 0.0013196432963013649, - "step": 19220 - }, - { - "epoch": 3.277919863597613, - "grad_norm": 0.02934069000184536, - "learning_rate": 6.109060133893501e-06, - "loss": 0.0005467353854328394, - "step": 19225 - }, - { - "epoch": 3.2787723785166243, - "grad_norm": 0.03443235158920288, - "learning_rate": 6.095050652982773e-06, - "loss": 0.0004425105173140764, - "step": 19230 - }, - { - "epoch": 3.279624893435635, - "grad_norm": 0.04472684487700462, - "learning_rate": 6.081055832947077e-06, - "loss": 0.0006654649972915649, - "step": 19235 - }, - { - "epoch": 3.2804774083546464, - "grad_norm": 0.04871739074587822, - "learning_rate": 6.067075680319663e-06, - "loss": 0.0011335751041769981, - "step": 19240 - }, - { - "epoch": 3.2813299232736575, - "grad_norm": 0.17364081740379333, - "learning_rate": 6.053110201626918e-06, - "loss": 0.0007838122546672821, - "step": 19245 - }, - { - "epoch": 3.2821824381926685, - "grad_norm": 0.10362228006124496, - "learning_rate": 6.0391594033884035e-06, - "loss": 0.0004850291647017002, - "step": 19250 - }, - { - "epoch": 3.2830349531116796, - "grad_norm": 0.033360131084918976, - "learning_rate": 6.025223292116828e-06, - "loss": 0.0003192754928022623, - "step": 19255 - }, - { - "epoch": 3.2838874680306906, - "grad_norm": 0.019219927489757538, - "learning_rate": 6.0113018743180195e-06, - "loss": 0.0004580964334309101, - "step": 19260 - }, - { - "epoch": 3.2847399829497017, - "grad_norm": 0.019038213416934013, - "learning_rate": 5.997395156490956e-06, - "loss": 0.000247283186763525, - "step": 19265 - }, - { - "epoch": 3.2855924978687128, - "grad_norm": 0.07672185450792313, - "learning_rate": 5.983503145127763e-06, - "loss": 0.00036474117077887056, - "step": 19270 - }, - { - "epoch": 3.286445012787724, - "grad_norm": 0.006674405187368393, - "learning_rate": 5.96962584671368e-06, - "loss": 0.000742178363725543, - "step": 19275 - }, - { - "epoch": 3.287297527706735, - "grad_norm": 0.0242395531386137, - "learning_rate": 5.9557632677271105e-06, - "loss": 0.0003610172076150775, - "step": 19280 - }, - { - "epoch": 3.288150042625746, - "grad_norm": 0.006741875316947699, - "learning_rate": 5.941915414639559e-06, - "loss": 0.0006255440413951874, - "step": 19285 - }, - { - "epoch": 3.289002557544757, - "grad_norm": 0.015137423761188984, - "learning_rate": 5.928082293915652e-06, - "loss": 0.0003517616540193558, - "step": 19290 - }, - { - "epoch": 3.289855072463768, - "grad_norm": 0.006232273764908314, - "learning_rate": 5.9142639120131636e-06, - "loss": 0.0002735992660745978, - "step": 19295 - }, - { - "epoch": 3.290707587382779, - "grad_norm": 0.025010747835040092, - "learning_rate": 5.900460275382981e-06, - "loss": 0.0004658872727304697, - "step": 19300 - }, - { - "epoch": 3.29156010230179, - "grad_norm": 0.00881986878812313, - "learning_rate": 5.88667139046909e-06, - "loss": 0.0007451147306710481, - "step": 19305 - }, - { - "epoch": 3.2924126172208013, - "grad_norm": 0.02796418033540249, - "learning_rate": 5.872897263708607e-06, - "loss": 0.0008796761743724346, - "step": 19310 - }, - { - "epoch": 3.2932651321398123, - "grad_norm": 0.027454577386379242, - "learning_rate": 5.859137901531745e-06, - "loss": 0.00026941425167024133, - "step": 19315 - }, - { - "epoch": 3.2941176470588234, - "grad_norm": 0.0395982526242733, - "learning_rate": 5.84539331036183e-06, - "loss": 0.00041040563955903054, - "step": 19320 - }, - { - "epoch": 3.2949701619778344, - "grad_norm": 0.042481981217861176, - "learning_rate": 5.831663496615304e-06, - "loss": 0.0003256106050685048, - "step": 19325 - }, - { - "epoch": 3.2958226768968455, - "grad_norm": 0.0194789320230484, - "learning_rate": 5.817948466701703e-06, - "loss": 0.00048703285865485667, - "step": 19330 - }, - { - "epoch": 3.296675191815857, - "grad_norm": 0.03693777322769165, - "learning_rate": 5.804248227023639e-06, - "loss": 0.0004573033656924963, - "step": 19335 - }, - { - "epoch": 3.2975277067348676, - "grad_norm": 0.010155921801924706, - "learning_rate": 5.790562783976857e-06, - "loss": 0.00022799526341259478, - "step": 19340 - }, - { - "epoch": 3.298380221653879, - "grad_norm": 0.014926153235137463, - "learning_rate": 5.776892143950181e-06, - "loss": 0.0002296717371791601, - "step": 19345 - }, - { - "epoch": 3.29923273657289, - "grad_norm": 0.025415342301130295, - "learning_rate": 5.763236313325513e-06, - "loss": 0.0001236582640558481, - "step": 19350 - }, - { - "epoch": 3.3000852514919012, - "grad_norm": 0.0994359701871872, - "learning_rate": 5.749595298477851e-06, - "loss": 0.0009945498779416085, - "step": 19355 - }, - { - "epoch": 3.3009377664109123, - "grad_norm": 0.015362569130957127, - "learning_rate": 5.7359691057752705e-06, - "loss": 0.0005355034954845905, - "step": 19360 - }, - { - "epoch": 3.3017902813299234, - "grad_norm": 0.07377626746892929, - "learning_rate": 5.722357741578925e-06, - "loss": 0.0009824702516198157, - "step": 19365 - }, - { - "epoch": 3.3026427962489344, - "grad_norm": 0.01109279878437519, - "learning_rate": 5.708761212243067e-06, - "loss": 0.00042829746380448344, - "step": 19370 - }, - { - "epoch": 3.3034953111679455, - "grad_norm": 0.012984010390937328, - "learning_rate": 5.695179524115008e-06, - "loss": 0.0005177812185138464, - "step": 19375 - }, - { - "epoch": 3.3043478260869565, - "grad_norm": 0.01330599281936884, - "learning_rate": 5.681612683535111e-06, - "loss": 0.00047001498751342297, - "step": 19380 - }, - { - "epoch": 3.3052003410059676, - "grad_norm": 0.027421219274401665, - "learning_rate": 5.66806069683686e-06, - "loss": 0.0005254631396383047, - "step": 19385 - }, - { - "epoch": 3.3060528559249787, - "grad_norm": 0.014183313585817814, - "learning_rate": 5.6545235703467435e-06, - "loss": 0.000249856011942029, - "step": 19390 - }, - { - "epoch": 3.3069053708439897, - "grad_norm": 0.09523740410804749, - "learning_rate": 5.641001310384365e-06, - "loss": 0.000620997790247202, - "step": 19395 - }, - { - "epoch": 3.307757885763001, - "grad_norm": 0.08386892080307007, - "learning_rate": 5.627493923262354e-06, - "loss": 0.0012673554010689259, - "step": 19400 - }, - { - "epoch": 3.308610400682012, - "grad_norm": 0.03303903713822365, - "learning_rate": 5.614001415286412e-06, - "loss": 0.0011139905080199241, - "step": 19405 - }, - { - "epoch": 3.309462915601023, - "grad_norm": 0.03811914473772049, - "learning_rate": 5.6005237927552805e-06, - "loss": 0.0006227992475032806, - "step": 19410 - }, - { - "epoch": 3.310315430520034, - "grad_norm": 0.016570856794714928, - "learning_rate": 5.5870610619607805e-06, - "loss": 0.0005445381160825491, - "step": 19415 - }, - { - "epoch": 3.311167945439045, - "grad_norm": 0.013608088716864586, - "learning_rate": 5.573613229187751e-06, - "loss": 0.0004142835270613432, - "step": 19420 - }, - { - "epoch": 3.312020460358056, - "grad_norm": 0.053280171006917953, - "learning_rate": 5.560180300714079e-06, - "loss": 0.0003944558557122946, - "step": 19425 - }, - { - "epoch": 3.3128729752770676, - "grad_norm": 0.04067116975784302, - "learning_rate": 5.5467622828107225e-06, - "loss": 0.0008278630673885345, - "step": 19430 - }, - { - "epoch": 3.313725490196078, - "grad_norm": 0.0459442101418972, - "learning_rate": 5.533359181741638e-06, - "loss": 0.00037522357888519764, - "step": 19435 - }, - { - "epoch": 3.3145780051150897, - "grad_norm": 0.07973090559244156, - "learning_rate": 5.519971003763862e-06, - "loss": 0.0006369464099407196, - "step": 19440 - }, - { - "epoch": 3.3154305200341008, - "grad_norm": 0.0318770669400692, - "learning_rate": 5.506597755127425e-06, - "loss": 0.0006823249161243439, - "step": 19445 - }, - { - "epoch": 3.316283034953112, - "grad_norm": 0.04201148822903633, - "learning_rate": 5.49323944207541e-06, - "loss": 0.000304691749624908, - "step": 19450 - }, - { - "epoch": 3.317135549872123, - "grad_norm": 0.011656812392175198, - "learning_rate": 5.479896070843919e-06, - "loss": 0.00038321034517139194, - "step": 19455 - }, - { - "epoch": 3.317988064791134, - "grad_norm": 0.03550105541944504, - "learning_rate": 5.466567647662075e-06, - "loss": 0.0007771219592541456, - "step": 19460 - }, - { - "epoch": 3.318840579710145, - "grad_norm": 0.010552220977842808, - "learning_rate": 5.453254178752044e-06, - "loss": 0.0008830759674310684, - "step": 19465 - }, - { - "epoch": 3.319693094629156, - "grad_norm": 0.0077703725546598434, - "learning_rate": 5.439955670328987e-06, - "loss": 0.0003484194632619619, - "step": 19470 - }, - { - "epoch": 3.320545609548167, - "grad_norm": 0.013720662333071232, - "learning_rate": 5.426672128601088e-06, - "loss": 0.0005347099620848894, - "step": 19475 - }, - { - "epoch": 3.321398124467178, - "grad_norm": 0.005950555205345154, - "learning_rate": 5.413403559769549e-06, - "loss": 0.00040374435484409333, - "step": 19480 - }, - { - "epoch": 3.3222506393861893, - "grad_norm": 0.005855921655893326, - "learning_rate": 5.400149970028587e-06, - "loss": 0.00011817219201475382, - "step": 19485 - }, - { - "epoch": 3.3231031543052003, - "grad_norm": 0.05193415656685829, - "learning_rate": 5.3869113655654145e-06, - "loss": 0.000558258919045329, - "step": 19490 - }, - { - "epoch": 3.3239556692242114, - "grad_norm": 0.002798686036840081, - "learning_rate": 5.37368775256025e-06, - "loss": 0.0002853567479178309, - "step": 19495 - }, - { - "epoch": 3.3248081841432224, - "grad_norm": 0.0037216702476143837, - "learning_rate": 5.360479137186315e-06, - "loss": 0.0003375500673428178, - "step": 19500 - }, - { - "epoch": 3.3256606990622335, - "grad_norm": 0.06180913746356964, - "learning_rate": 5.347285525609821e-06, - "loss": 0.00018238723278045653, - "step": 19505 - }, - { - "epoch": 3.3265132139812446, - "grad_norm": 0.034047432243824005, - "learning_rate": 5.334106923990009e-06, - "loss": 0.0006082602776587009, - "step": 19510 - }, - { - "epoch": 3.3273657289002556, - "grad_norm": 0.00867203064262867, - "learning_rate": 5.32094333847907e-06, - "loss": 0.0003369096200913191, - "step": 19515 - }, - { - "epoch": 3.3282182438192667, - "grad_norm": 0.04999540373682976, - "learning_rate": 5.3077947752222e-06, - "loss": 0.00042240540497004984, - "step": 19520 - }, - { - "epoch": 3.3290707587382777, - "grad_norm": 0.10174256563186646, - "learning_rate": 5.294661240357599e-06, - "loss": 0.0012334841303527355, - "step": 19525 - }, - { - "epoch": 3.329923273657289, - "grad_norm": 0.019731154665350914, - "learning_rate": 5.2815427400164365e-06, - "loss": 0.0002502906369045377, - "step": 19530 - }, - { - "epoch": 3.3307757885763003, - "grad_norm": 0.040488291531801224, - "learning_rate": 5.268439280322864e-06, - "loss": 0.0006264269817620516, - "step": 19535 - }, - { - "epoch": 3.3316283034953114, - "grad_norm": 0.027734950184822083, - "learning_rate": 5.2553508673940095e-06, - "loss": 0.0006609380245208741, - "step": 19540 - }, - { - "epoch": 3.3324808184143224, - "grad_norm": 0.033032696694135666, - "learning_rate": 5.24227750733998e-06, - "loss": 0.00046310769394040106, - "step": 19545 - }, - { - "epoch": 3.3333333333333335, - "grad_norm": 0.009758932515978813, - "learning_rate": 5.2292192062638485e-06, - "loss": 0.0003860333003103733, - "step": 19550 - }, - { - "epoch": 3.3341858482523445, - "grad_norm": 0.010039775632321835, - "learning_rate": 5.2161759702616764e-06, - "loss": 0.0005642361007630825, - "step": 19555 - }, - { - "epoch": 3.3350383631713556, - "grad_norm": 0.018729569390416145, - "learning_rate": 5.203147805422476e-06, - "loss": 0.0002538987435400486, - "step": 19560 - }, - { - "epoch": 3.3358908780903667, - "grad_norm": 0.07940587401390076, - "learning_rate": 5.190134717828216e-06, - "loss": 0.0003814149182289839, - "step": 19565 - }, - { - "epoch": 3.3367433930093777, - "grad_norm": 0.002807241166010499, - "learning_rate": 5.1771367135538575e-06, - "loss": 0.0005854971241205931, - "step": 19570 - }, - { - "epoch": 3.337595907928389, - "grad_norm": 0.029841719195246696, - "learning_rate": 5.164153798667284e-06, - "loss": 0.00021142382174730301, - "step": 19575 - }, - { - "epoch": 3.3384484228474, - "grad_norm": 0.017503969371318817, - "learning_rate": 5.151185979229372e-06, - "loss": 0.0005035904701799154, - "step": 19580 - }, - { - "epoch": 3.339300937766411, - "grad_norm": 0.033913351595401764, - "learning_rate": 5.138233261293917e-06, - "loss": 0.00033289811108261347, - "step": 19585 - }, - { - "epoch": 3.340153452685422, - "grad_norm": 0.027594633400440216, - "learning_rate": 5.125295650907682e-06, - "loss": 0.0006479782052338124, - "step": 19590 - }, - { - "epoch": 3.341005967604433, - "grad_norm": 0.017926139757037163, - "learning_rate": 5.112373154110365e-06, - "loss": 0.0009788990020751954, - "step": 19595 - }, - { - "epoch": 3.341858482523444, - "grad_norm": 0.012236343696713448, - "learning_rate": 5.099465776934636e-06, - "loss": 0.0009869396686553954, - "step": 19600 - }, - { - "epoch": 3.342710997442455, - "grad_norm": 0.1368396282196045, - "learning_rate": 5.086573525406075e-06, - "loss": 0.0009202501736581325, - "step": 19605 - }, - { - "epoch": 3.343563512361466, - "grad_norm": 0.04556318372488022, - "learning_rate": 5.07369640554321e-06, - "loss": 0.0002738154027611017, - "step": 19610 - }, - { - "epoch": 3.3444160272804773, - "grad_norm": 0.10456430912017822, - "learning_rate": 5.060834423357522e-06, - "loss": 0.0005405619740486145, - "step": 19615 - }, - { - "epoch": 3.3452685421994883, - "grad_norm": 0.009618780575692654, - "learning_rate": 5.047987584853398e-06, - "loss": 0.0003645260352641344, - "step": 19620 - }, - { - "epoch": 3.3461210571184994, - "grad_norm": 0.0008346811519004405, - "learning_rate": 5.035155896028186e-06, - "loss": 0.0009771523997187614, - "step": 19625 - }, - { - "epoch": 3.346973572037511, - "grad_norm": 0.07436166703701019, - "learning_rate": 5.022339362872134e-06, - "loss": 0.0005307651124894619, - "step": 19630 - }, - { - "epoch": 3.3478260869565215, - "grad_norm": 0.047390375286340714, - "learning_rate": 5.0095379913684326e-06, - "loss": 0.00032626844476908446, - "step": 19635 - }, - { - "epoch": 3.348678601875533, - "grad_norm": 0.11302479356527328, - "learning_rate": 4.996751787493172e-06, - "loss": 0.0006419796496629715, - "step": 19640 - }, - { - "epoch": 3.349531116794544, - "grad_norm": 0.04735693335533142, - "learning_rate": 4.983980757215398e-06, - "loss": 0.0004858987871557474, - "step": 19645 - }, - { - "epoch": 3.350383631713555, - "grad_norm": 0.05677567049860954, - "learning_rate": 4.971224906497043e-06, - "loss": 0.0009346410632133483, - "step": 19650 - }, - { - "epoch": 3.351236146632566, - "grad_norm": 0.01608835905790329, - "learning_rate": 4.958484241292954e-06, - "loss": 0.000258720014244318, - "step": 19655 - }, - { - "epoch": 3.3520886615515773, - "grad_norm": 0.023287836462259293, - "learning_rate": 4.9457587675509155e-06, - "loss": 0.0007150916382670403, - "step": 19660 - }, - { - "epoch": 3.3529411764705883, - "grad_norm": 0.02774999849498272, - "learning_rate": 4.9330484912115845e-06, - "loss": 0.000649323221296072, - "step": 19665 - }, - { - "epoch": 3.3537936913895994, - "grad_norm": 0.012109563685953617, - "learning_rate": 4.920353418208556e-06, - "loss": 0.00036820617970079184, - "step": 19670 - }, - { - "epoch": 3.3546462063086104, - "grad_norm": 0.03116477094590664, - "learning_rate": 4.907673554468305e-06, - "loss": 0.0009199230931699276, - "step": 19675 - }, - { - "epoch": 3.3554987212276215, - "grad_norm": 0.037316855043172836, - "learning_rate": 4.895008905910219e-06, - "loss": 0.0005375304259359837, - "step": 19680 - }, - { - "epoch": 3.3563512361466326, - "grad_norm": 0.02448320761322975, - "learning_rate": 4.882359478446568e-06, - "loss": 0.0007062189746648073, - "step": 19685 - }, - { - "epoch": 3.3572037510656436, - "grad_norm": 0.013858492486178875, - "learning_rate": 4.8697252779825195e-06, - "loss": 0.0003158868057653308, - "step": 19690 - }, - { - "epoch": 3.3580562659846547, - "grad_norm": 0.007077233865857124, - "learning_rate": 4.857106310416161e-06, - "loss": 0.00016839986201375723, - "step": 19695 - }, - { - "epoch": 3.3589087809036657, - "grad_norm": 0.00671799760311842, - "learning_rate": 4.844502581638424e-06, - "loss": 0.0013290375471115112, - "step": 19700 - }, - { - "epoch": 3.359761295822677, - "grad_norm": 0.11925818771123886, - "learning_rate": 4.83191409753317e-06, - "loss": 0.0008001517504453659, - "step": 19705 - }, - { - "epoch": 3.360613810741688, - "grad_norm": 0.012915749102830887, - "learning_rate": 4.819340863977098e-06, - "loss": 0.0003090864047408104, - "step": 19710 - }, - { - "epoch": 3.361466325660699, - "grad_norm": 0.0198194682598114, - "learning_rate": 4.806782886839833e-06, - "loss": 0.00015502141322940589, - "step": 19715 - }, - { - "epoch": 3.36231884057971, - "grad_norm": 0.02647668495774269, - "learning_rate": 4.794240171983848e-06, - "loss": 0.00032354283612221477, - "step": 19720 - }, - { - "epoch": 3.363171355498721, - "grad_norm": 0.03167302906513214, - "learning_rate": 4.781712725264503e-06, - "loss": 0.0008794944733381271, - "step": 19725 - }, - { - "epoch": 3.364023870417732, - "grad_norm": 0.03751087561249733, - "learning_rate": 4.769200552530017e-06, - "loss": 0.0017323100939393044, - "step": 19730 - }, - { - "epoch": 3.3648763853367436, - "grad_norm": 0.08725135773420334, - "learning_rate": 4.75670365962149e-06, - "loss": 0.0009663975797593594, - "step": 19735 - }, - { - "epoch": 3.3657289002557547, - "grad_norm": 0.014557529240846634, - "learning_rate": 4.7442220523729005e-06, - "loss": 0.0004029064439237118, - "step": 19740 - }, - { - "epoch": 3.3665814151747657, - "grad_norm": 0.05396854132413864, - "learning_rate": 4.731755736611068e-06, - "loss": 0.0011473988182842732, - "step": 19745 - }, - { - "epoch": 3.367433930093777, - "grad_norm": 0.06434670090675354, - "learning_rate": 4.7193047181556764e-06, - "loss": 0.00039711645804345607, - "step": 19750 - }, - { - "epoch": 3.368286445012788, - "grad_norm": 0.01898345723748207, - "learning_rate": 4.706869002819287e-06, - "loss": 0.0002789617981761694, - "step": 19755 - }, - { - "epoch": 3.369138959931799, - "grad_norm": 0.021839376538991928, - "learning_rate": 4.6944485964073085e-06, - "loss": 0.001008017361164093, - "step": 19760 - }, - { - "epoch": 3.36999147485081, - "grad_norm": 0.013436227105557919, - "learning_rate": 4.682043504717991e-06, - "loss": 0.0002914538374170661, - "step": 19765 - }, - { - "epoch": 3.370843989769821, - "grad_norm": 0.04120805487036705, - "learning_rate": 4.6696537335424485e-06, - "loss": 0.00099704097956419, - "step": 19770 - }, - { - "epoch": 3.371696504688832, - "grad_norm": 0.015087714418768883, - "learning_rate": 4.6572792886646326e-06, - "loss": 0.00031175173353403807, - "step": 19775 - }, - { - "epoch": 3.372549019607843, - "grad_norm": 0.09424779564142227, - "learning_rate": 4.644920175861347e-06, - "loss": 0.0008490364067256451, - "step": 19780 - }, - { - "epoch": 3.373401534526854, - "grad_norm": 0.15744835138320923, - "learning_rate": 4.632576400902244e-06, - "loss": 0.0011794422753155231, - "step": 19785 - }, - { - "epoch": 3.3742540494458653, - "grad_norm": 0.07353512197732925, - "learning_rate": 4.620247969549801e-06, - "loss": 0.0005946496035903692, - "step": 19790 - }, - { - "epoch": 3.3751065643648763, - "grad_norm": 0.05715373530983925, - "learning_rate": 4.607934887559335e-06, - "loss": 0.0005888998974114656, - "step": 19795 - }, - { - "epoch": 3.3759590792838874, - "grad_norm": 0.09267253428697586, - "learning_rate": 4.5956371606790195e-06, - "loss": 0.0007545445580035449, - "step": 19800 - }, - { - "epoch": 3.3768115942028984, - "grad_norm": 0.041159722954034805, - "learning_rate": 4.5833547946498235e-06, - "loss": 0.0006760005839169025, - "step": 19805 - }, - { - "epoch": 3.3776641091219095, - "grad_norm": 0.1095680296421051, - "learning_rate": 4.571087795205583e-06, - "loss": 0.0010204846039414406, - "step": 19810 - }, - { - "epoch": 3.3785166240409206, - "grad_norm": 0.006533615291118622, - "learning_rate": 4.558836168072928e-06, - "loss": 0.00032924620900303124, - "step": 19815 - }, - { - "epoch": 3.3793691389599316, - "grad_norm": 0.09690971672534943, - "learning_rate": 4.5465999189713305e-06, - "loss": 0.0006089920178055763, - "step": 19820 - }, - { - "epoch": 3.3802216538789427, - "grad_norm": 0.03703468665480614, - "learning_rate": 4.53437905361307e-06, - "loss": 0.00024356732610613107, - "step": 19825 - }, - { - "epoch": 3.381074168797954, - "grad_norm": 0.03449544310569763, - "learning_rate": 4.522173577703267e-06, - "loss": 0.0004322177264839411, - "step": 19830 - }, - { - "epoch": 3.381926683716965, - "grad_norm": 0.022056737914681435, - "learning_rate": 4.509983496939834e-06, - "loss": 0.00032165104057639836, - "step": 19835 - }, - { - "epoch": 3.3827791986359763, - "grad_norm": 0.06808804720640182, - "learning_rate": 4.4978088170135064e-06, - "loss": 0.0004901651758700609, - "step": 19840 - }, - { - "epoch": 3.3836317135549874, - "grad_norm": 0.035225335508584976, - "learning_rate": 4.485649543607835e-06, - "loss": 0.000494948634877801, - "step": 19845 - }, - { - "epoch": 3.3844842284739984, - "grad_norm": 0.005756362807005644, - "learning_rate": 4.473505682399165e-06, - "loss": 0.00037348996847867965, - "step": 19850 - }, - { - "epoch": 3.3853367433930095, - "grad_norm": 0.015896733850240707, - "learning_rate": 4.461377239056669e-06, - "loss": 0.001073040347546339, - "step": 19855 - }, - { - "epoch": 3.3861892583120206, - "grad_norm": 0.005726287607103586, - "learning_rate": 4.449264219242296e-06, - "loss": 0.00016913213767111301, - "step": 19860 - }, - { - "epoch": 3.3870417732310316, - "grad_norm": 0.01894184947013855, - "learning_rate": 4.4371666286108125e-06, - "loss": 0.0001936727436259389, - "step": 19865 - }, - { - "epoch": 3.3878942881500427, - "grad_norm": 0.0019047146197408438, - "learning_rate": 4.425084472809763e-06, - "loss": 0.00023375547025352716, - "step": 19870 - }, - { - "epoch": 3.3887468030690537, - "grad_norm": 0.004392183385789394, - "learning_rate": 4.41301775747952e-06, - "loss": 0.0006707040593028069, - "step": 19875 - }, - { - "epoch": 3.389599317988065, - "grad_norm": 0.024085786193609238, - "learning_rate": 4.400966488253218e-06, - "loss": 0.0002247063210234046, - "step": 19880 - }, - { - "epoch": 3.390451832907076, - "grad_norm": 0.07025684416294098, - "learning_rate": 4.388930670756779e-06, - "loss": 0.0007792794145643711, - "step": 19885 - }, - { - "epoch": 3.391304347826087, - "grad_norm": 0.06971945613622665, - "learning_rate": 4.3769103106089454e-06, - "loss": 0.0019492624327540399, - "step": 19890 - }, - { - "epoch": 3.392156862745098, - "grad_norm": 0.065009705722332, - "learning_rate": 4.364905413421204e-06, - "loss": 0.0009217139333486557, - "step": 19895 - }, - { - "epoch": 3.393009377664109, - "grad_norm": 0.050812624394893646, - "learning_rate": 4.352915984797849e-06, - "loss": 0.0007668033242225647, - "step": 19900 - }, - { - "epoch": 3.39386189258312, - "grad_norm": 0.0837833359837532, - "learning_rate": 4.340942030335942e-06, - "loss": 0.0005806859582662583, - "step": 19905 - }, - { - "epoch": 3.394714407502131, - "grad_norm": 0.03263656422495842, - "learning_rate": 4.3289835556253205e-06, - "loss": 0.0011843616142868997, - "step": 19910 - }, - { - "epoch": 3.395566922421142, - "grad_norm": 0.01964580826461315, - "learning_rate": 4.317040566248605e-06, - "loss": 0.0004248973447829485, - "step": 19915 - }, - { - "epoch": 3.3964194373401533, - "grad_norm": 0.05140439420938492, - "learning_rate": 4.305113067781167e-06, - "loss": 0.0004183043260127306, - "step": 19920 - }, - { - "epoch": 3.397271952259165, - "grad_norm": 0.015379955060780048, - "learning_rate": 4.293201065791172e-06, - "loss": 0.000815888587385416, - "step": 19925 - }, - { - "epoch": 3.3981244671781754, - "grad_norm": 0.0026071579195559025, - "learning_rate": 4.281304565839533e-06, - "loss": 0.0003499687649309635, - "step": 19930 - }, - { - "epoch": 3.398976982097187, - "grad_norm": 0.01917382702231407, - "learning_rate": 4.269423573479938e-06, - "loss": 0.0005561482626944781, - "step": 19935 - }, - { - "epoch": 3.399829497016198, - "grad_norm": 0.02250206656754017, - "learning_rate": 4.257558094258817e-06, - "loss": 0.0003818372031673789, - "step": 19940 - }, - { - "epoch": 3.400682011935209, - "grad_norm": 0.10248809307813644, - "learning_rate": 4.245708133715389e-06, - "loss": 0.0005628989078104496, - "step": 19945 - }, - { - "epoch": 3.40153452685422, - "grad_norm": 0.017903871834278107, - "learning_rate": 4.233873697381596e-06, - "loss": 0.000215845531783998, - "step": 19950 - }, - { - "epoch": 3.402387041773231, - "grad_norm": 0.061437349766492844, - "learning_rate": 4.222054790782155e-06, - "loss": 0.0007492574863135814, - "step": 19955 - }, - { - "epoch": 3.403239556692242, - "grad_norm": 0.04522673040628433, - "learning_rate": 4.210251419434515e-06, - "loss": 0.001055066753178835, - "step": 19960 - }, - { - "epoch": 3.4040920716112533, - "grad_norm": 0.005560046993196011, - "learning_rate": 4.198463588848883e-06, - "loss": 0.00024275691248476505, - "step": 19965 - }, - { - "epoch": 3.4049445865302643, - "grad_norm": 0.025880778208374977, - "learning_rate": 4.186691304528221e-06, - "loss": 0.00034111484419554474, - "step": 19970 - }, - { - "epoch": 3.4057971014492754, - "grad_norm": 0.02516460418701172, - "learning_rate": 4.174934571968218e-06, - "loss": 0.000534482765942812, - "step": 19975 - }, - { - "epoch": 3.4066496163682864, - "grad_norm": 0.027490204200148582, - "learning_rate": 4.1631933966572954e-06, - "loss": 0.001637015864253044, - "step": 19980 - }, - { - "epoch": 3.4075021312872975, - "grad_norm": 0.030315211042761803, - "learning_rate": 4.1514677840766395e-06, - "loss": 0.00029935024213045835, - "step": 19985 - }, - { - "epoch": 3.4083546462063086, - "grad_norm": 0.06448766589164734, - "learning_rate": 4.139757739700156e-06, - "loss": 0.0004935414995998144, - "step": 19990 - }, - { - "epoch": 3.4092071611253196, - "grad_norm": 0.007854131981730461, - "learning_rate": 4.128063268994479e-06, - "loss": 0.00030187955126166345, - "step": 19995 - }, - { - "epoch": 3.4100596760443307, - "grad_norm": 0.029494259506464005, - "learning_rate": 4.116384377418979e-06, - "loss": 0.0003482209984213114, - "step": 20000 - }, - { - "epoch": 3.4109121909633418, - "grad_norm": 0.030381083488464355, - "learning_rate": 4.104721070425751e-06, - "loss": 0.0002413678914308548, - "step": 20005 - }, - { - "epoch": 3.411764705882353, - "grad_norm": 0.006045108195394278, - "learning_rate": 4.093073353459604e-06, - "loss": 0.0004130109678953886, - "step": 20010 - }, - { - "epoch": 3.412617220801364, - "grad_norm": 0.0822497308254242, - "learning_rate": 4.081441231958094e-06, - "loss": 0.0007556038908660412, - "step": 20015 - }, - { - "epoch": 3.413469735720375, - "grad_norm": 0.02197144739329815, - "learning_rate": 4.069824711351475e-06, - "loss": 0.00042886766605079176, - "step": 20020 - }, - { - "epoch": 3.414322250639386, - "grad_norm": 0.01506667211651802, - "learning_rate": 4.0582237970627204e-06, - "loss": 0.0004569370299577713, - "step": 20025 - }, - { - "epoch": 3.4151747655583975, - "grad_norm": 0.0023130911868065596, - "learning_rate": 4.046638494507538e-06, - "loss": 0.0007974251173436641, - "step": 20030 - }, - { - "epoch": 3.416027280477408, - "grad_norm": 0.08822524547576904, - "learning_rate": 4.035068809094319e-06, - "loss": 0.0006814738735556602, - "step": 20035 - }, - { - "epoch": 3.4168797953964196, - "grad_norm": 0.026947883889079094, - "learning_rate": 4.023514746224184e-06, - "loss": 0.0002705232938751578, - "step": 20040 - }, - { - "epoch": 3.4177323103154307, - "grad_norm": 0.02061464823782444, - "learning_rate": 4.011976311290956e-06, - "loss": 0.0008053860627114772, - "step": 20045 - }, - { - "epoch": 3.4185848252344417, - "grad_norm": 0.01110768411308527, - "learning_rate": 4.000453509681155e-06, - "loss": 0.0005702998489141465, - "step": 20050 - }, - { - "epoch": 3.419437340153453, - "grad_norm": 0.06727463006973267, - "learning_rate": 3.9889463467739995e-06, - "loss": 0.00048296600580215453, - "step": 20055 - }, - { - "epoch": 3.420289855072464, - "grad_norm": 0.01981664076447487, - "learning_rate": 3.977454827941438e-06, - "loss": 0.0007956895977258682, - "step": 20060 - }, - { - "epoch": 3.421142369991475, - "grad_norm": 0.010179187171161175, - "learning_rate": 3.965978958548076e-06, - "loss": 0.001441807672381401, - "step": 20065 - }, - { - "epoch": 3.421994884910486, - "grad_norm": 0.028387323021888733, - "learning_rate": 3.954518743951235e-06, - "loss": 0.000527799129486084, - "step": 20070 - }, - { - "epoch": 3.422847399829497, - "grad_norm": 0.011368883773684502, - "learning_rate": 3.9430741895009275e-06, - "loss": 0.00046253204345703125, - "step": 20075 - }, - { - "epoch": 3.423699914748508, - "grad_norm": 0.012177668511867523, - "learning_rate": 3.931645300539847e-06, - "loss": 0.00043948981910943983, - "step": 20080 - }, - { - "epoch": 3.424552429667519, - "grad_norm": 0.07201547920703888, - "learning_rate": 3.920232082403392e-06, - "loss": 0.0005337335169315338, - "step": 20085 - }, - { - "epoch": 3.42540494458653, - "grad_norm": 0.12001162767410278, - "learning_rate": 3.908834540419621e-06, - "loss": 0.0008155249059200286, - "step": 20090 - }, - { - "epoch": 3.4262574595055413, - "grad_norm": 0.0132389971986413, - "learning_rate": 3.897452679909287e-06, - "loss": 0.000174278998747468, - "step": 20095 - }, - { - "epoch": 3.4271099744245523, - "grad_norm": 0.0051491111516952515, - "learning_rate": 3.886086506185822e-06, - "loss": 0.0006445198785513639, - "step": 20100 - }, - { - "epoch": 3.4279624893435634, - "grad_norm": 0.039136942476034164, - "learning_rate": 3.874736024555328e-06, - "loss": 0.0005972872488200665, - "step": 20105 - }, - { - "epoch": 3.4288150042625745, - "grad_norm": 0.00988066103309393, - "learning_rate": 3.863401240316599e-06, - "loss": 0.00036287889815866947, - "step": 20110 - }, - { - "epoch": 3.4296675191815855, - "grad_norm": 0.012278315611183643, - "learning_rate": 3.852082158761074e-06, - "loss": 0.0004206370562314987, - "step": 20115 - }, - { - "epoch": 3.4305200341005966, - "grad_norm": 0.08950433880090714, - "learning_rate": 3.840778785172897e-06, - "loss": 0.0007027041632682085, - "step": 20120 - }, - { - "epoch": 3.431372549019608, - "grad_norm": 0.015487313270568848, - "learning_rate": 3.829491124828843e-06, - "loss": 0.00030801878310739996, - "step": 20125 - }, - { - "epoch": 3.4322250639386187, - "grad_norm": 0.012695780955255032, - "learning_rate": 3.818219182998379e-06, - "loss": 0.00033567410428076984, - "step": 20130 - }, - { - "epoch": 3.43307757885763, - "grad_norm": 0.013385625556111336, - "learning_rate": 3.8069629649436134e-06, - "loss": 0.00033784976694732903, - "step": 20135 - }, - { - "epoch": 3.4339300937766413, - "grad_norm": 0.02653471939265728, - "learning_rate": 3.7957224759193258e-06, - "loss": 0.00037979823537170887, - "step": 20140 - }, - { - "epoch": 3.4347826086956523, - "grad_norm": 0.045122601091861725, - "learning_rate": 3.7844977211729523e-06, - "loss": 0.0003743718843907118, - "step": 20145 - }, - { - "epoch": 3.4356351236146634, - "grad_norm": 0.004004189744591713, - "learning_rate": 3.7732887059445717e-06, - "loss": 0.00024804847780615093, - "step": 20150 - }, - { - "epoch": 3.4364876385336744, - "grad_norm": 0.09962640702724457, - "learning_rate": 3.7620954354669443e-06, - "loss": 0.0007725684903562069, - "step": 20155 - }, - { - "epoch": 3.4373401534526855, - "grad_norm": 0.026793939992785454, - "learning_rate": 3.75091791496544e-06, - "loss": 0.0003023044904693961, - "step": 20160 - }, - { - "epoch": 3.4381926683716966, - "grad_norm": 0.06049729883670807, - "learning_rate": 3.7397561496581143e-06, - "loss": 0.00038756374269723894, - "step": 20165 - }, - { - "epoch": 3.4390451832907076, - "grad_norm": 0.10067807883024216, - "learning_rate": 3.7286101447556365e-06, - "loss": 0.00040011387318372726, - "step": 20170 - }, - { - "epoch": 3.4398976982097187, - "grad_norm": 0.025836393237113953, - "learning_rate": 3.7174799054613444e-06, - "loss": 0.0009764598682522774, - "step": 20175 - }, - { - "epoch": 3.4407502131287298, - "grad_norm": 0.03506815806031227, - "learning_rate": 3.7063654369712022e-06, - "loss": 0.0005544353742152452, - "step": 20180 - }, - { - "epoch": 3.441602728047741, - "grad_norm": 0.013711848296225071, - "learning_rate": 3.69526674447381e-06, - "loss": 0.0002796804532408714, - "step": 20185 - }, - { - "epoch": 3.442455242966752, - "grad_norm": 0.014671099372208118, - "learning_rate": 3.684183833150406e-06, - "loss": 0.0007412171456962824, - "step": 20190 - }, - { - "epoch": 3.443307757885763, - "grad_norm": 0.09581290930509567, - "learning_rate": 3.67311670817486e-06, - "loss": 0.0009363777935504913, - "step": 20195 - }, - { - "epoch": 3.444160272804774, - "grad_norm": 0.012721995823085308, - "learning_rate": 3.662065374713681e-06, - "loss": 0.0003047358011826873, - "step": 20200 - }, - { - "epoch": 3.445012787723785, - "grad_norm": 0.08440826833248138, - "learning_rate": 3.6510298379259883e-06, - "loss": 0.0015849992632865906, - "step": 20205 - }, - { - "epoch": 3.445865302642796, - "grad_norm": 0.016568806022405624, - "learning_rate": 3.6400101029635515e-06, - "loss": 0.0003544453531503677, - "step": 20210 - }, - { - "epoch": 3.446717817561807, - "grad_norm": 0.012676285579800606, - "learning_rate": 3.62900617497074e-06, - "loss": 0.00013037940952926875, - "step": 20215 - }, - { - "epoch": 3.4475703324808182, - "grad_norm": 0.028908727690577507, - "learning_rate": 3.618018059084553e-06, - "loss": 0.0004815624561160803, - "step": 20220 - }, - { - "epoch": 3.4484228473998293, - "grad_norm": 0.010771363973617554, - "learning_rate": 3.6070457604346155e-06, - "loss": 0.0008128033950924873, - "step": 20225 - }, - { - "epoch": 3.449275362318841, - "grad_norm": 0.05307495594024658, - "learning_rate": 3.5960892841431556e-06, - "loss": 0.0004443288315087557, - "step": 20230 - }, - { - "epoch": 3.4501278772378514, - "grad_norm": 0.04856376722455025, - "learning_rate": 3.5851486353250274e-06, - "loss": 0.0008865063078701496, - "step": 20235 - }, - { - "epoch": 3.450980392156863, - "grad_norm": 0.04320789873600006, - "learning_rate": 3.5742238190876752e-06, - "loss": 0.00030287024565041066, - "step": 20240 - }, - { - "epoch": 3.451832907075874, - "grad_norm": 0.026490481570363045, - "learning_rate": 3.563314840531181e-06, - "loss": 0.0003270474262535572, - "step": 20245 - }, - { - "epoch": 3.452685421994885, - "grad_norm": 0.01927161030471325, - "learning_rate": 3.5524217047482177e-06, - "loss": 0.0007327934727072716, - "step": 20250 - }, - { - "epoch": 3.453537936913896, - "grad_norm": 0.01646221987903118, - "learning_rate": 3.5415444168240547e-06, - "loss": 0.00015120231546461582, - "step": 20255 - }, - { - "epoch": 3.454390451832907, - "grad_norm": 0.07438748329877853, - "learning_rate": 3.53068298183658e-06, - "loss": 0.0008919765241444111, - "step": 20260 - }, - { - "epoch": 3.455242966751918, - "grad_norm": 0.060105398297309875, - "learning_rate": 3.519837404856263e-06, - "loss": 0.0005192287266254425, - "step": 20265 - }, - { - "epoch": 3.4560954816709293, - "grad_norm": 0.014563803561031818, - "learning_rate": 3.5090076909461946e-06, - "loss": 0.0004962874110788107, - "step": 20270 - }, - { - "epoch": 3.4569479965899403, - "grad_norm": 0.06408075243234634, - "learning_rate": 3.4981938451620393e-06, - "loss": 0.0005642884410917759, - "step": 20275 - }, - { - "epoch": 3.4578005115089514, - "grad_norm": 0.018654122948646545, - "learning_rate": 3.4873958725520555e-06, - "loss": 0.00020208589266985656, - "step": 20280 - }, - { - "epoch": 3.4586530264279625, - "grad_norm": 0.029568253085017204, - "learning_rate": 3.4766137781570934e-06, - "loss": 0.000255450839176774, - "step": 20285 - }, - { - "epoch": 3.4595055413469735, - "grad_norm": 0.007675605826079845, - "learning_rate": 3.465847567010606e-06, - "loss": 0.0007365974131971597, - "step": 20290 - }, - { - "epoch": 3.4603580562659846, - "grad_norm": 0.025231147184967995, - "learning_rate": 3.4550972441386105e-06, - "loss": 0.00035758940503001214, - "step": 20295 - }, - { - "epoch": 3.4612105711849956, - "grad_norm": 0.04905321076512337, - "learning_rate": 3.444362814559709e-06, - "loss": 0.0014069808647036552, - "step": 20300 - }, - { - "epoch": 3.4620630861040067, - "grad_norm": 0.03318062052130699, - "learning_rate": 3.4336442832851056e-06, - "loss": 0.0009246711619198322, - "step": 20305 - }, - { - "epoch": 3.4629156010230178, - "grad_norm": 0.006136562675237656, - "learning_rate": 3.422941655318552e-06, - "loss": 0.0006952826399356127, - "step": 20310 - }, - { - "epoch": 3.463768115942029, - "grad_norm": 0.025494717061519623, - "learning_rate": 3.4122549356564057e-06, - "loss": 0.0005774838849902153, - "step": 20315 - }, - { - "epoch": 3.46462063086104, - "grad_norm": 0.07548290491104126, - "learning_rate": 3.4015841292875754e-06, - "loss": 0.0007774532772600651, - "step": 20320 - }, - { - "epoch": 3.4654731457800514, - "grad_norm": 0.05289645493030548, - "learning_rate": 3.3909292411935475e-06, - "loss": 0.0011253023520112037, - "step": 20325 - }, - { - "epoch": 3.466325660699062, - "grad_norm": 0.018192177638411522, - "learning_rate": 3.380290276348377e-06, - "loss": 0.00043428516946733, - "step": 20330 - }, - { - "epoch": 3.4671781756180735, - "grad_norm": 0.04587262123823166, - "learning_rate": 3.3696672397186862e-06, - "loss": 0.00036711143329739573, - "step": 20335 - }, - { - "epoch": 3.4680306905370846, - "grad_norm": 0.04300279915332794, - "learning_rate": 3.3590601362636707e-06, - "loss": 0.0004922755528241396, - "step": 20340 - }, - { - "epoch": 3.4688832054560956, - "grad_norm": 0.051384493708610535, - "learning_rate": 3.3484689709350614e-06, - "loss": 0.000578513415530324, - "step": 20345 - }, - { - "epoch": 3.4697357203751067, - "grad_norm": 0.041927583515644073, - "learning_rate": 3.337893748677191e-06, - "loss": 0.00029339513275772333, - "step": 20350 - }, - { - "epoch": 3.4705882352941178, - "grad_norm": 0.059719622135162354, - "learning_rate": 3.3273344744269014e-06, - "loss": 0.000449614180251956, - "step": 20355 - }, - { - "epoch": 3.471440750213129, - "grad_norm": 0.04662923142313957, - "learning_rate": 3.3167911531136334e-06, - "loss": 0.0005207772832363844, - "step": 20360 - }, - { - "epoch": 3.47229326513214, - "grad_norm": 0.0316859669983387, - "learning_rate": 3.3062637896593498e-06, - "loss": 0.00032441234216094015, - "step": 20365 - }, - { - "epoch": 3.473145780051151, - "grad_norm": 0.05493699386715889, - "learning_rate": 3.2957523889785733e-06, - "loss": 0.0006051870062947273, - "step": 20370 - }, - { - "epoch": 3.473998294970162, - "grad_norm": 0.09825102239847183, - "learning_rate": 3.2852569559783785e-06, - "loss": 0.0013698142021894455, - "step": 20375 - }, - { - "epoch": 3.474850809889173, - "grad_norm": 0.03209096938371658, - "learning_rate": 3.2747774955583757e-06, - "loss": 0.0005756544414907694, - "step": 20380 - }, - { - "epoch": 3.475703324808184, - "grad_norm": 0.07800310105085373, - "learning_rate": 3.2643140126107343e-06, - "loss": 0.001114057283848524, - "step": 20385 - }, - { - "epoch": 3.476555839727195, - "grad_norm": 0.00817018747329712, - "learning_rate": 3.253866512020148e-06, - "loss": 0.00051291324198246, - "step": 20390 - }, - { - "epoch": 3.4774083546462062, - "grad_norm": 0.024451689794659615, - "learning_rate": 3.2434349986638687e-06, - "loss": 0.00032486242707818747, - "step": 20395 - }, - { - "epoch": 3.4782608695652173, - "grad_norm": 0.0076407743617892265, - "learning_rate": 3.2330194774116636e-06, - "loss": 0.00043834159150719644, - "step": 20400 - }, - { - "epoch": 3.4791133844842284, - "grad_norm": 0.012483174912631512, - "learning_rate": 3.222619953125852e-06, - "loss": 0.00016895943554118276, - "step": 20405 - }, - { - "epoch": 3.4799658994032394, - "grad_norm": 0.013760508969426155, - "learning_rate": 3.2122364306612745e-06, - "loss": 0.0003591555170714855, - "step": 20410 - }, - { - "epoch": 3.4808184143222505, - "grad_norm": 0.06936871260404587, - "learning_rate": 3.201868914865309e-06, - "loss": 0.0007365354336798191, - "step": 20415 - }, - { - "epoch": 3.4816709292412615, - "grad_norm": 0.026890093460679054, - "learning_rate": 3.19151741057785e-06, - "loss": 0.0003096622182056308, - "step": 20420 - }, - { - "epoch": 3.4825234441602726, - "grad_norm": 0.1273396909236908, - "learning_rate": 3.181181922631319e-06, - "loss": 0.0018214803189039231, - "step": 20425 - }, - { - "epoch": 3.483375959079284, - "grad_norm": 0.07851844280958176, - "learning_rate": 3.1708624558506784e-06, - "loss": 0.00047972016036510465, - "step": 20430 - }, - { - "epoch": 3.484228473998295, - "grad_norm": 0.10473177582025528, - "learning_rate": 3.1605590150533863e-06, - "loss": 0.0003519801888614893, - "step": 20435 - }, - { - "epoch": 3.485080988917306, - "grad_norm": 0.011826570145785809, - "learning_rate": 3.1502716050494493e-06, - "loss": 0.00012582357740029693, - "step": 20440 - }, - { - "epoch": 3.4859335038363173, - "grad_norm": 0.09120000153779984, - "learning_rate": 3.1400002306413596e-06, - "loss": 0.0011743055656552315, - "step": 20445 - }, - { - "epoch": 3.4867860187553283, - "grad_norm": 0.03551065921783447, - "learning_rate": 3.1297448966241312e-06, - "loss": 0.0003235040698200464, - "step": 20450 - }, - { - "epoch": 3.4876385336743394, - "grad_norm": 0.022862901911139488, - "learning_rate": 3.1195056077853093e-06, - "loss": 0.00019952079746872188, - "step": 20455 - }, - { - "epoch": 3.4884910485933505, - "grad_norm": 0.012301230803132057, - "learning_rate": 3.1092823689049294e-06, - "loss": 0.0005085674580186606, - "step": 20460 - }, - { - "epoch": 3.4893435635123615, - "grad_norm": 0.012983572669327259, - "learning_rate": 3.0990751847555355e-06, - "loss": 0.00026952670887112615, - "step": 20465 - }, - { - "epoch": 3.4901960784313726, - "grad_norm": 0.03648987412452698, - "learning_rate": 3.0888840601021784e-06, - "loss": 0.0006700227968394756, - "step": 20470 - }, - { - "epoch": 3.4910485933503836, - "grad_norm": 0.03276946395635605, - "learning_rate": 3.078708999702424e-06, - "loss": 0.0003196124453097582, - "step": 20475 - }, - { - "epoch": 3.4919011082693947, - "grad_norm": 0.05831300839781761, - "learning_rate": 3.068550008306318e-06, - "loss": 0.0005575232207775116, - "step": 20480 - }, - { - "epoch": 3.4927536231884058, - "grad_norm": 0.007617017719894648, - "learning_rate": 3.0584070906564297e-06, - "loss": 0.0005659013520926237, - "step": 20485 - }, - { - "epoch": 3.493606138107417, - "grad_norm": 0.01129902619868517, - "learning_rate": 3.0482802514878e-06, - "loss": 0.0005468820687383414, - "step": 20490 - }, - { - "epoch": 3.494458653026428, - "grad_norm": 0.021842598915100098, - "learning_rate": 3.0381694955279687e-06, - "loss": 0.00030360198579728606, - "step": 20495 - }, - { - "epoch": 3.495311167945439, - "grad_norm": 0.008945580571889877, - "learning_rate": 3.0280748274969887e-06, - "loss": 0.00019925013184547425, - "step": 20500 - }, - { - "epoch": 3.49616368286445, - "grad_norm": 0.024309689179062843, - "learning_rate": 3.0179962521073823e-06, - "loss": 0.0004822061397135258, - "step": 20505 - }, - { - "epoch": 3.497016197783461, - "grad_norm": 0.1111924946308136, - "learning_rate": 3.007933774064157e-06, - "loss": 0.0009571518748998642, - "step": 20510 - }, - { - "epoch": 3.497868712702472, - "grad_norm": 0.07870755344629288, - "learning_rate": 2.997887398064809e-06, - "loss": 0.00046168952248990534, - "step": 20515 - }, - { - "epoch": 3.498721227621483, - "grad_norm": 0.05273010954260826, - "learning_rate": 2.987857128799333e-06, - "loss": 0.0002907732035964727, - "step": 20520 - }, - { - "epoch": 3.4994032395566923, - "eval_loss": 0.0633777305483818, - "eval_runtime": 3.7174, - "eval_samples_per_second": 67.79, - "eval_steps_per_second": 1.076, - "step": 20524 - }, - { - "eval_cer_subset": 0.01387463969905711, - "eval_cer_subset_edit_distance": 852, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 20524 - }, - { - "epoch": 3.4995737425404947, - "grad_norm": 0.10134585946798325, - "learning_rate": 2.9778429709501866e-06, - "loss": 0.0007728733588010072, - "step": 20525 - }, - { - "epoch": 3.5004262574595053, - "grad_norm": 0.010177328251302242, - "learning_rate": 2.967844929192306e-06, - "loss": 0.0003987153992056847, - "step": 20530 - }, - { - "epoch": 3.501278772378517, - "grad_norm": 0.07338051497936249, - "learning_rate": 2.9578630081931245e-06, - "loss": 0.0006870163604617118, - "step": 20535 - }, - { - "epoch": 3.502131287297528, - "grad_norm": 0.010025802068412304, - "learning_rate": 2.9478972126125143e-06, - "loss": 0.0004745893180370331, - "step": 20540 - }, - { - "epoch": 3.502983802216539, - "grad_norm": 0.10838331282138824, - "learning_rate": 2.9379475471028616e-06, - "loss": 0.00041006896644830705, - "step": 20545 - }, - { - "epoch": 3.50383631713555, - "grad_norm": 0.019695911556482315, - "learning_rate": 2.928014016308994e-06, - "loss": 0.0004196802154183388, - "step": 20550 - }, - { - "epoch": 3.504688832054561, - "grad_norm": 0.01631144993007183, - "learning_rate": 2.918096624868209e-06, - "loss": 0.0008785014972090722, - "step": 20555 - }, - { - "epoch": 3.505541346973572, - "grad_norm": 0.05647982656955719, - "learning_rate": 2.9081953774102744e-06, - "loss": 0.0005624303594231606, - "step": 20560 - }, - { - "epoch": 3.506393861892583, - "grad_norm": 0.05051364004611969, - "learning_rate": 2.8983102785574223e-06, - "loss": 0.0006344023160636425, - "step": 20565 - }, - { - "epoch": 3.5072463768115942, - "grad_norm": 0.04484107717871666, - "learning_rate": 2.888441332924353e-06, - "loss": 0.00034873902332037686, - "step": 20570 - }, - { - "epoch": 3.5080988917306053, - "grad_norm": 0.026035010814666748, - "learning_rate": 2.87858854511821e-06, - "loss": 0.0005680721253156662, - "step": 20575 - }, - { - "epoch": 3.5089514066496164, - "grad_norm": 0.030738165602087975, - "learning_rate": 2.86875191973861e-06, - "loss": 0.0006887212861329317, - "step": 20580 - }, - { - "epoch": 3.5098039215686274, - "grad_norm": 0.021299093961715698, - "learning_rate": 2.858931461377609e-06, - "loss": 0.00039576496928930285, - "step": 20585 - }, - { - "epoch": 3.5106564364876385, - "grad_norm": 0.019585279747843742, - "learning_rate": 2.849127174619735e-06, - "loss": 0.0004361768718808889, - "step": 20590 - }, - { - "epoch": 3.5115089514066495, - "grad_norm": 0.021617313846945763, - "learning_rate": 2.839339064041944e-06, - "loss": 0.00023572267964482306, - "step": 20595 - }, - { - "epoch": 3.5123614663256606, - "grad_norm": 0.032228775322437286, - "learning_rate": 2.8295671342136602e-06, - "loss": 0.0006701612379401922, - "step": 20600 - }, - { - "epoch": 3.5132139812446717, - "grad_norm": 0.06554242223501205, - "learning_rate": 2.819811389696738e-06, - "loss": 0.000471029058098793, - "step": 20605 - }, - { - "epoch": 3.5140664961636827, - "grad_norm": 0.004567406140267849, - "learning_rate": 2.810071835045481e-06, - "loss": 0.0001446882146410644, - "step": 20610 - }, - { - "epoch": 3.514919011082694, - "grad_norm": 0.004180132411420345, - "learning_rate": 2.800348474806652e-06, - "loss": 0.00013457806780934334, - "step": 20615 - }, - { - "epoch": 3.5157715260017053, - "grad_norm": 0.09419524669647217, - "learning_rate": 2.790641313519419e-06, - "loss": 0.0003684045746922493, - "step": 20620 - }, - { - "epoch": 3.516624040920716, - "grad_norm": 0.015027535147964954, - "learning_rate": 2.780950355715429e-06, - "loss": 0.0011356399394571782, - "step": 20625 - }, - { - "epoch": 3.5174765558397274, - "grad_norm": 0.02399168163537979, - "learning_rate": 2.7712756059187254e-06, - "loss": 0.0003479213686659932, - "step": 20630 - }, - { - "epoch": 3.518329070758738, - "grad_norm": 0.001755329198203981, - "learning_rate": 2.761617068645816e-06, - "loss": 0.0009521358646452427, - "step": 20635 - }, - { - "epoch": 3.5191815856777495, - "grad_norm": 0.07159367203712463, - "learning_rate": 2.7519747484056233e-06, - "loss": 0.0004668514244258404, - "step": 20640 - }, - { - "epoch": 3.5200341005967606, - "grad_norm": 0.013049350120127201, - "learning_rate": 2.7423486496995007e-06, - "loss": 0.00038400108460336926, - "step": 20645 - }, - { - "epoch": 3.5208866155157716, - "grad_norm": 0.129945769906044, - "learning_rate": 2.732738777021233e-06, - "loss": 0.0007587837055325509, - "step": 20650 - }, - { - "epoch": 3.5217391304347827, - "grad_norm": 0.011801044456660748, - "learning_rate": 2.723145134857023e-06, - "loss": 0.00033344419207423923, - "step": 20655 - }, - { - "epoch": 3.5225916453537938, - "grad_norm": 0.012840129435062408, - "learning_rate": 2.713567727685513e-06, - "loss": 0.000570116238668561, - "step": 20660 - }, - { - "epoch": 3.523444160272805, - "grad_norm": 0.028646450489759445, - "learning_rate": 2.7040065599777395e-06, - "loss": 0.000337608833797276, - "step": 20665 - }, - { - "epoch": 3.524296675191816, - "grad_norm": 0.012803840450942516, - "learning_rate": 2.694461636197194e-06, - "loss": 0.00026674284599721433, - "step": 20670 - }, - { - "epoch": 3.525149190110827, - "grad_norm": 0.004883004352450371, - "learning_rate": 2.6849329607997503e-06, - "loss": 0.0005247415509074926, - "step": 20675 - }, - { - "epoch": 3.526001705029838, - "grad_norm": 0.013268784619867802, - "learning_rate": 2.675420538233712e-06, - "loss": 0.00042262640781700613, - "step": 20680 - }, - { - "epoch": 3.526854219948849, - "grad_norm": 0.02646799571812153, - "learning_rate": 2.6659243729398026e-06, - "loss": 0.00015779529931023717, - "step": 20685 - }, - { - "epoch": 3.52770673486786, - "grad_norm": 0.07095526158809662, - "learning_rate": 2.656444469351142e-06, - "loss": 0.0005843072663992644, - "step": 20690 - }, - { - "epoch": 3.528559249786871, - "grad_norm": 0.07467476278543472, - "learning_rate": 2.646980831893265e-06, - "loss": 0.0004990112502127886, - "step": 20695 - }, - { - "epoch": 3.5294117647058822, - "grad_norm": 0.016248852014541626, - "learning_rate": 2.6375334649841053e-06, - "loss": 0.0002280582906678319, - "step": 20700 - }, - { - "epoch": 3.5302642796248933, - "grad_norm": 0.015685981139540672, - "learning_rate": 2.628102373034022e-06, - "loss": 0.00040198476053774355, - "step": 20705 - }, - { - "epoch": 3.5311167945439044, - "grad_norm": 0.03586861863732338, - "learning_rate": 2.6186875604457466e-06, - "loss": 0.0006230812985450029, - "step": 20710 - }, - { - "epoch": 3.531969309462916, - "grad_norm": 0.01099316030740738, - "learning_rate": 2.6092890316144435e-06, - "loss": 0.00038466856349259613, - "step": 20715 - }, - { - "epoch": 3.5328218243819265, - "grad_norm": 0.052841104567050934, - "learning_rate": 2.599906790927653e-06, - "loss": 0.0004864667542278767, - "step": 20720 - }, - { - "epoch": 3.533674339300938, - "grad_norm": 0.016549568623304367, - "learning_rate": 2.5905408427653084e-06, - "loss": 0.0005029110237956047, - "step": 20725 - }, - { - "epoch": 3.5345268542199486, - "grad_norm": 0.046012695878744125, - "learning_rate": 2.581191191499754e-06, - "loss": 0.0005438106134533882, - "step": 20730 - }, - { - "epoch": 3.53537936913896, - "grad_norm": 0.010663103312253952, - "learning_rate": 2.57185784149572e-06, - "loss": 0.0002268374664708972, - "step": 20735 - }, - { - "epoch": 3.536231884057971, - "grad_norm": 0.05038389936089516, - "learning_rate": 2.56254079711032e-06, - "loss": 0.0003756745718419552, - "step": 20740 - }, - { - "epoch": 3.5370843989769822, - "grad_norm": 0.03177966922521591, - "learning_rate": 2.5532400626930625e-06, - "loss": 0.00024364627897739411, - "step": 20745 - }, - { - "epoch": 3.5379369138959933, - "grad_norm": 0.056538257747888565, - "learning_rate": 2.5439556425858333e-06, - "loss": 0.0004069384653121233, - "step": 20750 - }, - { - "epoch": 3.5387894288150044, - "grad_norm": 0.17656944692134857, - "learning_rate": 2.5346875411229194e-06, - "loss": 0.00038029137067496777, - "step": 20755 - }, - { - "epoch": 3.5396419437340154, - "grad_norm": 0.007788034155964851, - "learning_rate": 2.5254357626309675e-06, - "loss": 0.00021343901753425598, - "step": 20760 - }, - { - "epoch": 3.5404944586530265, - "grad_norm": 0.04890740290284157, - "learning_rate": 2.516200311429027e-06, - "loss": 0.00035889642313122747, - "step": 20765 - }, - { - "epoch": 3.5413469735720375, - "grad_norm": 0.01161973923444748, - "learning_rate": 2.5069811918285e-06, - "loss": 0.0005623042117804289, - "step": 20770 - }, - { - "epoch": 3.5421994884910486, - "grad_norm": 0.015352857299149036, - "learning_rate": 2.4977784081331926e-06, - "loss": 0.00024357731454074382, - "step": 20775 - }, - { - "epoch": 3.5430520034100597, - "grad_norm": 0.08323964476585388, - "learning_rate": 2.4885919646392653e-06, - "loss": 0.0010311774909496307, - "step": 20780 - }, - { - "epoch": 3.5439045183290707, - "grad_norm": 0.01847103051841259, - "learning_rate": 2.4794218656352573e-06, - "loss": 0.000347610330209136, - "step": 20785 - }, - { - "epoch": 3.544757033248082, - "grad_norm": 0.019678082317113876, - "learning_rate": 2.47026811540207e-06, - "loss": 0.0012042072601616382, - "step": 20790 - }, - { - "epoch": 3.545609548167093, - "grad_norm": 0.0032173304352909327, - "learning_rate": 2.4611307182129723e-06, - "loss": 0.0004959845449775457, - "step": 20795 - }, - { - "epoch": 3.546462063086104, - "grad_norm": 0.07671674340963364, - "learning_rate": 2.452009678333623e-06, - "loss": 0.00034225885756313803, - "step": 20800 - }, - { - "epoch": 3.547314578005115, - "grad_norm": 0.009932668879628181, - "learning_rate": 2.442905000022012e-06, - "loss": 0.00013967978302389383, - "step": 20805 - }, - { - "epoch": 3.548167092924126, - "grad_norm": 0.0073715317994356155, - "learning_rate": 2.4338166875285185e-06, - "loss": 0.0008998749777674675, - "step": 20810 - }, - { - "epoch": 3.549019607843137, - "grad_norm": 0.050911612808704376, - "learning_rate": 2.4247447450958564e-06, - "loss": 0.0005864705890417099, - "step": 20815 - }, - { - "epoch": 3.5498721227621486, - "grad_norm": 0.013407070189714432, - "learning_rate": 2.4156891769591222e-06, - "loss": 0.00031670662574470045, - "step": 20820 - }, - { - "epoch": 3.550724637681159, - "grad_norm": 0.016584917902946472, - "learning_rate": 2.4066499873457547e-06, - "loss": 0.00020419515203684568, - "step": 20825 - }, - { - "epoch": 3.5515771526001707, - "grad_norm": 0.059559017419815063, - "learning_rate": 2.3976271804755366e-06, - "loss": 0.0004414593800902367, - "step": 20830 - }, - { - "epoch": 3.5524296675191813, - "grad_norm": 0.06465112417936325, - "learning_rate": 2.3886207605606276e-06, - "loss": 0.000545783480629325, - "step": 20835 - }, - { - "epoch": 3.553282182438193, - "grad_norm": 0.06885542720556259, - "learning_rate": 2.3796307318055112e-06, - "loss": 0.0005661803297698498, - "step": 20840 - }, - { - "epoch": 3.554134697357204, - "grad_norm": 0.011327388696372509, - "learning_rate": 2.3706570984070417e-06, - "loss": 0.00034151540603488684, - "step": 20845 - }, - { - "epoch": 3.554987212276215, - "grad_norm": 0.012066229246556759, - "learning_rate": 2.361699864554406e-06, - "loss": 0.00031219117809087036, - "step": 20850 - }, - { - "epoch": 3.555839727195226, - "grad_norm": 0.010691968724131584, - "learning_rate": 2.352759034429143e-06, - "loss": 0.0007128301076591015, - "step": 20855 - }, - { - "epoch": 3.556692242114237, - "grad_norm": 0.016185365617275238, - "learning_rate": 2.3438346122051295e-06, - "loss": 0.0010599909350275994, - "step": 20860 - }, - { - "epoch": 3.557544757033248, - "grad_norm": 0.07072475552558899, - "learning_rate": 2.3349266020485714e-06, - "loss": 0.0009240474551916123, - "step": 20865 - }, - { - "epoch": 3.558397271952259, - "grad_norm": 0.02984112873673439, - "learning_rate": 2.326035008118038e-06, - "loss": 0.0006467741448432207, - "step": 20870 - }, - { - "epoch": 3.5592497868712702, - "grad_norm": 0.04521370306611061, - "learning_rate": 2.3171598345644164e-06, - "loss": 0.0007281980477273464, - "step": 20875 - }, - { - "epoch": 3.5601023017902813, - "grad_norm": 0.051938124001026154, - "learning_rate": 2.308301085530931e-06, - "loss": 0.001002713944762945, - "step": 20880 - }, - { - "epoch": 3.5609548167092924, - "grad_norm": 0.00381719833239913, - "learning_rate": 2.299458765153135e-06, - "loss": 0.0003859725082293153, - "step": 20885 - }, - { - "epoch": 3.5618073316283034, - "grad_norm": 0.008161951787769794, - "learning_rate": 2.2906328775589315e-06, - "loss": 0.00029291068203747274, - "step": 20890 - }, - { - "epoch": 3.5626598465473145, - "grad_norm": 0.012069445103406906, - "learning_rate": 2.2818234268685247e-06, - "loss": 0.00023344492074102164, - "step": 20895 - }, - { - "epoch": 3.5635123614663256, - "grad_norm": 0.019283002242445946, - "learning_rate": 2.273030417194474e-06, - "loss": 0.0002701515797525644, - "step": 20900 - }, - { - "epoch": 3.5643648763853366, - "grad_norm": 0.04979168623685837, - "learning_rate": 2.2642538526416384e-06, - "loss": 0.0007004131563007832, - "step": 20905 - }, - { - "epoch": 3.5652173913043477, - "grad_norm": 0.016069067642092705, - "learning_rate": 2.255493737307207e-06, - "loss": 0.00026671388186514375, - "step": 20910 - }, - { - "epoch": 3.566069906223359, - "grad_norm": 0.08306407183408737, - "learning_rate": 2.246750075280704e-06, - "loss": 0.0007622113451361656, - "step": 20915 - }, - { - "epoch": 3.56692242114237, - "grad_norm": 0.01117862667888403, - "learning_rate": 2.238022870643956e-06, - "loss": 0.0003897631075233221, - "step": 20920 - }, - { - "epoch": 3.5677749360613813, - "grad_norm": 0.02147560566663742, - "learning_rate": 2.2293121274711126e-06, - "loss": 0.00027591800317168237, - "step": 20925 - }, - { - "epoch": 3.568627450980392, - "grad_norm": 0.08633051812648773, - "learning_rate": 2.2206178498286293e-06, - "loss": 0.00048953453078866, - "step": 20930 - }, - { - "epoch": 3.5694799658994034, - "grad_norm": 0.024494649842381477, - "learning_rate": 2.211940041775301e-06, - "loss": 0.00030716974288225174, - "step": 20935 - }, - { - "epoch": 3.5703324808184145, - "grad_norm": 0.011852308176457882, - "learning_rate": 2.2032787073622075e-06, - "loss": 0.000294071389362216, - "step": 20940 - }, - { - "epoch": 3.5711849957374255, - "grad_norm": 0.013524633832275867, - "learning_rate": 2.1946338506327487e-06, - "loss": 0.0009473714977502823, - "step": 20945 - }, - { - "epoch": 3.5720375106564366, - "grad_norm": 0.10399185121059418, - "learning_rate": 2.1860054756226374e-06, - "loss": 0.0007893742062151432, - "step": 20950 - }, - { - "epoch": 3.5728900255754477, - "grad_norm": 0.026907512918114662, - "learning_rate": 2.1773935863598725e-06, - "loss": 0.00027270009741187096, - "step": 20955 - }, - { - "epoch": 3.5737425404944587, - "grad_norm": 0.02188337780535221, - "learning_rate": 2.1687981868647883e-06, - "loss": 0.0011183698661625385, - "step": 20960 - }, - { - "epoch": 3.57459505541347, - "grad_norm": 0.019559821113944054, - "learning_rate": 2.160219281149987e-06, - "loss": 0.0007883405312895774, - "step": 20965 - }, - { - "epoch": 3.575447570332481, - "grad_norm": 0.020538685843348503, - "learning_rate": 2.151656873220399e-06, - "loss": 0.0003405439667403698, - "step": 20970 - }, - { - "epoch": 3.576300085251492, - "grad_norm": 0.05687247961759567, - "learning_rate": 2.143110967073235e-06, - "loss": 0.00031936001032590867, - "step": 20975 - }, - { - "epoch": 3.577152600170503, - "grad_norm": 0.06545376777648926, - "learning_rate": 2.1345815666980027e-06, - "loss": 0.0014524533413350581, - "step": 20980 - }, - { - "epoch": 3.578005115089514, - "grad_norm": 0.013182499445974827, - "learning_rate": 2.1260686760765186e-06, - "loss": 0.0007357773371040821, - "step": 20985 - }, - { - "epoch": 3.578857630008525, - "grad_norm": 0.014390240423381329, - "learning_rate": 2.117572299182882e-06, - "loss": 0.0002635567681863904, - "step": 20990 - }, - { - "epoch": 3.579710144927536, - "grad_norm": 0.007471280638128519, - "learning_rate": 2.109092439983487e-06, - "loss": 0.00034300005063414574, - "step": 20995 - }, - { - "epoch": 3.580562659846547, - "grad_norm": 0.0056254020892083645, - "learning_rate": 2.1006291024370044e-06, - "loss": 0.00067988820374012, - "step": 21000 - }, - { - "epoch": 3.5814151747655583, - "grad_norm": 0.04476655274629593, - "learning_rate": 2.0921822904944152e-06, - "loss": 0.0012643163092434407, - "step": 21005 - }, - { - "epoch": 3.5822676896845693, - "grad_norm": 0.07641401141881943, - "learning_rate": 2.0837520080989612e-06, - "loss": 0.0005423400085419417, - "step": 21010 - }, - { - "epoch": 3.5831202046035804, - "grad_norm": 0.007483305409550667, - "learning_rate": 2.0753382591861823e-06, - "loss": 0.000717478571459651, - "step": 21015 - }, - { - "epoch": 3.583972719522592, - "grad_norm": 0.023776588961482048, - "learning_rate": 2.066941047683898e-06, - "loss": 0.0007531133480370045, - "step": 21020 - }, - { - "epoch": 3.5848252344416025, - "grad_norm": 0.011082421988248825, - "learning_rate": 2.0585603775121985e-06, - "loss": 0.000845146831125021, - "step": 21025 - }, - { - "epoch": 3.585677749360614, - "grad_norm": 0.016635581851005554, - "learning_rate": 2.0501962525834666e-06, - "loss": 0.0004957383964210749, - "step": 21030 - }, - { - "epoch": 3.5865302642796246, - "grad_norm": 0.009021840058267117, - "learning_rate": 2.0418486768023533e-06, - "loss": 0.00018236858304589986, - "step": 21035 - }, - { - "epoch": 3.587382779198636, - "grad_norm": 0.08496322482824326, - "learning_rate": 2.033517654065783e-06, - "loss": 0.000499946903437376, - "step": 21040 - }, - { - "epoch": 3.588235294117647, - "grad_norm": 0.03010745905339718, - "learning_rate": 2.025203188262954e-06, - "loss": 0.0007221087813377381, - "step": 21045 - }, - { - "epoch": 3.5890878090366582, - "grad_norm": 0.014769963920116425, - "learning_rate": 2.01690528327534e-06, - "loss": 0.0005381438415497542, - "step": 21050 - }, - { - "epoch": 3.5899403239556693, - "grad_norm": 0.004737900570034981, - "learning_rate": 2.0086239429766755e-06, - "loss": 0.00042590871453285217, - "step": 21055 - }, - { - "epoch": 3.5907928388746804, - "grad_norm": 0.011567474342882633, - "learning_rate": 2.000359171232968e-06, - "loss": 0.0002875934354960918, - "step": 21060 - }, - { - "epoch": 3.5916453537936914, - "grad_norm": 0.014703325927257538, - "learning_rate": 1.9921109719024815e-06, - "loss": 0.0004461627919226885, - "step": 21065 - }, - { - "epoch": 3.5924978687127025, - "grad_norm": 0.048457443714141846, - "learning_rate": 1.983879348835753e-06, - "loss": 0.00031095552258193494, - "step": 21070 - }, - { - "epoch": 3.5933503836317136, - "grad_norm": 0.035036347806453705, - "learning_rate": 1.975664305875582e-06, - "loss": 0.0004704943858087063, - "step": 21075 - }, - { - "epoch": 3.5942028985507246, - "grad_norm": 0.010464261285960674, - "learning_rate": 1.967465846857015e-06, - "loss": 0.000991502869874239, - "step": 21080 - }, - { - "epoch": 3.5950554134697357, - "grad_norm": 0.06083119288086891, - "learning_rate": 1.9592839756073773e-06, - "loss": 0.0003390623489394784, - "step": 21085 - }, - { - "epoch": 3.5959079283887467, - "grad_norm": 0.06610855460166931, - "learning_rate": 1.951118695946234e-06, - "loss": 0.0006665963679552078, - "step": 21090 - }, - { - "epoch": 3.596760443307758, - "grad_norm": 0.16398076713085175, - "learning_rate": 1.942970011685399e-06, - "loss": 0.000514071062207222, - "step": 21095 - }, - { - "epoch": 3.597612958226769, - "grad_norm": 0.007298734970390797, - "learning_rate": 1.9348379266289667e-06, - "loss": 0.00034214446786791087, - "step": 21100 - }, - { - "epoch": 3.59846547314578, - "grad_norm": 0.01020489726215601, - "learning_rate": 1.9267224445732548e-06, - "loss": 0.00022972908336669207, - "step": 21105 - }, - { - "epoch": 3.599317988064791, - "grad_norm": 0.004301535431295633, - "learning_rate": 1.9186235693068402e-06, - "loss": 0.000436145206913352, - "step": 21110 - }, - { - "epoch": 3.6001705029838025, - "grad_norm": 0.003998286556452513, - "learning_rate": 1.9105413046105452e-06, - "loss": 0.0006782910786569118, - "step": 21115 - }, - { - "epoch": 3.601023017902813, - "grad_norm": 0.024540653452277184, - "learning_rate": 1.9024756542574474e-06, - "loss": 0.000988519936800003, - "step": 21120 - }, - { - "epoch": 3.6018755328218246, - "grad_norm": 0.008918581530451775, - "learning_rate": 1.8944266220128512e-06, - "loss": 0.0002553706057369709, - "step": 21125 - }, - { - "epoch": 3.602728047740835, - "grad_norm": 0.04160633683204651, - "learning_rate": 1.886394211634322e-06, - "loss": 0.0010507453233003616, - "step": 21130 - }, - { - "epoch": 3.6035805626598467, - "grad_norm": 0.011109927669167519, - "learning_rate": 1.878378426871656e-06, - "loss": 0.0006055444478988648, - "step": 21135 - }, - { - "epoch": 3.604433077578858, - "grad_norm": 0.04830114543437958, - "learning_rate": 1.8703792714668763e-06, - "loss": 0.0004960444755852222, - "step": 21140 - }, - { - "epoch": 3.605285592497869, - "grad_norm": 0.00889639277011156, - "learning_rate": 1.862396749154267e-06, - "loss": 0.00040551358833909037, - "step": 21145 - }, - { - "epoch": 3.60613810741688, - "grad_norm": 0.004867313429713249, - "learning_rate": 1.8544308636603346e-06, - "loss": 0.00046198870986700056, - "step": 21150 - }, - { - "epoch": 3.606990622335891, - "grad_norm": 0.011908004991710186, - "learning_rate": 1.8464816187038129e-06, - "loss": 0.0007289954926818609, - "step": 21155 - }, - { - "epoch": 3.607843137254902, - "grad_norm": 0.028547517955303192, - "learning_rate": 1.8385490179956706e-06, - "loss": 0.00074036936275661, - "step": 21160 - }, - { - "epoch": 3.608695652173913, - "grad_norm": 0.016837269067764282, - "learning_rate": 1.8306330652391204e-06, - "loss": 0.00043045044876635077, - "step": 21165 - }, - { - "epoch": 3.609548167092924, - "grad_norm": 0.0513509176671505, - "learning_rate": 1.8227337641295859e-06, - "loss": 0.0011829334311187268, - "step": 21170 - }, - { - "epoch": 3.610400682011935, - "grad_norm": 0.0072334990836679935, - "learning_rate": 1.8148511183547252e-06, - "loss": 0.000338142248801887, - "step": 21175 - }, - { - "epoch": 3.6112531969309463, - "grad_norm": 0.012744804844260216, - "learning_rate": 1.806985131594424e-06, - "loss": 0.0004504667595028877, - "step": 21180 - }, - { - "epoch": 3.6121057118499573, - "grad_norm": 0.011759931221604347, - "learning_rate": 1.799135807520774e-06, - "loss": 0.00029756310395896436, - "step": 21185 - }, - { - "epoch": 3.6129582267689684, - "grad_norm": 0.018434008583426476, - "learning_rate": 1.7913031497981193e-06, - "loss": 0.00032974979840219023, - "step": 21190 - }, - { - "epoch": 3.6138107416879794, - "grad_norm": 0.014259081333875656, - "learning_rate": 1.7834871620829889e-06, - "loss": 0.000511990487575531, - "step": 21195 - }, - { - "epoch": 3.6146632566069905, - "grad_norm": 0.007993457838892937, - "learning_rate": 1.7756878480241556e-06, - "loss": 0.001352803036570549, - "step": 21200 - }, - { - "epoch": 3.6155157715260016, - "grad_norm": 0.004293349105864763, - "learning_rate": 1.7679052112625908e-06, - "loss": 0.0003066781908273697, - "step": 21205 - }, - { - "epoch": 3.6163682864450126, - "grad_norm": 0.07703537493944168, - "learning_rate": 1.760139255431492e-06, - "loss": 0.00040162606164813043, - "step": 21210 - }, - { - "epoch": 3.6172208013640237, - "grad_norm": 0.16972683370113373, - "learning_rate": 1.7523899841562632e-06, - "loss": 0.0007527290377765894, - "step": 21215 - }, - { - "epoch": 3.618073316283035, - "grad_norm": 0.027821099385619164, - "learning_rate": 1.7446574010545277e-06, - "loss": 0.0003161250613629818, - "step": 21220 - }, - { - "epoch": 3.618925831202046, - "grad_norm": 0.03964545577764511, - "learning_rate": 1.7369415097361103e-06, - "loss": 0.0006695718038827181, - "step": 21225 - }, - { - "epoch": 3.6197783461210573, - "grad_norm": 0.04607070982456207, - "learning_rate": 1.729242313803042e-06, - "loss": 0.0003807933768257499, - "step": 21230 - }, - { - "epoch": 3.620630861040068, - "grad_norm": 0.01855855993926525, - "learning_rate": 1.7215598168495729e-06, - "loss": 0.0009717889130115509, - "step": 21235 - }, - { - "epoch": 3.6214833759590794, - "grad_norm": 0.031046895310282707, - "learning_rate": 1.7138940224621381e-06, - "loss": 0.000874253548681736, - "step": 21240 - }, - { - "epoch": 3.6223358908780905, - "grad_norm": 0.07868655025959015, - "learning_rate": 1.7062449342193917e-06, - "loss": 0.0005975952371954918, - "step": 21245 - }, - { - "epoch": 3.6231884057971016, - "grad_norm": 0.12167970091104507, - "learning_rate": 1.6986125556921776e-06, - "loss": 0.0012172631919384002, - "step": 21250 - }, - { - "epoch": 3.6240409207161126, - "grad_norm": 0.026766330003738403, - "learning_rate": 1.6909968904435453e-06, - "loss": 0.00015401726122945547, - "step": 21255 - }, - { - "epoch": 3.6248934356351237, - "grad_norm": 0.0523478165268898, - "learning_rate": 1.6833979420287386e-06, - "loss": 0.00043351505883038044, - "step": 21260 - }, - { - "epoch": 3.6257459505541347, - "grad_norm": 0.03436252847313881, - "learning_rate": 1.6758157139952072e-06, - "loss": 0.0005547068547457456, - "step": 21265 - }, - { - "epoch": 3.626598465473146, - "grad_norm": 0.009839732199907303, - "learning_rate": 1.6682502098825824e-06, - "loss": 0.0005176344886422158, - "step": 21270 - }, - { - "epoch": 3.627450980392157, - "grad_norm": 0.08149638772010803, - "learning_rate": 1.6607014332226886e-06, - "loss": 0.000564785860478878, - "step": 21275 - }, - { - "epoch": 3.628303495311168, - "grad_norm": 0.004576738923788071, - "learning_rate": 1.6531693875395574e-06, - "loss": 0.0010984219610691071, - "step": 21280 - }, - { - "epoch": 3.629156010230179, - "grad_norm": 0.011690586805343628, - "learning_rate": 1.6456540763493884e-06, - "loss": 0.00019397520227357746, - "step": 21285 - }, - { - "epoch": 3.63000852514919, - "grad_norm": 0.07157396525144577, - "learning_rate": 1.6381555031605876e-06, - "loss": 0.0006828072480857372, - "step": 21290 - }, - { - "epoch": 3.630861040068201, - "grad_norm": 0.06383173167705536, - "learning_rate": 1.6306736714737256e-06, - "loss": 0.0006252658553421497, - "step": 21295 - }, - { - "epoch": 3.631713554987212, - "grad_norm": 0.05774596706032753, - "learning_rate": 1.6232085847815795e-06, - "loss": 0.0013784953393042088, - "step": 21300 - }, - { - "epoch": 3.632566069906223, - "grad_norm": 0.038066111505031586, - "learning_rate": 1.615760246569099e-06, - "loss": 0.000414402037858963, - "step": 21305 - }, - { - "epoch": 3.6334185848252343, - "grad_norm": 0.04431888833642006, - "learning_rate": 1.6083286603134112e-06, - "loss": 0.0002233121544122696, - "step": 21310 - }, - { - "epoch": 3.634271099744246, - "grad_norm": 0.005531220696866512, - "learning_rate": 1.6009138294838367e-06, - "loss": 0.0005217500030994416, - "step": 21315 - }, - { - "epoch": 3.6351236146632564, - "grad_norm": 0.03662824630737305, - "learning_rate": 1.5935157575418605e-06, - "loss": 0.0006840425077825785, - "step": 21320 - }, - { - "epoch": 3.635976129582268, - "grad_norm": 0.04950394108891487, - "learning_rate": 1.5861344479411454e-06, - "loss": 0.00041497671045362947, - "step": 21325 - }, - { - "epoch": 3.6368286445012785, - "grad_norm": 0.020219210535287857, - "learning_rate": 1.5787699041275345e-06, - "loss": 0.00039848005399107934, - "step": 21330 - }, - { - "epoch": 3.63768115942029, - "grad_norm": 0.009260480292141438, - "learning_rate": 1.5714221295390488e-06, - "loss": 0.0008187741041183472, - "step": 21335 - }, - { - "epoch": 3.638533674339301, - "grad_norm": 0.03334662690758705, - "learning_rate": 1.5640911276058654e-06, - "loss": 0.0002430976601317525, - "step": 21340 - }, - { - "epoch": 3.639386189258312, - "grad_norm": 0.02324068546295166, - "learning_rate": 1.5567769017503382e-06, - "loss": 0.00017816005274653434, - "step": 21345 - }, - { - "epoch": 3.640238704177323, - "grad_norm": 0.08741103112697601, - "learning_rate": 1.5494794553869982e-06, - "loss": 0.0008083363994956016, - "step": 21350 - }, - { - "epoch": 3.6410912190963343, - "grad_norm": 0.052611518651247025, - "learning_rate": 1.542198791922529e-06, - "loss": 0.00037114876322448255, - "step": 21355 - }, - { - "epoch": 3.6419437340153453, - "grad_norm": 0.044670768082141876, - "learning_rate": 1.534934914755795e-06, - "loss": 0.0010107600130140782, - "step": 21360 - }, - { - "epoch": 3.6427962489343564, - "grad_norm": 0.029497483745217323, - "learning_rate": 1.5276878272778126e-06, - "loss": 0.00046050939708948136, - "step": 21365 - }, - { - "epoch": 3.6436487638533674, - "grad_norm": 0.033001627773046494, - "learning_rate": 1.520457532871759e-06, - "loss": 0.0008739419281482697, - "step": 21370 - }, - { - "epoch": 3.6445012787723785, - "grad_norm": 0.0858602374792099, - "learning_rate": 1.5132440349129804e-06, - "loss": 0.001319802924990654, - "step": 21375 - }, - { - "epoch": 3.6453537936913896, - "grad_norm": 0.002159344032406807, - "learning_rate": 1.5060473367689785e-06, - "loss": 0.000239237817004323, - "step": 21380 - }, - { - "epoch": 3.6462063086104006, - "grad_norm": 0.10409238934516907, - "learning_rate": 1.4988674417994076e-06, - "loss": 0.001322145201265812, - "step": 21385 - }, - { - "epoch": 3.6470588235294117, - "grad_norm": 0.08724559098482132, - "learning_rate": 1.4917043533560823e-06, - "loss": 0.0007014136761426925, - "step": 21390 - }, - { - "epoch": 3.6479113384484227, - "grad_norm": 0.05907455086708069, - "learning_rate": 1.4845580747829696e-06, - "loss": 0.00017757418099790813, - "step": 21395 - }, - { - "epoch": 3.648763853367434, - "grad_norm": 0.00219578854739666, - "learning_rate": 1.4774286094161883e-06, - "loss": 0.00017139697447419167, - "step": 21400 - }, - { - "epoch": 3.649616368286445, - "grad_norm": 0.038314200937747955, - "learning_rate": 1.4703159605840218e-06, - "loss": 0.0002736913273110986, - "step": 21405 - }, - { - "epoch": 3.6504688832054564, - "grad_norm": 0.01319828350096941, - "learning_rate": 1.4632201316068806e-06, - "loss": 0.00023620841093361378, - "step": 21410 - }, - { - "epoch": 3.651321398124467, - "grad_norm": 0.039625514298677444, - "learning_rate": 1.456141125797332e-06, - "loss": 0.00044136070646345616, - "step": 21415 - }, - { - "epoch": 3.6521739130434785, - "grad_norm": 0.0016333634266629815, - "learning_rate": 1.4490789464601027e-06, - "loss": 0.000284887757152319, - "step": 21420 - }, - { - "epoch": 3.653026427962489, - "grad_norm": 0.007170053664594889, - "learning_rate": 1.4420335968920435e-06, - "loss": 0.00017265495844185353, - "step": 21425 - }, - { - "epoch": 3.6538789428815006, - "grad_norm": 0.008500473573803902, - "learning_rate": 1.4350050803821608e-06, - "loss": 0.00042344643734395506, - "step": 21430 - }, - { - "epoch": 3.6547314578005117, - "grad_norm": 0.02188137173652649, - "learning_rate": 1.4279934002115968e-06, - "loss": 0.0008881168439984321, - "step": 21435 - }, - { - "epoch": 3.6555839727195227, - "grad_norm": 0.011452939361333847, - "learning_rate": 1.420998559653637e-06, - "loss": 0.00039616567082703116, - "step": 21440 - }, - { - "epoch": 3.656436487638534, - "grad_norm": 0.07035384327173233, - "learning_rate": 1.4140205619737068e-06, - "loss": 0.0011622272431850433, - "step": 21445 - }, - { - "epoch": 3.657289002557545, - "grad_norm": 0.007866617292165756, - "learning_rate": 1.4070594104293758e-06, - "loss": 0.0007314439862966537, - "step": 21450 - }, - { - "epoch": 3.658141517476556, - "grad_norm": 0.020861556753516197, - "learning_rate": 1.400115108270332e-06, - "loss": 0.0006033163517713547, - "step": 21455 - }, - { - "epoch": 3.658994032395567, - "grad_norm": 0.10272221267223358, - "learning_rate": 1.3931876587384024e-06, - "loss": 0.00074980896897614, - "step": 21460 - }, - { - "epoch": 3.659846547314578, - "grad_norm": 0.03701607510447502, - "learning_rate": 1.3862770650675675e-06, - "loss": 0.00043788314796984197, - "step": 21465 - }, - { - "epoch": 3.660699062233589, - "grad_norm": 0.01912214234471321, - "learning_rate": 1.3793833304839088e-06, - "loss": 0.00025298474356532096, - "step": 21470 - }, - { - "epoch": 3.6615515771526, - "grad_norm": 0.09408050775527954, - "learning_rate": 1.3725064582056563e-06, - "loss": 0.0005287495441734791, - "step": 21475 - }, - { - "epoch": 3.662404092071611, - "grad_norm": 0.04015975072979927, - "learning_rate": 1.3656464514431587e-06, - "loss": 0.0005934454500675201, - "step": 21480 - }, - { - "epoch": 3.6632566069906223, - "grad_norm": 0.06787826120853424, - "learning_rate": 1.3588033133988916e-06, - "loss": 0.0005741545930504799, - "step": 21485 - }, - { - "epoch": 3.6641091219096333, - "grad_norm": 0.011169936507940292, - "learning_rate": 1.3519770472674663e-06, - "loss": 0.000489132758229971, - "step": 21490 - }, - { - "epoch": 3.6649616368286444, - "grad_norm": 0.08029177039861679, - "learning_rate": 1.345167656235617e-06, - "loss": 0.00028758635744452477, - "step": 21495 - }, - { - "epoch": 3.6658141517476555, - "grad_norm": 0.014910605736076832, - "learning_rate": 1.3383751434821844e-06, - "loss": 0.0004812784492969513, - "step": 21500 - }, - { - "epoch": 3.6666666666666665, - "grad_norm": 0.010188158601522446, - "learning_rate": 1.3315995121781442e-06, - "loss": 0.00021801250986754894, - "step": 21505 - }, - { - "epoch": 3.6675191815856776, - "grad_norm": 0.009267419576644897, - "learning_rate": 1.3248407654865747e-06, - "loss": 0.00018663164228200914, - "step": 21510 - }, - { - "epoch": 3.668371696504689, - "grad_norm": 0.008017940446734428, - "learning_rate": 1.318098906562702e-06, - "loss": 0.00021431564819067716, - "step": 21515 - }, - { - "epoch": 3.6692242114236997, - "grad_norm": 0.059672970324754715, - "learning_rate": 1.3113739385538418e-06, - "loss": 0.002246948331594467, - "step": 21520 - }, - { - "epoch": 3.670076726342711, - "grad_norm": 0.040056753903627396, - "learning_rate": 1.3046658645994287e-06, - "loss": 0.000770262023434043, - "step": 21525 - }, - { - "epoch": 3.670929241261722, - "grad_norm": 0.01101162564009428, - "learning_rate": 1.297974687831016e-06, - "loss": 0.0002761463634669781, - "step": 21530 - }, - { - "epoch": 3.6717817561807333, - "grad_norm": 0.02171594463288784, - "learning_rate": 1.2913004113722675e-06, - "loss": 0.002316083945333958, - "step": 21535 - }, - { - "epoch": 3.6726342710997444, - "grad_norm": 0.10676748305559158, - "learning_rate": 1.2846430383389699e-06, - "loss": 0.0004832268226891756, - "step": 21540 - }, - { - "epoch": 3.6734867860187554, - "grad_norm": 0.009993486106395721, - "learning_rate": 1.2780025718389916e-06, - "loss": 0.0002966204658150673, - "step": 21545 - }, - { - "epoch": 3.6743393009377665, - "grad_norm": 0.015029081143438816, - "learning_rate": 1.2713790149723277e-06, - "loss": 0.00016313637606799603, - "step": 21550 - }, - { - "epoch": 3.6751918158567776, - "grad_norm": 0.004780618939548731, - "learning_rate": 1.2647723708310713e-06, - "loss": 0.0002024576999247074, - "step": 21555 - }, - { - "epoch": 3.6760443307757886, - "grad_norm": 0.03506084531545639, - "learning_rate": 1.2581826424994348e-06, - "loss": 0.00036899447441101073, - "step": 21560 - }, - { - "epoch": 3.6768968456947997, - "grad_norm": 0.03333313763141632, - "learning_rate": 1.2516098330537154e-06, - "loss": 0.0009617398492991925, - "step": 21565 - }, - { - "epoch": 3.6777493606138107, - "grad_norm": 0.032154183834791183, - "learning_rate": 1.2450539455623173e-06, - "loss": 0.0003954335581511259, - "step": 21570 - }, - { - "epoch": 3.678601875532822, - "grad_norm": 0.029868626967072487, - "learning_rate": 1.238514983085742e-06, - "loss": 0.0002000307198613882, - "step": 21575 - }, - { - "epoch": 3.679454390451833, - "grad_norm": 0.007650670595467091, - "learning_rate": 1.2319929486766106e-06, - "loss": 0.00048509505577385423, - "step": 21580 - }, - { - "epoch": 3.680306905370844, - "grad_norm": 0.09073834866285324, - "learning_rate": 1.225487845379608e-06, - "loss": 0.0009519712999463082, - "step": 21585 - }, - { - "epoch": 3.681159420289855, - "grad_norm": 0.02642376720905304, - "learning_rate": 1.2189996762315468e-06, - "loss": 0.0002688464941456914, - "step": 21590 - }, - { - "epoch": 3.682011935208866, - "grad_norm": 0.013354654423892498, - "learning_rate": 1.2125284442613167e-06, - "loss": 0.0005553624592721462, - "step": 21595 - }, - { - "epoch": 3.682864450127877, - "grad_norm": 0.010057013481855392, - "learning_rate": 1.206074152489897e-06, - "loss": 0.00021782214753329754, - "step": 21600 - }, - { - "epoch": 3.683716965046888, - "grad_norm": 0.01647094078361988, - "learning_rate": 1.1996368039303733e-06, - "loss": 0.0002732400316745043, - "step": 21605 - }, - { - "epoch": 3.6845694799658997, - "grad_norm": 0.04311537370085716, - "learning_rate": 1.1932164015879126e-06, - "loss": 0.0002665382344275713, - "step": 21610 - }, - { - "epoch": 3.6854219948849103, - "grad_norm": 0.007978399284183979, - "learning_rate": 1.1868129484597714e-06, - "loss": 0.0005598774179816246, - "step": 21615 - }, - { - "epoch": 3.686274509803922, - "grad_norm": 0.06534867733716965, - "learning_rate": 1.1804264475352916e-06, - "loss": 0.0006941860541701317, - "step": 21620 - }, - { - "epoch": 3.6871270247229324, - "grad_norm": 0.021297244355082512, - "learning_rate": 1.1740569017959098e-06, - "loss": 0.0014498828910291194, - "step": 21625 - }, - { - "epoch": 3.687979539641944, - "grad_norm": 0.015370003879070282, - "learning_rate": 1.1677043142151386e-06, - "loss": 0.0002328573726117611, - "step": 21630 - }, - { - "epoch": 3.688832054560955, - "grad_norm": 0.044275715947151184, - "learning_rate": 1.1613686877585765e-06, - "loss": 0.0004160989075899124, - "step": 21635 - }, - { - "epoch": 3.689684569479966, - "grad_norm": 0.012127033434808254, - "learning_rate": 1.155050025383912e-06, - "loss": 0.0004161412827670574, - "step": 21640 - }, - { - "epoch": 3.690537084398977, - "grad_norm": 0.03625606745481491, - "learning_rate": 1.1487483300408898e-06, - "loss": 0.0002486191689968109, - "step": 21645 - }, - { - "epoch": 3.691389599317988, - "grad_norm": 0.0037806867621839046, - "learning_rate": 1.1424636046713731e-06, - "loss": 0.0009839927777647972, - "step": 21650 - }, - { - "epoch": 3.692242114236999, - "grad_norm": 0.024629997089505196, - "learning_rate": 1.1361958522092652e-06, - "loss": 0.0005969745106995106, - "step": 21655 - }, - { - "epoch": 3.6930946291560103, - "grad_norm": 0.020410476252436638, - "learning_rate": 1.1299450755805669e-06, - "loss": 0.000520474137738347, - "step": 21660 - }, - { - "epoch": 3.6939471440750213, - "grad_norm": 0.061364348977804184, - "learning_rate": 1.1237112777033485e-06, - "loss": 0.0004287329036742449, - "step": 21665 - }, - { - "epoch": 3.6947996589940324, - "grad_norm": 0.050738945603370667, - "learning_rate": 1.1174944614877487e-06, - "loss": 0.0003132443642243743, - "step": 21670 - }, - { - "epoch": 3.6956521739130435, - "grad_norm": 0.01692713052034378, - "learning_rate": 1.1112946298359876e-06, - "loss": 0.000536510581150651, - "step": 21675 - }, - { - "epoch": 3.6965046888320545, - "grad_norm": 0.017278827726840973, - "learning_rate": 1.1051117856423583e-06, - "loss": 0.0004452986642718315, - "step": 21680 - }, - { - "epoch": 3.6973572037510656, - "grad_norm": 0.054739370942115784, - "learning_rate": 1.0989459317932187e-06, - "loss": 0.0005310873501002788, - "step": 21685 - }, - { - "epoch": 3.6982097186700766, - "grad_norm": 0.11146213859319687, - "learning_rate": 1.0927970711669786e-06, - "loss": 0.0014278174377977848, - "step": 21690 - }, - { - "epoch": 3.6990622335890877, - "grad_norm": 0.03265102952718735, - "learning_rate": 1.0866652066341506e-06, - "loss": 0.00036611778195947407, - "step": 21695 - }, - { - "epoch": 3.6999147485080988, - "grad_norm": 0.044467322528362274, - "learning_rate": 1.080550341057282e-06, - "loss": 0.000625171372666955, - "step": 21700 - }, - { - "epoch": 3.70076726342711, - "grad_norm": 0.03242240473628044, - "learning_rate": 1.0744524772909977e-06, - "loss": 0.0004086061846464872, - "step": 21705 - }, - { - "epoch": 3.701619778346121, - "grad_norm": 0.017928048968315125, - "learning_rate": 1.0683716181819873e-06, - "loss": 0.0003092557191848755, - "step": 21710 - }, - { - "epoch": 3.7024722932651324, - "grad_norm": 0.11531335115432739, - "learning_rate": 1.0623077665689844e-06, - "loss": 0.0016329120844602585, - "step": 21715 - }, - { - "epoch": 3.703324808184143, - "grad_norm": 0.008593913167715073, - "learning_rate": 1.056260925282812e-06, - "loss": 0.0001580311683937907, - "step": 21720 - }, - { - "epoch": 3.7041773231031545, - "grad_norm": 0.061547067016363144, - "learning_rate": 1.0502310971463285e-06, - "loss": 0.001316339522600174, - "step": 21725 - }, - { - "epoch": 3.705029838022165, - "grad_norm": 0.057052768766880035, - "learning_rate": 1.0442182849744656e-06, - "loss": 0.00038654208183288576, - "step": 21730 - }, - { - "epoch": 3.7058823529411766, - "grad_norm": 0.03813531622290611, - "learning_rate": 1.038222491574199e-06, - "loss": 0.0002339026890695095, - "step": 21735 - }, - { - "epoch": 3.7067348678601877, - "grad_norm": 0.12125222384929657, - "learning_rate": 1.0322437197445605e-06, - "loss": 0.0009862667880952357, - "step": 21740 - }, - { - "epoch": 3.7075873827791987, - "grad_norm": 0.05146624147891998, - "learning_rate": 1.0262819722766463e-06, - "loss": 0.00039231935515999795, - "step": 21745 - }, - { - "epoch": 3.70843989769821, - "grad_norm": 0.0025069713592529297, - "learning_rate": 1.020337251953597e-06, - "loss": 0.0004076188895851374, - "step": 21750 - }, - { - "epoch": 3.709292412617221, - "grad_norm": 0.041663553565740585, - "learning_rate": 1.0144095615506053e-06, - "loss": 0.0006343224085867405, - "step": 21755 - }, - { - "epoch": 3.710144927536232, - "grad_norm": 0.005100937094539404, - "learning_rate": 1.0084989038349118e-06, - "loss": 0.00014722023624926805, - "step": 21760 - }, - { - "epoch": 3.710997442455243, - "grad_norm": 0.003966887481510639, - "learning_rate": 1.0026052815658088e-06, - "loss": 0.00029324383940547704, - "step": 21765 - }, - { - "epoch": 3.711849957374254, - "grad_norm": 0.010124515742063522, - "learning_rate": 9.96728697494638e-07, - "loss": 0.00028116661123931406, - "step": 21770 - }, - { - "epoch": 3.712702472293265, - "grad_norm": 0.024313004687428474, - "learning_rate": 9.908691543647873e-07, - "loss": 0.0006351057440042496, - "step": 21775 - }, - { - "epoch": 3.713554987212276, - "grad_norm": 0.008684827014803886, - "learning_rate": 9.850266549116775e-07, - "loss": 0.0005675890017300845, - "step": 21780 - }, - { - "epoch": 3.7144075021312872, - "grad_norm": 0.02833733707666397, - "learning_rate": 9.792012018627851e-07, - "loss": 0.0004331086296588182, - "step": 21785 - }, - { - "epoch": 3.7152600170502983, - "grad_norm": 0.07219739258289337, - "learning_rate": 9.733927979376304e-07, - "loss": 0.000644698552787304, - "step": 21790 - }, - { - "epoch": 3.7161125319693094, - "grad_norm": 0.006920814514160156, - "learning_rate": 9.676014458477655e-07, - "loss": 0.0005529311019927263, - "step": 21795 - }, - { - "epoch": 3.7169650468883204, - "grad_norm": 0.033863846212625504, - "learning_rate": 9.618271482967858e-07, - "loss": 0.0004058407619595528, - "step": 21800 - }, - { - "epoch": 3.7178175618073315, - "grad_norm": 0.07452560216188431, - "learning_rate": 9.560699079803225e-07, - "loss": 0.00045907692983746526, - "step": 21805 - }, - { - "epoch": 3.718670076726343, - "grad_norm": 0.006375455763190985, - "learning_rate": 9.50329727586055e-07, - "loss": 0.0002314785495400429, - "step": 21810 - }, - { - "epoch": 3.7195225916453536, - "grad_norm": 0.009601259604096413, - "learning_rate": 9.446066097936814e-07, - "loss": 0.00022105807438492774, - "step": 21815 - }, - { - "epoch": 3.720375106564365, - "grad_norm": 0.005180593114346266, - "learning_rate": 9.389005572749519e-07, - "loss": 0.0005345941055566073, - "step": 21820 - }, - { - "epoch": 3.7212276214833757, - "grad_norm": 0.01795378513634205, - "learning_rate": 9.33211572693636e-07, - "loss": 0.0010281124152243138, - "step": 21825 - }, - { - "epoch": 3.722080136402387, - "grad_norm": 0.005491979885846376, - "learning_rate": 9.275396587055422e-07, - "loss": 0.0005115194246172905, - "step": 21830 - }, - { - "epoch": 3.7229326513213983, - "grad_norm": 0.015443346463143826, - "learning_rate": 9.218848179585112e-07, - "loss": 0.0002423166995868087, - "step": 21835 - }, - { - "epoch": 3.7237851662404093, - "grad_norm": 0.005140860099345446, - "learning_rate": 9.162470530924101e-07, - "loss": 0.0002986373612657189, - "step": 21840 - }, - { - "epoch": 3.7246376811594204, - "grad_norm": 0.039769724011421204, - "learning_rate": 9.106263667391298e-07, - "loss": 0.0006013393867760897, - "step": 21845 - }, - { - "epoch": 3.7254901960784315, - "grad_norm": 0.05602128803730011, - "learning_rate": 9.05022761522596e-07, - "loss": 0.0014486395753920077, - "step": 21850 - }, - { - "epoch": 3.7263427109974425, - "grad_norm": 0.0194423608481884, - "learning_rate": 8.994362400587624e-07, - "loss": 0.00029678286518901585, - "step": 21855 - }, - { - "epoch": 3.7271952259164536, - "grad_norm": 0.015371611341834068, - "learning_rate": 8.938668049555966e-07, - "loss": 0.0003891410538926721, - "step": 21860 - }, - { - "epoch": 3.7280477408354646, - "grad_norm": 0.03265884891152382, - "learning_rate": 8.883144588131066e-07, - "loss": 0.0002852254081517458, - "step": 21865 - }, - { - "epoch": 3.7289002557544757, - "grad_norm": 0.0016827320214360952, - "learning_rate": 8.827792042233023e-07, - "loss": 0.00022765444591641427, - "step": 21870 - }, - { - "epoch": 3.7297527706734868, - "grad_norm": 0.053975410759449005, - "learning_rate": 8.772610437702249e-07, - "loss": 0.0005050489213317633, - "step": 21875 - }, - { - "epoch": 3.730605285592498, - "grad_norm": 0.041164103895425797, - "learning_rate": 8.717599800299387e-07, - "loss": 0.0007206748705357313, - "step": 21880 - }, - { - "epoch": 3.731457800511509, - "grad_norm": 0.01276206225156784, - "learning_rate": 8.662760155705273e-07, - "loss": 0.0003082378301769495, - "step": 21885 - }, - { - "epoch": 3.73231031543052, - "grad_norm": 0.014569109305739403, - "learning_rate": 8.6080915295208e-07, - "loss": 0.002179015427827835, - "step": 21890 - }, - { - "epoch": 3.733162830349531, - "grad_norm": 0.03148796409368515, - "learning_rate": 8.553593947267137e-07, - "loss": 0.0005310860928148031, - "step": 21895 - }, - { - "epoch": 3.734015345268542, - "grad_norm": 0.002400216180831194, - "learning_rate": 8.499267434385559e-07, - "loss": 0.00037463915068656204, - "step": 21900 - }, - { - "epoch": 3.734867860187553, - "grad_norm": 0.07524898648262024, - "learning_rate": 8.445112016237446e-07, - "loss": 0.0002757473383098841, - "step": 21905 - }, - { - "epoch": 3.735720375106564, - "grad_norm": 0.03802500292658806, - "learning_rate": 8.391127718104448e-07, - "loss": 0.0003543847240507603, - "step": 21910 - }, - { - "epoch": 3.7365728900255757, - "grad_norm": 0.04768325388431549, - "learning_rate": 8.3373145651882e-07, - "loss": 0.0006353846751153469, - "step": 21915 - }, - { - "epoch": 3.7374254049445863, - "grad_norm": 0.030708076432347298, - "learning_rate": 8.283672582610396e-07, - "loss": 0.00034714918583631513, - "step": 21920 - }, - { - "epoch": 3.738277919863598, - "grad_norm": 0.08609063178300858, - "learning_rate": 8.230201795413005e-07, - "loss": 0.0007828005589544773, - "step": 21925 - }, - { - "epoch": 3.7391304347826084, - "grad_norm": 0.022040044888854027, - "learning_rate": 8.176902228557894e-07, - "loss": 0.0003338766284286976, - "step": 21930 - }, - { - "epoch": 3.73998294970162, - "grad_norm": 0.033182695508003235, - "learning_rate": 8.123773906927117e-07, - "loss": 0.00040285829454660415, - "step": 21935 - }, - { - "epoch": 3.740835464620631, - "grad_norm": 0.034886643290519714, - "learning_rate": 8.07081685532271e-07, - "loss": 0.0003746964270249009, - "step": 21940 - }, - { - "epoch": 3.741687979539642, - "grad_norm": 0.010563422925770283, - "learning_rate": 8.01803109846677e-07, - "loss": 0.00028156861662864685, - "step": 21945 - }, - { - "epoch": 3.742540494458653, - "grad_norm": 0.00919833779335022, - "learning_rate": 7.965416661001506e-07, - "loss": 0.00018601591000333428, - "step": 21950 - }, - { - "epoch": 3.743393009377664, - "grad_norm": 0.07408086955547333, - "learning_rate": 7.912973567489017e-07, - "loss": 0.0004837862215936184, - "step": 21955 - }, - { - "epoch": 3.7442455242966752, - "grad_norm": 0.012142200022935867, - "learning_rate": 7.860701842411596e-07, - "loss": 0.0005443771369755268, - "step": 21960 - }, - { - "epoch": 3.7450980392156863, - "grad_norm": 0.005506236106157303, - "learning_rate": 7.808601510171306e-07, - "loss": 0.0010629001073539257, - "step": 21965 - }, - { - "epoch": 3.7459505541346974, - "grad_norm": 0.01200301107019186, - "learning_rate": 7.756672595090316e-07, - "loss": 0.0003482490312308073, - "step": 21970 - }, - { - "epoch": 3.7468030690537084, - "grad_norm": 0.020502127707004547, - "learning_rate": 7.704915121410859e-07, - "loss": 0.0010052938014268875, - "step": 21975 - }, - { - "epoch": 3.7476555839727195, - "grad_norm": 0.010255628265440464, - "learning_rate": 7.653329113294984e-07, - "loss": 0.00015571712283417581, - "step": 21980 - }, - { - "epoch": 3.7485080988917305, - "grad_norm": 0.0257349144667387, - "learning_rate": 7.601914594824801e-07, - "loss": 0.0002349275629967451, - "step": 21985 - }, - { - "epoch": 3.7493606138107416, - "grad_norm": 0.007586009334772825, - "learning_rate": 7.550671590002236e-07, - "loss": 0.0009384787641465664, - "step": 21990 - }, - { - "epoch": 3.7493606138107416, - "eval_loss": 0.06301642954349518, - "eval_runtime": 3.7241, - "eval_samples_per_second": 67.667, - "eval_steps_per_second": 1.074, - "step": 21990 - }, - { - "eval_cer_subset": 0.013206963375510935, - "eval_cer_subset_edit_distance": 811, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 21990 - }, - { - "epoch": 3.7502131287297527, - "grad_norm": 0.01631186157464981, - "learning_rate": 7.499600122749277e-07, - "loss": 0.00024612166453152895, - "step": 21995 - }, - { - "epoch": 3.7510656436487637, - "grad_norm": 0.0041867573745548725, - "learning_rate": 7.448700216907814e-07, - "loss": 0.00025008111260831355, - "step": 22000 - }, - { - "epoch": 3.7519181585677748, - "grad_norm": 0.008313664235174656, - "learning_rate": 7.397971896239585e-07, - "loss": 0.0005951725412160158, - "step": 22005 - }, - { - "epoch": 3.7527706734867863, - "grad_norm": 0.04596323147416115, - "learning_rate": 7.347415184426275e-07, - "loss": 0.0003834041766822338, - "step": 22010 - }, - { - "epoch": 3.753623188405797, - "grad_norm": 0.0066893636249005795, - "learning_rate": 7.297030105069379e-07, - "loss": 0.00040263482369482515, - "step": 22015 - }, - { - "epoch": 3.7544757033248084, - "grad_norm": 0.05047721043229103, - "learning_rate": 7.246816681690415e-07, - "loss": 0.0008630319498479366, - "step": 22020 - }, - { - "epoch": 3.755328218243819, - "grad_norm": 0.045102309435606, - "learning_rate": 7.196774937730632e-07, - "loss": 0.0004944218788295984, - "step": 22025 - }, - { - "epoch": 3.7561807331628305, - "grad_norm": 0.035739608108997345, - "learning_rate": 7.146904896551175e-07, - "loss": 0.000390232028439641, - "step": 22030 - }, - { - "epoch": 3.7570332480818416, - "grad_norm": 0.07845264673233032, - "learning_rate": 7.097206581433049e-07, - "loss": 0.0006002193316817283, - "step": 22035 - }, - { - "epoch": 3.7578857630008526, - "grad_norm": 0.06677660346031189, - "learning_rate": 7.047680015577067e-07, - "loss": 0.0006055246107280254, - "step": 22040 - }, - { - "epoch": 3.7587382779198637, - "grad_norm": 0.005762454587966204, - "learning_rate": 6.998325222103904e-07, - "loss": 0.0002784677781164646, - "step": 22045 - }, - { - "epoch": 3.7595907928388748, - "grad_norm": 0.052431393414735794, - "learning_rate": 6.949142224054003e-07, - "loss": 0.0006639796774834394, - "step": 22050 - }, - { - "epoch": 3.760443307757886, - "grad_norm": 0.060557615011930466, - "learning_rate": 6.900131044387663e-07, - "loss": 0.0003828573739156127, - "step": 22055 - }, - { - "epoch": 3.761295822676897, - "grad_norm": 0.020646315068006516, - "learning_rate": 6.851291705984835e-07, - "loss": 0.0003116678912192583, - "step": 22060 - }, - { - "epoch": 3.762148337595908, - "grad_norm": 0.02366071753203869, - "learning_rate": 6.802624231645445e-07, - "loss": 0.00022359511349350213, - "step": 22065 - }, - { - "epoch": 3.763000852514919, - "grad_norm": 0.04398680850863457, - "learning_rate": 6.754128644089072e-07, - "loss": 0.0004183667246252298, - "step": 22070 - }, - { - "epoch": 3.76385336743393, - "grad_norm": 0.005853195209056139, - "learning_rate": 6.705804965955062e-07, - "loss": 0.0005386173259466886, - "step": 22075 - }, - { - "epoch": 3.764705882352941, - "grad_norm": 0.009862879291176796, - "learning_rate": 6.657653219802453e-07, - "loss": 0.00016693586949259042, - "step": 22080 - }, - { - "epoch": 3.765558397271952, - "grad_norm": 0.07206539809703827, - "learning_rate": 6.60967342811014e-07, - "loss": 0.00036818250082433225, - "step": 22085 - }, - { - "epoch": 3.7664109121909632, - "grad_norm": 0.014724505133926868, - "learning_rate": 6.561865613276665e-07, - "loss": 0.00038080462254583833, - "step": 22090 - }, - { - "epoch": 3.7672634271099743, - "grad_norm": 0.0687309056520462, - "learning_rate": 6.514229797620382e-07, - "loss": 0.0005503068678081036, - "step": 22095 - }, - { - "epoch": 3.7681159420289854, - "grad_norm": 0.07538152486085892, - "learning_rate": 6.46676600337917e-07, - "loss": 0.0005414205603301525, - "step": 22100 - }, - { - "epoch": 3.7689684569479964, - "grad_norm": 0.013810453936457634, - "learning_rate": 6.419474252710763e-07, - "loss": 0.000361010548658669, - "step": 22105 - }, - { - "epoch": 3.7698209718670075, - "grad_norm": 0.01780586875975132, - "learning_rate": 6.3723545676925e-07, - "loss": 0.0002679239492863417, - "step": 22110 - }, - { - "epoch": 3.770673486786019, - "grad_norm": 0.012996112927794456, - "learning_rate": 6.325406970321453e-07, - "loss": 0.00031234726775437595, - "step": 22115 - }, - { - "epoch": 3.7715260017050296, - "grad_norm": 0.0011625232873484492, - "learning_rate": 6.278631482514257e-07, - "loss": 0.00037115085870027543, - "step": 22120 - }, - { - "epoch": 3.772378516624041, - "grad_norm": 0.008648713119328022, - "learning_rate": 6.232028126107319e-07, - "loss": 0.0002120264805853367, - "step": 22125 - }, - { - "epoch": 3.7732310315430517, - "grad_norm": 0.008668708615005016, - "learning_rate": 6.185596922856611e-07, - "loss": 0.00017857172060757874, - "step": 22130 - }, - { - "epoch": 3.7740835464620632, - "grad_norm": 0.020673241466283798, - "learning_rate": 6.139337894437795e-07, - "loss": 0.00015971544198691844, - "step": 22135 - }, - { - "epoch": 3.7749360613810743, - "grad_norm": 0.06992864608764648, - "learning_rate": 6.093251062446097e-07, - "loss": 0.0013386713340878486, - "step": 22140 - }, - { - "epoch": 3.7757885763000854, - "grad_norm": 0.06895331293344498, - "learning_rate": 6.04733644839639e-07, - "loss": 0.00039470652118325236, - "step": 22145 - }, - { - "epoch": 3.7766410912190964, - "grad_norm": 0.03614363074302673, - "learning_rate": 6.001594073723151e-07, - "loss": 0.0002598386025056243, - "step": 22150 - }, - { - "epoch": 3.7774936061381075, - "grad_norm": 0.025721333920955658, - "learning_rate": 5.956023959780427e-07, - "loss": 0.0004882506560534239, - "step": 22155 - }, - { - "epoch": 3.7783461210571185, - "grad_norm": 0.16202254593372345, - "learning_rate": 5.910626127841863e-07, - "loss": 0.0007598648779094219, - "step": 22160 - }, - { - "epoch": 3.7791986359761296, - "grad_norm": 0.01326384861022234, - "learning_rate": 5.865400599100678e-07, - "loss": 0.00036332786548882724, - "step": 22165 - }, - { - "epoch": 3.7800511508951407, - "grad_norm": 0.023784616962075233, - "learning_rate": 5.820347394669689e-07, - "loss": 0.0005443296395242214, - "step": 22170 - }, - { - "epoch": 3.7809036658141517, - "grad_norm": 0.034594547003507614, - "learning_rate": 5.775466535581155e-07, - "loss": 0.0002668139757588506, - "step": 22175 - }, - { - "epoch": 3.7817561807331628, - "grad_norm": 0.010697634890675545, - "learning_rate": 5.730758042787026e-07, - "loss": 0.00031887323129922154, - "step": 22180 - }, - { - "epoch": 3.782608695652174, - "grad_norm": 0.01985642872750759, - "learning_rate": 5.686221937158688e-07, - "loss": 0.00031337011605501173, - "step": 22185 - }, - { - "epoch": 3.783461210571185, - "grad_norm": 0.021349545568227768, - "learning_rate": 5.641858239487096e-07, - "loss": 0.0012821664102375507, - "step": 22190 - }, - { - "epoch": 3.784313725490196, - "grad_norm": 0.023454997688531876, - "learning_rate": 5.597666970482681e-07, - "loss": 0.0005118092987686396, - "step": 22195 - }, - { - "epoch": 3.785166240409207, - "grad_norm": 0.020847661420702934, - "learning_rate": 5.553648150775359e-07, - "loss": 0.0006337463855743408, - "step": 22200 - }, - { - "epoch": 3.786018755328218, - "grad_norm": 0.02310790866613388, - "learning_rate": 5.509801800914648e-07, - "loss": 0.00020002322271466254, - "step": 22205 - }, - { - "epoch": 3.7868712702472296, - "grad_norm": 0.01310602854937315, - "learning_rate": 5.466127941369428e-07, - "loss": 0.0004617081955075264, - "step": 22210 - }, - { - "epoch": 3.78772378516624, - "grad_norm": 0.015786537900567055, - "learning_rate": 5.422626592528137e-07, - "loss": 0.00013349256478250026, - "step": 22215 - }, - { - "epoch": 3.7885763000852517, - "grad_norm": 0.06509364396333694, - "learning_rate": 5.379297774698657e-07, - "loss": 0.0006992133799940348, - "step": 22220 - }, - { - "epoch": 3.7894288150042623, - "grad_norm": 0.019068438559770584, - "learning_rate": 5.336141508108266e-07, - "loss": 0.0005609648767858743, - "step": 22225 - }, - { - "epoch": 3.790281329923274, - "grad_norm": 0.021051403135061264, - "learning_rate": 5.293157812903847e-07, - "loss": 0.0008666029199957848, - "step": 22230 - }, - { - "epoch": 3.791133844842285, - "grad_norm": 0.03582745045423508, - "learning_rate": 5.250346709151559e-07, - "loss": 0.0006231529172509908, - "step": 22235 - }, - { - "epoch": 3.791986359761296, - "grad_norm": 0.0056411512196063995, - "learning_rate": 5.207708216837039e-07, - "loss": 0.00033764943946152927, - "step": 22240 - }, - { - "epoch": 3.792838874680307, - "grad_norm": 0.016097834333777428, - "learning_rate": 5.165242355865365e-07, - "loss": 0.0003145002759993076, - "step": 22245 - }, - { - "epoch": 3.793691389599318, - "grad_norm": 0.001957574160769582, - "learning_rate": 5.122949146061011e-07, - "loss": 0.0005082461517304182, - "step": 22250 - }, - { - "epoch": 3.794543904518329, - "grad_norm": 0.021488793194293976, - "learning_rate": 5.080828607167895e-07, - "loss": 0.0002109076827764511, - "step": 22255 - }, - { - "epoch": 3.79539641943734, - "grad_norm": 0.00327285542152822, - "learning_rate": 5.038880758849242e-07, - "loss": 0.00029755146242678167, - "step": 22260 - }, - { - "epoch": 3.7962489343563512, - "grad_norm": 0.04071727767586708, - "learning_rate": 4.997105620687642e-07, - "loss": 0.00035189902409911157, - "step": 22265 - }, - { - "epoch": 3.7971014492753623, - "grad_norm": 0.043891459703445435, - "learning_rate": 4.955503212185241e-07, - "loss": 0.00030435039661824703, - "step": 22270 - }, - { - "epoch": 3.7979539641943734, - "grad_norm": 0.04061891511082649, - "learning_rate": 4.914073552763381e-07, - "loss": 0.0007196913473308086, - "step": 22275 - }, - { - "epoch": 3.7988064791133844, - "grad_norm": 0.006950880866497755, - "learning_rate": 4.872816661762799e-07, - "loss": 0.0005048359278589487, - "step": 22280 - }, - { - "epoch": 3.7996589940323955, - "grad_norm": 0.01075704861432314, - "learning_rate": 4.831732558443588e-07, - "loss": 0.00047706514596939086, - "step": 22285 - }, - { - "epoch": 3.8005115089514065, - "grad_norm": 0.05214826390147209, - "learning_rate": 4.790821261985114e-07, - "loss": 0.0003694279119372368, - "step": 22290 - }, - { - "epoch": 3.8013640238704176, - "grad_norm": 0.10015593469142914, - "learning_rate": 4.750082791486226e-07, - "loss": 0.0009528685361146927, - "step": 22295 - }, - { - "epoch": 3.8022165387894287, - "grad_norm": 0.014796345494687557, - "learning_rate": 4.709517165965002e-07, - "loss": 0.00034629302099347113, - "step": 22300 - }, - { - "epoch": 3.80306905370844, - "grad_norm": 0.08756010234355927, - "learning_rate": 4.669124404358709e-07, - "loss": 0.0015204890631139278, - "step": 22305 - }, - { - "epoch": 3.803921568627451, - "grad_norm": 0.011379587464034557, - "learning_rate": 4.6289045255241407e-07, - "loss": 0.00043293628841638564, - "step": 22310 - }, - { - "epoch": 3.8047740835464623, - "grad_norm": 0.006499307695776224, - "learning_rate": 4.588857548237193e-07, - "loss": 0.0001419330248609185, - "step": 22315 - }, - { - "epoch": 3.805626598465473, - "grad_norm": 0.010868554934859276, - "learning_rate": 4.5489834911932034e-07, - "loss": 0.0003131470642983913, - "step": 22320 - }, - { - "epoch": 3.8064791133844844, - "grad_norm": 0.1949165314435959, - "learning_rate": 4.509282373006698e-07, - "loss": 0.000589557969942689, - "step": 22325 - }, - { - "epoch": 3.8073316283034955, - "grad_norm": 0.062032103538513184, - "learning_rate": 4.4697542122114766e-07, - "loss": 0.0010706196539103986, - "step": 22330 - }, - { - "epoch": 3.8081841432225065, - "grad_norm": 0.03491361066699028, - "learning_rate": 4.430399027260528e-07, - "loss": 0.00013118325732648374, - "step": 22335 - }, - { - "epoch": 3.8090366581415176, - "grad_norm": 0.012687149457633495, - "learning_rate": 4.39121683652624e-07, - "loss": 0.0006070541683584451, - "step": 22340 - }, - { - "epoch": 3.8098891730605287, - "grad_norm": 0.009227769449353218, - "learning_rate": 4.352207658300105e-07, - "loss": 0.00013156197965145112, - "step": 22345 - }, - { - "epoch": 3.8107416879795397, - "grad_norm": 0.025756431743502617, - "learning_rate": 4.3133715107929736e-07, - "loss": 0.0002231092657893896, - "step": 22350 - }, - { - "epoch": 3.8115942028985508, - "grad_norm": 0.02315904013812542, - "learning_rate": 4.2747084121348e-07, - "loss": 0.0008578922599554062, - "step": 22355 - }, - { - "epoch": 3.812446717817562, - "grad_norm": 0.03867887705564499, - "learning_rate": 4.2362183803748145e-07, - "loss": 0.001257845014333725, - "step": 22360 - }, - { - "epoch": 3.813299232736573, - "grad_norm": 0.007496925536543131, - "learning_rate": 4.197901433481435e-07, - "loss": 0.00025641615502536297, - "step": 22365 - }, - { - "epoch": 3.814151747655584, - "grad_norm": 0.006181985139846802, - "learning_rate": 4.159757589342352e-07, - "loss": 0.000565656740218401, - "step": 22370 - }, - { - "epoch": 3.815004262574595, - "grad_norm": 0.0055122580379247665, - "learning_rate": 4.121786865764282e-07, - "loss": 0.0002753081964328885, - "step": 22375 - }, - { - "epoch": 3.815856777493606, - "grad_norm": 0.06405172497034073, - "learning_rate": 4.083989280473293e-07, - "loss": 0.00029685499612241983, - "step": 22380 - }, - { - "epoch": 3.816709292412617, - "grad_norm": 0.015038585290312767, - "learning_rate": 4.0463648511145223e-07, - "loss": 0.00019005691865459085, - "step": 22385 - }, - { - "epoch": 3.817561807331628, - "grad_norm": 0.006501917727291584, - "learning_rate": 4.008913595252336e-07, - "loss": 0.00047526941634714606, - "step": 22390 - }, - { - "epoch": 3.8184143222506393, - "grad_norm": 0.044223811477422714, - "learning_rate": 3.971635530370207e-07, - "loss": 0.00044773337431252, - "step": 22395 - }, - { - "epoch": 3.8192668371696503, - "grad_norm": 0.11778905987739563, - "learning_rate": 3.934530673870714e-07, - "loss": 0.0004794740118086338, - "step": 22400 - }, - { - "epoch": 3.8201193520886614, - "grad_norm": 0.04858732223510742, - "learning_rate": 3.897599043075753e-07, - "loss": 0.0005154036451131105, - "step": 22405 - }, - { - "epoch": 3.820971867007673, - "grad_norm": 0.047194644808769226, - "learning_rate": 3.860840655226155e-07, - "loss": 0.0005436111241579056, - "step": 22410 - }, - { - "epoch": 3.8218243819266835, - "grad_norm": 0.02568012662231922, - "learning_rate": 3.8242555274820287e-07, - "loss": 0.0006584774237126112, - "step": 22415 - }, - { - "epoch": 3.822676896845695, - "grad_norm": 0.006387351080775261, - "learning_rate": 3.787843676922461e-07, - "loss": 0.0005248990841209888, - "step": 22420 - }, - { - "epoch": 3.8235294117647056, - "grad_norm": 0.03069436363875866, - "learning_rate": 3.751605120545731e-07, - "loss": 0.00037630724254995586, - "step": 22425 - }, - { - "epoch": 3.824381926683717, - "grad_norm": 0.007291358429938555, - "learning_rate": 3.715539875269222e-07, - "loss": 0.0008363793604075909, - "step": 22430 - }, - { - "epoch": 3.825234441602728, - "grad_norm": 0.009530414827167988, - "learning_rate": 3.6796479579293824e-07, - "loss": 0.00019785722251981498, - "step": 22435 - }, - { - "epoch": 3.8260869565217392, - "grad_norm": 0.008785492740571499, - "learning_rate": 3.643929385281727e-07, - "loss": 0.000590974697843194, - "step": 22440 - }, - { - "epoch": 3.8269394714407503, - "grad_norm": 0.007667489815503359, - "learning_rate": 3.608384174000958e-07, - "loss": 0.00028850554954260586, - "step": 22445 - }, - { - "epoch": 3.8277919863597614, - "grad_norm": 0.0715310126543045, - "learning_rate": 3.573012340680637e-07, - "loss": 0.0009551153518259526, - "step": 22450 - }, - { - "epoch": 3.8286445012787724, - "grad_norm": 0.009573518298566341, - "learning_rate": 3.5378139018335526e-07, - "loss": 0.0001834099180996418, - "step": 22455 - }, - { - "epoch": 3.8294970161977835, - "grad_norm": 0.024146724492311478, - "learning_rate": 3.502788873891604e-07, - "loss": 0.0002805993659421802, - "step": 22460 - }, - { - "epoch": 3.8303495311167945, - "grad_norm": 0.034584276378154755, - "learning_rate": 3.4679372732055455e-07, - "loss": 0.00022452049888670443, - "step": 22465 - }, - { - "epoch": 3.8312020460358056, - "grad_norm": 0.01712440513074398, - "learning_rate": 3.433259116045278e-07, - "loss": 0.00025582562666386365, - "step": 22470 - }, - { - "epoch": 3.8320545609548167, - "grad_norm": 0.03746594488620758, - "learning_rate": 3.398754418599728e-07, - "loss": 0.0005911256186664105, - "step": 22475 - }, - { - "epoch": 3.8329070758738277, - "grad_norm": 0.010265517979860306, - "learning_rate": 3.3644231969768427e-07, - "loss": 0.0005279291886836291, - "step": 22480 - }, - { - "epoch": 3.833759590792839, - "grad_norm": 0.00778925372287631, - "learning_rate": 3.3302654672035523e-07, - "loss": 0.00015487722121179103, - "step": 22485 - }, - { - "epoch": 3.83461210571185, - "grad_norm": 0.07120466977357864, - "learning_rate": 3.296281245225851e-07, - "loss": 0.00033021410927176477, - "step": 22490 - }, - { - "epoch": 3.835464620630861, - "grad_norm": 0.019761493429541588, - "learning_rate": 3.2624705469086745e-07, - "loss": 0.0005753487348556519, - "step": 22495 - }, - { - "epoch": 3.836317135549872, - "grad_norm": 0.058154042810201645, - "learning_rate": 3.22883338803602e-07, - "loss": 0.0007553929463028908, - "step": 22500 - }, - { - "epoch": 3.8371696504688835, - "grad_norm": 0.04644978418946266, - "learning_rate": 3.1953697843107864e-07, - "loss": 0.0006953209638595581, - "step": 22505 - }, - { - "epoch": 3.838022165387894, - "grad_norm": 0.1766635626554489, - "learning_rate": 3.1620797513549347e-07, - "loss": 0.0003559787990525365, - "step": 22510 - }, - { - "epoch": 3.8388746803069056, - "grad_norm": 0.00365132512524724, - "learning_rate": 3.128963304709367e-07, - "loss": 0.0004078059922903776, - "step": 22515 - }, - { - "epoch": 3.839727195225916, - "grad_norm": 0.030486639589071274, - "learning_rate": 3.096020459833884e-07, - "loss": 0.0005455107428133487, - "step": 22520 - }, - { - "epoch": 3.8405797101449277, - "grad_norm": 0.01159575954079628, - "learning_rate": 3.0632512321073916e-07, - "loss": 0.0001546024577692151, - "step": 22525 - }, - { - "epoch": 3.8414322250639388, - "grad_norm": 0.016208885237574577, - "learning_rate": 3.0306556368275697e-07, - "loss": 0.00098867267370224, - "step": 22530 - }, - { - "epoch": 3.84228473998295, - "grad_norm": 0.03821619600057602, - "learning_rate": 2.998233689211163e-07, - "loss": 0.001981107145547867, - "step": 22535 - }, - { - "epoch": 3.843137254901961, - "grad_norm": 0.08347555249929428, - "learning_rate": 2.9659854043937726e-07, - "loss": 0.0005207284353673458, - "step": 22540 - }, - { - "epoch": 3.843989769820972, - "grad_norm": 0.12094799429178238, - "learning_rate": 2.933910797430064e-07, - "loss": 0.0014771541580557823, - "step": 22545 - }, - { - "epoch": 3.844842284739983, - "grad_norm": 0.009907519444823265, - "learning_rate": 2.9020098832934354e-07, - "loss": 0.00020951812621206045, - "step": 22550 - }, - { - "epoch": 3.845694799658994, - "grad_norm": 0.009634948335587978, - "learning_rate": 2.87028267687639e-07, - "loss": 0.0004478182177990675, - "step": 22555 - }, - { - "epoch": 3.846547314578005, - "grad_norm": 0.012202809564769268, - "learning_rate": 2.8387291929901636e-07, - "loss": 0.0007417299784719944, - "step": 22560 - }, - { - "epoch": 3.847399829497016, - "grad_norm": 0.0760713517665863, - "learning_rate": 2.8073494463649734e-07, - "loss": 0.0011769287288188935, - "step": 22565 - }, - { - "epoch": 3.8482523444160273, - "grad_norm": 0.005865536630153656, - "learning_rate": 2.7761434516499757e-07, - "loss": 0.0004877586383372545, - "step": 22570 - }, - { - "epoch": 3.8491048593350383, - "grad_norm": 0.07838205993175507, - "learning_rate": 2.7451112234131434e-07, - "loss": 0.0007841618731617928, - "step": 22575 - }, - { - "epoch": 3.8499573742540494, - "grad_norm": 0.043550923466682434, - "learning_rate": 2.714252776141346e-07, - "loss": 0.0004276952240616083, - "step": 22580 - }, - { - "epoch": 3.8508098891730604, - "grad_norm": 0.07113270461559296, - "learning_rate": 2.6835681242403097e-07, - "loss": 0.0004472899250686169, - "step": 22585 - }, - { - "epoch": 3.8516624040920715, - "grad_norm": 0.005593425128608942, - "learning_rate": 2.653057282034743e-07, - "loss": 0.0005118703935295344, - "step": 22590 - }, - { - "epoch": 3.8525149190110826, - "grad_norm": 0.0488508939743042, - "learning_rate": 2.6227202637680025e-07, - "loss": 0.0004050908610224724, - "step": 22595 - }, - { - "epoch": 3.8533674339300936, - "grad_norm": 0.006526235956698656, - "learning_rate": 2.592557083602509e-07, - "loss": 0.00033729532733559606, - "step": 22600 - }, - { - "epoch": 3.8542199488491047, - "grad_norm": 0.06794753670692444, - "learning_rate": 2.5625677556194156e-07, - "loss": 0.0005501693580299616, - "step": 22605 - }, - { - "epoch": 3.855072463768116, - "grad_norm": 0.01906239427626133, - "learning_rate": 2.532752293818732e-07, - "loss": 0.00029343762435019016, - "step": 22610 - }, - { - "epoch": 3.855924978687127, - "grad_norm": 0.018383421003818512, - "learning_rate": 2.5031107121192827e-07, - "loss": 0.00016756532713770866, - "step": 22615 - }, - { - "epoch": 3.8567774936061383, - "grad_norm": 0.024472814053297043, - "learning_rate": 2.4736430243587484e-07, - "loss": 0.0008003754541277885, - "step": 22620 - }, - { - "epoch": 3.857630008525149, - "grad_norm": 0.015008813701570034, - "learning_rate": 2.4443492442936666e-07, - "loss": 0.000387492123991251, - "step": 22625 - }, - { - "epoch": 3.8584825234441604, - "grad_norm": 0.00883245375007391, - "learning_rate": 2.4152293855993073e-07, - "loss": 0.0003626285819336772, - "step": 22630 - }, - { - "epoch": 3.8593350383631715, - "grad_norm": 0.09013792127370834, - "learning_rate": 2.386283461869837e-07, - "loss": 0.0009348958730697632, - "step": 22635 - }, - { - "epoch": 3.8601875532821825, - "grad_norm": 0.022092169150710106, - "learning_rate": 2.3575114866181134e-07, - "loss": 0.000852299015969038, - "step": 22640 - }, - { - "epoch": 3.8610400682011936, - "grad_norm": 0.041009481996297836, - "learning_rate": 2.3289134732758923e-07, - "loss": 0.0002561352448537946, - "step": 22645 - }, - { - "epoch": 3.8618925831202047, - "grad_norm": 0.01204719115048647, - "learning_rate": 2.300489435193703e-07, - "loss": 0.00041399816982448103, - "step": 22650 - }, - { - "epoch": 3.8627450980392157, - "grad_norm": 0.006769226398319006, - "learning_rate": 2.2722393856407644e-07, - "loss": 0.0007864254526793957, - "step": 22655 - }, - { - "epoch": 3.863597612958227, - "grad_norm": 0.03550838679075241, - "learning_rate": 2.2441633378051522e-07, - "loss": 0.0007944885641336441, - "step": 22660 - }, - { - "epoch": 3.864450127877238, - "grad_norm": 0.018093502148985863, - "learning_rate": 2.2162613047937567e-07, - "loss": 0.0004680437035858631, - "step": 22665 - }, - { - "epoch": 3.865302642796249, - "grad_norm": 0.052169136703014374, - "learning_rate": 2.188533299632117e-07, - "loss": 0.0005756160244345665, - "step": 22670 - }, - { - "epoch": 3.86615515771526, - "grad_norm": 0.0072258529253304005, - "learning_rate": 2.1609793352646288e-07, - "loss": 0.0004663677420467138, - "step": 22675 - }, - { - "epoch": 3.867007672634271, - "grad_norm": 0.005863961763679981, - "learning_rate": 2.1335994245543358e-07, - "loss": 0.0002789400052279234, - "step": 22680 - }, - { - "epoch": 3.867860187553282, - "grad_norm": 0.018820008262991905, - "learning_rate": 2.1063935802831804e-07, - "loss": 0.00045074778608977795, - "step": 22685 - }, - { - "epoch": 3.868712702472293, - "grad_norm": 0.012297754175961018, - "learning_rate": 2.07936181515167e-07, - "loss": 0.00035502421669661997, - "step": 22690 - }, - { - "epoch": 3.869565217391304, - "grad_norm": 0.016527209430933, - "learning_rate": 2.0525041417792097e-07, - "loss": 0.0003985234070569277, - "step": 22695 - }, - { - "epoch": 3.8704177323103153, - "grad_norm": 0.02526754140853882, - "learning_rate": 2.0258205727038123e-07, - "loss": 0.0005783138796687127, - "step": 22700 - }, - { - "epoch": 3.8712702472293268, - "grad_norm": 0.03327864035964012, - "learning_rate": 1.9993111203822215e-07, - "loss": 0.0004067671485245228, - "step": 22705 - }, - { - "epoch": 3.8721227621483374, - "grad_norm": 0.027886848896741867, - "learning_rate": 1.972975797190038e-07, - "loss": 0.00023027975112199783, - "step": 22710 - }, - { - "epoch": 3.872975277067349, - "grad_norm": 0.05536410212516785, - "learning_rate": 1.9468146154213438e-07, - "loss": 0.0006173822097480297, - "step": 22715 - }, - { - "epoch": 3.8738277919863595, - "grad_norm": 0.03576589748263359, - "learning_rate": 1.92082758728912e-07, - "loss": 0.000484804529696703, - "step": 22720 - }, - { - "epoch": 3.874680306905371, - "grad_norm": 0.00873229093849659, - "learning_rate": 1.8950147249249536e-07, - "loss": 0.0002321997657418251, - "step": 22725 - }, - { - "epoch": 3.875532821824382, - "grad_norm": 0.039964936673641205, - "learning_rate": 1.8693760403791642e-07, - "loss": 0.0005052187014371156, - "step": 22730 - }, - { - "epoch": 3.876385336743393, - "grad_norm": 0.08028698712587357, - "learning_rate": 1.8439115456207188e-07, - "loss": 0.0006155148148536682, - "step": 22735 - }, - { - "epoch": 3.877237851662404, - "grad_norm": 0.027846908196806908, - "learning_rate": 1.8186212525373168e-07, - "loss": 0.0013848446309566499, - "step": 22740 - }, - { - "epoch": 3.8780903665814153, - "grad_norm": 0.020065290853381157, - "learning_rate": 1.7935051729353056e-07, - "loss": 0.00031999857164919374, - "step": 22745 - }, - { - "epoch": 3.8789428815004263, - "grad_norm": 0.03264397382736206, - "learning_rate": 1.7685633185396812e-07, - "loss": 0.00050089699216187, - "step": 22750 - }, - { - "epoch": 3.8797953964194374, - "grad_norm": 0.015950864180922508, - "learning_rate": 1.74379570099413e-07, - "loss": 0.00018732347525656222, - "step": 22755 - }, - { - "epoch": 3.8806479113384484, - "grad_norm": 0.022178884595632553, - "learning_rate": 1.7192023318610277e-07, - "loss": 0.0004226424265652895, - "step": 22760 - }, - { - "epoch": 3.8815004262574595, - "grad_norm": 0.08079300820827484, - "learning_rate": 1.6947832226213987e-07, - "loss": 0.0008535554632544517, - "step": 22765 - }, - { - "epoch": 3.8823529411764706, - "grad_norm": 0.04649774730205536, - "learning_rate": 1.670538384674791e-07, - "loss": 0.001085586380213499, - "step": 22770 - }, - { - "epoch": 3.8832054560954816, - "grad_norm": 0.003438888816162944, - "learning_rate": 1.6464678293396093e-07, - "loss": 0.0002649814588949084, - "step": 22775 - }, - { - "epoch": 3.8840579710144927, - "grad_norm": 0.03992622345685959, - "learning_rate": 1.6225715678527816e-07, - "loss": 0.0005447682924568653, - "step": 22780 - }, - { - "epoch": 3.8849104859335037, - "grad_norm": 0.022190820425748825, - "learning_rate": 1.5988496113698013e-07, - "loss": 0.00076604881323874, - "step": 22785 - }, - { - "epoch": 3.885763000852515, - "grad_norm": 0.05946198105812073, - "learning_rate": 1.575301970964976e-07, - "loss": 0.0003323152428492904, - "step": 22790 - }, - { - "epoch": 3.886615515771526, - "grad_norm": 0.011684319004416466, - "learning_rate": 1.551928657631013e-07, - "loss": 0.0003173027187585831, - "step": 22795 - }, - { - "epoch": 3.887468030690537, - "grad_norm": 0.03443511947989464, - "learning_rate": 1.528729682279392e-07, - "loss": 0.000368134374730289, - "step": 22800 - }, - { - "epoch": 3.888320545609548, - "grad_norm": 0.0683477595448494, - "learning_rate": 1.5057050557402418e-07, - "loss": 0.0003201848594471812, - "step": 22805 - }, - { - "epoch": 3.8891730605285595, - "grad_norm": 0.06834128499031067, - "learning_rate": 1.4828547887621307e-07, - "loss": 0.0007571569178253413, - "step": 22810 - }, - { - "epoch": 3.89002557544757, - "grad_norm": 0.013952577486634254, - "learning_rate": 1.4601788920123176e-07, - "loss": 0.0007348908577114344, - "step": 22815 - }, - { - "epoch": 3.8908780903665816, - "grad_norm": 0.0736064463853836, - "learning_rate": 1.4376773760767511e-07, - "loss": 0.0007845867425203323, - "step": 22820 - }, - { - "epoch": 3.8917306052855922, - "grad_norm": 0.018666284158825874, - "learning_rate": 1.415350251459821e-07, - "loss": 0.00020006420090794562, - "step": 22825 - }, - { - "epoch": 3.8925831202046037, - "grad_norm": 0.11472959816455841, - "learning_rate": 1.3931975285845636e-07, - "loss": 0.0009698046371340752, - "step": 22830 - }, - { - "epoch": 3.893435635123615, - "grad_norm": 0.012865605764091015, - "learning_rate": 1.3712192177926656e-07, - "loss": 0.0010215624235570431, - "step": 22835 - }, - { - "epoch": 3.894288150042626, - "grad_norm": 0.0334518626332283, - "learning_rate": 1.349415329344336e-07, - "loss": 0.00037307373713701963, - "step": 22840 - }, - { - "epoch": 3.895140664961637, - "grad_norm": 0.02127443067729473, - "learning_rate": 1.3277858734182662e-07, - "loss": 0.00018035907996818423, - "step": 22845 - }, - { - "epoch": 3.895993179880648, - "grad_norm": 0.00875978171825409, - "learning_rate": 1.3063308601118372e-07, - "loss": 0.000528197456151247, - "step": 22850 - }, - { - "epoch": 3.896845694799659, - "grad_norm": 0.009711408987641335, - "learning_rate": 1.2850502994410371e-07, - "loss": 0.0004956468474119902, - "step": 22855 - }, - { - "epoch": 3.89769820971867, - "grad_norm": 0.012017211876809597, - "learning_rate": 1.2639442013402528e-07, - "loss": 0.00036720100324600936, - "step": 22860 - }, - { - "epoch": 3.898550724637681, - "grad_norm": 0.011994452215731144, - "learning_rate": 1.2430125756625609e-07, - "loss": 0.00036521924193948506, - "step": 22865 - }, - { - "epoch": 3.899403239556692, - "grad_norm": 0.015366890467703342, - "learning_rate": 1.2222554321795614e-07, - "loss": 0.001251001376658678, - "step": 22870 - }, - { - "epoch": 3.9002557544757033, - "grad_norm": 0.011636365205049515, - "learning_rate": 1.2016727805812948e-07, - "loss": 0.00030510048381984236, - "step": 22875 - }, - { - "epoch": 3.9011082693947143, - "grad_norm": 0.06454995274543762, - "learning_rate": 1.1812646304764917e-07, - "loss": 0.00047010909765958785, - "step": 22880 - }, - { - "epoch": 3.9019607843137254, - "grad_norm": 0.004893654957413673, - "learning_rate": 1.1610309913923643e-07, - "loss": 0.0002160596428439021, - "step": 22885 - }, - { - "epoch": 3.9028132992327365, - "grad_norm": 0.009556293487548828, - "learning_rate": 1.140971872774607e-07, - "loss": 0.0003103669732809067, - "step": 22890 - }, - { - "epoch": 3.9036658141517475, - "grad_norm": 0.013235462829470634, - "learning_rate": 1.1210872839875207e-07, - "loss": 0.000385861424729228, - "step": 22895 - }, - { - "epoch": 3.9045183290707586, - "grad_norm": 0.006167116109281778, - "learning_rate": 1.1013772343138466e-07, - "loss": 0.0004389645531773567, - "step": 22900 - }, - { - "epoch": 3.90537084398977, - "grad_norm": 0.012628387659788132, - "learning_rate": 1.0818417329549328e-07, - "loss": 0.0002759493188932538, - "step": 22905 - }, - { - "epoch": 3.9062233589087807, - "grad_norm": 0.020555347204208374, - "learning_rate": 1.0624807890305676e-07, - "loss": 0.00044609596952795985, - "step": 22910 - }, - { - "epoch": 3.907075873827792, - "grad_norm": 0.019547248259186745, - "learning_rate": 1.0432944115791043e-07, - "loss": 0.00040673622861504556, - "step": 22915 - }, - { - "epoch": 3.907928388746803, - "grad_norm": 0.005401125177741051, - "learning_rate": 1.0242826095574198e-07, - "loss": 0.0004734213929623365, - "step": 22920 - }, - { - "epoch": 3.9087809036658143, - "grad_norm": 0.004537303000688553, - "learning_rate": 1.0054453918407896e-07, - "loss": 0.00024205415975302457, - "step": 22925 - }, - { - "epoch": 3.9096334185848254, - "grad_norm": 0.01689390279352665, - "learning_rate": 9.86782767223096e-08, - "loss": 0.0002517271088436246, - "step": 22930 - }, - { - "epoch": 3.9104859335038364, - "grad_norm": 0.06417275965213776, - "learning_rate": 9.682947444166616e-08, - "loss": 0.0005528208799660206, - "step": 22935 - }, - { - "epoch": 3.9113384484228475, - "grad_norm": 0.022223379462957382, - "learning_rate": 9.499813320522909e-08, - "loss": 0.00022923222277313471, - "step": 22940 - }, - { - "epoch": 3.9121909633418586, - "grad_norm": 0.015348847955465317, - "learning_rate": 9.318425386793116e-08, - "loss": 0.0006318584084510804, - "step": 22945 - }, - { - "epoch": 3.9130434782608696, - "grad_norm": 0.04616772010922432, - "learning_rate": 9.138783727655336e-08, - "loss": 0.0006988701410591602, - "step": 22950 - }, - { - "epoch": 3.9138959931798807, - "grad_norm": 0.012819357216358185, - "learning_rate": 8.960888426972068e-08, - "loss": 0.0001819216297008097, - "step": 22955 - }, - { - "epoch": 3.9147485080988917, - "grad_norm": 0.020759694278240204, - "learning_rate": 8.784739567790632e-08, - "loss": 0.0006579568609595299, - "step": 22960 - }, - { - "epoch": 3.915601023017903, - "grad_norm": 0.17942465841770172, - "learning_rate": 8.610337232343167e-08, - "loss": 0.001371748000383377, - "step": 22965 - }, - { - "epoch": 3.916453537936914, - "grad_norm": 0.041743483394384384, - "learning_rate": 8.437681502047045e-08, - "loss": 0.001165113039314747, - "step": 22970 - }, - { - "epoch": 3.917306052855925, - "grad_norm": 0.04779570922255516, - "learning_rate": 8.26677245750279e-08, - "loss": 0.0006786032579839229, - "step": 22975 - }, - { - "epoch": 3.918158567774936, - "grad_norm": 0.06158490851521492, - "learning_rate": 8.097610178497416e-08, - "loss": 0.0007079535163939, - "step": 22980 - }, - { - "epoch": 3.919011082693947, - "grad_norm": 0.016627155244350433, - "learning_rate": 7.930194744000668e-08, - "loss": 0.0005602708086371421, - "step": 22985 - }, - { - "epoch": 3.919863597612958, - "grad_norm": 0.012588472105562687, - "learning_rate": 7.764526232167945e-08, - "loss": 0.0003641904331743717, - "step": 22990 - }, - { - "epoch": 3.920716112531969, - "grad_norm": 0.008864199742674828, - "learning_rate": 7.600604720339049e-08, - "loss": 0.0014302042312920094, - "step": 22995 - }, - { - "epoch": 3.9215686274509802, - "grad_norm": 0.06016424298286438, - "learning_rate": 7.438430285037767e-08, - "loss": 0.0005181442946195602, - "step": 23000 - }, - { - "epoch": 3.9224211423699913, - "grad_norm": 0.017198998481035233, - "learning_rate": 7.278003001972704e-08, - "loss": 0.0007852497510612011, - "step": 23005 - }, - { - "epoch": 3.923273657289003, - "grad_norm": 0.10172398388385773, - "learning_rate": 7.119322946036454e-08, - "loss": 0.0008896278217434883, - "step": 23010 - }, - { - "epoch": 3.9241261722080134, - "grad_norm": 0.021225174888968468, - "learning_rate": 6.96239019130601e-08, - "loss": 0.0006865154020488263, - "step": 23015 - }, - { - "epoch": 3.924978687127025, - "grad_norm": 0.039081476628780365, - "learning_rate": 6.807204811043187e-08, - "loss": 0.0003759567625820637, - "step": 23020 - }, - { - "epoch": 3.9258312020460355, - "grad_norm": 0.033924706280231476, - "learning_rate": 6.653766877693368e-08, - "loss": 0.0003636789973825216, - "step": 23025 - }, - { - "epoch": 3.926683716965047, - "grad_norm": 0.022344253957271576, - "learning_rate": 6.502076462886752e-08, - "loss": 0.00048628607764840127, - "step": 23030 - }, - { - "epoch": 3.927536231884058, - "grad_norm": 0.01214676909148693, - "learning_rate": 6.352133637437112e-08, - "loss": 0.0007959893904626369, - "step": 23035 - }, - { - "epoch": 3.928388746803069, - "grad_norm": 0.044509515166282654, - "learning_rate": 6.20393847134304e-08, - "loss": 0.000532484333962202, - "step": 23040 - }, - { - "epoch": 3.92924126172208, - "grad_norm": 0.030274951830506325, - "learning_rate": 6.05749103378711e-08, - "loss": 0.0006205241661518812, - "step": 23045 - }, - { - "epoch": 3.9300937766410913, - "grad_norm": 0.07182637602090836, - "learning_rate": 5.912791393135469e-08, - "loss": 0.0010498687624931335, - "step": 23050 - }, - { - "epoch": 3.9309462915601023, - "grad_norm": 0.08199719339609146, - "learning_rate": 5.769839616938665e-08, - "loss": 0.0007067734841257334, - "step": 23055 - }, - { - "epoch": 3.9317988064791134, - "grad_norm": 0.02759665437042713, - "learning_rate": 5.6286357719320664e-08, - "loss": 0.0006836862768977881, - "step": 23060 - }, - { - "epoch": 3.9326513213981245, - "grad_norm": 0.049859654158353806, - "learning_rate": 5.48917992403336e-08, - "loss": 0.0003887650091201067, - "step": 23065 - }, - { - "epoch": 3.9335038363171355, - "grad_norm": 0.020034313201904297, - "learning_rate": 5.3514721383458856e-08, - "loss": 0.0002620942890644073, - "step": 23070 - }, - { - "epoch": 3.9343563512361466, - "grad_norm": 0.02726505696773529, - "learning_rate": 5.21551247915572e-08, - "loss": 0.00019104636739939452, - "step": 23075 - }, - { - "epoch": 3.9352088661551576, - "grad_norm": 0.022711891680955887, - "learning_rate": 5.081301009933758e-08, - "loss": 0.00029650195501744746, - "step": 23080 - }, - { - "epoch": 3.9360613810741687, - "grad_norm": 0.09871876984834671, - "learning_rate": 4.948837793334465e-08, - "loss": 0.00047057950869202616, - "step": 23085 - }, - { - "epoch": 3.9369138959931798, - "grad_norm": 0.006549649406224489, - "learning_rate": 4.818122891195458e-08, - "loss": 0.00047056037001311777, - "step": 23090 - }, - { - "epoch": 3.937766410912191, - "grad_norm": 0.05611155182123184, - "learning_rate": 4.689156364539176e-08, - "loss": 0.0005085552111268044, - "step": 23095 - }, - { - "epoch": 3.938618925831202, - "grad_norm": 0.0015141203766688704, - "learning_rate": 4.561938273572041e-08, - "loss": 0.0005205009132623672, - "step": 23100 - }, - { - "epoch": 3.9394714407502134, - "grad_norm": 0.040545813739299774, - "learning_rate": 4.436468677682797e-08, - "loss": 0.0003895478090271354, - "step": 23105 - }, - { - "epoch": 3.940323955669224, - "grad_norm": 0.007932890206575394, - "learning_rate": 4.3127476354450075e-08, - "loss": 0.0003442693734541535, - "step": 23110 - }, - { - "epoch": 3.9411764705882355, - "grad_norm": 0.04718896746635437, - "learning_rate": 4.1907752046162205e-08, - "loss": 0.0009176202118396759, - "step": 23115 - }, - { - "epoch": 3.942028985507246, - "grad_norm": 0.005520604085177183, - "learning_rate": 4.070551442136305e-08, - "loss": 0.0008139912039041519, - "step": 23120 - }, - { - "epoch": 3.9428815004262576, - "grad_norm": 0.013835078105330467, - "learning_rate": 3.952076404131199e-08, - "loss": 0.0009749887511134147, - "step": 23125 - }, - { - "epoch": 3.9437340153452687, - "grad_norm": 0.14306026697158813, - "learning_rate": 3.8353501459074953e-08, - "loss": 0.0004838756285607815, - "step": 23130 - }, - { - "epoch": 3.9445865302642797, - "grad_norm": 0.0022388347424566746, - "learning_rate": 3.720372721957854e-08, - "loss": 0.00016013121930882334, - "step": 23135 - }, - { - "epoch": 3.945439045183291, - "grad_norm": 0.06415778398513794, - "learning_rate": 3.607144185957256e-08, - "loss": 0.0004399829544126987, - "step": 23140 - }, - { - "epoch": 3.946291560102302, - "grad_norm": 0.006063231732696295, - "learning_rate": 3.4956645907642525e-08, - "loss": 0.0003463166998699307, - "step": 23145 - }, - { - "epoch": 3.947144075021313, - "grad_norm": 0.058278266340494156, - "learning_rate": 3.385933988421796e-08, - "loss": 0.0006705883424729109, - "step": 23150 - }, - { - "epoch": 3.947996589940324, - "grad_norm": 0.007072574459016323, - "learning_rate": 3.277952430155162e-08, - "loss": 0.00027483000885695217, - "step": 23155 - }, - { - "epoch": 3.948849104859335, - "grad_norm": 0.06288672983646393, - "learning_rate": 3.171719966374442e-08, - "loss": 0.00044926153495907786, - "step": 23160 - }, - { - "epoch": 3.949701619778346, - "grad_norm": 0.11492919921875, - "learning_rate": 3.06723664667205e-08, - "loss": 0.0006402281112968921, - "step": 23165 - }, - { - "epoch": 3.950554134697357, - "grad_norm": 0.057387739419937134, - "learning_rate": 2.964502519823969e-08, - "loss": 0.0009132426232099534, - "step": 23170 - }, - { - "epoch": 3.9514066496163682, - "grad_norm": 0.02673800103366375, - "learning_rate": 2.8635176337905852e-08, - "loss": 0.00021783523261547088, - "step": 23175 - }, - { - "epoch": 3.9522591645353793, - "grad_norm": 0.005547195672988892, - "learning_rate": 2.7642820357146046e-08, - "loss": 0.00022417106665670872, - "step": 23180 - }, - { - "epoch": 3.9531116794543903, - "grad_norm": 0.004117009229958057, - "learning_rate": 2.6667957719227197e-08, - "loss": 0.0005245423410087824, - "step": 23185 - }, - { - "epoch": 3.9539641943734014, - "grad_norm": 0.027315596118569374, - "learning_rate": 2.5710588879243597e-08, - "loss": 0.000357617880217731, - "step": 23190 - }, - { - "epoch": 3.9548167092924125, - "grad_norm": 0.013029955327510834, - "learning_rate": 2.4770714284133575e-08, - "loss": 0.0009207891300320626, - "step": 23195 - }, - { - "epoch": 3.955669224211424, - "grad_norm": 0.0028237253427505493, - "learning_rate": 2.3848334372654488e-08, - "loss": 0.00036229838151484727, - "step": 23200 - }, - { - "epoch": 3.9565217391304346, - "grad_norm": 0.04396171122789383, - "learning_rate": 2.2943449575407725e-08, - "loss": 0.0002923567779362202, - "step": 23205 - }, - { - "epoch": 3.957374254049446, - "grad_norm": 0.0613851472735405, - "learning_rate": 2.2056060314822044e-08, - "loss": 0.001064283773303032, - "step": 23210 - }, - { - "epoch": 3.9582267689684567, - "grad_norm": 0.0072186607867479324, - "learning_rate": 2.1186167005166066e-08, - "loss": 0.00033982205204665663, - "step": 23215 - }, - { - "epoch": 3.959079283887468, - "grad_norm": 0.02121078409254551, - "learning_rate": 2.0333770052527453e-08, - "loss": 0.0003547267057001591, - "step": 23220 - }, - { - "epoch": 3.9599317988064793, - "grad_norm": 0.0026946449652314186, - "learning_rate": 1.9498869854833733e-08, - "loss": 0.0009029872715473175, - "step": 23225 - }, - { - "epoch": 3.9607843137254903, - "grad_norm": 0.029824867844581604, - "learning_rate": 1.8681466801852286e-08, - "loss": 0.0006612129043787717, - "step": 23230 - }, - { - "epoch": 3.9616368286445014, - "grad_norm": 0.021079210564494133, - "learning_rate": 1.7881561275161217e-08, - "loss": 0.00028204533737152815, - "step": 23235 - }, - { - "epoch": 3.9624893435635125, - "grad_norm": 0.04094521328806877, - "learning_rate": 1.709915364819514e-08, - "loss": 0.0006630297284573317, - "step": 23240 - }, - { - "epoch": 3.9633418584825235, - "grad_norm": 0.012560448609292507, - "learning_rate": 1.6334244286203556e-08, - "loss": 0.0004881520289927721, - "step": 23245 - }, - { - "epoch": 3.9641943734015346, - "grad_norm": 0.08076170831918716, - "learning_rate": 1.5586833546267498e-08, - "loss": 0.0008171131834387779, - "step": 23250 - }, - { - "epoch": 3.9650468883205456, - "grad_norm": 0.016555890440940857, - "learning_rate": 1.4856921777312014e-08, - "loss": 0.0002690091263502836, - "step": 23255 - }, - { - "epoch": 3.9658994032395567, - "grad_norm": 0.025087429210543633, - "learning_rate": 1.4144509320072884e-08, - "loss": 0.0002989970613270998, - "step": 23260 - }, - { - "epoch": 3.9667519181585678, - "grad_norm": 0.07274094223976135, - "learning_rate": 1.3449596507138228e-08, - "loss": 0.001411922462284565, - "step": 23265 - }, - { - "epoch": 3.967604433077579, - "grad_norm": 0.009768236428499222, - "learning_rate": 1.277218366291105e-08, - "loss": 0.00017849041614681482, - "step": 23270 - }, - { - "epoch": 3.96845694799659, - "grad_norm": 0.011777863837778568, - "learning_rate": 1.2112271103630056e-08, - "loss": 0.0003875581081956625, - "step": 23275 - }, - { - "epoch": 3.969309462915601, - "grad_norm": 0.10975488275289536, - "learning_rate": 1.1469859137369642e-08, - "loss": 0.001325076725333929, - "step": 23280 - }, - { - "epoch": 3.970161977834612, - "grad_norm": 0.04203944653272629, - "learning_rate": 1.0844948064019088e-08, - "loss": 0.000397085165604949, - "step": 23285 - }, - { - "epoch": 3.971014492753623, - "grad_norm": 0.022160891443490982, - "learning_rate": 1.0237538175320026e-08, - "loss": 0.0010200665332376958, - "step": 23290 - }, - { - "epoch": 3.971867007672634, - "grad_norm": 0.02673690766096115, - "learning_rate": 9.64762975482064e-09, - "loss": 0.0005900958087295294, - "step": 23295 - }, - { - "epoch": 3.972719522591645, - "grad_norm": 0.09556782245635986, - "learning_rate": 9.075223077917304e-09, - "loss": 0.000301313167437911, - "step": 23300 - }, - { - "epoch": 3.9735720375106567, - "grad_norm": 0.007306256797164679, - "learning_rate": 8.520318411825434e-09, - "loss": 0.00012424198212102056, - "step": 23305 - }, - { - "epoch": 3.9744245524296673, - "grad_norm": 0.03629566729068756, - "learning_rate": 7.982916015591978e-09, - "loss": 0.0005501963198184967, - "step": 23310 - }, - { - "epoch": 3.975277067348679, - "grad_norm": 0.009374035522341728, - "learning_rate": 7.463016140095423e-09, - "loss": 0.0009492671117186547, - "step": 23315 - }, - { - "epoch": 3.9761295822676894, - "grad_norm": 0.010084625333547592, - "learning_rate": 6.960619028041625e-09, - "loss": 0.00046687186695635317, - "step": 23320 - }, - { - "epoch": 3.976982097186701, - "grad_norm": 0.02581017091870308, - "learning_rate": 6.475724913967972e-09, - "loss": 0.000594175374135375, - "step": 23325 - }, - { - "epoch": 3.977834612105712, - "grad_norm": 0.045390889048576355, - "learning_rate": 6.0083340242392256e-09, - "loss": 0.00024255807511508465, - "step": 23330 - }, - { - "epoch": 3.978687127024723, - "grad_norm": 0.06592033803462982, - "learning_rate": 5.558446577047515e-09, - "loss": 0.0006300599779933691, - "step": 23335 - }, - { - "epoch": 3.979539641943734, - "grad_norm": 0.03145362809300423, - "learning_rate": 5.1260627824123436e-09, - "loss": 0.0004346088971942663, - "step": 23340 - }, - { - "epoch": 3.980392156862745, - "grad_norm": 0.008353454060852528, - "learning_rate": 4.711182842193073e-09, - "loss": 0.0009800062514841556, - "step": 23345 - }, - { - "epoch": 3.9812446717817562, - "grad_norm": 0.024524807929992676, - "learning_rate": 4.313806950063947e-09, - "loss": 0.0003803495317697525, - "step": 23350 - }, - { - "epoch": 3.9820971867007673, - "grad_norm": 0.018736131489276886, - "learning_rate": 3.933935291530743e-09, - "loss": 0.00024626741651445627, - "step": 23355 - }, - { - "epoch": 3.9829497016197783, - "grad_norm": 0.010309611447155476, - "learning_rate": 3.571568043934936e-09, - "loss": 0.0006795517634600401, - "step": 23360 - }, - { - "epoch": 3.9838022165387894, - "grad_norm": 0.04562285169959068, - "learning_rate": 3.2267053764412076e-09, - "loss": 0.0006199819035828114, - "step": 23365 - }, - { - "epoch": 3.9846547314578005, - "grad_norm": 0.043983470648527145, - "learning_rate": 2.8993474500416113e-09, - "loss": 0.00030286619439721105, - "step": 23370 - }, - { - "epoch": 3.9855072463768115, - "grad_norm": 0.012420141138136387, - "learning_rate": 2.589494417555571e-09, - "loss": 0.0005513324867933989, - "step": 23375 - }, - { - "epoch": 3.9863597612958226, - "grad_norm": 0.010290928184986115, - "learning_rate": 2.2971464236382074e-09, - "loss": 0.00047493595629930494, - "step": 23380 - }, - { - "epoch": 3.9872122762148337, - "grad_norm": 0.0191446952521801, - "learning_rate": 2.0223036047636867e-09, - "loss": 0.0007154576946049928, - "step": 23385 - }, - { - "epoch": 3.9880647911338447, - "grad_norm": 0.20886076986789703, - "learning_rate": 1.764966089237707e-09, - "loss": 0.002053941413760185, - "step": 23390 - }, - { - "epoch": 3.9889173060528558, - "grad_norm": 0.10748913139104843, - "learning_rate": 1.5251339971933397e-09, - "loss": 0.001036037877202034, - "step": 23395 - }, - { - "epoch": 3.9897698209718673, - "grad_norm": 0.004688949324190617, - "learning_rate": 1.3028074405951882e-09, - "loss": 0.0003380549373105168, - "step": 23400 - }, - { - "epoch": 3.990622335890878, - "grad_norm": 0.09298693388700485, - "learning_rate": 1.0979865232310647e-09, - "loss": 0.0003235136391595006, - "step": 23405 - }, - { - "epoch": 3.9914748508098894, - "grad_norm": 0.011672712862491608, - "learning_rate": 9.106713407161515e-10, - "loss": 0.0008779228664934635, - "step": 23410 - }, - { - "epoch": 3.9923273657289, - "grad_norm": 0.025006311014294624, - "learning_rate": 7.408619804971649e-10, - "loss": 0.0008599048480391503, - "step": 23415 - }, - { - "epoch": 3.9931798806479115, - "grad_norm": 0.02157057262957096, - "learning_rate": 5.885585218481925e-10, - "loss": 0.00027061502914875745, - "step": 23420 - }, - { - "epoch": 3.9940323955669226, - "grad_norm": 0.004747865721583366, - "learning_rate": 4.5376103587069223e-10, - "loss": 0.0004908301401883364, - "step": 23425 - }, - { - "epoch": 3.9948849104859336, - "grad_norm": 0.03755816072225571, - "learning_rate": 3.3646958548516623e-10, - "loss": 0.00035946685820817945, - "step": 23430 - }, - { - "epoch": 3.9957374254049447, - "grad_norm": 0.05361028388142586, - "learning_rate": 2.3668422545614074e-10, - "loss": 0.00037483936175704003, - "step": 23435 - }, - { - "epoch": 3.9965899403239558, - "grad_norm": 0.18934012949466705, - "learning_rate": 1.5440500235885945e-10, - "loss": 0.0011177632957696914, - "step": 23440 - }, - { - "epoch": 3.997442455242967, - "grad_norm": 0.013160571455955505, - "learning_rate": 8.963195461259009e-11, - "loss": 0.0003229744965210557, - "step": 23445 - }, - { - "epoch": 3.998294970161978, - "grad_norm": 0.03440895304083824, - "learning_rate": 4.2365112447317885e-11, - "loss": 0.0006907129660248757, - "step": 23450 - }, - { - "epoch": 3.999147485080989, - "grad_norm": 0.006319939624518156, - "learning_rate": 1.2604497932888757e-11, - "loss": 0.00044195097871124744, - "step": 23455 - }, - { - "epoch": 3.9993179880647913, - "eval_loss": 0.0636245608329773, - "eval_runtime": 3.6813, - "eval_samples_per_second": 68.454, - "eval_steps_per_second": 1.087, - "step": 23456 - }, - { - "eval_cer_subset": 0.014004918006090512, - "eval_cer_subset_edit_distance": 860, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 23456 - }, - { - "epoch": 4.0, - "grad_norm": 0.1382739543914795, - "learning_rate": 3.501249623560909e-13, - "loss": 0.0008504444733262063, - "step": 23460 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 7.908443215120466e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-23460/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/adapter_model.safetensors deleted file mode 100644 index b92106a95a4aaa44570a43b0166080023fd9b8cf..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b43cd08007cfd823f6d3b84092a5e1dea2319fe24f60c6e085ebea781da6a2d3 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/optimizer.pt deleted file mode 100644 index 56977a789888563e15d22cbdbfd368e41e8057d3..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e36fffeb7edf6b0e924bcf51765cc270e40404eb3d1e1e6e64dc7eeb00472dc3 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/rng_state.pth deleted file mode 100644 index c7658e4f831fd8c1da6aa599afb753c30a133b26..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e14598ef7e7425096a0b26efc2931c8fb3e2d7c0c97cc03f70d781dec9fb2e7 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/scheduler.pt deleted file mode 100644 index b684e60886118cf3100137a207d7cd55afc40bee..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:270dbfd535dad5eabc8f24bcb37dffe6c85a83008afffd836bf35146a52d9e67 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/trainer_state.json deleted file mode 100644 index 537566ab8d0c5d4232d227735ea837499ab33a9a..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/trainer_state.json +++ /dev/null @@ -1,3730 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.747612551159618, - "eval_steps": 366, - "global_step": 2562, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 2.680621871402189e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2562/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/adapter_model.safetensors deleted file mode 100644 index e2c1243915b81298a1e181fe9ad6fe5d288d45bc..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b698bfece2485b8e77a597e143c0093046dfe0503b34948167bd14a67691ca5 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/optimizer.pt deleted file mode 100644 index 3a46be9a45bd622ac7a363b21e1497027ad1aa5d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b9cc862e1dbda11f6eba6259a5b6b74d0048500aef5f6d7e6c6b2b7f808ce77 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/rng_state.pth deleted file mode 100644 index 97a0dbefb70722fe69a095f738aee6ae07e2ac12..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55618d72fdc98ce531c1d13612b817a14a50cd3ee258c44dec6a79dee2ab54ab -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/scheduler.pt deleted file mode 100644 index d11635e301d3d768440238a4e9b4699e806aec3e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4deab52eb0757eb852b6c49d3f6dc7ab38a50fbafc5eec41a4068876edf310f6 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/trainer_state.json deleted file mode 100644 index a61319109370ca9ae89f1390551ebf4c48bb52e6..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/trainer_state.json +++ /dev/null @@ -1,4257 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.9972714870395634, - "eval_steps": 366, - "global_step": 2928, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3.05261638391808e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2928/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/adapter_model.safetensors deleted file mode 100644 index 0425707d79ca8af4bdc92ad3e6fed907230447be..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4f52b4fbc185f5b9585f62a529b5be6bb7ac1b810fc51aaae9f9638634554c3 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/optimizer.pt deleted file mode 100644 index ba33d46bdb469987be05ecfcf2784702a7953620..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0aea6101d0c76cd72a7436cddd38ee0f29d6b6166b7f5b8181f19504cde92abe -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/rng_state.pth deleted file mode 100644 index 567b652f4426334d6573c42dfcfb408434537684..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec2cf52e4c3f15759e904ba93b5a10165bb850df54a226c7977bcfa5b79f76fb -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/scheduler.pt deleted file mode 100644 index d25fd72e8504764c6728ec01b478c7743b865581..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fd896493131a3e2f5633ad97ee9633943b9b689915228498d0411173d7031f43 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/trainer_state.json deleted file mode 100644 index d45c8c6f56faf4543d9600bef25f2b548cdf8232..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/trainer_state.json +++ /dev/null @@ -1,4168 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.4999147485080989, - "eval_steps": 1466, - "global_step": 2932, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 9.882574892892979e+17, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-2932/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/adapter_model.safetensors deleted file mode 100644 index edd0bdf8e2df75ce8cbb0a1bfd459db28f08c65e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:521c0accb0e23e78dd47ff958601d6c6a916c32dcda416d2085da1ed2e7a4b35 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/optimizer.pt deleted file mode 100644 index 3cf1c2c6bd97529acdfa1d2b82b7cbd8a4ac742d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:115683f3367fb930c4e92e522e35b6c3d4e07675a0da88c042f098c14e6e3136 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/rng_state.pth deleted file mode 100644 index b4c90a1dcb65212bc674e30d5261d12fd7b45bd3..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72ffa0691a56ac87071d6aa8f96da2bbc7ac5214f3ad9c284719c47b3512a3d3 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/scheduler.pt deleted file mode 100644 index 30449bd991206781865c6ed4a78780006143a8cd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ace9a6f09bd5ffb0c0f091f5764e817542e4252ddccbe302e7e7c9e1e2c7dd3 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/trainer_state.json deleted file mode 100644 index bfa11483bd6372b9ca31da79d5fc350e82bff4ce..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/trainer_state.json +++ /dev/null @@ -1,4784 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.246930422919509, - "eval_steps": 366, - "global_step": 3294, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3.434499924030259e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3294/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/adapter_model.safetensors deleted file mode 100644 index 3e1305eb8baafbf45d85d85476aad4566d6ec541..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c9911f65f699f709137449b7fd67dc4169e85312261cb1399120c30d43c385a -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/optimizer.pt deleted file mode 100644 index ad43df116628485ed8de213fdd0fcec0ea240af9..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b05cd08b59bd0bc6fa85d84799234d7565c94ef200529e156539f4df4ff5812a -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/rng_state.pth deleted file mode 100644 index 590182fe3185b746a8114bd40bdcdd2d117502bd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ccb8eeb935749fc43744e0a5eeacdf6f0f10253be15266a497cbca0ffaa2573 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/scheduler.pt deleted file mode 100644 index ec9eecf0dbe69a33cb5db52c207be98f3e795ffb..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6f3cf2c5fd59db6240cf83ae7ce4eb18e62c53f5ee17beab20e8a15a21a4746 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/trainer_state.json deleted file mode 100644 index 464d2e64cd23b4e45048f8e3bcfe102e85f92d1f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/trainer_state.json +++ /dev/null @@ -1,561 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.24965893587994542, - "eval_steps": 366, - "global_step": 366, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3797973616877568.0, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-366/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/adapter_model.safetensors deleted file mode 100644 index d8202b78555fac85204580f37be87c68c18f305b..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7de5cbc210fed4977980461b32ff18c0db49526d8dfebea0aceba5906cd3503 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/optimizer.pt deleted file mode 100644 index e033ad450e321db232b9313a91c573a97be86704..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69622424ada8e8cf9679a158f03b220a04f37cc5c30acb80d5f16cc2fe866324 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/rng_state.pth deleted file mode 100644 index 0f75955e7ebe698d40f961cef744719ee440b01b..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b526e1a6193bbef5c713d1f3b86c07cf14f47ab0393283cef43a72643daf1bc -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/scheduler.pt deleted file mode 100644 index 2d3f9a427650a0611068d68b48a61b6422c86df8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10ee5c64670a825c4221fa6ede4d9f727e544eb2406d63ead4aef8788658aab5 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/trainer_state.json deleted file mode 100644 index d9f6b3460e20697c81a8543ac3027e81e623fc91..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/trainer_state.json +++ /dev/null @@ -1,5318 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.4965893587994543, - "eval_steps": 366, - "global_step": 3660, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3.81958949592023e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-3660/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/adapter_model.safetensors deleted file mode 100644 index 720ce91749e89f34f114cdda59250769320e0ff9..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af65d6a0cfacf34c19a5737adbdf4bfe4d7aa4730b6cdff7e014be40833beca2 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/optimizer.pt deleted file mode 100644 index bb8fcf93ca4554b232d8a37e39bf929f2dcc590d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb4354238dd921acdf599f6d5629ec74eedf725136781958e44895fb8d92d675 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/rng_state.pth deleted file mode 100644 index 0a5172e5e2a7fd7da534588da77e1b4837a714e9..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68cae064ec1b66311320ebbcf8d27687f81328658f39e29a117900d377377e69 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/scheduler.pt deleted file mode 100644 index 242ae6dfe64d8b0d88da85935a5b5ff645fcae70..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44be148d17c4a97a5030975971d8463ac64843d4a9e9d9a289a0f6c32d8c85fe -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/trainer_state.json deleted file mode 100644 index e48dfc5de8d1727285e4de394a538ece57a4b66d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/trainer_state.json +++ /dev/null @@ -1,5845 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.7462482946793996, - "eval_steps": 366, - "global_step": 4026, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 4.192040077745357e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4026/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/adapter_model.safetensors deleted file mode 100644 index 4cdb8753dcd663bac8c9aa48b81e207d99e6521a..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:782eeb7e6b89372aee97ec66013888a5ba2a7a6502b257ce05a3c4887daa787b -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/optimizer.pt deleted file mode 100644 index 430363f4393d69d124a1ca58db862f379bec6b64..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18782d9bdcb195d77a1e25fc779224056c1276269a1b2021582e26498d278a0b -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/rng_state.pth deleted file mode 100644 index 6367b74f19467b36e1d46b9724b5c718b09ed5ea..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:419e357cd97f0d0fc2ead1cab9470f1570a1a1a10f851be6facc831c815ac5e0 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/scheduler.pt deleted file mode 100644 index 30b26fe090b4eb41e6ba17b3881998ef05594e45..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de2c4e5d905fe0bab6ee6ae2f522aaeb73bf96d675616dd8527e9f47bb6037ed -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/trainer_state.json deleted file mode 100644 index f7f5f5fe702bfbd68bbac9a61756d0f2ddbca4dd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/trainer_state.json +++ /dev/null @@ -1,6372 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.9959072305593453, - "eval_steps": 366, - "global_step": 4392, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - }, - { - "epoch": 2.748976807639836, - "grad_norm": 0.9081467986106873, - "learning_rate": 1.7342139203494537e-05, - "loss": 0.0036753010004758834, - "step": 4030 - }, - { - "epoch": 2.752387448840382, - "grad_norm": 0.07232939451932907, - "learning_rate": 1.7255770633843028e-05, - "loss": 0.0029829120263457297, - "step": 4035 - }, - { - "epoch": 2.7557980900409276, - "grad_norm": 0.005813417956233025, - "learning_rate": 1.7169553353936035e-05, - "loss": 0.00047225411981344223, - "step": 4040 - }, - { - "epoch": 2.7592087312414733, - "grad_norm": 0.2354760318994522, - "learning_rate": 1.7083488008094945e-05, - "loss": 0.0015884984284639358, - "step": 4045 - }, - { - "epoch": 2.762619372442019, - "grad_norm": 0.1201627105474472, - "learning_rate": 1.699757523950577e-05, - "loss": 0.0010768620297312737, - "step": 4050 - }, - { - "epoch": 2.766030013642565, - "grad_norm": 0.033547814935445786, - "learning_rate": 1.6911815690214166e-05, - "loss": 0.01052093282341957, - "step": 4055 - }, - { - "epoch": 2.7694406548431107, - "grad_norm": 0.01014826912432909, - "learning_rate": 1.682621000112085e-05, - "loss": 0.0003362501040101051, - "step": 4060 - }, - { - "epoch": 2.7728512960436564, - "grad_norm": 0.004405386745929718, - "learning_rate": 1.6740758811976665e-05, - "loss": 0.00044001247733831405, - "step": 4065 - }, - { - "epoch": 2.776261937244202, - "grad_norm": 3.621004104614258, - "learning_rate": 1.665546276137783e-05, - "loss": 0.18638403415679933, - "step": 4070 - }, - { - "epoch": 2.779672578444748, - "grad_norm": 0.01203183177858591, - "learning_rate": 1.6570322486761184e-05, - "loss": 0.00013435594737529755, - "step": 4075 - }, - { - "epoch": 2.7830832196452935, - "grad_norm": 0.23053398728370667, - "learning_rate": 1.6485338624399445e-05, - "loss": 0.0010434269905090332, - "step": 4080 - }, - { - "epoch": 2.786493860845839, - "grad_norm": 0.0109801534563303, - "learning_rate": 1.6400511809396394e-05, - "loss": 0.00012704560067504643, - "step": 4085 - }, - { - "epoch": 2.789904502046385, - "grad_norm": 0.8463883399963379, - "learning_rate": 1.631584267568217e-05, - "loss": 0.007707947492599487, - "step": 4090 - }, - { - "epoch": 2.7933151432469305, - "grad_norm": 0.05444789677858353, - "learning_rate": 1.623133185600852e-05, - "loss": 0.0008411366492509842, - "step": 4095 - }, - { - "epoch": 2.796725784447476, - "grad_norm": 0.007274657487869263, - "learning_rate": 1.6146979981944095e-05, - "loss": 0.0002061120932921767, - "step": 4100 - }, - { - "epoch": 2.800136425648022, - "grad_norm": 0.013262615539133549, - "learning_rate": 1.6062787683869667e-05, - "loss": 0.0003235015319660306, - "step": 4105 - }, - { - "epoch": 2.8035470668485676, - "grad_norm": 0.0030244409572333097, - "learning_rate": 1.597875559097352e-05, - "loss": 0.0007286245469003916, - "step": 4110 - }, - { - "epoch": 2.8069577080491133, - "grad_norm": 0.0021644949447363615, - "learning_rate": 1.5894884331246632e-05, - "loss": 0.0003056209534406662, - "step": 4115 - }, - { - "epoch": 2.810368349249659, - "grad_norm": 0.014495057985186577, - "learning_rate": 1.5811174531478074e-05, - "loss": 0.0014359142631292343, - "step": 4120 - }, - { - "epoch": 2.8137789904502046, - "grad_norm": 0.05203676223754883, - "learning_rate": 1.5727626817250255e-05, - "loss": 0.0006342739332467317, - "step": 4125 - }, - { - "epoch": 2.8171896316507503, - "grad_norm": 0.004998183809220791, - "learning_rate": 1.564424181293435e-05, - "loss": 0.00013386564096435906, - "step": 4130 - }, - { - "epoch": 2.820600272851296, - "grad_norm": 0.05428452417254448, - "learning_rate": 1.556102014168546e-05, - "loss": 0.00014423681423068047, - "step": 4135 - }, - { - "epoch": 2.8240109140518417, - "grad_norm": 0.0013511159922927618, - "learning_rate": 1.5477962425438164e-05, - "loss": 6.5605464624241e-05, - "step": 4140 - }, - { - "epoch": 2.8274215552523874, - "grad_norm": 0.9757132530212402, - "learning_rate": 1.539506928490171e-05, - "loss": 0.00839839205145836, - "step": 4145 - }, - { - "epoch": 2.830832196452933, - "grad_norm": 0.0018282996024936438, - "learning_rate": 1.5312341339555445e-05, - "loss": 0.005959897115826607, - "step": 4150 - }, - { - "epoch": 2.8342428376534787, - "grad_norm": 0.0022353942040354013, - "learning_rate": 1.5229779207644171e-05, - "loss": 0.000250368332490325, - "step": 4155 - }, - { - "epoch": 2.8376534788540244, - "grad_norm": 0.006538774352520704, - "learning_rate": 1.5147383506173572e-05, - "loss": 0.0004641829524189234, - "step": 4160 - }, - { - "epoch": 2.84106412005457, - "grad_norm": 0.003464010776951909, - "learning_rate": 1.5065154850905465e-05, - "loss": 0.008368657529354095, - "step": 4165 - }, - { - "epoch": 2.844474761255116, - "grad_norm": 0.0020767974201589823, - "learning_rate": 1.4983093856353398e-05, - "loss": 0.00010023106588050724, - "step": 4170 - }, - { - "epoch": 2.8478854024556615, - "grad_norm": 0.0025670777540653944, - "learning_rate": 1.4901201135777887e-05, - "loss": 0.0006715046241879463, - "step": 4175 - }, - { - "epoch": 2.851296043656207, - "grad_norm": 0.0006886612391099334, - "learning_rate": 1.4819477301181915e-05, - "loss": 0.0008357623592019081, - "step": 4180 - }, - { - "epoch": 2.854706684856753, - "grad_norm": 0.0023747060913592577, - "learning_rate": 1.4737922963306332e-05, - "loss": 0.00488339364528656, - "step": 4185 - }, - { - "epoch": 2.8581173260572985, - "grad_norm": 0.0030493123922497034, - "learning_rate": 1.4656538731625333e-05, - "loss": 0.017219077050685882, - "step": 4190 - }, - { - "epoch": 2.8615279672578446, - "grad_norm": 0.0030935786198824644, - "learning_rate": 1.457532521434184e-05, - "loss": 0.00014684826601296663, - "step": 4195 - }, - { - "epoch": 2.8649386084583903, - "grad_norm": 0.0004102849052287638, - "learning_rate": 1.4494283018382991e-05, - "loss": 0.0002242558402940631, - "step": 4200 - }, - { - "epoch": 2.868349249658936, - "grad_norm": 1.6695232391357422, - "learning_rate": 1.4413412749395593e-05, - "loss": 0.01916976124048233, - "step": 4205 - }, - { - "epoch": 2.8717598908594817, - "grad_norm": 0.003357100998982787, - "learning_rate": 1.4332715011741656e-05, - "loss": 0.0036146264523267747, - "step": 4210 - }, - { - "epoch": 2.8751705320600274, - "grad_norm": 0.002434425987303257, - "learning_rate": 1.425219040849373e-05, - "loss": 0.0001186407171189785, - "step": 4215 - }, - { - "epoch": 2.878581173260573, - "grad_norm": 0.0272241048514843, - "learning_rate": 1.4171839541430586e-05, - "loss": 0.003773893415927887, - "step": 4220 - }, - { - "epoch": 2.8819918144611187, - "grad_norm": 0.00031364246387965977, - "learning_rate": 1.409166301103257e-05, - "loss": 3.483370819594711e-05, - "step": 4225 - }, - { - "epoch": 2.8854024556616644, - "grad_norm": 0.008605693466961384, - "learning_rate": 1.4011661416477186e-05, - "loss": 0.005898609757423401, - "step": 4230 - }, - { - "epoch": 2.88881309686221, - "grad_norm": 0.0003439450520090759, - "learning_rate": 1.3931835355634601e-05, - "loss": 0.0017080994322896004, - "step": 4235 - }, - { - "epoch": 2.892223738062756, - "grad_norm": 0.004427058156579733, - "learning_rate": 1.3852185425063181e-05, - "loss": 0.00010978456120938062, - "step": 4240 - }, - { - "epoch": 2.8956343792633015, - "grad_norm": 0.02229383960366249, - "learning_rate": 1.377271222000503e-05, - "loss": 0.0012531550601124763, - "step": 4245 - }, - { - "epoch": 2.899045020463847, - "grad_norm": 0.004171700682491064, - "learning_rate": 1.3693416334381517e-05, - "loss": 0.0012122373096644878, - "step": 4250 - }, - { - "epoch": 2.902455661664393, - "grad_norm": 0.0016778658609837294, - "learning_rate": 1.3614298360788924e-05, - "loss": 0.0006234514527022839, - "step": 4255 - }, - { - "epoch": 2.9058663028649385, - "grad_norm": 0.00403103232383728, - "learning_rate": 1.3535358890493897e-05, - "loss": 0.00030033572111278775, - "step": 4260 - }, - { - "epoch": 2.909276944065484, - "grad_norm": 0.006126928608864546, - "learning_rate": 1.3456598513429111e-05, - "loss": 0.014299000799655914, - "step": 4265 - }, - { - "epoch": 2.91268758526603, - "grad_norm": 0.0007882033823989332, - "learning_rate": 1.3378017818188815e-05, - "loss": 0.001049484871327877, - "step": 4270 - }, - { - "epoch": 2.9160982264665756, - "grad_norm": 0.0004515725013334304, - "learning_rate": 1.329961739202451e-05, - "loss": 6.776668014936149e-05, - "step": 4275 - }, - { - "epoch": 2.9195088676671213, - "grad_norm": 0.3738904297351837, - "learning_rate": 1.3221397820840419e-05, - "loss": 0.0010396759025752544, - "step": 4280 - }, - { - "epoch": 2.9229195088676674, - "grad_norm": 0.035881806164979935, - "learning_rate": 1.3143359689189279e-05, - "loss": 0.0003127899952232838, - "step": 4285 - }, - { - "epoch": 2.926330150068213, - "grad_norm": 0.010501476936042309, - "learning_rate": 1.306550358026784e-05, - "loss": 0.013835662603378296, - "step": 4290 - }, - { - "epoch": 2.9297407912687587, - "grad_norm": 0.002153329784050584, - "learning_rate": 1.2987830075912565e-05, - "loss": 0.007172297686338425, - "step": 4295 - }, - { - "epoch": 2.9331514324693044, - "grad_norm": 0.000594582874327898, - "learning_rate": 1.2910339756595254e-05, - "loss": 6.662132800556719e-05, - "step": 4300 - }, - { - "epoch": 2.93656207366985, - "grad_norm": 1.2802950143814087, - "learning_rate": 1.283303320141879e-05, - "loss": 0.0013225926086306572, - "step": 4305 - }, - { - "epoch": 2.939972714870396, - "grad_norm": 0.0010621119290590286, - "learning_rate": 1.2755910988112639e-05, - "loss": 0.0001950544072315097, - "step": 4310 - }, - { - "epoch": 2.9433833560709415, - "grad_norm": 0.0004629544273484498, - "learning_rate": 1.2678973693028735e-05, - "loss": 0.0002407266292721033, - "step": 4315 - }, - { - "epoch": 2.946793997271487, - "grad_norm": 0.00353289395570755, - "learning_rate": 1.2602221891137021e-05, - "loss": 0.006271860748529434, - "step": 4320 - }, - { - "epoch": 2.950204638472033, - "grad_norm": 0.021108930930495262, - "learning_rate": 1.2525656156021227e-05, - "loss": 0.007909800857305527, - "step": 4325 - }, - { - "epoch": 2.9536152796725785, - "grad_norm": 0.007604979444295168, - "learning_rate": 1.2449277059874547e-05, - "loss": 0.00022456045262515546, - "step": 4330 - }, - { - "epoch": 2.957025920873124, - "grad_norm": 0.0007357494323514402, - "learning_rate": 1.2373085173495411e-05, - "loss": 0.0010289529338479042, - "step": 4335 - }, - { - "epoch": 2.96043656207367, - "grad_norm": 0.0004920829669572413, - "learning_rate": 1.229708106628316e-05, - "loss": 8.303072536364198e-05, - "step": 4340 - }, - { - "epoch": 2.9638472032742156, - "grad_norm": 0.0014207189669832587, - "learning_rate": 1.2221265306233824e-05, - "loss": 0.001923336647450924, - "step": 4345 - }, - { - "epoch": 2.9672578444747613, - "grad_norm": 0.00898696668446064, - "learning_rate": 1.2145638459935863e-05, - "loss": 0.01918397843837738, - "step": 4350 - }, - { - "epoch": 2.970668485675307, - "grad_norm": 0.08236313611268997, - "learning_rate": 1.2070201092565988e-05, - "loss": 0.0005465132184326648, - "step": 4355 - }, - { - "epoch": 2.9740791268758526, - "grad_norm": 0.00796231534332037, - "learning_rate": 1.199495376788481e-05, - "loss": 4.8452542978338896e-05, - "step": 4360 - }, - { - "epoch": 2.9774897680763983, - "grad_norm": 0.026452092453837395, - "learning_rate": 1.1919897048232791e-05, - "loss": 0.0006576513405889273, - "step": 4365 - }, - { - "epoch": 2.980900409276944, - "grad_norm": 0.08616425842046738, - "learning_rate": 1.1845031494525901e-05, - "loss": 0.0013777482323348521, - "step": 4370 - }, - { - "epoch": 2.9843110504774897, - "grad_norm": 0.00048112327931448817, - "learning_rate": 1.1770357666251509e-05, - "loss": 0.0018716825172305108, - "step": 4375 - }, - { - "epoch": 2.9877216916780354, - "grad_norm": 0.0013632692862302065, - "learning_rate": 1.1695876121464154e-05, - "loss": 4.9980584299191834e-05, - "step": 4380 - }, - { - "epoch": 2.991132332878581, - "grad_norm": 1.318718671798706, - "learning_rate": 1.1621587416781445e-05, - "loss": 0.013328136503696441, - "step": 4385 - }, - { - "epoch": 2.9945429740791267, - "grad_norm": 0.00219643022865057, - "learning_rate": 1.1547492107379782e-05, - "loss": 9.902374586090446e-05, - "step": 4390 - }, - { - "epoch": 2.9959072305593453, - "eval_loss": 0.07537060230970383, - "eval_runtime": 0.95, - "eval_samples_per_second": 78.945, - "eval_steps_per_second": 2.105, - "step": 4392 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4392 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 4.581338130785894e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4392/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/adapter_model.safetensors deleted file mode 100644 index 13c6c3ad051626acbe50148dd9d964c1393b1883..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6046b355f60a223c7d636aa15c7f907efa15fb118bedad55e7c5624c972e2fb2 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/optimizer.pt deleted file mode 100644 index 20e586140ecb5dd113f89a363e845d405dc530b7..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:277087d55677dc7489a53524ac8eaf37cafb9bb9ddccbc186a9f4fa6239fcb16 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/rng_state.pth deleted file mode 100644 index 29eecf71f027a23b5a0d826cd3148e39a8d85840..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d569a9c96ff4e89d32dfe8ca0b97952cc2edcd76f89c14caa7e57238c4b46c8a -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/scheduler.pt deleted file mode 100644 index 2d514a6a69119cd8c31ed41b4b67f7ef7aa385c0..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fa37c8e83aa2808803ec5da6a4c55863baf57b9c8c3de046f9fbd9170ad2de71 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/trainer_state.json deleted file mode 100644 index c38957684fe1fc40e0decc37f3e3791fb40613f1..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/trainer_state.json +++ /dev/null @@ -1,6235 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.7498721227621483, - "eval_steps": 1466, - "global_step": 4398, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 1.482582343736918e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4398/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/adapter_model.safetensors deleted file mode 100644 index 58cee7d014b251fe3178e2bc34178d67566bfe03..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:783a84b997ae5b82feb1d7257cb8e9946735a1e8bb45be0c42b555d99fd143bc -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/optimizer.pt deleted file mode 100644 index f27a40a78a188aad0140a7c76362f80125dcb5a9..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92cef9ccdb44cadf4b0d08e6937d7bc78712b3c1fbd849e9bd5a54ae3616a457 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/rng_state.pth deleted file mode 100644 index 529c5ec3031c8a76d24e0fd7ede51c3992f5b04d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93796aa03ebeb2fd5d960d3081cb2a9b011809fb5d5212c2c06f20b5081d9a63 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/scheduler.pt deleted file mode 100644 index ad3b3d9d762a3fd0af713f40e7189e7624b34aa8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af05fe4c57a1c478016dc4c297fc6fb5d7b4af4e307e5353ded357c3d868b096 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/trainer_state.json deleted file mode 100644 index ba2e574f245d459b4620cdbb91d561f792483749..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/trainer_state.json +++ /dev/null @@ -1,6899 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.2455661664392905, - "eval_steps": 366, - "global_step": 4758, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - }, - { - "epoch": 2.748976807639836, - "grad_norm": 0.9081467986106873, - "learning_rate": 1.7342139203494537e-05, - "loss": 0.0036753010004758834, - "step": 4030 - }, - { - "epoch": 2.752387448840382, - "grad_norm": 0.07232939451932907, - "learning_rate": 1.7255770633843028e-05, - "loss": 0.0029829120263457297, - "step": 4035 - }, - { - "epoch": 2.7557980900409276, - "grad_norm": 0.005813417956233025, - "learning_rate": 1.7169553353936035e-05, - "loss": 0.00047225411981344223, - "step": 4040 - }, - { - "epoch": 2.7592087312414733, - "grad_norm": 0.2354760318994522, - "learning_rate": 1.7083488008094945e-05, - "loss": 0.0015884984284639358, - "step": 4045 - }, - { - "epoch": 2.762619372442019, - "grad_norm": 0.1201627105474472, - "learning_rate": 1.699757523950577e-05, - "loss": 0.0010768620297312737, - "step": 4050 - }, - { - "epoch": 2.766030013642565, - "grad_norm": 0.033547814935445786, - "learning_rate": 1.6911815690214166e-05, - "loss": 0.01052093282341957, - "step": 4055 - }, - { - "epoch": 2.7694406548431107, - "grad_norm": 0.01014826912432909, - "learning_rate": 1.682621000112085e-05, - "loss": 0.0003362501040101051, - "step": 4060 - }, - { - "epoch": 2.7728512960436564, - "grad_norm": 0.004405386745929718, - "learning_rate": 1.6740758811976665e-05, - "loss": 0.00044001247733831405, - "step": 4065 - }, - { - "epoch": 2.776261937244202, - "grad_norm": 3.621004104614258, - "learning_rate": 1.665546276137783e-05, - "loss": 0.18638403415679933, - "step": 4070 - }, - { - "epoch": 2.779672578444748, - "grad_norm": 0.01203183177858591, - "learning_rate": 1.6570322486761184e-05, - "loss": 0.00013435594737529755, - "step": 4075 - }, - { - "epoch": 2.7830832196452935, - "grad_norm": 0.23053398728370667, - "learning_rate": 1.6485338624399445e-05, - "loss": 0.0010434269905090332, - "step": 4080 - }, - { - "epoch": 2.786493860845839, - "grad_norm": 0.0109801534563303, - "learning_rate": 1.6400511809396394e-05, - "loss": 0.00012704560067504643, - "step": 4085 - }, - { - "epoch": 2.789904502046385, - "grad_norm": 0.8463883399963379, - "learning_rate": 1.631584267568217e-05, - "loss": 0.007707947492599487, - "step": 4090 - }, - { - "epoch": 2.7933151432469305, - "grad_norm": 0.05444789677858353, - "learning_rate": 1.623133185600852e-05, - "loss": 0.0008411366492509842, - "step": 4095 - }, - { - "epoch": 2.796725784447476, - "grad_norm": 0.007274657487869263, - "learning_rate": 1.6146979981944095e-05, - "loss": 0.0002061120932921767, - "step": 4100 - }, - { - "epoch": 2.800136425648022, - "grad_norm": 0.013262615539133549, - "learning_rate": 1.6062787683869667e-05, - "loss": 0.0003235015319660306, - "step": 4105 - }, - { - "epoch": 2.8035470668485676, - "grad_norm": 0.0030244409572333097, - "learning_rate": 1.597875559097352e-05, - "loss": 0.0007286245469003916, - "step": 4110 - }, - { - "epoch": 2.8069577080491133, - "grad_norm": 0.0021644949447363615, - "learning_rate": 1.5894884331246632e-05, - "loss": 0.0003056209534406662, - "step": 4115 - }, - { - "epoch": 2.810368349249659, - "grad_norm": 0.014495057985186577, - "learning_rate": 1.5811174531478074e-05, - "loss": 0.0014359142631292343, - "step": 4120 - }, - { - "epoch": 2.8137789904502046, - "grad_norm": 0.05203676223754883, - "learning_rate": 1.5727626817250255e-05, - "loss": 0.0006342739332467317, - "step": 4125 - }, - { - "epoch": 2.8171896316507503, - "grad_norm": 0.004998183809220791, - "learning_rate": 1.564424181293435e-05, - "loss": 0.00013386564096435906, - "step": 4130 - }, - { - "epoch": 2.820600272851296, - "grad_norm": 0.05428452417254448, - "learning_rate": 1.556102014168546e-05, - "loss": 0.00014423681423068047, - "step": 4135 - }, - { - "epoch": 2.8240109140518417, - "grad_norm": 0.0013511159922927618, - "learning_rate": 1.5477962425438164e-05, - "loss": 6.5605464624241e-05, - "step": 4140 - }, - { - "epoch": 2.8274215552523874, - "grad_norm": 0.9757132530212402, - "learning_rate": 1.539506928490171e-05, - "loss": 0.00839839205145836, - "step": 4145 - }, - { - "epoch": 2.830832196452933, - "grad_norm": 0.0018282996024936438, - "learning_rate": 1.5312341339555445e-05, - "loss": 0.005959897115826607, - "step": 4150 - }, - { - "epoch": 2.8342428376534787, - "grad_norm": 0.0022353942040354013, - "learning_rate": 1.5229779207644171e-05, - "loss": 0.000250368332490325, - "step": 4155 - }, - { - "epoch": 2.8376534788540244, - "grad_norm": 0.006538774352520704, - "learning_rate": 1.5147383506173572e-05, - "loss": 0.0004641829524189234, - "step": 4160 - }, - { - "epoch": 2.84106412005457, - "grad_norm": 0.003464010776951909, - "learning_rate": 1.5065154850905465e-05, - "loss": 0.008368657529354095, - "step": 4165 - }, - { - "epoch": 2.844474761255116, - "grad_norm": 0.0020767974201589823, - "learning_rate": 1.4983093856353398e-05, - "loss": 0.00010023106588050724, - "step": 4170 - }, - { - "epoch": 2.8478854024556615, - "grad_norm": 0.0025670777540653944, - "learning_rate": 1.4901201135777887e-05, - "loss": 0.0006715046241879463, - "step": 4175 - }, - { - "epoch": 2.851296043656207, - "grad_norm": 0.0006886612391099334, - "learning_rate": 1.4819477301181915e-05, - "loss": 0.0008357623592019081, - "step": 4180 - }, - { - "epoch": 2.854706684856753, - "grad_norm": 0.0023747060913592577, - "learning_rate": 1.4737922963306332e-05, - "loss": 0.00488339364528656, - "step": 4185 - }, - { - "epoch": 2.8581173260572985, - "grad_norm": 0.0030493123922497034, - "learning_rate": 1.4656538731625333e-05, - "loss": 0.017219077050685882, - "step": 4190 - }, - { - "epoch": 2.8615279672578446, - "grad_norm": 0.0030935786198824644, - "learning_rate": 1.457532521434184e-05, - "loss": 0.00014684826601296663, - "step": 4195 - }, - { - "epoch": 2.8649386084583903, - "grad_norm": 0.0004102849052287638, - "learning_rate": 1.4494283018382991e-05, - "loss": 0.0002242558402940631, - "step": 4200 - }, - { - "epoch": 2.868349249658936, - "grad_norm": 1.6695232391357422, - "learning_rate": 1.4413412749395593e-05, - "loss": 0.01916976124048233, - "step": 4205 - }, - { - "epoch": 2.8717598908594817, - "grad_norm": 0.003357100998982787, - "learning_rate": 1.4332715011741656e-05, - "loss": 0.0036146264523267747, - "step": 4210 - }, - { - "epoch": 2.8751705320600274, - "grad_norm": 0.002434425987303257, - "learning_rate": 1.425219040849373e-05, - "loss": 0.0001186407171189785, - "step": 4215 - }, - { - "epoch": 2.878581173260573, - "grad_norm": 0.0272241048514843, - "learning_rate": 1.4171839541430586e-05, - "loss": 0.003773893415927887, - "step": 4220 - }, - { - "epoch": 2.8819918144611187, - "grad_norm": 0.00031364246387965977, - "learning_rate": 1.409166301103257e-05, - "loss": 3.483370819594711e-05, - "step": 4225 - }, - { - "epoch": 2.8854024556616644, - "grad_norm": 0.008605693466961384, - "learning_rate": 1.4011661416477186e-05, - "loss": 0.005898609757423401, - "step": 4230 - }, - { - "epoch": 2.88881309686221, - "grad_norm": 0.0003439450520090759, - "learning_rate": 1.3931835355634601e-05, - "loss": 0.0017080994322896004, - "step": 4235 - }, - { - "epoch": 2.892223738062756, - "grad_norm": 0.004427058156579733, - "learning_rate": 1.3852185425063181e-05, - "loss": 0.00010978456120938062, - "step": 4240 - }, - { - "epoch": 2.8956343792633015, - "grad_norm": 0.02229383960366249, - "learning_rate": 1.377271222000503e-05, - "loss": 0.0012531550601124763, - "step": 4245 - }, - { - "epoch": 2.899045020463847, - "grad_norm": 0.004171700682491064, - "learning_rate": 1.3693416334381517e-05, - "loss": 0.0012122373096644878, - "step": 4250 - }, - { - "epoch": 2.902455661664393, - "grad_norm": 0.0016778658609837294, - "learning_rate": 1.3614298360788924e-05, - "loss": 0.0006234514527022839, - "step": 4255 - }, - { - "epoch": 2.9058663028649385, - "grad_norm": 0.00403103232383728, - "learning_rate": 1.3535358890493897e-05, - "loss": 0.00030033572111278775, - "step": 4260 - }, - { - "epoch": 2.909276944065484, - "grad_norm": 0.006126928608864546, - "learning_rate": 1.3456598513429111e-05, - "loss": 0.014299000799655914, - "step": 4265 - }, - { - "epoch": 2.91268758526603, - "grad_norm": 0.0007882033823989332, - "learning_rate": 1.3378017818188815e-05, - "loss": 0.001049484871327877, - "step": 4270 - }, - { - "epoch": 2.9160982264665756, - "grad_norm": 0.0004515725013334304, - "learning_rate": 1.329961739202451e-05, - "loss": 6.776668014936149e-05, - "step": 4275 - }, - { - "epoch": 2.9195088676671213, - "grad_norm": 0.3738904297351837, - "learning_rate": 1.3221397820840419e-05, - "loss": 0.0010396759025752544, - "step": 4280 - }, - { - "epoch": 2.9229195088676674, - "grad_norm": 0.035881806164979935, - "learning_rate": 1.3143359689189279e-05, - "loss": 0.0003127899952232838, - "step": 4285 - }, - { - "epoch": 2.926330150068213, - "grad_norm": 0.010501476936042309, - "learning_rate": 1.306550358026784e-05, - "loss": 0.013835662603378296, - "step": 4290 - }, - { - "epoch": 2.9297407912687587, - "grad_norm": 0.002153329784050584, - "learning_rate": 1.2987830075912565e-05, - "loss": 0.007172297686338425, - "step": 4295 - }, - { - "epoch": 2.9331514324693044, - "grad_norm": 0.000594582874327898, - "learning_rate": 1.2910339756595254e-05, - "loss": 6.662132800556719e-05, - "step": 4300 - }, - { - "epoch": 2.93656207366985, - "grad_norm": 1.2802950143814087, - "learning_rate": 1.283303320141879e-05, - "loss": 0.0013225926086306572, - "step": 4305 - }, - { - "epoch": 2.939972714870396, - "grad_norm": 0.0010621119290590286, - "learning_rate": 1.2755910988112639e-05, - "loss": 0.0001950544072315097, - "step": 4310 - }, - { - "epoch": 2.9433833560709415, - "grad_norm": 0.0004629544273484498, - "learning_rate": 1.2678973693028735e-05, - "loss": 0.0002407266292721033, - "step": 4315 - }, - { - "epoch": 2.946793997271487, - "grad_norm": 0.00353289395570755, - "learning_rate": 1.2602221891137021e-05, - "loss": 0.006271860748529434, - "step": 4320 - }, - { - "epoch": 2.950204638472033, - "grad_norm": 0.021108930930495262, - "learning_rate": 1.2525656156021227e-05, - "loss": 0.007909800857305527, - "step": 4325 - }, - { - "epoch": 2.9536152796725785, - "grad_norm": 0.007604979444295168, - "learning_rate": 1.2449277059874547e-05, - "loss": 0.00022456045262515546, - "step": 4330 - }, - { - "epoch": 2.957025920873124, - "grad_norm": 0.0007357494323514402, - "learning_rate": 1.2373085173495411e-05, - "loss": 0.0010289529338479042, - "step": 4335 - }, - { - "epoch": 2.96043656207367, - "grad_norm": 0.0004920829669572413, - "learning_rate": 1.229708106628316e-05, - "loss": 8.303072536364198e-05, - "step": 4340 - }, - { - "epoch": 2.9638472032742156, - "grad_norm": 0.0014207189669832587, - "learning_rate": 1.2221265306233824e-05, - "loss": 0.001923336647450924, - "step": 4345 - }, - { - "epoch": 2.9672578444747613, - "grad_norm": 0.00898696668446064, - "learning_rate": 1.2145638459935863e-05, - "loss": 0.01918397843837738, - "step": 4350 - }, - { - "epoch": 2.970668485675307, - "grad_norm": 0.08236313611268997, - "learning_rate": 1.2070201092565988e-05, - "loss": 0.0005465132184326648, - "step": 4355 - }, - { - "epoch": 2.9740791268758526, - "grad_norm": 0.00796231534332037, - "learning_rate": 1.199495376788481e-05, - "loss": 4.8452542978338896e-05, - "step": 4360 - }, - { - "epoch": 2.9774897680763983, - "grad_norm": 0.026452092453837395, - "learning_rate": 1.1919897048232791e-05, - "loss": 0.0006576513405889273, - "step": 4365 - }, - { - "epoch": 2.980900409276944, - "grad_norm": 0.08616425842046738, - "learning_rate": 1.1845031494525901e-05, - "loss": 0.0013777482323348521, - "step": 4370 - }, - { - "epoch": 2.9843110504774897, - "grad_norm": 0.00048112327931448817, - "learning_rate": 1.1770357666251509e-05, - "loss": 0.0018716825172305108, - "step": 4375 - }, - { - "epoch": 2.9877216916780354, - "grad_norm": 0.0013632692862302065, - "learning_rate": 1.1695876121464154e-05, - "loss": 4.9980584299191834e-05, - "step": 4380 - }, - { - "epoch": 2.991132332878581, - "grad_norm": 1.318718671798706, - "learning_rate": 1.1621587416781445e-05, - "loss": 0.013328136503696441, - "step": 4385 - }, - { - "epoch": 2.9945429740791267, - "grad_norm": 0.00219643022865057, - "learning_rate": 1.1547492107379782e-05, - "loss": 9.902374586090446e-05, - "step": 4390 - }, - { - "epoch": 2.9959072305593453, - "eval_loss": 0.07537060230970383, - "eval_runtime": 0.95, - "eval_samples_per_second": 78.945, - "eval_steps_per_second": 2.105, - "step": 4392 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4392 - }, - { - "epoch": 2.9979536152796724, - "grad_norm": 0.003875225316733122, - "learning_rate": 1.1473590746990342e-05, - "loss": 0.0025784535333514213, - "step": 4395 - }, - { - "epoch": 3.001364256480218, - "grad_norm": 0.011882650665938854, - "learning_rate": 1.1399883887894846e-05, - "loss": 6.913430406711996e-05, - "step": 4400 - }, - { - "epoch": 3.004774897680764, - "grad_norm": 0.002775507280603051, - "learning_rate": 1.1326372080921464e-05, - "loss": 0.00019087132532149553, - "step": 4405 - }, - { - "epoch": 3.00818553888131, - "grad_norm": 0.06267738342285156, - "learning_rate": 1.125305587544069e-05, - "loss": 0.0007432831451296806, - "step": 4410 - }, - { - "epoch": 3.0115961800818556, - "grad_norm": 0.026564784348011017, - "learning_rate": 1.1179935819361272e-05, - "loss": 0.00015866300091147423, - "step": 4415 - }, - { - "epoch": 3.0150068212824013, - "grad_norm": 0.0011134434025734663, - "learning_rate": 1.1107012459126064e-05, - "loss": 0.0005193403456360101, - "step": 4420 - }, - { - "epoch": 3.018417462482947, - "grad_norm": 0.0006882239249534905, - "learning_rate": 1.1034286339707975e-05, - "loss": 0.0002253461629152298, - "step": 4425 - }, - { - "epoch": 3.0218281036834926, - "grad_norm": 0.008092721924185753, - "learning_rate": 1.0961758004605873e-05, - "loss": 0.0003596893046051264, - "step": 4430 - }, - { - "epoch": 3.0252387448840383, - "grad_norm": 0.01083564292639494, - "learning_rate": 1.0889427995840585e-05, - "loss": 0.010776457190513612, - "step": 4435 - }, - { - "epoch": 3.028649386084584, - "grad_norm": 0.040309611707925797, - "learning_rate": 1.0817296853950724e-05, - "loss": 0.0002762762364000082, - "step": 4440 - }, - { - "epoch": 3.0320600272851297, - "grad_norm": 0.0026077954098582268, - "learning_rate": 1.0745365117988804e-05, - "loss": 0.00011963967699557543, - "step": 4445 - }, - { - "epoch": 3.0354706684856754, - "grad_norm": 0.0013187731383368373, - "learning_rate": 1.0673633325517088e-05, - "loss": 0.0022230114787817, - "step": 4450 - }, - { - "epoch": 3.038881309686221, - "grad_norm": 0.08760128915309906, - "learning_rate": 1.060210201260362e-05, - "loss": 0.00029163951985538005, - "step": 4455 - }, - { - "epoch": 3.0422919508867667, - "grad_norm": 0.2557981610298157, - "learning_rate": 1.0530771713818229e-05, - "loss": 0.0014749299734830856, - "step": 4460 - }, - { - "epoch": 3.0457025920873124, - "grad_norm": 0.009532714262604713, - "learning_rate": 1.0459642962228502e-05, - "loss": 0.00023887362331151963, - "step": 4465 - }, - { - "epoch": 3.049113233287858, - "grad_norm": 0.002977812895551324, - "learning_rate": 1.0388716289395833e-05, - "loss": 3.836472751572728e-05, - "step": 4470 - }, - { - "epoch": 3.052523874488404, - "grad_norm": 0.0007074066670611501, - "learning_rate": 1.0317992225371411e-05, - "loss": 7.527543348260224e-05, - "step": 4475 - }, - { - "epoch": 3.0559345156889495, - "grad_norm": 0.020919082686305046, - "learning_rate": 1.0247471298692336e-05, - "loss": 0.00021068421192467214, - "step": 4480 - }, - { - "epoch": 3.059345156889495, - "grad_norm": 0.0012850193306803703, - "learning_rate": 1.0177154036377557e-05, - "loss": 0.00027780483942478894, - "step": 4485 - }, - { - "epoch": 3.062755798090041, - "grad_norm": 0.030514074489474297, - "learning_rate": 1.0107040963924027e-05, - "loss": 0.00020711682736873627, - "step": 4490 - }, - { - "epoch": 3.0661664392905865, - "grad_norm": 0.001317308866418898, - "learning_rate": 1.0037132605302716e-05, - "loss": 0.00024969261139631274, - "step": 4495 - }, - { - "epoch": 3.069577080491132, - "grad_norm": 0.0012978437589481473, - "learning_rate": 9.967429482954768e-06, - "loss": 0.0001504249172285199, - "step": 4500 - }, - { - "epoch": 3.072987721691678, - "grad_norm": 0.0003166501992382109, - "learning_rate": 9.897932117787476e-06, - "loss": 0.0002773872809484601, - "step": 4505 - }, - { - "epoch": 3.0763983628922236, - "grad_norm": 0.0029924868140369654, - "learning_rate": 9.828641029170544e-06, - "loss": 0.0005509680602699518, - "step": 4510 - }, - { - "epoch": 3.0798090040927693, - "grad_norm": 0.003611995605751872, - "learning_rate": 9.759556734932064e-06, - "loss": 0.0004832141101360321, - "step": 4515 - }, - { - "epoch": 3.083219645293315, - "grad_norm": 0.003923687152564526, - "learning_rate": 9.690679751354736e-06, - "loss": 0.00014967764727771282, - "step": 4520 - }, - { - "epoch": 3.086630286493861, - "grad_norm": 0.012491169385612011, - "learning_rate": 9.62201059317195e-06, - "loss": 8.488112362101675e-05, - "step": 4525 - }, - { - "epoch": 3.0900409276944067, - "grad_norm": 0.00011767258547479287, - "learning_rate": 9.553549773564035e-06, - "loss": 5.874955677427351e-05, - "step": 4530 - }, - { - "epoch": 3.0934515688949524, - "grad_norm": 0.021170401945710182, - "learning_rate": 9.48529780415427e-06, - "loss": 0.00013614417985081674, - "step": 4535 - }, - { - "epoch": 3.096862210095498, - "grad_norm": 0.005378399509936571, - "learning_rate": 9.417255195005218e-06, - "loss": 0.00010139571968466043, - "step": 4540 - }, - { - "epoch": 3.100272851296044, - "grad_norm": 0.0013061281060799956, - "learning_rate": 9.349422454614815e-06, - "loss": 0.0004999907687306404, - "step": 4545 - }, - { - "epoch": 3.1036834924965895, - "grad_norm": 0.000693993701133877, - "learning_rate": 9.281800089912605e-06, - "loss": 0.0001406701048836112, - "step": 4550 - }, - { - "epoch": 3.107094133697135, - "grad_norm": 0.003168008290231228, - "learning_rate": 9.214388606255934e-06, - "loss": 0.0002458775183185935, - "step": 4555 - }, - { - "epoch": 3.110504774897681, - "grad_norm": 0.001224424922838807, - "learning_rate": 9.147188507426224e-06, - "loss": 5.902486154809594e-05, - "step": 4560 - }, - { - "epoch": 3.1139154160982265, - "grad_norm": 0.0006458029965870082, - "learning_rate": 9.080200295625125e-06, - "loss": 6.971908733248711e-05, - "step": 4565 - }, - { - "epoch": 3.117326057298772, - "grad_norm": 0.0004500233626458794, - "learning_rate": 9.013424471470832e-06, - "loss": 4.827580996789038e-05, - "step": 4570 - }, - { - "epoch": 3.120736698499318, - "grad_norm": 0.008253362029790878, - "learning_rate": 8.946861533994316e-06, - "loss": 0.00010066803079098464, - "step": 4575 - }, - { - "epoch": 3.1241473396998636, - "grad_norm": 0.0027607178781181574, - "learning_rate": 8.88051198063559e-06, - "loss": 0.00010761913144961, - "step": 4580 - }, - { - "epoch": 3.1275579809004093, - "grad_norm": 0.0012132832780480385, - "learning_rate": 8.81437630723999e-06, - "loss": 0.00010583751136437059, - "step": 4585 - }, - { - "epoch": 3.130968622100955, - "grad_norm": 0.013205752708017826, - "learning_rate": 8.748455008054519e-06, - "loss": 7.872265996411443e-05, - "step": 4590 - }, - { - "epoch": 3.1343792633015006, - "grad_norm": 0.010380366817116737, - "learning_rate": 8.682748575724071e-06, - "loss": 0.00027635702863335607, - "step": 4595 - }, - { - "epoch": 3.1377899045020463, - "grad_norm": 0.012770955450832844, - "learning_rate": 8.617257501287805e-06, - "loss": 0.00028360043652355673, - "step": 4600 - }, - { - "epoch": 3.141200545702592, - "grad_norm": 0.012632913887500763, - "learning_rate": 8.551982274175449e-06, - "loss": 4.925676621496678e-05, - "step": 4605 - }, - { - "epoch": 3.1446111869031377, - "grad_norm": 0.0028189525473862886, - "learning_rate": 8.486923382203703e-06, - "loss": 0.0039628144353628155, - "step": 4610 - }, - { - "epoch": 3.1480218281036834, - "grad_norm": 0.11367341130971909, - "learning_rate": 8.422081311572464e-06, - "loss": 0.000568081671372056, - "step": 4615 - }, - { - "epoch": 3.151432469304229, - "grad_norm": 0.00095061567844823, - "learning_rate": 8.35745654686135e-06, - "loss": 0.00036408030427992344, - "step": 4620 - }, - { - "epoch": 3.1548431105047747, - "grad_norm": 0.05772553011775017, - "learning_rate": 8.29304957102596e-06, - "loss": 0.0006427288055419922, - "step": 4625 - }, - { - "epoch": 3.1582537517053204, - "grad_norm": 0.017082368955016136, - "learning_rate": 8.22886086539432e-06, - "loss": 0.00015330149326473475, - "step": 4630 - }, - { - "epoch": 3.161664392905866, - "grad_norm": 0.0033851212356239557, - "learning_rate": 8.164890909663256e-06, - "loss": 0.00012121353065595031, - "step": 4635 - }, - { - "epoch": 3.1650750341064118, - "grad_norm": 0.000577523373067379, - "learning_rate": 8.101140181894868e-06, - "loss": 7.793278782628477e-05, - "step": 4640 - }, - { - "epoch": 3.168485675306958, - "grad_norm": 0.00039884017314761877, - "learning_rate": 8.037609158512875e-06, - "loss": 0.0014324543066322804, - "step": 4645 - }, - { - "epoch": 3.1718963165075036, - "grad_norm": 0.004100058693438768, - "learning_rate": 7.97429831429911e-06, - "loss": 0.00015990985557436943, - "step": 4650 - }, - { - "epoch": 3.1753069577080493, - "grad_norm": 0.005648438818752766, - "learning_rate": 7.911208122389956e-06, - "loss": 8.149745990522206e-05, - "step": 4655 - }, - { - "epoch": 3.178717598908595, - "grad_norm": 0.005042952951043844, - "learning_rate": 7.848339054272808e-06, - "loss": 9.191314456984401e-05, - "step": 4660 - }, - { - "epoch": 3.1821282401091406, - "grad_norm": 0.0007950706058181822, - "learning_rate": 7.785691579782546e-06, - "loss": 7.44381221011281e-05, - "step": 4665 - }, - { - "epoch": 3.1855388813096863, - "grad_norm": 0.0717335194349289, - "learning_rate": 7.723266167098058e-06, - "loss": 0.00012347951997071505, - "step": 4670 - }, - { - "epoch": 3.188949522510232, - "grad_norm": 0.011817894876003265, - "learning_rate": 7.661063282738685e-06, - "loss": 0.000311569613404572, - "step": 4675 - }, - { - "epoch": 3.1923601637107777, - "grad_norm": 0.011100267991423607, - "learning_rate": 7.599083391560774e-06, - "loss": 0.00023061195388436319, - "step": 4680 - }, - { - "epoch": 3.1957708049113234, - "grad_norm": 0.025753796100616455, - "learning_rate": 7.5373269567541776e-06, - "loss": 0.00013706330209970475, - "step": 4685 - }, - { - "epoch": 3.199181446111869, - "grad_norm": 0.0529993437230587, - "learning_rate": 7.47579443983886e-06, - "loss": 0.00025801956653594973, - "step": 4690 - }, - { - "epoch": 3.2025920873124147, - "grad_norm": 0.01164156198501587, - "learning_rate": 7.41448630066132e-06, - "loss": 0.0001246333820745349, - "step": 4695 - }, - { - "epoch": 3.2060027285129604, - "grad_norm": 0.0022943434305489063, - "learning_rate": 7.353402997391271e-06, - "loss": 4.788096994161606e-05, - "step": 4700 - }, - { - "epoch": 3.209413369713506, - "grad_norm": 0.0016377613646909595, - "learning_rate": 7.292544986518198e-06, - "loss": 0.0014273281209170817, - "step": 4705 - }, - { - "epoch": 3.212824010914052, - "grad_norm": 0.046027738600969315, - "learning_rate": 7.231912722847881e-06, - "loss": 0.00015070366207510234, - "step": 4710 - }, - { - "epoch": 3.2162346521145975, - "grad_norm": 0.12626095116138458, - "learning_rate": 7.171506659499067e-06, - "loss": 0.0002223264891654253, - "step": 4715 - }, - { - "epoch": 3.219645293315143, - "grad_norm": 0.004387991968542337, - "learning_rate": 7.1113272479000465e-06, - "loss": 5.1431613974273206e-05, - "step": 4720 - }, - { - "epoch": 3.223055934515689, - "grad_norm": 0.0005260159377939999, - "learning_rate": 7.051374937785289e-06, - "loss": 9.967307560145854e-05, - "step": 4725 - }, - { - "epoch": 3.2264665757162345, - "grad_norm": 0.001557494280859828, - "learning_rate": 6.9916501771920795e-06, - "loss": 3.6639469908550384e-05, - "step": 4730 - }, - { - "epoch": 3.22987721691678, - "grad_norm": 0.0013014579890295863, - "learning_rate": 6.932153412457195e-06, - "loss": 0.00015290889423340558, - "step": 4735 - }, - { - "epoch": 3.233287858117326, - "grad_norm": 0.0005693956045433879, - "learning_rate": 6.872885088213522e-06, - "loss": 9.23092185985297e-05, - "step": 4740 - }, - { - "epoch": 3.2366984993178716, - "grad_norm": 0.0008315684972330928, - "learning_rate": 6.813845647386771e-06, - "loss": 0.00010763210011646151, - "step": 4745 - }, - { - "epoch": 3.2401091405184177, - "grad_norm": 0.0021727036219090223, - "learning_rate": 6.755035531192148e-06, - "loss": 3.423129383008927e-05, - "step": 4750 - }, - { - "epoch": 3.2435197817189634, - "grad_norm": 0.0001480428036302328, - "learning_rate": 6.696455179131084e-06, - "loss": 0.023981352150440217, - "step": 4755 - }, - { - "epoch": 3.2455661664392905, - "eval_loss": 0.074391670525074, - "eval_runtime": 0.9318, - "eval_samples_per_second": 80.487, - "eval_steps_per_second": 2.146, - "step": 4758 - }, - { - "eval_cer_subset": 0.015260934732252351, - "eval_cer_subset_edit_distance": 112, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4758 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 4.967235983926886e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-4758/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/adapter_model.safetensors deleted file mode 100644 index 73c3ab80e9ec42da9114ad99516a9fa2c7a7b1a8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aed6e2020c05b7859e44d1b000a815e4ac63a0d4859d04b7ffc896f5b2418e7b -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/optimizer.pt deleted file mode 100644 index 25aa5fedebdede96523d5106778c00448e00363c..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:83f92d68d2f3830d5708b99fdc622f9a2b5f80c1722a86af08f537753e71d477 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/rng_state.pth deleted file mode 100644 index a1212d03b765eaaf9ef7ed52fadb49a6fa1c7008..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21c07160197b34717d595cac163969653154415c96eda4bdeb7cf2d945992899 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/scheduler.pt deleted file mode 100644 index 3c4dd4770a39cb3409a751f1651b896bb69c4846..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ffe5006d7cda8c8203666c12876fdb02d8419a17c3fecfcdccdbd3bd2a11c7c2 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/trainer_state.json deleted file mode 100644 index 4f0279e2380513aabe4afdbbe6ed71b88eec7546..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/trainer_state.json +++ /dev/null @@ -1,7426 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.495225102319236, - "eval_steps": 366, - "global_step": 5124, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - }, - { - "epoch": 2.748976807639836, - "grad_norm": 0.9081467986106873, - "learning_rate": 1.7342139203494537e-05, - "loss": 0.0036753010004758834, - "step": 4030 - }, - { - "epoch": 2.752387448840382, - "grad_norm": 0.07232939451932907, - "learning_rate": 1.7255770633843028e-05, - "loss": 0.0029829120263457297, - "step": 4035 - }, - { - "epoch": 2.7557980900409276, - "grad_norm": 0.005813417956233025, - "learning_rate": 1.7169553353936035e-05, - "loss": 0.00047225411981344223, - "step": 4040 - }, - { - "epoch": 2.7592087312414733, - "grad_norm": 0.2354760318994522, - "learning_rate": 1.7083488008094945e-05, - "loss": 0.0015884984284639358, - "step": 4045 - }, - { - "epoch": 2.762619372442019, - "grad_norm": 0.1201627105474472, - "learning_rate": 1.699757523950577e-05, - "loss": 0.0010768620297312737, - "step": 4050 - }, - { - "epoch": 2.766030013642565, - "grad_norm": 0.033547814935445786, - "learning_rate": 1.6911815690214166e-05, - "loss": 0.01052093282341957, - "step": 4055 - }, - { - "epoch": 2.7694406548431107, - "grad_norm": 0.01014826912432909, - "learning_rate": 1.682621000112085e-05, - "loss": 0.0003362501040101051, - "step": 4060 - }, - { - "epoch": 2.7728512960436564, - "grad_norm": 0.004405386745929718, - "learning_rate": 1.6740758811976665e-05, - "loss": 0.00044001247733831405, - "step": 4065 - }, - { - "epoch": 2.776261937244202, - "grad_norm": 3.621004104614258, - "learning_rate": 1.665546276137783e-05, - "loss": 0.18638403415679933, - "step": 4070 - }, - { - "epoch": 2.779672578444748, - "grad_norm": 0.01203183177858591, - "learning_rate": 1.6570322486761184e-05, - "loss": 0.00013435594737529755, - "step": 4075 - }, - { - "epoch": 2.7830832196452935, - "grad_norm": 0.23053398728370667, - "learning_rate": 1.6485338624399445e-05, - "loss": 0.0010434269905090332, - "step": 4080 - }, - { - "epoch": 2.786493860845839, - "grad_norm": 0.0109801534563303, - "learning_rate": 1.6400511809396394e-05, - "loss": 0.00012704560067504643, - "step": 4085 - }, - { - "epoch": 2.789904502046385, - "grad_norm": 0.8463883399963379, - "learning_rate": 1.631584267568217e-05, - "loss": 0.007707947492599487, - "step": 4090 - }, - { - "epoch": 2.7933151432469305, - "grad_norm": 0.05444789677858353, - "learning_rate": 1.623133185600852e-05, - "loss": 0.0008411366492509842, - "step": 4095 - }, - { - "epoch": 2.796725784447476, - "grad_norm": 0.007274657487869263, - "learning_rate": 1.6146979981944095e-05, - "loss": 0.0002061120932921767, - "step": 4100 - }, - { - "epoch": 2.800136425648022, - "grad_norm": 0.013262615539133549, - "learning_rate": 1.6062787683869667e-05, - "loss": 0.0003235015319660306, - "step": 4105 - }, - { - "epoch": 2.8035470668485676, - "grad_norm": 0.0030244409572333097, - "learning_rate": 1.597875559097352e-05, - "loss": 0.0007286245469003916, - "step": 4110 - }, - { - "epoch": 2.8069577080491133, - "grad_norm": 0.0021644949447363615, - "learning_rate": 1.5894884331246632e-05, - "loss": 0.0003056209534406662, - "step": 4115 - }, - { - "epoch": 2.810368349249659, - "grad_norm": 0.014495057985186577, - "learning_rate": 1.5811174531478074e-05, - "loss": 0.0014359142631292343, - "step": 4120 - }, - { - "epoch": 2.8137789904502046, - "grad_norm": 0.05203676223754883, - "learning_rate": 1.5727626817250255e-05, - "loss": 0.0006342739332467317, - "step": 4125 - }, - { - "epoch": 2.8171896316507503, - "grad_norm": 0.004998183809220791, - "learning_rate": 1.564424181293435e-05, - "loss": 0.00013386564096435906, - "step": 4130 - }, - { - "epoch": 2.820600272851296, - "grad_norm": 0.05428452417254448, - "learning_rate": 1.556102014168546e-05, - "loss": 0.00014423681423068047, - "step": 4135 - }, - { - "epoch": 2.8240109140518417, - "grad_norm": 0.0013511159922927618, - "learning_rate": 1.5477962425438164e-05, - "loss": 6.5605464624241e-05, - "step": 4140 - }, - { - "epoch": 2.8274215552523874, - "grad_norm": 0.9757132530212402, - "learning_rate": 1.539506928490171e-05, - "loss": 0.00839839205145836, - "step": 4145 - }, - { - "epoch": 2.830832196452933, - "grad_norm": 0.0018282996024936438, - "learning_rate": 1.5312341339555445e-05, - "loss": 0.005959897115826607, - "step": 4150 - }, - { - "epoch": 2.8342428376534787, - "grad_norm": 0.0022353942040354013, - "learning_rate": 1.5229779207644171e-05, - "loss": 0.000250368332490325, - "step": 4155 - }, - { - "epoch": 2.8376534788540244, - "grad_norm": 0.006538774352520704, - "learning_rate": 1.5147383506173572e-05, - "loss": 0.0004641829524189234, - "step": 4160 - }, - { - "epoch": 2.84106412005457, - "grad_norm": 0.003464010776951909, - "learning_rate": 1.5065154850905465e-05, - "loss": 0.008368657529354095, - "step": 4165 - }, - { - "epoch": 2.844474761255116, - "grad_norm": 0.0020767974201589823, - "learning_rate": 1.4983093856353398e-05, - "loss": 0.00010023106588050724, - "step": 4170 - }, - { - "epoch": 2.8478854024556615, - "grad_norm": 0.0025670777540653944, - "learning_rate": 1.4901201135777887e-05, - "loss": 0.0006715046241879463, - "step": 4175 - }, - { - "epoch": 2.851296043656207, - "grad_norm": 0.0006886612391099334, - "learning_rate": 1.4819477301181915e-05, - "loss": 0.0008357623592019081, - "step": 4180 - }, - { - "epoch": 2.854706684856753, - "grad_norm": 0.0023747060913592577, - "learning_rate": 1.4737922963306332e-05, - "loss": 0.00488339364528656, - "step": 4185 - }, - { - "epoch": 2.8581173260572985, - "grad_norm": 0.0030493123922497034, - "learning_rate": 1.4656538731625333e-05, - "loss": 0.017219077050685882, - "step": 4190 - }, - { - "epoch": 2.8615279672578446, - "grad_norm": 0.0030935786198824644, - "learning_rate": 1.457532521434184e-05, - "loss": 0.00014684826601296663, - "step": 4195 - }, - { - "epoch": 2.8649386084583903, - "grad_norm": 0.0004102849052287638, - "learning_rate": 1.4494283018382991e-05, - "loss": 0.0002242558402940631, - "step": 4200 - }, - { - "epoch": 2.868349249658936, - "grad_norm": 1.6695232391357422, - "learning_rate": 1.4413412749395593e-05, - "loss": 0.01916976124048233, - "step": 4205 - }, - { - "epoch": 2.8717598908594817, - "grad_norm": 0.003357100998982787, - "learning_rate": 1.4332715011741656e-05, - "loss": 0.0036146264523267747, - "step": 4210 - }, - { - "epoch": 2.8751705320600274, - "grad_norm": 0.002434425987303257, - "learning_rate": 1.425219040849373e-05, - "loss": 0.0001186407171189785, - "step": 4215 - }, - { - "epoch": 2.878581173260573, - "grad_norm": 0.0272241048514843, - "learning_rate": 1.4171839541430586e-05, - "loss": 0.003773893415927887, - "step": 4220 - }, - { - "epoch": 2.8819918144611187, - "grad_norm": 0.00031364246387965977, - "learning_rate": 1.409166301103257e-05, - "loss": 3.483370819594711e-05, - "step": 4225 - }, - { - "epoch": 2.8854024556616644, - "grad_norm": 0.008605693466961384, - "learning_rate": 1.4011661416477186e-05, - "loss": 0.005898609757423401, - "step": 4230 - }, - { - "epoch": 2.88881309686221, - "grad_norm": 0.0003439450520090759, - "learning_rate": 1.3931835355634601e-05, - "loss": 0.0017080994322896004, - "step": 4235 - }, - { - "epoch": 2.892223738062756, - "grad_norm": 0.004427058156579733, - "learning_rate": 1.3852185425063181e-05, - "loss": 0.00010978456120938062, - "step": 4240 - }, - { - "epoch": 2.8956343792633015, - "grad_norm": 0.02229383960366249, - "learning_rate": 1.377271222000503e-05, - "loss": 0.0012531550601124763, - "step": 4245 - }, - { - "epoch": 2.899045020463847, - "grad_norm": 0.004171700682491064, - "learning_rate": 1.3693416334381517e-05, - "loss": 0.0012122373096644878, - "step": 4250 - }, - { - "epoch": 2.902455661664393, - "grad_norm": 0.0016778658609837294, - "learning_rate": 1.3614298360788924e-05, - "loss": 0.0006234514527022839, - "step": 4255 - }, - { - "epoch": 2.9058663028649385, - "grad_norm": 0.00403103232383728, - "learning_rate": 1.3535358890493897e-05, - "loss": 0.00030033572111278775, - "step": 4260 - }, - { - "epoch": 2.909276944065484, - "grad_norm": 0.006126928608864546, - "learning_rate": 1.3456598513429111e-05, - "loss": 0.014299000799655914, - "step": 4265 - }, - { - "epoch": 2.91268758526603, - "grad_norm": 0.0007882033823989332, - "learning_rate": 1.3378017818188815e-05, - "loss": 0.001049484871327877, - "step": 4270 - }, - { - "epoch": 2.9160982264665756, - "grad_norm": 0.0004515725013334304, - "learning_rate": 1.329961739202451e-05, - "loss": 6.776668014936149e-05, - "step": 4275 - }, - { - "epoch": 2.9195088676671213, - "grad_norm": 0.3738904297351837, - "learning_rate": 1.3221397820840419e-05, - "loss": 0.0010396759025752544, - "step": 4280 - }, - { - "epoch": 2.9229195088676674, - "grad_norm": 0.035881806164979935, - "learning_rate": 1.3143359689189279e-05, - "loss": 0.0003127899952232838, - "step": 4285 - }, - { - "epoch": 2.926330150068213, - "grad_norm": 0.010501476936042309, - "learning_rate": 1.306550358026784e-05, - "loss": 0.013835662603378296, - "step": 4290 - }, - { - "epoch": 2.9297407912687587, - "grad_norm": 0.002153329784050584, - "learning_rate": 1.2987830075912565e-05, - "loss": 0.007172297686338425, - "step": 4295 - }, - { - "epoch": 2.9331514324693044, - "grad_norm": 0.000594582874327898, - "learning_rate": 1.2910339756595254e-05, - "loss": 6.662132800556719e-05, - "step": 4300 - }, - { - "epoch": 2.93656207366985, - "grad_norm": 1.2802950143814087, - "learning_rate": 1.283303320141879e-05, - "loss": 0.0013225926086306572, - "step": 4305 - }, - { - "epoch": 2.939972714870396, - "grad_norm": 0.0010621119290590286, - "learning_rate": 1.2755910988112639e-05, - "loss": 0.0001950544072315097, - "step": 4310 - }, - { - "epoch": 2.9433833560709415, - "grad_norm": 0.0004629544273484498, - "learning_rate": 1.2678973693028735e-05, - "loss": 0.0002407266292721033, - "step": 4315 - }, - { - "epoch": 2.946793997271487, - "grad_norm": 0.00353289395570755, - "learning_rate": 1.2602221891137021e-05, - "loss": 0.006271860748529434, - "step": 4320 - }, - { - "epoch": 2.950204638472033, - "grad_norm": 0.021108930930495262, - "learning_rate": 1.2525656156021227e-05, - "loss": 0.007909800857305527, - "step": 4325 - }, - { - "epoch": 2.9536152796725785, - "grad_norm": 0.007604979444295168, - "learning_rate": 1.2449277059874547e-05, - "loss": 0.00022456045262515546, - "step": 4330 - }, - { - "epoch": 2.957025920873124, - "grad_norm": 0.0007357494323514402, - "learning_rate": 1.2373085173495411e-05, - "loss": 0.0010289529338479042, - "step": 4335 - }, - { - "epoch": 2.96043656207367, - "grad_norm": 0.0004920829669572413, - "learning_rate": 1.229708106628316e-05, - "loss": 8.303072536364198e-05, - "step": 4340 - }, - { - "epoch": 2.9638472032742156, - "grad_norm": 0.0014207189669832587, - "learning_rate": 1.2221265306233824e-05, - "loss": 0.001923336647450924, - "step": 4345 - }, - { - "epoch": 2.9672578444747613, - "grad_norm": 0.00898696668446064, - "learning_rate": 1.2145638459935863e-05, - "loss": 0.01918397843837738, - "step": 4350 - }, - { - "epoch": 2.970668485675307, - "grad_norm": 0.08236313611268997, - "learning_rate": 1.2070201092565988e-05, - "loss": 0.0005465132184326648, - "step": 4355 - }, - { - "epoch": 2.9740791268758526, - "grad_norm": 0.00796231534332037, - "learning_rate": 1.199495376788481e-05, - "loss": 4.8452542978338896e-05, - "step": 4360 - }, - { - "epoch": 2.9774897680763983, - "grad_norm": 0.026452092453837395, - "learning_rate": 1.1919897048232791e-05, - "loss": 0.0006576513405889273, - "step": 4365 - }, - { - "epoch": 2.980900409276944, - "grad_norm": 0.08616425842046738, - "learning_rate": 1.1845031494525901e-05, - "loss": 0.0013777482323348521, - "step": 4370 - }, - { - "epoch": 2.9843110504774897, - "grad_norm": 0.00048112327931448817, - "learning_rate": 1.1770357666251509e-05, - "loss": 0.0018716825172305108, - "step": 4375 - }, - { - "epoch": 2.9877216916780354, - "grad_norm": 0.0013632692862302065, - "learning_rate": 1.1695876121464154e-05, - "loss": 4.9980584299191834e-05, - "step": 4380 - }, - { - "epoch": 2.991132332878581, - "grad_norm": 1.318718671798706, - "learning_rate": 1.1621587416781445e-05, - "loss": 0.013328136503696441, - "step": 4385 - }, - { - "epoch": 2.9945429740791267, - "grad_norm": 0.00219643022865057, - "learning_rate": 1.1547492107379782e-05, - "loss": 9.902374586090446e-05, - "step": 4390 - }, - { - "epoch": 2.9959072305593453, - "eval_loss": 0.07537060230970383, - "eval_runtime": 0.95, - "eval_samples_per_second": 78.945, - "eval_steps_per_second": 2.105, - "step": 4392 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4392 - }, - { - "epoch": 2.9979536152796724, - "grad_norm": 0.003875225316733122, - "learning_rate": 1.1473590746990342e-05, - "loss": 0.0025784535333514213, - "step": 4395 - }, - { - "epoch": 3.001364256480218, - "grad_norm": 0.011882650665938854, - "learning_rate": 1.1399883887894846e-05, - "loss": 6.913430406711996e-05, - "step": 4400 - }, - { - "epoch": 3.004774897680764, - "grad_norm": 0.002775507280603051, - "learning_rate": 1.1326372080921464e-05, - "loss": 0.00019087132532149553, - "step": 4405 - }, - { - "epoch": 3.00818553888131, - "grad_norm": 0.06267738342285156, - "learning_rate": 1.125305587544069e-05, - "loss": 0.0007432831451296806, - "step": 4410 - }, - { - "epoch": 3.0115961800818556, - "grad_norm": 0.026564784348011017, - "learning_rate": 1.1179935819361272e-05, - "loss": 0.00015866300091147423, - "step": 4415 - }, - { - "epoch": 3.0150068212824013, - "grad_norm": 0.0011134434025734663, - "learning_rate": 1.1107012459126064e-05, - "loss": 0.0005193403456360101, - "step": 4420 - }, - { - "epoch": 3.018417462482947, - "grad_norm": 0.0006882239249534905, - "learning_rate": 1.1034286339707975e-05, - "loss": 0.0002253461629152298, - "step": 4425 - }, - { - "epoch": 3.0218281036834926, - "grad_norm": 0.008092721924185753, - "learning_rate": 1.0961758004605873e-05, - "loss": 0.0003596893046051264, - "step": 4430 - }, - { - "epoch": 3.0252387448840383, - "grad_norm": 0.01083564292639494, - "learning_rate": 1.0889427995840585e-05, - "loss": 0.010776457190513612, - "step": 4435 - }, - { - "epoch": 3.028649386084584, - "grad_norm": 0.040309611707925797, - "learning_rate": 1.0817296853950724e-05, - "loss": 0.0002762762364000082, - "step": 4440 - }, - { - "epoch": 3.0320600272851297, - "grad_norm": 0.0026077954098582268, - "learning_rate": 1.0745365117988804e-05, - "loss": 0.00011963967699557543, - "step": 4445 - }, - { - "epoch": 3.0354706684856754, - "grad_norm": 0.0013187731383368373, - "learning_rate": 1.0673633325517088e-05, - "loss": 0.0022230114787817, - "step": 4450 - }, - { - "epoch": 3.038881309686221, - "grad_norm": 0.08760128915309906, - "learning_rate": 1.060210201260362e-05, - "loss": 0.00029163951985538005, - "step": 4455 - }, - { - "epoch": 3.0422919508867667, - "grad_norm": 0.2557981610298157, - "learning_rate": 1.0530771713818229e-05, - "loss": 0.0014749299734830856, - "step": 4460 - }, - { - "epoch": 3.0457025920873124, - "grad_norm": 0.009532714262604713, - "learning_rate": 1.0459642962228502e-05, - "loss": 0.00023887362331151963, - "step": 4465 - }, - { - "epoch": 3.049113233287858, - "grad_norm": 0.002977812895551324, - "learning_rate": 1.0388716289395833e-05, - "loss": 3.836472751572728e-05, - "step": 4470 - }, - { - "epoch": 3.052523874488404, - "grad_norm": 0.0007074066670611501, - "learning_rate": 1.0317992225371411e-05, - "loss": 7.527543348260224e-05, - "step": 4475 - }, - { - "epoch": 3.0559345156889495, - "grad_norm": 0.020919082686305046, - "learning_rate": 1.0247471298692336e-05, - "loss": 0.00021068421192467214, - "step": 4480 - }, - { - "epoch": 3.059345156889495, - "grad_norm": 0.0012850193306803703, - "learning_rate": 1.0177154036377557e-05, - "loss": 0.00027780483942478894, - "step": 4485 - }, - { - "epoch": 3.062755798090041, - "grad_norm": 0.030514074489474297, - "learning_rate": 1.0107040963924027e-05, - "loss": 0.00020711682736873627, - "step": 4490 - }, - { - "epoch": 3.0661664392905865, - "grad_norm": 0.001317308866418898, - "learning_rate": 1.0037132605302716e-05, - "loss": 0.00024969261139631274, - "step": 4495 - }, - { - "epoch": 3.069577080491132, - "grad_norm": 0.0012978437589481473, - "learning_rate": 9.967429482954768e-06, - "loss": 0.0001504249172285199, - "step": 4500 - }, - { - "epoch": 3.072987721691678, - "grad_norm": 0.0003166501992382109, - "learning_rate": 9.897932117787476e-06, - "loss": 0.0002773872809484601, - "step": 4505 - }, - { - "epoch": 3.0763983628922236, - "grad_norm": 0.0029924868140369654, - "learning_rate": 9.828641029170544e-06, - "loss": 0.0005509680602699518, - "step": 4510 - }, - { - "epoch": 3.0798090040927693, - "grad_norm": 0.003611995605751872, - "learning_rate": 9.759556734932064e-06, - "loss": 0.0004832141101360321, - "step": 4515 - }, - { - "epoch": 3.083219645293315, - "grad_norm": 0.003923687152564526, - "learning_rate": 9.690679751354736e-06, - "loss": 0.00014967764727771282, - "step": 4520 - }, - { - "epoch": 3.086630286493861, - "grad_norm": 0.012491169385612011, - "learning_rate": 9.62201059317195e-06, - "loss": 8.488112362101675e-05, - "step": 4525 - }, - { - "epoch": 3.0900409276944067, - "grad_norm": 0.00011767258547479287, - "learning_rate": 9.553549773564035e-06, - "loss": 5.874955677427351e-05, - "step": 4530 - }, - { - "epoch": 3.0934515688949524, - "grad_norm": 0.021170401945710182, - "learning_rate": 9.48529780415427e-06, - "loss": 0.00013614417985081674, - "step": 4535 - }, - { - "epoch": 3.096862210095498, - "grad_norm": 0.005378399509936571, - "learning_rate": 9.417255195005218e-06, - "loss": 0.00010139571968466043, - "step": 4540 - }, - { - "epoch": 3.100272851296044, - "grad_norm": 0.0013061281060799956, - "learning_rate": 9.349422454614815e-06, - "loss": 0.0004999907687306404, - "step": 4545 - }, - { - "epoch": 3.1036834924965895, - "grad_norm": 0.000693993701133877, - "learning_rate": 9.281800089912605e-06, - "loss": 0.0001406701048836112, - "step": 4550 - }, - { - "epoch": 3.107094133697135, - "grad_norm": 0.003168008290231228, - "learning_rate": 9.214388606255934e-06, - "loss": 0.0002458775183185935, - "step": 4555 - }, - { - "epoch": 3.110504774897681, - "grad_norm": 0.001224424922838807, - "learning_rate": 9.147188507426224e-06, - "loss": 5.902486154809594e-05, - "step": 4560 - }, - { - "epoch": 3.1139154160982265, - "grad_norm": 0.0006458029965870082, - "learning_rate": 9.080200295625125e-06, - "loss": 6.971908733248711e-05, - "step": 4565 - }, - { - "epoch": 3.117326057298772, - "grad_norm": 0.0004500233626458794, - "learning_rate": 9.013424471470832e-06, - "loss": 4.827580996789038e-05, - "step": 4570 - }, - { - "epoch": 3.120736698499318, - "grad_norm": 0.008253362029790878, - "learning_rate": 8.946861533994316e-06, - "loss": 0.00010066803079098464, - "step": 4575 - }, - { - "epoch": 3.1241473396998636, - "grad_norm": 0.0027607178781181574, - "learning_rate": 8.88051198063559e-06, - "loss": 0.00010761913144961, - "step": 4580 - }, - { - "epoch": 3.1275579809004093, - "grad_norm": 0.0012132832780480385, - "learning_rate": 8.81437630723999e-06, - "loss": 0.00010583751136437059, - "step": 4585 - }, - { - "epoch": 3.130968622100955, - "grad_norm": 0.013205752708017826, - "learning_rate": 8.748455008054519e-06, - "loss": 7.872265996411443e-05, - "step": 4590 - }, - { - "epoch": 3.1343792633015006, - "grad_norm": 0.010380366817116737, - "learning_rate": 8.682748575724071e-06, - "loss": 0.00027635702863335607, - "step": 4595 - }, - { - "epoch": 3.1377899045020463, - "grad_norm": 0.012770955450832844, - "learning_rate": 8.617257501287805e-06, - "loss": 0.00028360043652355673, - "step": 4600 - }, - { - "epoch": 3.141200545702592, - "grad_norm": 0.012632913887500763, - "learning_rate": 8.551982274175449e-06, - "loss": 4.925676621496678e-05, - "step": 4605 - }, - { - "epoch": 3.1446111869031377, - "grad_norm": 0.0028189525473862886, - "learning_rate": 8.486923382203703e-06, - "loss": 0.0039628144353628155, - "step": 4610 - }, - { - "epoch": 3.1480218281036834, - "grad_norm": 0.11367341130971909, - "learning_rate": 8.422081311572464e-06, - "loss": 0.000568081671372056, - "step": 4615 - }, - { - "epoch": 3.151432469304229, - "grad_norm": 0.00095061567844823, - "learning_rate": 8.35745654686135e-06, - "loss": 0.00036408030427992344, - "step": 4620 - }, - { - "epoch": 3.1548431105047747, - "grad_norm": 0.05772553011775017, - "learning_rate": 8.29304957102596e-06, - "loss": 0.0006427288055419922, - "step": 4625 - }, - { - "epoch": 3.1582537517053204, - "grad_norm": 0.017082368955016136, - "learning_rate": 8.22886086539432e-06, - "loss": 0.00015330149326473475, - "step": 4630 - }, - { - "epoch": 3.161664392905866, - "grad_norm": 0.0033851212356239557, - "learning_rate": 8.164890909663256e-06, - "loss": 0.00012121353065595031, - "step": 4635 - }, - { - "epoch": 3.1650750341064118, - "grad_norm": 0.000577523373067379, - "learning_rate": 8.101140181894868e-06, - "loss": 7.793278782628477e-05, - "step": 4640 - }, - { - "epoch": 3.168485675306958, - "grad_norm": 0.00039884017314761877, - "learning_rate": 8.037609158512875e-06, - "loss": 0.0014324543066322804, - "step": 4645 - }, - { - "epoch": 3.1718963165075036, - "grad_norm": 0.004100058693438768, - "learning_rate": 7.97429831429911e-06, - "loss": 0.00015990985557436943, - "step": 4650 - }, - { - "epoch": 3.1753069577080493, - "grad_norm": 0.005648438818752766, - "learning_rate": 7.911208122389956e-06, - "loss": 8.149745990522206e-05, - "step": 4655 - }, - { - "epoch": 3.178717598908595, - "grad_norm": 0.005042952951043844, - "learning_rate": 7.848339054272808e-06, - "loss": 9.191314456984401e-05, - "step": 4660 - }, - { - "epoch": 3.1821282401091406, - "grad_norm": 0.0007950706058181822, - "learning_rate": 7.785691579782546e-06, - "loss": 7.44381221011281e-05, - "step": 4665 - }, - { - "epoch": 3.1855388813096863, - "grad_norm": 0.0717335194349289, - "learning_rate": 7.723266167098058e-06, - "loss": 0.00012347951997071505, - "step": 4670 - }, - { - "epoch": 3.188949522510232, - "grad_norm": 0.011817894876003265, - "learning_rate": 7.661063282738685e-06, - "loss": 0.000311569613404572, - "step": 4675 - }, - { - "epoch": 3.1923601637107777, - "grad_norm": 0.011100267991423607, - "learning_rate": 7.599083391560774e-06, - "loss": 0.00023061195388436319, - "step": 4680 - }, - { - "epoch": 3.1957708049113234, - "grad_norm": 0.025753796100616455, - "learning_rate": 7.5373269567541776e-06, - "loss": 0.00013706330209970475, - "step": 4685 - }, - { - "epoch": 3.199181446111869, - "grad_norm": 0.0529993437230587, - "learning_rate": 7.47579443983886e-06, - "loss": 0.00025801956653594973, - "step": 4690 - }, - { - "epoch": 3.2025920873124147, - "grad_norm": 0.01164156198501587, - "learning_rate": 7.41448630066132e-06, - "loss": 0.0001246333820745349, - "step": 4695 - }, - { - "epoch": 3.2060027285129604, - "grad_norm": 0.0022943434305489063, - "learning_rate": 7.353402997391271e-06, - "loss": 4.788096994161606e-05, - "step": 4700 - }, - { - "epoch": 3.209413369713506, - "grad_norm": 0.0016377613646909595, - "learning_rate": 7.292544986518198e-06, - "loss": 0.0014273281209170817, - "step": 4705 - }, - { - "epoch": 3.212824010914052, - "grad_norm": 0.046027738600969315, - "learning_rate": 7.231912722847881e-06, - "loss": 0.00015070366207510234, - "step": 4710 - }, - { - "epoch": 3.2162346521145975, - "grad_norm": 0.12626095116138458, - "learning_rate": 7.171506659499067e-06, - "loss": 0.0002223264891654253, - "step": 4715 - }, - { - "epoch": 3.219645293315143, - "grad_norm": 0.004387991968542337, - "learning_rate": 7.1113272479000465e-06, - "loss": 5.1431613974273206e-05, - "step": 4720 - }, - { - "epoch": 3.223055934515689, - "grad_norm": 0.0005260159377939999, - "learning_rate": 7.051374937785289e-06, - "loss": 9.967307560145854e-05, - "step": 4725 - }, - { - "epoch": 3.2264665757162345, - "grad_norm": 0.001557494280859828, - "learning_rate": 6.9916501771920795e-06, - "loss": 3.6639469908550384e-05, - "step": 4730 - }, - { - "epoch": 3.22987721691678, - "grad_norm": 0.0013014579890295863, - "learning_rate": 6.932153412457195e-06, - "loss": 0.00015290889423340558, - "step": 4735 - }, - { - "epoch": 3.233287858117326, - "grad_norm": 0.0005693956045433879, - "learning_rate": 6.872885088213522e-06, - "loss": 9.23092185985297e-05, - "step": 4740 - }, - { - "epoch": 3.2366984993178716, - "grad_norm": 0.0008315684972330928, - "learning_rate": 6.813845647386771e-06, - "loss": 0.00010763210011646151, - "step": 4745 - }, - { - "epoch": 3.2401091405184177, - "grad_norm": 0.0021727036219090223, - "learning_rate": 6.755035531192148e-06, - "loss": 3.423129383008927e-05, - "step": 4750 - }, - { - "epoch": 3.2435197817189634, - "grad_norm": 0.0001480428036302328, - "learning_rate": 6.696455179131084e-06, - "loss": 0.023981352150440217, - "step": 4755 - }, - { - "epoch": 3.2455661664392905, - "eval_loss": 0.074391670525074, - "eval_runtime": 0.9318, - "eval_samples_per_second": 80.487, - "eval_steps_per_second": 2.146, - "step": 4758 - }, - { - "eval_cer_subset": 0.015260934732252351, - "eval_cer_subset_edit_distance": 112, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4758 - }, - { - "epoch": 3.246930422919509, - "grad_norm": 0.007659688591957092, - "learning_rate": 6.638105028987886e-06, - "loss": 0.0004011324606835842, - "step": 4760 - }, - { - "epoch": 3.2503410641200547, - "grad_norm": 0.033579885959625244, - "learning_rate": 6.579985516826564e-06, - "loss": 0.000267721782438457, - "step": 4765 - }, - { - "epoch": 3.2537517053206004, - "grad_norm": 0.01196813303977251, - "learning_rate": 6.52209707698748e-06, - "loss": 0.00014587611658498645, - "step": 4770 - }, - { - "epoch": 3.257162346521146, - "grad_norm": 0.022396638989448547, - "learning_rate": 6.464440142084156e-06, - "loss": 0.00038080187514424326, - "step": 4775 - }, - { - "epoch": 3.260572987721692, - "grad_norm": 0.002048628870397806, - "learning_rate": 6.407015143000002e-06, - "loss": 0.007085768878459931, - "step": 4780 - }, - { - "epoch": 3.2639836289222375, - "grad_norm": 0.0019598486833274364, - "learning_rate": 6.3498225088851686e-06, - "loss": 0.0010610194876790047, - "step": 4785 - }, - { - "epoch": 3.267394270122783, - "grad_norm": 0.02006545290350914, - "learning_rate": 6.29286266715324e-06, - "loss": 0.00014606654876843096, - "step": 4790 - }, - { - "epoch": 3.270804911323329, - "grad_norm": 0.0005910536856390536, - "learning_rate": 6.236136043478106e-06, - "loss": 6.873804377391935e-05, - "step": 4795 - }, - { - "epoch": 3.2742155525238745, - "grad_norm": 0.021028559654951096, - "learning_rate": 6.179643061790775e-06, - "loss": 0.0013180674985051155, - "step": 4800 - }, - { - "epoch": 3.27762619372442, - "grad_norm": 0.0004542934475466609, - "learning_rate": 6.123384144276183e-06, - "loss": 4.965414409525692e-05, - "step": 4805 - }, - { - "epoch": 3.281036834924966, - "grad_norm": 0.041615571826696396, - "learning_rate": 6.067359711370047e-06, - "loss": 0.019744729995727538, - "step": 4810 - }, - { - "epoch": 3.2844474761255116, - "grad_norm": 0.002755318768322468, - "learning_rate": 6.011570181755754e-06, - "loss": 0.00010759549913927913, - "step": 4815 - }, - { - "epoch": 3.2878581173260573, - "grad_norm": 0.0032264923211187124, - "learning_rate": 5.956015972361171e-06, - "loss": 0.0007094295229762793, - "step": 4820 - }, - { - "epoch": 3.291268758526603, - "grad_norm": 1.4725555181503296, - "learning_rate": 5.900697498355589e-06, - "loss": 0.010635539144277572, - "step": 4825 - }, - { - "epoch": 3.2946793997271486, - "grad_norm": 0.017544033005833626, - "learning_rate": 5.84561517314657e-06, - "loss": 0.0003255015704780817, - "step": 4830 - }, - { - "epoch": 3.2980900409276943, - "grad_norm": 0.0035306529607623816, - "learning_rate": 5.790769408376922e-06, - "loss": 0.00013384120538830757, - "step": 4835 - }, - { - "epoch": 3.30150068212824, - "grad_norm": 0.0018269309075549245, - "learning_rate": 5.736160613921528e-06, - "loss": 0.0007960126735270023, - "step": 4840 - }, - { - "epoch": 3.3049113233287857, - "grad_norm": 0.010024248622357845, - "learning_rate": 5.6817891978843855e-06, - "loss": 0.0003233390394598246, - "step": 4845 - }, - { - "epoch": 3.3083219645293314, - "grad_norm": 0.004048422910273075, - "learning_rate": 5.627655566595489e-06, - "loss": 0.00011264056665822863, - "step": 4850 - }, - { - "epoch": 3.311732605729877, - "grad_norm": 0.0034091162960976362, - "learning_rate": 5.573760124607812e-06, - "loss": 0.00048314151354134083, - "step": 4855 - }, - { - "epoch": 3.3151432469304227, - "grad_norm": 0.009309383109211922, - "learning_rate": 5.5201032746942796e-06, - "loss": 0.0002101475838571787, - "step": 4860 - }, - { - "epoch": 3.3185538881309684, - "grad_norm": 0.06538962572813034, - "learning_rate": 5.466685417844797e-06, - "loss": 0.0003499136073514819, - "step": 4865 - }, - { - "epoch": 3.321964529331514, - "grad_norm": 0.002885893452912569, - "learning_rate": 5.413506953263162e-06, - "loss": 0.00048564458265900614, - "step": 4870 - }, - { - "epoch": 3.32537517053206, - "grad_norm": 0.004836421925574541, - "learning_rate": 5.3605682783642e-06, - "loss": 6.691411836072803e-05, - "step": 4875 - }, - { - "epoch": 3.328785811732606, - "grad_norm": 0.00475132092833519, - "learning_rate": 5.307869788770694e-06, - "loss": 0.0006194526329636573, - "step": 4880 - }, - { - "epoch": 3.3321964529331516, - "grad_norm": 0.005688230507075787, - "learning_rate": 5.255411878310482e-06, - "loss": 9.07582463696599e-05, - "step": 4885 - }, - { - "epoch": 3.3356070941336973, - "grad_norm": 0.0037306994199752808, - "learning_rate": 5.2031949390134856e-06, - "loss": 0.00012413164367899298, - "step": 4890 - }, - { - "epoch": 3.339017735334243, - "grad_norm": 0.011166035197675228, - "learning_rate": 5.15121936110882e-06, - "loss": 7.776234415359795e-05, - "step": 4895 - }, - { - "epoch": 3.3424283765347886, - "grad_norm": 0.0012123408960178494, - "learning_rate": 5.099485533021836e-06, - "loss": 2.784754615277052e-05, - "step": 4900 - }, - { - "epoch": 3.3458390177353343, - "grad_norm": 0.002357951132580638, - "learning_rate": 5.047993841371223e-06, - "loss": 0.00029555323999375105, - "step": 4905 - }, - { - "epoch": 3.34924965893588, - "grad_norm": 0.00030636831070296466, - "learning_rate": 4.996744670966138e-06, - "loss": 0.00010590272722765803, - "step": 4910 - }, - { - "epoch": 3.3526603001364257, - "grad_norm": 0.0058077736757695675, - "learning_rate": 4.945738404803348e-06, - "loss": 0.0001937644206918776, - "step": 4915 - }, - { - "epoch": 3.3560709413369714, - "grad_norm": 0.0007610859465785325, - "learning_rate": 4.8949754240642775e-06, - "loss": 0.00011190775549039245, - "step": 4920 - }, - { - "epoch": 3.359481582537517, - "grad_norm": 0.6333717107772827, - "learning_rate": 4.844456108112297e-06, - "loss": 0.0008945153094828129, - "step": 4925 - }, - { - "epoch": 3.3628922237380627, - "grad_norm": 0.006114002782851458, - "learning_rate": 4.794180834489772e-06, - "loss": 9.972437983378768e-05, - "step": 4930 - }, - { - "epoch": 3.3663028649386084, - "grad_norm": 0.0017828121781349182, - "learning_rate": 4.744149978915274e-06, - "loss": 7.763381581753493e-05, - "step": 4935 - }, - { - "epoch": 3.369713506139154, - "grad_norm": 0.0009099289891310036, - "learning_rate": 4.694363915280814e-06, - "loss": 0.05654715895652771, - "step": 4940 - }, - { - "epoch": 3.3731241473397, - "grad_norm": 0.013658248819410801, - "learning_rate": 4.644823015649009e-06, - "loss": 0.0013173201121389865, - "step": 4945 - }, - { - "epoch": 3.3765347885402455, - "grad_norm": 0.6665285229682922, - "learning_rate": 4.5955276502502944e-06, - "loss": 0.0018493477255105972, - "step": 4950 - }, - { - "epoch": 3.379945429740791, - "grad_norm": 0.001095029292628169, - "learning_rate": 4.546478187480176e-06, - "loss": 0.0005424355156719684, - "step": 4955 - }, - { - "epoch": 3.383356070941337, - "grad_norm": 0.015392723493278027, - "learning_rate": 4.497674993896503e-06, - "loss": 9.325146675109863e-05, - "step": 4960 - }, - { - "epoch": 3.3867667121418825, - "grad_norm": 0.08219064027070999, - "learning_rate": 4.449118434216653e-06, - "loss": 0.0004451565444469452, - "step": 4965 - }, - { - "epoch": 3.390177353342428, - "grad_norm": 0.003267089370638132, - "learning_rate": 4.4008088713148845e-06, - "loss": 0.00016891954001039267, - "step": 4970 - }, - { - "epoch": 3.3935879945429743, - "grad_norm": 0.008226803503930569, - "learning_rate": 4.35274666621957e-06, - "loss": 0.0001009777537547052, - "step": 4975 - }, - { - "epoch": 3.39699863574352, - "grad_norm": 0.01762073114514351, - "learning_rate": 4.304932178110558e-06, - "loss": 0.006487253308296204, - "step": 4980 - }, - { - "epoch": 3.4004092769440657, - "grad_norm": 0.0021151783876121044, - "learning_rate": 4.257365764316395e-06, - "loss": 0.00011428899597376585, - "step": 4985 - }, - { - "epoch": 3.4038199181446114, - "grad_norm": 0.0008960114791989326, - "learning_rate": 4.210047780311768e-06, - "loss": 3.825195599347353e-05, - "step": 4990 - }, - { - "epoch": 3.407230559345157, - "grad_norm": 0.058672014623880386, - "learning_rate": 4.162978579714753e-06, - "loss": 0.0005217622965574265, - "step": 4995 - }, - { - "epoch": 3.4106412005457027, - "grad_norm": 6.555901927640662e-05, - "learning_rate": 4.11615851428423e-06, - "loss": 0.00011362402001395822, - "step": 5000 - }, - { - "epoch": 3.4140518417462484, - "grad_norm": 0.21014879643917084, - "learning_rate": 4.069587933917221e-06, - "loss": 0.00179185438901186, - "step": 5005 - }, - { - "epoch": 3.417462482946794, - "grad_norm": 0.1315806359052658, - "learning_rate": 4.023267186646317e-06, - "loss": 0.0014887897297739983, - "step": 5010 - }, - { - "epoch": 3.42087312414734, - "grad_norm": 0.018509764224290848, - "learning_rate": 3.977196618637e-06, - "loss": 0.00016269356710836292, - "step": 5015 - }, - { - "epoch": 3.4242837653478855, - "grad_norm": 0.0007501108921132982, - "learning_rate": 3.931376574185166e-06, - "loss": 3.563327190931886e-05, - "step": 5020 - }, - { - "epoch": 3.427694406548431, - "grad_norm": 0.0036405418068170547, - "learning_rate": 3.885807395714441e-06, - "loss": 0.0009133132174611091, - "step": 5025 - }, - { - "epoch": 3.431105047748977, - "grad_norm": 0.01853407360613346, - "learning_rate": 3.840489423773698e-06, - "loss": 0.00011221827007830143, - "step": 5030 - }, - { - "epoch": 3.4345156889495225, - "grad_norm": 0.004317726474255323, - "learning_rate": 3.7954229970344725e-06, - "loss": 0.00033687916584312916, - "step": 5035 - }, - { - "epoch": 3.437926330150068, - "grad_norm": 0.003909669350832701, - "learning_rate": 3.7506084522884684e-06, - "loss": 7.524496177211404e-05, - "step": 5040 - }, - { - "epoch": 3.441336971350614, - "grad_norm": 0.016738831996917725, - "learning_rate": 3.7060461244449945e-06, - "loss": 9.533832781016827e-05, - "step": 5045 - }, - { - "epoch": 3.4447476125511596, - "grad_norm": 0.0012303644325584173, - "learning_rate": 3.6617363465284875e-06, - "loss": 0.00011376941110938788, - "step": 5050 - }, - { - "epoch": 3.4481582537517053, - "grad_norm": 0.005091819446533918, - "learning_rate": 3.617679449676028e-06, - "loss": 0.000578709552064538, - "step": 5055 - }, - { - "epoch": 3.451568894952251, - "grad_norm": 0.0007091189618222415, - "learning_rate": 3.5738757631348744e-06, - "loss": 7.042675861157477e-05, - "step": 5060 - }, - { - "epoch": 3.4549795361527966, - "grad_norm": 0.004033643286675215, - "learning_rate": 3.5303256142599407e-06, - "loss": 5.9417390730232e-05, - "step": 5065 - }, - { - "epoch": 3.4583901773533423, - "grad_norm": 0.018419573083519936, - "learning_rate": 3.487029328511444e-06, - "loss": 0.0001460162689909339, - "step": 5070 - }, - { - "epoch": 3.461800818553888, - "grad_norm": 0.0035185501910746098, - "learning_rate": 3.4439872294524025e-06, - "loss": 0.0001254791859537363, - "step": 5075 - }, - { - "epoch": 3.4652114597544337, - "grad_norm": 0.0007496779435314238, - "learning_rate": 3.401199638746241e-06, - "loss": 4.7221675049513576e-05, - "step": 5080 - }, - { - "epoch": 3.4686221009549794, - "grad_norm": 2.67924165725708, - "learning_rate": 3.3586668761543813e-06, - "loss": 0.0016780177131295205, - "step": 5085 - }, - { - "epoch": 3.472032742155525, - "grad_norm": 0.011002305895090103, - "learning_rate": 3.316389259533876e-06, - "loss": 9.696125634945929e-05, - "step": 5090 - }, - { - "epoch": 3.4754433833560707, - "grad_norm": 0.0005470504984259605, - "learning_rate": 3.2743671048349755e-06, - "loss": 3.457541752140969e-05, - "step": 5095 - }, - { - "epoch": 3.4788540245566164, - "grad_norm": 0.014639006927609444, - "learning_rate": 3.232600726098851e-06, - "loss": 0.005385900661349296, - "step": 5100 - }, - { - "epoch": 3.4822646657571625, - "grad_norm": 0.02914433367550373, - "learning_rate": 3.191090435455171e-06, - "loss": 0.00018561827018857003, - "step": 5105 - }, - { - "epoch": 3.485675306957708, - "grad_norm": 0.0009303450351580977, - "learning_rate": 3.1498365431198048e-06, - "loss": 8.976480457931758e-05, - "step": 5110 - }, - { - "epoch": 3.489085948158254, - "grad_norm": 0.001520369085483253, - "learning_rate": 3.1088393573924966e-06, - "loss": 0.00012106491485610604, - "step": 5115 - }, - { - "epoch": 3.4924965893587996, - "grad_norm": 0.0004795770801138133, - "learning_rate": 3.0680991846545836e-06, - "loss": 5.365515244193375e-05, - "step": 5120 - }, - { - "epoch": 3.495225102319236, - "eval_loss": 0.07684502005577087, - "eval_runtime": 0.9207, - "eval_samples_per_second": 81.46, - "eval_steps_per_second": 2.172, - "step": 5124 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5124 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 5.351616616395571e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5124/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/adapter_model.safetensors deleted file mode 100644 index 246e398133e03937edc42f1a9fe7cc32cca0a64d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d726e1faae8e3d89f3870b8122c66f7a9869106d77224865abb74c0ff730e2b8 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/optimizer.pt deleted file mode 100644 index ecc99505580dd01873eea9a76d1d201848ce0d20..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1abf8077a5b133b039d4867bc5233a1748d50c282b378ff919e35e635c323759 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/rng_state.pth deleted file mode 100644 index d9a76fb8c9e3bb0943f558bb46beabc9a0ff9f5e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d93fd5df166ed77db1f2d0370e376d570a8c8fec3fc2ddfd6cb951dbd7e6101 -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/scheduler.pt deleted file mode 100644 index 388a4720dd5a79305f1b4561faa4bdfc6e1c5725..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d025c17df1099c6516ee3ed48d37c5ab273e14f94535c2f37e1ca5b07400f926 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/trainer_state.json deleted file mode 100644 index 2ae89e315cf2e3341ac65fe66cf410f1877456df..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/trainer_state.json +++ /dev/null @@ -1,7960 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.7448840381991815, - "eval_steps": 366, - "global_step": 5490, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - }, - { - "epoch": 2.748976807639836, - "grad_norm": 0.9081467986106873, - "learning_rate": 1.7342139203494537e-05, - "loss": 0.0036753010004758834, - "step": 4030 - }, - { - "epoch": 2.752387448840382, - "grad_norm": 0.07232939451932907, - "learning_rate": 1.7255770633843028e-05, - "loss": 0.0029829120263457297, - "step": 4035 - }, - { - "epoch": 2.7557980900409276, - "grad_norm": 0.005813417956233025, - "learning_rate": 1.7169553353936035e-05, - "loss": 0.00047225411981344223, - "step": 4040 - }, - { - "epoch": 2.7592087312414733, - "grad_norm": 0.2354760318994522, - "learning_rate": 1.7083488008094945e-05, - "loss": 0.0015884984284639358, - "step": 4045 - }, - { - "epoch": 2.762619372442019, - "grad_norm": 0.1201627105474472, - "learning_rate": 1.699757523950577e-05, - "loss": 0.0010768620297312737, - "step": 4050 - }, - { - "epoch": 2.766030013642565, - "grad_norm": 0.033547814935445786, - "learning_rate": 1.6911815690214166e-05, - "loss": 0.01052093282341957, - "step": 4055 - }, - { - "epoch": 2.7694406548431107, - "grad_norm": 0.01014826912432909, - "learning_rate": 1.682621000112085e-05, - "loss": 0.0003362501040101051, - "step": 4060 - }, - { - "epoch": 2.7728512960436564, - "grad_norm": 0.004405386745929718, - "learning_rate": 1.6740758811976665e-05, - "loss": 0.00044001247733831405, - "step": 4065 - }, - { - "epoch": 2.776261937244202, - "grad_norm": 3.621004104614258, - "learning_rate": 1.665546276137783e-05, - "loss": 0.18638403415679933, - "step": 4070 - }, - { - "epoch": 2.779672578444748, - "grad_norm": 0.01203183177858591, - "learning_rate": 1.6570322486761184e-05, - "loss": 0.00013435594737529755, - "step": 4075 - }, - { - "epoch": 2.7830832196452935, - "grad_norm": 0.23053398728370667, - "learning_rate": 1.6485338624399445e-05, - "loss": 0.0010434269905090332, - "step": 4080 - }, - { - "epoch": 2.786493860845839, - "grad_norm": 0.0109801534563303, - "learning_rate": 1.6400511809396394e-05, - "loss": 0.00012704560067504643, - "step": 4085 - }, - { - "epoch": 2.789904502046385, - "grad_norm": 0.8463883399963379, - "learning_rate": 1.631584267568217e-05, - "loss": 0.007707947492599487, - "step": 4090 - }, - { - "epoch": 2.7933151432469305, - "grad_norm": 0.05444789677858353, - "learning_rate": 1.623133185600852e-05, - "loss": 0.0008411366492509842, - "step": 4095 - }, - { - "epoch": 2.796725784447476, - "grad_norm": 0.007274657487869263, - "learning_rate": 1.6146979981944095e-05, - "loss": 0.0002061120932921767, - "step": 4100 - }, - { - "epoch": 2.800136425648022, - "grad_norm": 0.013262615539133549, - "learning_rate": 1.6062787683869667e-05, - "loss": 0.0003235015319660306, - "step": 4105 - }, - { - "epoch": 2.8035470668485676, - "grad_norm": 0.0030244409572333097, - "learning_rate": 1.597875559097352e-05, - "loss": 0.0007286245469003916, - "step": 4110 - }, - { - "epoch": 2.8069577080491133, - "grad_norm": 0.0021644949447363615, - "learning_rate": 1.5894884331246632e-05, - "loss": 0.0003056209534406662, - "step": 4115 - }, - { - "epoch": 2.810368349249659, - "grad_norm": 0.014495057985186577, - "learning_rate": 1.5811174531478074e-05, - "loss": 0.0014359142631292343, - "step": 4120 - }, - { - "epoch": 2.8137789904502046, - "grad_norm": 0.05203676223754883, - "learning_rate": 1.5727626817250255e-05, - "loss": 0.0006342739332467317, - "step": 4125 - }, - { - "epoch": 2.8171896316507503, - "grad_norm": 0.004998183809220791, - "learning_rate": 1.564424181293435e-05, - "loss": 0.00013386564096435906, - "step": 4130 - }, - { - "epoch": 2.820600272851296, - "grad_norm": 0.05428452417254448, - "learning_rate": 1.556102014168546e-05, - "loss": 0.00014423681423068047, - "step": 4135 - }, - { - "epoch": 2.8240109140518417, - "grad_norm": 0.0013511159922927618, - "learning_rate": 1.5477962425438164e-05, - "loss": 6.5605464624241e-05, - "step": 4140 - }, - { - "epoch": 2.8274215552523874, - "grad_norm": 0.9757132530212402, - "learning_rate": 1.539506928490171e-05, - "loss": 0.00839839205145836, - "step": 4145 - }, - { - "epoch": 2.830832196452933, - "grad_norm": 0.0018282996024936438, - "learning_rate": 1.5312341339555445e-05, - "loss": 0.005959897115826607, - "step": 4150 - }, - { - "epoch": 2.8342428376534787, - "grad_norm": 0.0022353942040354013, - "learning_rate": 1.5229779207644171e-05, - "loss": 0.000250368332490325, - "step": 4155 - }, - { - "epoch": 2.8376534788540244, - "grad_norm": 0.006538774352520704, - "learning_rate": 1.5147383506173572e-05, - "loss": 0.0004641829524189234, - "step": 4160 - }, - { - "epoch": 2.84106412005457, - "grad_norm": 0.003464010776951909, - "learning_rate": 1.5065154850905465e-05, - "loss": 0.008368657529354095, - "step": 4165 - }, - { - "epoch": 2.844474761255116, - "grad_norm": 0.0020767974201589823, - "learning_rate": 1.4983093856353398e-05, - "loss": 0.00010023106588050724, - "step": 4170 - }, - { - "epoch": 2.8478854024556615, - "grad_norm": 0.0025670777540653944, - "learning_rate": 1.4901201135777887e-05, - "loss": 0.0006715046241879463, - "step": 4175 - }, - { - "epoch": 2.851296043656207, - "grad_norm": 0.0006886612391099334, - "learning_rate": 1.4819477301181915e-05, - "loss": 0.0008357623592019081, - "step": 4180 - }, - { - "epoch": 2.854706684856753, - "grad_norm": 0.0023747060913592577, - "learning_rate": 1.4737922963306332e-05, - "loss": 0.00488339364528656, - "step": 4185 - }, - { - "epoch": 2.8581173260572985, - "grad_norm": 0.0030493123922497034, - "learning_rate": 1.4656538731625333e-05, - "loss": 0.017219077050685882, - "step": 4190 - }, - { - "epoch": 2.8615279672578446, - "grad_norm": 0.0030935786198824644, - "learning_rate": 1.457532521434184e-05, - "loss": 0.00014684826601296663, - "step": 4195 - }, - { - "epoch": 2.8649386084583903, - "grad_norm": 0.0004102849052287638, - "learning_rate": 1.4494283018382991e-05, - "loss": 0.0002242558402940631, - "step": 4200 - }, - { - "epoch": 2.868349249658936, - "grad_norm": 1.6695232391357422, - "learning_rate": 1.4413412749395593e-05, - "loss": 0.01916976124048233, - "step": 4205 - }, - { - "epoch": 2.8717598908594817, - "grad_norm": 0.003357100998982787, - "learning_rate": 1.4332715011741656e-05, - "loss": 0.0036146264523267747, - "step": 4210 - }, - { - "epoch": 2.8751705320600274, - "grad_norm": 0.002434425987303257, - "learning_rate": 1.425219040849373e-05, - "loss": 0.0001186407171189785, - "step": 4215 - }, - { - "epoch": 2.878581173260573, - "grad_norm": 0.0272241048514843, - "learning_rate": 1.4171839541430586e-05, - "loss": 0.003773893415927887, - "step": 4220 - }, - { - "epoch": 2.8819918144611187, - "grad_norm": 0.00031364246387965977, - "learning_rate": 1.409166301103257e-05, - "loss": 3.483370819594711e-05, - "step": 4225 - }, - { - "epoch": 2.8854024556616644, - "grad_norm": 0.008605693466961384, - "learning_rate": 1.4011661416477186e-05, - "loss": 0.005898609757423401, - "step": 4230 - }, - { - "epoch": 2.88881309686221, - "grad_norm": 0.0003439450520090759, - "learning_rate": 1.3931835355634601e-05, - "loss": 0.0017080994322896004, - "step": 4235 - }, - { - "epoch": 2.892223738062756, - "grad_norm": 0.004427058156579733, - "learning_rate": 1.3852185425063181e-05, - "loss": 0.00010978456120938062, - "step": 4240 - }, - { - "epoch": 2.8956343792633015, - "grad_norm": 0.02229383960366249, - "learning_rate": 1.377271222000503e-05, - "loss": 0.0012531550601124763, - "step": 4245 - }, - { - "epoch": 2.899045020463847, - "grad_norm": 0.004171700682491064, - "learning_rate": 1.3693416334381517e-05, - "loss": 0.0012122373096644878, - "step": 4250 - }, - { - "epoch": 2.902455661664393, - "grad_norm": 0.0016778658609837294, - "learning_rate": 1.3614298360788924e-05, - "loss": 0.0006234514527022839, - "step": 4255 - }, - { - "epoch": 2.9058663028649385, - "grad_norm": 0.00403103232383728, - "learning_rate": 1.3535358890493897e-05, - "loss": 0.00030033572111278775, - "step": 4260 - }, - { - "epoch": 2.909276944065484, - "grad_norm": 0.006126928608864546, - "learning_rate": 1.3456598513429111e-05, - "loss": 0.014299000799655914, - "step": 4265 - }, - { - "epoch": 2.91268758526603, - "grad_norm": 0.0007882033823989332, - "learning_rate": 1.3378017818188815e-05, - "loss": 0.001049484871327877, - "step": 4270 - }, - { - "epoch": 2.9160982264665756, - "grad_norm": 0.0004515725013334304, - "learning_rate": 1.329961739202451e-05, - "loss": 6.776668014936149e-05, - "step": 4275 - }, - { - "epoch": 2.9195088676671213, - "grad_norm": 0.3738904297351837, - "learning_rate": 1.3221397820840419e-05, - "loss": 0.0010396759025752544, - "step": 4280 - }, - { - "epoch": 2.9229195088676674, - "grad_norm": 0.035881806164979935, - "learning_rate": 1.3143359689189279e-05, - "loss": 0.0003127899952232838, - "step": 4285 - }, - { - "epoch": 2.926330150068213, - "grad_norm": 0.010501476936042309, - "learning_rate": 1.306550358026784e-05, - "loss": 0.013835662603378296, - "step": 4290 - }, - { - "epoch": 2.9297407912687587, - "grad_norm": 0.002153329784050584, - "learning_rate": 1.2987830075912565e-05, - "loss": 0.007172297686338425, - "step": 4295 - }, - { - "epoch": 2.9331514324693044, - "grad_norm": 0.000594582874327898, - "learning_rate": 1.2910339756595254e-05, - "loss": 6.662132800556719e-05, - "step": 4300 - }, - { - "epoch": 2.93656207366985, - "grad_norm": 1.2802950143814087, - "learning_rate": 1.283303320141879e-05, - "loss": 0.0013225926086306572, - "step": 4305 - }, - { - "epoch": 2.939972714870396, - "grad_norm": 0.0010621119290590286, - "learning_rate": 1.2755910988112639e-05, - "loss": 0.0001950544072315097, - "step": 4310 - }, - { - "epoch": 2.9433833560709415, - "grad_norm": 0.0004629544273484498, - "learning_rate": 1.2678973693028735e-05, - "loss": 0.0002407266292721033, - "step": 4315 - }, - { - "epoch": 2.946793997271487, - "grad_norm": 0.00353289395570755, - "learning_rate": 1.2602221891137021e-05, - "loss": 0.006271860748529434, - "step": 4320 - }, - { - "epoch": 2.950204638472033, - "grad_norm": 0.021108930930495262, - "learning_rate": 1.2525656156021227e-05, - "loss": 0.007909800857305527, - "step": 4325 - }, - { - "epoch": 2.9536152796725785, - "grad_norm": 0.007604979444295168, - "learning_rate": 1.2449277059874547e-05, - "loss": 0.00022456045262515546, - "step": 4330 - }, - { - "epoch": 2.957025920873124, - "grad_norm": 0.0007357494323514402, - "learning_rate": 1.2373085173495411e-05, - "loss": 0.0010289529338479042, - "step": 4335 - }, - { - "epoch": 2.96043656207367, - "grad_norm": 0.0004920829669572413, - "learning_rate": 1.229708106628316e-05, - "loss": 8.303072536364198e-05, - "step": 4340 - }, - { - "epoch": 2.9638472032742156, - "grad_norm": 0.0014207189669832587, - "learning_rate": 1.2221265306233824e-05, - "loss": 0.001923336647450924, - "step": 4345 - }, - { - "epoch": 2.9672578444747613, - "grad_norm": 0.00898696668446064, - "learning_rate": 1.2145638459935863e-05, - "loss": 0.01918397843837738, - "step": 4350 - }, - { - "epoch": 2.970668485675307, - "grad_norm": 0.08236313611268997, - "learning_rate": 1.2070201092565988e-05, - "loss": 0.0005465132184326648, - "step": 4355 - }, - { - "epoch": 2.9740791268758526, - "grad_norm": 0.00796231534332037, - "learning_rate": 1.199495376788481e-05, - "loss": 4.8452542978338896e-05, - "step": 4360 - }, - { - "epoch": 2.9774897680763983, - "grad_norm": 0.026452092453837395, - "learning_rate": 1.1919897048232791e-05, - "loss": 0.0006576513405889273, - "step": 4365 - }, - { - "epoch": 2.980900409276944, - "grad_norm": 0.08616425842046738, - "learning_rate": 1.1845031494525901e-05, - "loss": 0.0013777482323348521, - "step": 4370 - }, - { - "epoch": 2.9843110504774897, - "grad_norm": 0.00048112327931448817, - "learning_rate": 1.1770357666251509e-05, - "loss": 0.0018716825172305108, - "step": 4375 - }, - { - "epoch": 2.9877216916780354, - "grad_norm": 0.0013632692862302065, - "learning_rate": 1.1695876121464154e-05, - "loss": 4.9980584299191834e-05, - "step": 4380 - }, - { - "epoch": 2.991132332878581, - "grad_norm": 1.318718671798706, - "learning_rate": 1.1621587416781445e-05, - "loss": 0.013328136503696441, - "step": 4385 - }, - { - "epoch": 2.9945429740791267, - "grad_norm": 0.00219643022865057, - "learning_rate": 1.1547492107379782e-05, - "loss": 9.902374586090446e-05, - "step": 4390 - }, - { - "epoch": 2.9959072305593453, - "eval_loss": 0.07537060230970383, - "eval_runtime": 0.95, - "eval_samples_per_second": 78.945, - "eval_steps_per_second": 2.105, - "step": 4392 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4392 - }, - { - "epoch": 2.9979536152796724, - "grad_norm": 0.003875225316733122, - "learning_rate": 1.1473590746990342e-05, - "loss": 0.0025784535333514213, - "step": 4395 - }, - { - "epoch": 3.001364256480218, - "grad_norm": 0.011882650665938854, - "learning_rate": 1.1399883887894846e-05, - "loss": 6.913430406711996e-05, - "step": 4400 - }, - { - "epoch": 3.004774897680764, - "grad_norm": 0.002775507280603051, - "learning_rate": 1.1326372080921464e-05, - "loss": 0.00019087132532149553, - "step": 4405 - }, - { - "epoch": 3.00818553888131, - "grad_norm": 0.06267738342285156, - "learning_rate": 1.125305587544069e-05, - "loss": 0.0007432831451296806, - "step": 4410 - }, - { - "epoch": 3.0115961800818556, - "grad_norm": 0.026564784348011017, - "learning_rate": 1.1179935819361272e-05, - "loss": 0.00015866300091147423, - "step": 4415 - }, - { - "epoch": 3.0150068212824013, - "grad_norm": 0.0011134434025734663, - "learning_rate": 1.1107012459126064e-05, - "loss": 0.0005193403456360101, - "step": 4420 - }, - { - "epoch": 3.018417462482947, - "grad_norm": 0.0006882239249534905, - "learning_rate": 1.1034286339707975e-05, - "loss": 0.0002253461629152298, - "step": 4425 - }, - { - "epoch": 3.0218281036834926, - "grad_norm": 0.008092721924185753, - "learning_rate": 1.0961758004605873e-05, - "loss": 0.0003596893046051264, - "step": 4430 - }, - { - "epoch": 3.0252387448840383, - "grad_norm": 0.01083564292639494, - "learning_rate": 1.0889427995840585e-05, - "loss": 0.010776457190513612, - "step": 4435 - }, - { - "epoch": 3.028649386084584, - "grad_norm": 0.040309611707925797, - "learning_rate": 1.0817296853950724e-05, - "loss": 0.0002762762364000082, - "step": 4440 - }, - { - "epoch": 3.0320600272851297, - "grad_norm": 0.0026077954098582268, - "learning_rate": 1.0745365117988804e-05, - "loss": 0.00011963967699557543, - "step": 4445 - }, - { - "epoch": 3.0354706684856754, - "grad_norm": 0.0013187731383368373, - "learning_rate": 1.0673633325517088e-05, - "loss": 0.0022230114787817, - "step": 4450 - }, - { - "epoch": 3.038881309686221, - "grad_norm": 0.08760128915309906, - "learning_rate": 1.060210201260362e-05, - "loss": 0.00029163951985538005, - "step": 4455 - }, - { - "epoch": 3.0422919508867667, - "grad_norm": 0.2557981610298157, - "learning_rate": 1.0530771713818229e-05, - "loss": 0.0014749299734830856, - "step": 4460 - }, - { - "epoch": 3.0457025920873124, - "grad_norm": 0.009532714262604713, - "learning_rate": 1.0459642962228502e-05, - "loss": 0.00023887362331151963, - "step": 4465 - }, - { - "epoch": 3.049113233287858, - "grad_norm": 0.002977812895551324, - "learning_rate": 1.0388716289395833e-05, - "loss": 3.836472751572728e-05, - "step": 4470 - }, - { - "epoch": 3.052523874488404, - "grad_norm": 0.0007074066670611501, - "learning_rate": 1.0317992225371411e-05, - "loss": 7.527543348260224e-05, - "step": 4475 - }, - { - "epoch": 3.0559345156889495, - "grad_norm": 0.020919082686305046, - "learning_rate": 1.0247471298692336e-05, - "loss": 0.00021068421192467214, - "step": 4480 - }, - { - "epoch": 3.059345156889495, - "grad_norm": 0.0012850193306803703, - "learning_rate": 1.0177154036377557e-05, - "loss": 0.00027780483942478894, - "step": 4485 - }, - { - "epoch": 3.062755798090041, - "grad_norm": 0.030514074489474297, - "learning_rate": 1.0107040963924027e-05, - "loss": 0.00020711682736873627, - "step": 4490 - }, - { - "epoch": 3.0661664392905865, - "grad_norm": 0.001317308866418898, - "learning_rate": 1.0037132605302716e-05, - "loss": 0.00024969261139631274, - "step": 4495 - }, - { - "epoch": 3.069577080491132, - "grad_norm": 0.0012978437589481473, - "learning_rate": 9.967429482954768e-06, - "loss": 0.0001504249172285199, - "step": 4500 - }, - { - "epoch": 3.072987721691678, - "grad_norm": 0.0003166501992382109, - "learning_rate": 9.897932117787476e-06, - "loss": 0.0002773872809484601, - "step": 4505 - }, - { - "epoch": 3.0763983628922236, - "grad_norm": 0.0029924868140369654, - "learning_rate": 9.828641029170544e-06, - "loss": 0.0005509680602699518, - "step": 4510 - }, - { - "epoch": 3.0798090040927693, - "grad_norm": 0.003611995605751872, - "learning_rate": 9.759556734932064e-06, - "loss": 0.0004832141101360321, - "step": 4515 - }, - { - "epoch": 3.083219645293315, - "grad_norm": 0.003923687152564526, - "learning_rate": 9.690679751354736e-06, - "loss": 0.00014967764727771282, - "step": 4520 - }, - { - "epoch": 3.086630286493861, - "grad_norm": 0.012491169385612011, - "learning_rate": 9.62201059317195e-06, - "loss": 8.488112362101675e-05, - "step": 4525 - }, - { - "epoch": 3.0900409276944067, - "grad_norm": 0.00011767258547479287, - "learning_rate": 9.553549773564035e-06, - "loss": 5.874955677427351e-05, - "step": 4530 - }, - { - "epoch": 3.0934515688949524, - "grad_norm": 0.021170401945710182, - "learning_rate": 9.48529780415427e-06, - "loss": 0.00013614417985081674, - "step": 4535 - }, - { - "epoch": 3.096862210095498, - "grad_norm": 0.005378399509936571, - "learning_rate": 9.417255195005218e-06, - "loss": 0.00010139571968466043, - "step": 4540 - }, - { - "epoch": 3.100272851296044, - "grad_norm": 0.0013061281060799956, - "learning_rate": 9.349422454614815e-06, - "loss": 0.0004999907687306404, - "step": 4545 - }, - { - "epoch": 3.1036834924965895, - "grad_norm": 0.000693993701133877, - "learning_rate": 9.281800089912605e-06, - "loss": 0.0001406701048836112, - "step": 4550 - }, - { - "epoch": 3.107094133697135, - "grad_norm": 0.003168008290231228, - "learning_rate": 9.214388606255934e-06, - "loss": 0.0002458775183185935, - "step": 4555 - }, - { - "epoch": 3.110504774897681, - "grad_norm": 0.001224424922838807, - "learning_rate": 9.147188507426224e-06, - "loss": 5.902486154809594e-05, - "step": 4560 - }, - { - "epoch": 3.1139154160982265, - "grad_norm": 0.0006458029965870082, - "learning_rate": 9.080200295625125e-06, - "loss": 6.971908733248711e-05, - "step": 4565 - }, - { - "epoch": 3.117326057298772, - "grad_norm": 0.0004500233626458794, - "learning_rate": 9.013424471470832e-06, - "loss": 4.827580996789038e-05, - "step": 4570 - }, - { - "epoch": 3.120736698499318, - "grad_norm": 0.008253362029790878, - "learning_rate": 8.946861533994316e-06, - "loss": 0.00010066803079098464, - "step": 4575 - }, - { - "epoch": 3.1241473396998636, - "grad_norm": 0.0027607178781181574, - "learning_rate": 8.88051198063559e-06, - "loss": 0.00010761913144961, - "step": 4580 - }, - { - "epoch": 3.1275579809004093, - "grad_norm": 0.0012132832780480385, - "learning_rate": 8.81437630723999e-06, - "loss": 0.00010583751136437059, - "step": 4585 - }, - { - "epoch": 3.130968622100955, - "grad_norm": 0.013205752708017826, - "learning_rate": 8.748455008054519e-06, - "loss": 7.872265996411443e-05, - "step": 4590 - }, - { - "epoch": 3.1343792633015006, - "grad_norm": 0.010380366817116737, - "learning_rate": 8.682748575724071e-06, - "loss": 0.00027635702863335607, - "step": 4595 - }, - { - "epoch": 3.1377899045020463, - "grad_norm": 0.012770955450832844, - "learning_rate": 8.617257501287805e-06, - "loss": 0.00028360043652355673, - "step": 4600 - }, - { - "epoch": 3.141200545702592, - "grad_norm": 0.012632913887500763, - "learning_rate": 8.551982274175449e-06, - "loss": 4.925676621496678e-05, - "step": 4605 - }, - { - "epoch": 3.1446111869031377, - "grad_norm": 0.0028189525473862886, - "learning_rate": 8.486923382203703e-06, - "loss": 0.0039628144353628155, - "step": 4610 - }, - { - "epoch": 3.1480218281036834, - "grad_norm": 0.11367341130971909, - "learning_rate": 8.422081311572464e-06, - "loss": 0.000568081671372056, - "step": 4615 - }, - { - "epoch": 3.151432469304229, - "grad_norm": 0.00095061567844823, - "learning_rate": 8.35745654686135e-06, - "loss": 0.00036408030427992344, - "step": 4620 - }, - { - "epoch": 3.1548431105047747, - "grad_norm": 0.05772553011775017, - "learning_rate": 8.29304957102596e-06, - "loss": 0.0006427288055419922, - "step": 4625 - }, - { - "epoch": 3.1582537517053204, - "grad_norm": 0.017082368955016136, - "learning_rate": 8.22886086539432e-06, - "loss": 0.00015330149326473475, - "step": 4630 - }, - { - "epoch": 3.161664392905866, - "grad_norm": 0.0033851212356239557, - "learning_rate": 8.164890909663256e-06, - "loss": 0.00012121353065595031, - "step": 4635 - }, - { - "epoch": 3.1650750341064118, - "grad_norm": 0.000577523373067379, - "learning_rate": 8.101140181894868e-06, - "loss": 7.793278782628477e-05, - "step": 4640 - }, - { - "epoch": 3.168485675306958, - "grad_norm": 0.00039884017314761877, - "learning_rate": 8.037609158512875e-06, - "loss": 0.0014324543066322804, - "step": 4645 - }, - { - "epoch": 3.1718963165075036, - "grad_norm": 0.004100058693438768, - "learning_rate": 7.97429831429911e-06, - "loss": 0.00015990985557436943, - "step": 4650 - }, - { - "epoch": 3.1753069577080493, - "grad_norm": 0.005648438818752766, - "learning_rate": 7.911208122389956e-06, - "loss": 8.149745990522206e-05, - "step": 4655 - }, - { - "epoch": 3.178717598908595, - "grad_norm": 0.005042952951043844, - "learning_rate": 7.848339054272808e-06, - "loss": 9.191314456984401e-05, - "step": 4660 - }, - { - "epoch": 3.1821282401091406, - "grad_norm": 0.0007950706058181822, - "learning_rate": 7.785691579782546e-06, - "loss": 7.44381221011281e-05, - "step": 4665 - }, - { - "epoch": 3.1855388813096863, - "grad_norm": 0.0717335194349289, - "learning_rate": 7.723266167098058e-06, - "loss": 0.00012347951997071505, - "step": 4670 - }, - { - "epoch": 3.188949522510232, - "grad_norm": 0.011817894876003265, - "learning_rate": 7.661063282738685e-06, - "loss": 0.000311569613404572, - "step": 4675 - }, - { - "epoch": 3.1923601637107777, - "grad_norm": 0.011100267991423607, - "learning_rate": 7.599083391560774e-06, - "loss": 0.00023061195388436319, - "step": 4680 - }, - { - "epoch": 3.1957708049113234, - "grad_norm": 0.025753796100616455, - "learning_rate": 7.5373269567541776e-06, - "loss": 0.00013706330209970475, - "step": 4685 - }, - { - "epoch": 3.199181446111869, - "grad_norm": 0.0529993437230587, - "learning_rate": 7.47579443983886e-06, - "loss": 0.00025801956653594973, - "step": 4690 - }, - { - "epoch": 3.2025920873124147, - "grad_norm": 0.01164156198501587, - "learning_rate": 7.41448630066132e-06, - "loss": 0.0001246333820745349, - "step": 4695 - }, - { - "epoch": 3.2060027285129604, - "grad_norm": 0.0022943434305489063, - "learning_rate": 7.353402997391271e-06, - "loss": 4.788096994161606e-05, - "step": 4700 - }, - { - "epoch": 3.209413369713506, - "grad_norm": 0.0016377613646909595, - "learning_rate": 7.292544986518198e-06, - "loss": 0.0014273281209170817, - "step": 4705 - }, - { - "epoch": 3.212824010914052, - "grad_norm": 0.046027738600969315, - "learning_rate": 7.231912722847881e-06, - "loss": 0.00015070366207510234, - "step": 4710 - }, - { - "epoch": 3.2162346521145975, - "grad_norm": 0.12626095116138458, - "learning_rate": 7.171506659499067e-06, - "loss": 0.0002223264891654253, - "step": 4715 - }, - { - "epoch": 3.219645293315143, - "grad_norm": 0.004387991968542337, - "learning_rate": 7.1113272479000465e-06, - "loss": 5.1431613974273206e-05, - "step": 4720 - }, - { - "epoch": 3.223055934515689, - "grad_norm": 0.0005260159377939999, - "learning_rate": 7.051374937785289e-06, - "loss": 9.967307560145854e-05, - "step": 4725 - }, - { - "epoch": 3.2264665757162345, - "grad_norm": 0.001557494280859828, - "learning_rate": 6.9916501771920795e-06, - "loss": 3.6639469908550384e-05, - "step": 4730 - }, - { - "epoch": 3.22987721691678, - "grad_norm": 0.0013014579890295863, - "learning_rate": 6.932153412457195e-06, - "loss": 0.00015290889423340558, - "step": 4735 - }, - { - "epoch": 3.233287858117326, - "grad_norm": 0.0005693956045433879, - "learning_rate": 6.872885088213522e-06, - "loss": 9.23092185985297e-05, - "step": 4740 - }, - { - "epoch": 3.2366984993178716, - "grad_norm": 0.0008315684972330928, - "learning_rate": 6.813845647386771e-06, - "loss": 0.00010763210011646151, - "step": 4745 - }, - { - "epoch": 3.2401091405184177, - "grad_norm": 0.0021727036219090223, - "learning_rate": 6.755035531192148e-06, - "loss": 3.423129383008927e-05, - "step": 4750 - }, - { - "epoch": 3.2435197817189634, - "grad_norm": 0.0001480428036302328, - "learning_rate": 6.696455179131084e-06, - "loss": 0.023981352150440217, - "step": 4755 - }, - { - "epoch": 3.2455661664392905, - "eval_loss": 0.074391670525074, - "eval_runtime": 0.9318, - "eval_samples_per_second": 80.487, - "eval_steps_per_second": 2.146, - "step": 4758 - }, - { - "eval_cer_subset": 0.015260934732252351, - "eval_cer_subset_edit_distance": 112, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4758 - }, - { - "epoch": 3.246930422919509, - "grad_norm": 0.007659688591957092, - "learning_rate": 6.638105028987886e-06, - "loss": 0.0004011324606835842, - "step": 4760 - }, - { - "epoch": 3.2503410641200547, - "grad_norm": 0.033579885959625244, - "learning_rate": 6.579985516826564e-06, - "loss": 0.000267721782438457, - "step": 4765 - }, - { - "epoch": 3.2537517053206004, - "grad_norm": 0.01196813303977251, - "learning_rate": 6.52209707698748e-06, - "loss": 0.00014587611658498645, - "step": 4770 - }, - { - "epoch": 3.257162346521146, - "grad_norm": 0.022396638989448547, - "learning_rate": 6.464440142084156e-06, - "loss": 0.00038080187514424326, - "step": 4775 - }, - { - "epoch": 3.260572987721692, - "grad_norm": 0.002048628870397806, - "learning_rate": 6.407015143000002e-06, - "loss": 0.007085768878459931, - "step": 4780 - }, - { - "epoch": 3.2639836289222375, - "grad_norm": 0.0019598486833274364, - "learning_rate": 6.3498225088851686e-06, - "loss": 0.0010610194876790047, - "step": 4785 - }, - { - "epoch": 3.267394270122783, - "grad_norm": 0.02006545290350914, - "learning_rate": 6.29286266715324e-06, - "loss": 0.00014606654876843096, - "step": 4790 - }, - { - "epoch": 3.270804911323329, - "grad_norm": 0.0005910536856390536, - "learning_rate": 6.236136043478106e-06, - "loss": 6.873804377391935e-05, - "step": 4795 - }, - { - "epoch": 3.2742155525238745, - "grad_norm": 0.021028559654951096, - "learning_rate": 6.179643061790775e-06, - "loss": 0.0013180674985051155, - "step": 4800 - }, - { - "epoch": 3.27762619372442, - "grad_norm": 0.0004542934475466609, - "learning_rate": 6.123384144276183e-06, - "loss": 4.965414409525692e-05, - "step": 4805 - }, - { - "epoch": 3.281036834924966, - "grad_norm": 0.041615571826696396, - "learning_rate": 6.067359711370047e-06, - "loss": 0.019744729995727538, - "step": 4810 - }, - { - "epoch": 3.2844474761255116, - "grad_norm": 0.002755318768322468, - "learning_rate": 6.011570181755754e-06, - "loss": 0.00010759549913927913, - "step": 4815 - }, - { - "epoch": 3.2878581173260573, - "grad_norm": 0.0032264923211187124, - "learning_rate": 5.956015972361171e-06, - "loss": 0.0007094295229762793, - "step": 4820 - }, - { - "epoch": 3.291268758526603, - "grad_norm": 1.4725555181503296, - "learning_rate": 5.900697498355589e-06, - "loss": 0.010635539144277572, - "step": 4825 - }, - { - "epoch": 3.2946793997271486, - "grad_norm": 0.017544033005833626, - "learning_rate": 5.84561517314657e-06, - "loss": 0.0003255015704780817, - "step": 4830 - }, - { - "epoch": 3.2980900409276943, - "grad_norm": 0.0035306529607623816, - "learning_rate": 5.790769408376922e-06, - "loss": 0.00013384120538830757, - "step": 4835 - }, - { - "epoch": 3.30150068212824, - "grad_norm": 0.0018269309075549245, - "learning_rate": 5.736160613921528e-06, - "loss": 0.0007960126735270023, - "step": 4840 - }, - { - "epoch": 3.3049113233287857, - "grad_norm": 0.010024248622357845, - "learning_rate": 5.6817891978843855e-06, - "loss": 0.0003233390394598246, - "step": 4845 - }, - { - "epoch": 3.3083219645293314, - "grad_norm": 0.004048422910273075, - "learning_rate": 5.627655566595489e-06, - "loss": 0.00011264056665822863, - "step": 4850 - }, - { - "epoch": 3.311732605729877, - "grad_norm": 0.0034091162960976362, - "learning_rate": 5.573760124607812e-06, - "loss": 0.00048314151354134083, - "step": 4855 - }, - { - "epoch": 3.3151432469304227, - "grad_norm": 0.009309383109211922, - "learning_rate": 5.5201032746942796e-06, - "loss": 0.0002101475838571787, - "step": 4860 - }, - { - "epoch": 3.3185538881309684, - "grad_norm": 0.06538962572813034, - "learning_rate": 5.466685417844797e-06, - "loss": 0.0003499136073514819, - "step": 4865 - }, - { - "epoch": 3.321964529331514, - "grad_norm": 0.002885893452912569, - "learning_rate": 5.413506953263162e-06, - "loss": 0.00048564458265900614, - "step": 4870 - }, - { - "epoch": 3.32537517053206, - "grad_norm": 0.004836421925574541, - "learning_rate": 5.3605682783642e-06, - "loss": 6.691411836072803e-05, - "step": 4875 - }, - { - "epoch": 3.328785811732606, - "grad_norm": 0.00475132092833519, - "learning_rate": 5.307869788770694e-06, - "loss": 0.0006194526329636573, - "step": 4880 - }, - { - "epoch": 3.3321964529331516, - "grad_norm": 0.005688230507075787, - "learning_rate": 5.255411878310482e-06, - "loss": 9.07582463696599e-05, - "step": 4885 - }, - { - "epoch": 3.3356070941336973, - "grad_norm": 0.0037306994199752808, - "learning_rate": 5.2031949390134856e-06, - "loss": 0.00012413164367899298, - "step": 4890 - }, - { - "epoch": 3.339017735334243, - "grad_norm": 0.011166035197675228, - "learning_rate": 5.15121936110882e-06, - "loss": 7.776234415359795e-05, - "step": 4895 - }, - { - "epoch": 3.3424283765347886, - "grad_norm": 0.0012123408960178494, - "learning_rate": 5.099485533021836e-06, - "loss": 2.784754615277052e-05, - "step": 4900 - }, - { - "epoch": 3.3458390177353343, - "grad_norm": 0.002357951132580638, - "learning_rate": 5.047993841371223e-06, - "loss": 0.00029555323999375105, - "step": 4905 - }, - { - "epoch": 3.34924965893588, - "grad_norm": 0.00030636831070296466, - "learning_rate": 4.996744670966138e-06, - "loss": 0.00010590272722765803, - "step": 4910 - }, - { - "epoch": 3.3526603001364257, - "grad_norm": 0.0058077736757695675, - "learning_rate": 4.945738404803348e-06, - "loss": 0.0001937644206918776, - "step": 4915 - }, - { - "epoch": 3.3560709413369714, - "grad_norm": 0.0007610859465785325, - "learning_rate": 4.8949754240642775e-06, - "loss": 0.00011190775549039245, - "step": 4920 - }, - { - "epoch": 3.359481582537517, - "grad_norm": 0.6333717107772827, - "learning_rate": 4.844456108112297e-06, - "loss": 0.0008945153094828129, - "step": 4925 - }, - { - "epoch": 3.3628922237380627, - "grad_norm": 0.006114002782851458, - "learning_rate": 4.794180834489772e-06, - "loss": 9.972437983378768e-05, - "step": 4930 - }, - { - "epoch": 3.3663028649386084, - "grad_norm": 0.0017828121781349182, - "learning_rate": 4.744149978915274e-06, - "loss": 7.763381581753493e-05, - "step": 4935 - }, - { - "epoch": 3.369713506139154, - "grad_norm": 0.0009099289891310036, - "learning_rate": 4.694363915280814e-06, - "loss": 0.05654715895652771, - "step": 4940 - }, - { - "epoch": 3.3731241473397, - "grad_norm": 0.013658248819410801, - "learning_rate": 4.644823015649009e-06, - "loss": 0.0013173201121389865, - "step": 4945 - }, - { - "epoch": 3.3765347885402455, - "grad_norm": 0.6665285229682922, - "learning_rate": 4.5955276502502944e-06, - "loss": 0.0018493477255105972, - "step": 4950 - }, - { - "epoch": 3.379945429740791, - "grad_norm": 0.001095029292628169, - "learning_rate": 4.546478187480176e-06, - "loss": 0.0005424355156719684, - "step": 4955 - }, - { - "epoch": 3.383356070941337, - "grad_norm": 0.015392723493278027, - "learning_rate": 4.497674993896503e-06, - "loss": 9.325146675109863e-05, - "step": 4960 - }, - { - "epoch": 3.3867667121418825, - "grad_norm": 0.08219064027070999, - "learning_rate": 4.449118434216653e-06, - "loss": 0.0004451565444469452, - "step": 4965 - }, - { - "epoch": 3.390177353342428, - "grad_norm": 0.003267089370638132, - "learning_rate": 4.4008088713148845e-06, - "loss": 0.00016891954001039267, - "step": 4970 - }, - { - "epoch": 3.3935879945429743, - "grad_norm": 0.008226803503930569, - "learning_rate": 4.35274666621957e-06, - "loss": 0.0001009777537547052, - "step": 4975 - }, - { - "epoch": 3.39699863574352, - "grad_norm": 0.01762073114514351, - "learning_rate": 4.304932178110558e-06, - "loss": 0.006487253308296204, - "step": 4980 - }, - { - "epoch": 3.4004092769440657, - "grad_norm": 0.0021151783876121044, - "learning_rate": 4.257365764316395e-06, - "loss": 0.00011428899597376585, - "step": 4985 - }, - { - "epoch": 3.4038199181446114, - "grad_norm": 0.0008960114791989326, - "learning_rate": 4.210047780311768e-06, - "loss": 3.825195599347353e-05, - "step": 4990 - }, - { - "epoch": 3.407230559345157, - "grad_norm": 0.058672014623880386, - "learning_rate": 4.162978579714753e-06, - "loss": 0.0005217622965574265, - "step": 4995 - }, - { - "epoch": 3.4106412005457027, - "grad_norm": 6.555901927640662e-05, - "learning_rate": 4.11615851428423e-06, - "loss": 0.00011362402001395822, - "step": 5000 - }, - { - "epoch": 3.4140518417462484, - "grad_norm": 0.21014879643917084, - "learning_rate": 4.069587933917221e-06, - "loss": 0.00179185438901186, - "step": 5005 - }, - { - "epoch": 3.417462482946794, - "grad_norm": 0.1315806359052658, - "learning_rate": 4.023267186646317e-06, - "loss": 0.0014887897297739983, - "step": 5010 - }, - { - "epoch": 3.42087312414734, - "grad_norm": 0.018509764224290848, - "learning_rate": 3.977196618637e-06, - "loss": 0.00016269356710836292, - "step": 5015 - }, - { - "epoch": 3.4242837653478855, - "grad_norm": 0.0007501108921132982, - "learning_rate": 3.931376574185166e-06, - "loss": 3.563327190931886e-05, - "step": 5020 - }, - { - "epoch": 3.427694406548431, - "grad_norm": 0.0036405418068170547, - "learning_rate": 3.885807395714441e-06, - "loss": 0.0009133132174611091, - "step": 5025 - }, - { - "epoch": 3.431105047748977, - "grad_norm": 0.01853407360613346, - "learning_rate": 3.840489423773698e-06, - "loss": 0.00011221827007830143, - "step": 5030 - }, - { - "epoch": 3.4345156889495225, - "grad_norm": 0.004317726474255323, - "learning_rate": 3.7954229970344725e-06, - "loss": 0.00033687916584312916, - "step": 5035 - }, - { - "epoch": 3.437926330150068, - "grad_norm": 0.003909669350832701, - "learning_rate": 3.7506084522884684e-06, - "loss": 7.524496177211404e-05, - "step": 5040 - }, - { - "epoch": 3.441336971350614, - "grad_norm": 0.016738831996917725, - "learning_rate": 3.7060461244449945e-06, - "loss": 9.533832781016827e-05, - "step": 5045 - }, - { - "epoch": 3.4447476125511596, - "grad_norm": 0.0012303644325584173, - "learning_rate": 3.6617363465284875e-06, - "loss": 0.00011376941110938788, - "step": 5050 - }, - { - "epoch": 3.4481582537517053, - "grad_norm": 0.005091819446533918, - "learning_rate": 3.617679449676028e-06, - "loss": 0.000578709552064538, - "step": 5055 - }, - { - "epoch": 3.451568894952251, - "grad_norm": 0.0007091189618222415, - "learning_rate": 3.5738757631348744e-06, - "loss": 7.042675861157477e-05, - "step": 5060 - }, - { - "epoch": 3.4549795361527966, - "grad_norm": 0.004033643286675215, - "learning_rate": 3.5303256142599407e-06, - "loss": 5.9417390730232e-05, - "step": 5065 - }, - { - "epoch": 3.4583901773533423, - "grad_norm": 0.018419573083519936, - "learning_rate": 3.487029328511444e-06, - "loss": 0.0001460162689909339, - "step": 5070 - }, - { - "epoch": 3.461800818553888, - "grad_norm": 0.0035185501910746098, - "learning_rate": 3.4439872294524025e-06, - "loss": 0.0001254791859537363, - "step": 5075 - }, - { - "epoch": 3.4652114597544337, - "grad_norm": 0.0007496779435314238, - "learning_rate": 3.401199638746241e-06, - "loss": 4.7221675049513576e-05, - "step": 5080 - }, - { - "epoch": 3.4686221009549794, - "grad_norm": 2.67924165725708, - "learning_rate": 3.3586668761543813e-06, - "loss": 0.0016780177131295205, - "step": 5085 - }, - { - "epoch": 3.472032742155525, - "grad_norm": 0.011002305895090103, - "learning_rate": 3.316389259533876e-06, - "loss": 9.696125634945929e-05, - "step": 5090 - }, - { - "epoch": 3.4754433833560707, - "grad_norm": 0.0005470504984259605, - "learning_rate": 3.2743671048349755e-06, - "loss": 3.457541752140969e-05, - "step": 5095 - }, - { - "epoch": 3.4788540245566164, - "grad_norm": 0.014639006927609444, - "learning_rate": 3.232600726098851e-06, - "loss": 0.005385900661349296, - "step": 5100 - }, - { - "epoch": 3.4822646657571625, - "grad_norm": 0.02914433367550373, - "learning_rate": 3.191090435455171e-06, - "loss": 0.00018561827018857003, - "step": 5105 - }, - { - "epoch": 3.485675306957708, - "grad_norm": 0.0009303450351580977, - "learning_rate": 3.1498365431198048e-06, - "loss": 8.976480457931758e-05, - "step": 5110 - }, - { - "epoch": 3.489085948158254, - "grad_norm": 0.001520369085483253, - "learning_rate": 3.1088393573924966e-06, - "loss": 0.00012106491485610604, - "step": 5115 - }, - { - "epoch": 3.4924965893587996, - "grad_norm": 0.0004795770801138133, - "learning_rate": 3.0680991846545836e-06, - "loss": 5.365515244193375e-05, - "step": 5120 - }, - { - "epoch": 3.495225102319236, - "eval_loss": 0.07684502005577087, - "eval_runtime": 0.9207, - "eval_samples_per_second": 81.46, - "eval_steps_per_second": 2.172, - "step": 5124 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5124 - }, - { - "epoch": 3.4959072305593453, - "grad_norm": 0.0028329631313681602, - "learning_rate": 3.027616329366664e-06, - "loss": 0.00016044501680880784, - "step": 5125 - }, - { - "epoch": 3.499317871759891, - "grad_norm": 0.00026357462047599256, - "learning_rate": 2.987391094066345e-06, - "loss": 0.00023950175382196902, - "step": 5130 - }, - { - "epoch": 3.5027285129604366, - "grad_norm": 0.05449860543012619, - "learning_rate": 2.9474237793659956e-06, - "loss": 0.0002613885561004281, - "step": 5135 - }, - { - "epoch": 3.5061391541609823, - "grad_norm": 0.0011948413448408246, - "learning_rate": 2.907714683950471e-06, - "loss": 5.1718176109716296e-05, - "step": 5140 - }, - { - "epoch": 3.509549795361528, - "grad_norm": 0.005827807355672121, - "learning_rate": 2.8682641045748964e-06, - "loss": 0.002432660385966301, - "step": 5145 - }, - { - "epoch": 3.5129604365620737, - "grad_norm": 0.0015064133331179619, - "learning_rate": 2.829072336062463e-06, - "loss": 3.671176382340491e-05, - "step": 5150 - }, - { - "epoch": 3.5163710777626194, - "grad_norm": 0.0009614004520699382, - "learning_rate": 2.790139671302191e-06, - "loss": 8.640417945571244e-05, - "step": 5155 - }, - { - "epoch": 3.519781718963165, - "grad_norm": 0.0023597145918756723, - "learning_rate": 2.7514664012467727e-06, - "loss": 7.012838032096624e-05, - "step": 5160 - }, - { - "epoch": 3.5231923601637107, - "grad_norm": 0.0007319003925658762, - "learning_rate": 2.7130528149103657e-06, - "loss": 2.3347024398390204e-05, - "step": 5165 - }, - { - "epoch": 3.5266030013642564, - "grad_norm": 0.006225161254405975, - "learning_rate": 2.6748991993664774e-06, - "loss": 0.00018728851573541762, - "step": 5170 - }, - { - "epoch": 3.530013642564802, - "grad_norm": 0.5371012687683105, - "learning_rate": 2.637005839745772e-06, - "loss": 0.004397168383002281, - "step": 5175 - }, - { - "epoch": 3.533424283765348, - "grad_norm": 0.0009830278577283025, - "learning_rate": 2.5993730192339663e-06, - "loss": 5.1807129057124254e-05, - "step": 5180 - }, - { - "epoch": 3.5368349249658935, - "grad_norm": 0.002657173899933696, - "learning_rate": 2.562001019069726e-06, - "loss": 0.00013663843274116516, - "step": 5185 - }, - { - "epoch": 3.540245566166439, - "grad_norm": 0.005280365701764822, - "learning_rate": 2.5248901185425345e-06, - "loss": 0.00618811659514904, - "step": 5190 - }, - { - "epoch": 3.543656207366985, - "grad_norm": 0.5160993933677673, - "learning_rate": 2.488040594990606e-06, - "loss": 0.0008973299525678157, - "step": 5195 - }, - { - "epoch": 3.547066848567531, - "grad_norm": 0.00302395923063159, - "learning_rate": 2.451452723798844e-06, - "loss": 0.00022636731155216693, - "step": 5200 - }, - { - "epoch": 3.5504774897680766, - "grad_norm": 0.010127821005880833, - "learning_rate": 2.415126778396764e-06, - "loss": 0.0002638277132064104, - "step": 5205 - }, - { - "epoch": 3.5538881309686223, - "grad_norm": 0.0024330182932317257, - "learning_rate": 2.379063030256424e-06, - "loss": 0.0007223737891763448, - "step": 5210 - }, - { - "epoch": 3.557298772169168, - "grad_norm": 0.006238727364689112, - "learning_rate": 2.343261748890457e-06, - "loss": 0.0010717549361288548, - "step": 5215 - }, - { - "epoch": 3.5607094133697137, - "grad_norm": 0.0005047949962317944, - "learning_rate": 2.3077232018500024e-06, - "loss": 7.232563220895826e-05, - "step": 5220 - }, - { - "epoch": 3.5641200545702594, - "grad_norm": 0.0017868287395685911, - "learning_rate": 2.2724476547227325e-06, - "loss": 5.702247726731002e-05, - "step": 5225 - }, - { - "epoch": 3.567530695770805, - "grad_norm": 0.0005412849714048207, - "learning_rate": 2.2374353711308467e-06, - "loss": 4.539011861197651e-05, - "step": 5230 - }, - { - "epoch": 3.5709413369713507, - "grad_norm": 0.0008777446346357465, - "learning_rate": 2.2026866127291516e-06, - "loss": 0.0010734600946307182, - "step": 5235 - }, - { - "epoch": 3.5743519781718964, - "grad_norm": 0.020784150809049606, - "learning_rate": 2.1682016392030198e-06, - "loss": 0.0001352211693301797, - "step": 5240 - }, - { - "epoch": 3.577762619372442, - "grad_norm": 0.0032426826655864716, - "learning_rate": 2.133980708266539e-06, - "loss": 0.0001363346236757934, - "step": 5245 - }, - { - "epoch": 3.581173260572988, - "grad_norm": 0.00047889159759506583, - "learning_rate": 2.100024075660538e-06, - "loss": 0.00017863117391243578, - "step": 5250 - }, - { - "epoch": 3.5845839017735335, - "grad_norm": 0.00031558674527332187, - "learning_rate": 2.066331995150666e-06, - "loss": 0.00021245412062853575, - "step": 5255 - }, - { - "epoch": 3.587994542974079, - "grad_norm": 0.0009245559340342879, - "learning_rate": 2.032904718525531e-06, - "loss": 0.0003619278548285365, - "step": 5260 - }, - { - "epoch": 3.591405184174625, - "grad_norm": 0.0013650038745254278, - "learning_rate": 1.9997424955948054e-06, - "loss": 3.983181086368859e-05, - "step": 5265 - }, - { - "epoch": 3.5948158253751705, - "grad_norm": 0.004131193272769451, - "learning_rate": 1.9668455741873424e-06, - "loss": 5.411377060227096e-05, - "step": 5270 - }, - { - "epoch": 3.598226466575716, - "grad_norm": 0.0061623696237802505, - "learning_rate": 1.9342142001493394e-06, - "loss": 5.686166696250439e-05, - "step": 5275 - }, - { - "epoch": 3.601637107776262, - "grad_norm": 0.0031696371734142303, - "learning_rate": 1.9018486173424982e-06, - "loss": 0.00025364819448441266, - "step": 5280 - }, - { - "epoch": 3.6050477489768076, - "grad_norm": 0.0009586649248376489, - "learning_rate": 1.8697490676422052e-06, - "loss": 0.0001193733187392354, - "step": 5285 - }, - { - "epoch": 3.6084583901773533, - "grad_norm": 0.04152340814471245, - "learning_rate": 1.837915790935708e-06, - "loss": 0.00011144874151796103, - "step": 5290 - }, - { - "epoch": 3.611869031377899, - "grad_norm": 0.0054172491654753685, - "learning_rate": 1.8063490251203628e-06, - "loss": 0.0011931947432458402, - "step": 5295 - }, - { - "epoch": 3.6152796725784446, - "grad_norm": 0.009695466607809067, - "learning_rate": 1.7750490061018058e-06, - "loss": 0.00018604533979669214, - "step": 5300 - }, - { - "epoch": 3.6186903137789903, - "grad_norm": 0.021507805213332176, - "learning_rate": 1.7440159677922186e-06, - "loss": 0.00013907469110563397, - "step": 5305 - }, - { - "epoch": 3.622100954979536, - "grad_norm": 0.006461102515459061, - "learning_rate": 1.7132501421085659e-06, - "loss": 0.0003493543481454253, - "step": 5310 - }, - { - "epoch": 3.6255115961800817, - "grad_norm": 0.0105512710288167, - "learning_rate": 1.6827517589709057e-06, - "loss": 7.155602797865867e-05, - "step": 5315 - }, - { - "epoch": 3.6289222373806274, - "grad_norm": 0.0008970944327302277, - "learning_rate": 1.6525210463005868e-06, - "loss": 0.00011632050154730678, - "step": 5320 - }, - { - "epoch": 3.632332878581173, - "grad_norm": 0.00177982565946877, - "learning_rate": 1.6225582300186405e-06, - "loss": 0.002909584902226925, - "step": 5325 - }, - { - "epoch": 3.6357435197817187, - "grad_norm": 0.001438588253222406, - "learning_rate": 1.5928635340440255e-06, - "loss": 0.00019459464820101858, - "step": 5330 - }, - { - "epoch": 3.6391541609822644, - "grad_norm": 0.00501625519245863, - "learning_rate": 1.5634371802919696e-06, - "loss": 6.730342865921557e-05, - "step": 5335 - }, - { - "epoch": 3.64256480218281, - "grad_norm": 0.003331825602799654, - "learning_rate": 1.534279388672334e-06, - "loss": 4.0749041363596913e-05, - "step": 5340 - }, - { - "epoch": 3.645975443383356, - "grad_norm": 0.000979799311608076, - "learning_rate": 1.5053903770879518e-06, - "loss": 3.709651937242597e-05, - "step": 5345 - }, - { - "epoch": 3.649386084583902, - "grad_norm": 0.000521843961905688, - "learning_rate": 1.4767703614329884e-06, - "loss": 8.522009593434632e-05, - "step": 5350 - }, - { - "epoch": 3.6527967257844476, - "grad_norm": 0.0004526655247900635, - "learning_rate": 1.4484195555913582e-06, - "loss": 2.5001828907988967e-05, - "step": 5355 - }, - { - "epoch": 3.6562073669849933, - "grad_norm": 0.0029042293317615986, - "learning_rate": 1.420338171435094e-06, - "loss": 0.00018259206553921102, - "step": 5360 - }, - { - "epoch": 3.659618008185539, - "grad_norm": 0.003727864008396864, - "learning_rate": 1.3925264188227925e-06, - "loss": 0.00011327432002872229, - "step": 5365 - }, - { - "epoch": 3.6630286493860846, - "grad_norm": 0.0013012837152928114, - "learning_rate": 1.3649845055980172e-06, - "loss": 3.7070104735903445e-05, - "step": 5370 - }, - { - "epoch": 3.6664392905866303, - "grad_norm": 0.013001679442822933, - "learning_rate": 1.3377126375877817e-06, - "loss": 5.2633637096732855e-05, - "step": 5375 - }, - { - "epoch": 3.669849931787176, - "grad_norm": 0.0007804427877999842, - "learning_rate": 1.3107110186009643e-06, - "loss": 3.320692339912057e-05, - "step": 5380 - }, - { - "epoch": 3.6732605729877217, - "grad_norm": 0.002156102331355214, - "learning_rate": 1.2839798504268333e-06, - "loss": 0.0010122624225914478, - "step": 5385 - }, - { - "epoch": 3.6766712141882674, - "grad_norm": 0.014592370949685574, - "learning_rate": 1.2575193328334995e-06, - "loss": 8.401789236813783e-05, - "step": 5390 - }, - { - "epoch": 3.680081855388813, - "grad_norm": 0.0031752304639667273, - "learning_rate": 1.2313296635664576e-06, - "loss": 3.530957619659603e-05, - "step": 5395 - }, - { - "epoch": 3.6834924965893587, - "grad_norm": 0.0010194798232987523, - "learning_rate": 1.205411038347072e-06, - "loss": 3.8149964530020954e-05, - "step": 5400 - }, - { - "epoch": 3.6869031377899044, - "grad_norm": 0.015784459188580513, - "learning_rate": 1.1797636508711433e-06, - "loss": 0.0003640792798250914, - "step": 5405 - }, - { - "epoch": 3.69031377899045, - "grad_norm": 0.044826168566942215, - "learning_rate": 1.1543876928074485e-06, - "loss": 9.083467884920537e-05, - "step": 5410 - }, - { - "epoch": 3.693724420190996, - "grad_norm": 0.00281776231713593, - "learning_rate": 1.1292833537963162e-06, - "loss": 7.455614977516233e-05, - "step": 5415 - }, - { - "epoch": 3.6971350613915415, - "grad_norm": 0.0028589575085788965, - "learning_rate": 1.1044508214481981e-06, - "loss": 0.00014598952839151025, - "step": 5420 - }, - { - "epoch": 3.700545702592087, - "grad_norm": 0.22950734198093414, - "learning_rate": 1.0798902813422674e-06, - "loss": 0.0015467993915081024, - "step": 5425 - }, - { - "epoch": 3.7039563437926333, - "grad_norm": 0.00023460436204914004, - "learning_rate": 1.055601917025052e-06, - "loss": 9.705557604320348e-05, - "step": 5430 - }, - { - "epoch": 3.707366984993179, - "grad_norm": 0.001205096603371203, - "learning_rate": 1.0315859100090278e-06, - "loss": 0.00016396433347836136, - "step": 5435 - }, - { - "epoch": 3.7107776261937246, - "grad_norm": 0.001592331798747182, - "learning_rate": 1.0078424397713072e-06, - "loss": 5.8903940953314304e-05, - "step": 5440 - }, - { - "epoch": 3.7141882673942703, - "grad_norm": 0.006218360736966133, - "learning_rate": 9.843716837522524e-07, - "loss": 0.00036285766400396825, - "step": 5445 - }, - { - "epoch": 3.717598908594816, - "grad_norm": 0.0005638732109218836, - "learning_rate": 9.611738173541856e-07, - "loss": 9.058183059096336e-05, - "step": 5450 - }, - { - "epoch": 3.7210095497953617, - "grad_norm": 0.0001975312188733369, - "learning_rate": 9.382490139400386e-07, - "loss": 0.0006882708985358477, - "step": 5455 - }, - { - "epoch": 3.7244201909959074, - "grad_norm": 0.0014521559933200479, - "learning_rate": 9.155974448321182e-07, - "loss": 4.348080838099122e-05, - "step": 5460 - }, - { - "epoch": 3.727830832196453, - "grad_norm": 0.0007730235811322927, - "learning_rate": 8.932192793107515e-07, - "loss": 6.59460376482457e-05, - "step": 5465 - }, - { - "epoch": 3.7312414733969987, - "grad_norm": 0.0017008045688271523, - "learning_rate": 8.711146846130834e-07, - "loss": 4.6517132432200015e-05, - "step": 5470 - }, - { - "epoch": 3.7346521145975444, - "grad_norm": 0.000825837894808501, - "learning_rate": 8.492838259317902e-07, - "loss": 5.0807202933356165e-05, - "step": 5475 - }, - { - "epoch": 3.73806275579809, - "grad_norm": 0.0004800660244654864, - "learning_rate": 8.277268664138553e-07, - "loss": 7.82750197686255e-05, - "step": 5480 - }, - { - "epoch": 3.741473396998636, - "grad_norm": 0.024822546169161797, - "learning_rate": 8.06443967159362e-07, - "loss": 6.785190780647098e-05, - "step": 5485 - }, - { - "epoch": 3.7448840381991815, - "grad_norm": 0.0010685365414246917, - "learning_rate": 7.854352872202735e-07, - "loss": 6.689840811304748e-05, - "step": 5490 - }, - { - "epoch": 3.7448840381991815, - "eval_loss": 0.07814140617847443, - "eval_runtime": 0.8865, - "eval_samples_per_second": 84.605, - "eval_steps_per_second": 2.256, - "step": 5490 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5490 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 5.725543779053568e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5490/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/adapter_model.safetensors deleted file mode 100644 index 71f24d4684f3ab2a104d57efca212dd0575247f1..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bb81dae3f82fbe0987ed2f0f0c6eb141e72a2812e59f5622c4ade933f01b3da6 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/optimizer.pt deleted file mode 100644 index 2ede6d529fdcf91fa32c2418f0170b487245900b..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3379a942fd0ab444980378201ca0f6fd8e6553879a36b7ad8950c0b29ad44835 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/rng_state.pth deleted file mode 100644 index 4669d52b4df192bc91f2676391b1233756a07f32..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a2df993afae23610a148cbb90d0d2fcfd8b5e755873f80c30cdbe027d950c3c -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/scheduler.pt deleted file mode 100644 index 1183c964b5ce2920f309052fda8d7593a076a148..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f56e25a4ca4110995d13945841eaaa8482f7f17d6850ed7bd02b052e21c1dd6d -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/trainer_state.json deleted file mode 100644 index 61d10980ca3a1040c744bf5eae0db90b57443981..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/trainer_state.json +++ /dev/null @@ -1,8487 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.9945429740791267, - "eval_steps": 366, - "global_step": 5856, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - }, - { - "epoch": 2.748976807639836, - "grad_norm": 0.9081467986106873, - "learning_rate": 1.7342139203494537e-05, - "loss": 0.0036753010004758834, - "step": 4030 - }, - { - "epoch": 2.752387448840382, - "grad_norm": 0.07232939451932907, - "learning_rate": 1.7255770633843028e-05, - "loss": 0.0029829120263457297, - "step": 4035 - }, - { - "epoch": 2.7557980900409276, - "grad_norm": 0.005813417956233025, - "learning_rate": 1.7169553353936035e-05, - "loss": 0.00047225411981344223, - "step": 4040 - }, - { - "epoch": 2.7592087312414733, - "grad_norm": 0.2354760318994522, - "learning_rate": 1.7083488008094945e-05, - "loss": 0.0015884984284639358, - "step": 4045 - }, - { - "epoch": 2.762619372442019, - "grad_norm": 0.1201627105474472, - "learning_rate": 1.699757523950577e-05, - "loss": 0.0010768620297312737, - "step": 4050 - }, - { - "epoch": 2.766030013642565, - "grad_norm": 0.033547814935445786, - "learning_rate": 1.6911815690214166e-05, - "loss": 0.01052093282341957, - "step": 4055 - }, - { - "epoch": 2.7694406548431107, - "grad_norm": 0.01014826912432909, - "learning_rate": 1.682621000112085e-05, - "loss": 0.0003362501040101051, - "step": 4060 - }, - { - "epoch": 2.7728512960436564, - "grad_norm": 0.004405386745929718, - "learning_rate": 1.6740758811976665e-05, - "loss": 0.00044001247733831405, - "step": 4065 - }, - { - "epoch": 2.776261937244202, - "grad_norm": 3.621004104614258, - "learning_rate": 1.665546276137783e-05, - "loss": 0.18638403415679933, - "step": 4070 - }, - { - "epoch": 2.779672578444748, - "grad_norm": 0.01203183177858591, - "learning_rate": 1.6570322486761184e-05, - "loss": 0.00013435594737529755, - "step": 4075 - }, - { - "epoch": 2.7830832196452935, - "grad_norm": 0.23053398728370667, - "learning_rate": 1.6485338624399445e-05, - "loss": 0.0010434269905090332, - "step": 4080 - }, - { - "epoch": 2.786493860845839, - "grad_norm": 0.0109801534563303, - "learning_rate": 1.6400511809396394e-05, - "loss": 0.00012704560067504643, - "step": 4085 - }, - { - "epoch": 2.789904502046385, - "grad_norm": 0.8463883399963379, - "learning_rate": 1.631584267568217e-05, - "loss": 0.007707947492599487, - "step": 4090 - }, - { - "epoch": 2.7933151432469305, - "grad_norm": 0.05444789677858353, - "learning_rate": 1.623133185600852e-05, - "loss": 0.0008411366492509842, - "step": 4095 - }, - { - "epoch": 2.796725784447476, - "grad_norm": 0.007274657487869263, - "learning_rate": 1.6146979981944095e-05, - "loss": 0.0002061120932921767, - "step": 4100 - }, - { - "epoch": 2.800136425648022, - "grad_norm": 0.013262615539133549, - "learning_rate": 1.6062787683869667e-05, - "loss": 0.0003235015319660306, - "step": 4105 - }, - { - "epoch": 2.8035470668485676, - "grad_norm": 0.0030244409572333097, - "learning_rate": 1.597875559097352e-05, - "loss": 0.0007286245469003916, - "step": 4110 - }, - { - "epoch": 2.8069577080491133, - "grad_norm": 0.0021644949447363615, - "learning_rate": 1.5894884331246632e-05, - "loss": 0.0003056209534406662, - "step": 4115 - }, - { - "epoch": 2.810368349249659, - "grad_norm": 0.014495057985186577, - "learning_rate": 1.5811174531478074e-05, - "loss": 0.0014359142631292343, - "step": 4120 - }, - { - "epoch": 2.8137789904502046, - "grad_norm": 0.05203676223754883, - "learning_rate": 1.5727626817250255e-05, - "loss": 0.0006342739332467317, - "step": 4125 - }, - { - "epoch": 2.8171896316507503, - "grad_norm": 0.004998183809220791, - "learning_rate": 1.564424181293435e-05, - "loss": 0.00013386564096435906, - "step": 4130 - }, - { - "epoch": 2.820600272851296, - "grad_norm": 0.05428452417254448, - "learning_rate": 1.556102014168546e-05, - "loss": 0.00014423681423068047, - "step": 4135 - }, - { - "epoch": 2.8240109140518417, - "grad_norm": 0.0013511159922927618, - "learning_rate": 1.5477962425438164e-05, - "loss": 6.5605464624241e-05, - "step": 4140 - }, - { - "epoch": 2.8274215552523874, - "grad_norm": 0.9757132530212402, - "learning_rate": 1.539506928490171e-05, - "loss": 0.00839839205145836, - "step": 4145 - }, - { - "epoch": 2.830832196452933, - "grad_norm": 0.0018282996024936438, - "learning_rate": 1.5312341339555445e-05, - "loss": 0.005959897115826607, - "step": 4150 - }, - { - "epoch": 2.8342428376534787, - "grad_norm": 0.0022353942040354013, - "learning_rate": 1.5229779207644171e-05, - "loss": 0.000250368332490325, - "step": 4155 - }, - { - "epoch": 2.8376534788540244, - "grad_norm": 0.006538774352520704, - "learning_rate": 1.5147383506173572e-05, - "loss": 0.0004641829524189234, - "step": 4160 - }, - { - "epoch": 2.84106412005457, - "grad_norm": 0.003464010776951909, - "learning_rate": 1.5065154850905465e-05, - "loss": 0.008368657529354095, - "step": 4165 - }, - { - "epoch": 2.844474761255116, - "grad_norm": 0.0020767974201589823, - "learning_rate": 1.4983093856353398e-05, - "loss": 0.00010023106588050724, - "step": 4170 - }, - { - "epoch": 2.8478854024556615, - "grad_norm": 0.0025670777540653944, - "learning_rate": 1.4901201135777887e-05, - "loss": 0.0006715046241879463, - "step": 4175 - }, - { - "epoch": 2.851296043656207, - "grad_norm": 0.0006886612391099334, - "learning_rate": 1.4819477301181915e-05, - "loss": 0.0008357623592019081, - "step": 4180 - }, - { - "epoch": 2.854706684856753, - "grad_norm": 0.0023747060913592577, - "learning_rate": 1.4737922963306332e-05, - "loss": 0.00488339364528656, - "step": 4185 - }, - { - "epoch": 2.8581173260572985, - "grad_norm": 0.0030493123922497034, - "learning_rate": 1.4656538731625333e-05, - "loss": 0.017219077050685882, - "step": 4190 - }, - { - "epoch": 2.8615279672578446, - "grad_norm": 0.0030935786198824644, - "learning_rate": 1.457532521434184e-05, - "loss": 0.00014684826601296663, - "step": 4195 - }, - { - "epoch": 2.8649386084583903, - "grad_norm": 0.0004102849052287638, - "learning_rate": 1.4494283018382991e-05, - "loss": 0.0002242558402940631, - "step": 4200 - }, - { - "epoch": 2.868349249658936, - "grad_norm": 1.6695232391357422, - "learning_rate": 1.4413412749395593e-05, - "loss": 0.01916976124048233, - "step": 4205 - }, - { - "epoch": 2.8717598908594817, - "grad_norm": 0.003357100998982787, - "learning_rate": 1.4332715011741656e-05, - "loss": 0.0036146264523267747, - "step": 4210 - }, - { - "epoch": 2.8751705320600274, - "grad_norm": 0.002434425987303257, - "learning_rate": 1.425219040849373e-05, - "loss": 0.0001186407171189785, - "step": 4215 - }, - { - "epoch": 2.878581173260573, - "grad_norm": 0.0272241048514843, - "learning_rate": 1.4171839541430586e-05, - "loss": 0.003773893415927887, - "step": 4220 - }, - { - "epoch": 2.8819918144611187, - "grad_norm": 0.00031364246387965977, - "learning_rate": 1.409166301103257e-05, - "loss": 3.483370819594711e-05, - "step": 4225 - }, - { - "epoch": 2.8854024556616644, - "grad_norm": 0.008605693466961384, - "learning_rate": 1.4011661416477186e-05, - "loss": 0.005898609757423401, - "step": 4230 - }, - { - "epoch": 2.88881309686221, - "grad_norm": 0.0003439450520090759, - "learning_rate": 1.3931835355634601e-05, - "loss": 0.0017080994322896004, - "step": 4235 - }, - { - "epoch": 2.892223738062756, - "grad_norm": 0.004427058156579733, - "learning_rate": 1.3852185425063181e-05, - "loss": 0.00010978456120938062, - "step": 4240 - }, - { - "epoch": 2.8956343792633015, - "grad_norm": 0.02229383960366249, - "learning_rate": 1.377271222000503e-05, - "loss": 0.0012531550601124763, - "step": 4245 - }, - { - "epoch": 2.899045020463847, - "grad_norm": 0.004171700682491064, - "learning_rate": 1.3693416334381517e-05, - "loss": 0.0012122373096644878, - "step": 4250 - }, - { - "epoch": 2.902455661664393, - "grad_norm": 0.0016778658609837294, - "learning_rate": 1.3614298360788924e-05, - "loss": 0.0006234514527022839, - "step": 4255 - }, - { - "epoch": 2.9058663028649385, - "grad_norm": 0.00403103232383728, - "learning_rate": 1.3535358890493897e-05, - "loss": 0.00030033572111278775, - "step": 4260 - }, - { - "epoch": 2.909276944065484, - "grad_norm": 0.006126928608864546, - "learning_rate": 1.3456598513429111e-05, - "loss": 0.014299000799655914, - "step": 4265 - }, - { - "epoch": 2.91268758526603, - "grad_norm": 0.0007882033823989332, - "learning_rate": 1.3378017818188815e-05, - "loss": 0.001049484871327877, - "step": 4270 - }, - { - "epoch": 2.9160982264665756, - "grad_norm": 0.0004515725013334304, - "learning_rate": 1.329961739202451e-05, - "loss": 6.776668014936149e-05, - "step": 4275 - }, - { - "epoch": 2.9195088676671213, - "grad_norm": 0.3738904297351837, - "learning_rate": 1.3221397820840419e-05, - "loss": 0.0010396759025752544, - "step": 4280 - }, - { - "epoch": 2.9229195088676674, - "grad_norm": 0.035881806164979935, - "learning_rate": 1.3143359689189279e-05, - "loss": 0.0003127899952232838, - "step": 4285 - }, - { - "epoch": 2.926330150068213, - "grad_norm": 0.010501476936042309, - "learning_rate": 1.306550358026784e-05, - "loss": 0.013835662603378296, - "step": 4290 - }, - { - "epoch": 2.9297407912687587, - "grad_norm": 0.002153329784050584, - "learning_rate": 1.2987830075912565e-05, - "loss": 0.007172297686338425, - "step": 4295 - }, - { - "epoch": 2.9331514324693044, - "grad_norm": 0.000594582874327898, - "learning_rate": 1.2910339756595254e-05, - "loss": 6.662132800556719e-05, - "step": 4300 - }, - { - "epoch": 2.93656207366985, - "grad_norm": 1.2802950143814087, - "learning_rate": 1.283303320141879e-05, - "loss": 0.0013225926086306572, - "step": 4305 - }, - { - "epoch": 2.939972714870396, - "grad_norm": 0.0010621119290590286, - "learning_rate": 1.2755910988112639e-05, - "loss": 0.0001950544072315097, - "step": 4310 - }, - { - "epoch": 2.9433833560709415, - "grad_norm": 0.0004629544273484498, - "learning_rate": 1.2678973693028735e-05, - "loss": 0.0002407266292721033, - "step": 4315 - }, - { - "epoch": 2.946793997271487, - "grad_norm": 0.00353289395570755, - "learning_rate": 1.2602221891137021e-05, - "loss": 0.006271860748529434, - "step": 4320 - }, - { - "epoch": 2.950204638472033, - "grad_norm": 0.021108930930495262, - "learning_rate": 1.2525656156021227e-05, - "loss": 0.007909800857305527, - "step": 4325 - }, - { - "epoch": 2.9536152796725785, - "grad_norm": 0.007604979444295168, - "learning_rate": 1.2449277059874547e-05, - "loss": 0.00022456045262515546, - "step": 4330 - }, - { - "epoch": 2.957025920873124, - "grad_norm": 0.0007357494323514402, - "learning_rate": 1.2373085173495411e-05, - "loss": 0.0010289529338479042, - "step": 4335 - }, - { - "epoch": 2.96043656207367, - "grad_norm": 0.0004920829669572413, - "learning_rate": 1.229708106628316e-05, - "loss": 8.303072536364198e-05, - "step": 4340 - }, - { - "epoch": 2.9638472032742156, - "grad_norm": 0.0014207189669832587, - "learning_rate": 1.2221265306233824e-05, - "loss": 0.001923336647450924, - "step": 4345 - }, - { - "epoch": 2.9672578444747613, - "grad_norm": 0.00898696668446064, - "learning_rate": 1.2145638459935863e-05, - "loss": 0.01918397843837738, - "step": 4350 - }, - { - "epoch": 2.970668485675307, - "grad_norm": 0.08236313611268997, - "learning_rate": 1.2070201092565988e-05, - "loss": 0.0005465132184326648, - "step": 4355 - }, - { - "epoch": 2.9740791268758526, - "grad_norm": 0.00796231534332037, - "learning_rate": 1.199495376788481e-05, - "loss": 4.8452542978338896e-05, - "step": 4360 - }, - { - "epoch": 2.9774897680763983, - "grad_norm": 0.026452092453837395, - "learning_rate": 1.1919897048232791e-05, - "loss": 0.0006576513405889273, - "step": 4365 - }, - { - "epoch": 2.980900409276944, - "grad_norm": 0.08616425842046738, - "learning_rate": 1.1845031494525901e-05, - "loss": 0.0013777482323348521, - "step": 4370 - }, - { - "epoch": 2.9843110504774897, - "grad_norm": 0.00048112327931448817, - "learning_rate": 1.1770357666251509e-05, - "loss": 0.0018716825172305108, - "step": 4375 - }, - { - "epoch": 2.9877216916780354, - "grad_norm": 0.0013632692862302065, - "learning_rate": 1.1695876121464154e-05, - "loss": 4.9980584299191834e-05, - "step": 4380 - }, - { - "epoch": 2.991132332878581, - "grad_norm": 1.318718671798706, - "learning_rate": 1.1621587416781445e-05, - "loss": 0.013328136503696441, - "step": 4385 - }, - { - "epoch": 2.9945429740791267, - "grad_norm": 0.00219643022865057, - "learning_rate": 1.1547492107379782e-05, - "loss": 9.902374586090446e-05, - "step": 4390 - }, - { - "epoch": 2.9959072305593453, - "eval_loss": 0.07537060230970383, - "eval_runtime": 0.95, - "eval_samples_per_second": 78.945, - "eval_steps_per_second": 2.105, - "step": 4392 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4392 - }, - { - "epoch": 2.9979536152796724, - "grad_norm": 0.003875225316733122, - "learning_rate": 1.1473590746990342e-05, - "loss": 0.0025784535333514213, - "step": 4395 - }, - { - "epoch": 3.001364256480218, - "grad_norm": 0.011882650665938854, - "learning_rate": 1.1399883887894846e-05, - "loss": 6.913430406711996e-05, - "step": 4400 - }, - { - "epoch": 3.004774897680764, - "grad_norm": 0.002775507280603051, - "learning_rate": 1.1326372080921464e-05, - "loss": 0.00019087132532149553, - "step": 4405 - }, - { - "epoch": 3.00818553888131, - "grad_norm": 0.06267738342285156, - "learning_rate": 1.125305587544069e-05, - "loss": 0.0007432831451296806, - "step": 4410 - }, - { - "epoch": 3.0115961800818556, - "grad_norm": 0.026564784348011017, - "learning_rate": 1.1179935819361272e-05, - "loss": 0.00015866300091147423, - "step": 4415 - }, - { - "epoch": 3.0150068212824013, - "grad_norm": 0.0011134434025734663, - "learning_rate": 1.1107012459126064e-05, - "loss": 0.0005193403456360101, - "step": 4420 - }, - { - "epoch": 3.018417462482947, - "grad_norm": 0.0006882239249534905, - "learning_rate": 1.1034286339707975e-05, - "loss": 0.0002253461629152298, - "step": 4425 - }, - { - "epoch": 3.0218281036834926, - "grad_norm": 0.008092721924185753, - "learning_rate": 1.0961758004605873e-05, - "loss": 0.0003596893046051264, - "step": 4430 - }, - { - "epoch": 3.0252387448840383, - "grad_norm": 0.01083564292639494, - "learning_rate": 1.0889427995840585e-05, - "loss": 0.010776457190513612, - "step": 4435 - }, - { - "epoch": 3.028649386084584, - "grad_norm": 0.040309611707925797, - "learning_rate": 1.0817296853950724e-05, - "loss": 0.0002762762364000082, - "step": 4440 - }, - { - "epoch": 3.0320600272851297, - "grad_norm": 0.0026077954098582268, - "learning_rate": 1.0745365117988804e-05, - "loss": 0.00011963967699557543, - "step": 4445 - }, - { - "epoch": 3.0354706684856754, - "grad_norm": 0.0013187731383368373, - "learning_rate": 1.0673633325517088e-05, - "loss": 0.0022230114787817, - "step": 4450 - }, - { - "epoch": 3.038881309686221, - "grad_norm": 0.08760128915309906, - "learning_rate": 1.060210201260362e-05, - "loss": 0.00029163951985538005, - "step": 4455 - }, - { - "epoch": 3.0422919508867667, - "grad_norm": 0.2557981610298157, - "learning_rate": 1.0530771713818229e-05, - "loss": 0.0014749299734830856, - "step": 4460 - }, - { - "epoch": 3.0457025920873124, - "grad_norm": 0.009532714262604713, - "learning_rate": 1.0459642962228502e-05, - "loss": 0.00023887362331151963, - "step": 4465 - }, - { - "epoch": 3.049113233287858, - "grad_norm": 0.002977812895551324, - "learning_rate": 1.0388716289395833e-05, - "loss": 3.836472751572728e-05, - "step": 4470 - }, - { - "epoch": 3.052523874488404, - "grad_norm": 0.0007074066670611501, - "learning_rate": 1.0317992225371411e-05, - "loss": 7.527543348260224e-05, - "step": 4475 - }, - { - "epoch": 3.0559345156889495, - "grad_norm": 0.020919082686305046, - "learning_rate": 1.0247471298692336e-05, - "loss": 0.00021068421192467214, - "step": 4480 - }, - { - "epoch": 3.059345156889495, - "grad_norm": 0.0012850193306803703, - "learning_rate": 1.0177154036377557e-05, - "loss": 0.00027780483942478894, - "step": 4485 - }, - { - "epoch": 3.062755798090041, - "grad_norm": 0.030514074489474297, - "learning_rate": 1.0107040963924027e-05, - "loss": 0.00020711682736873627, - "step": 4490 - }, - { - "epoch": 3.0661664392905865, - "grad_norm": 0.001317308866418898, - "learning_rate": 1.0037132605302716e-05, - "loss": 0.00024969261139631274, - "step": 4495 - }, - { - "epoch": 3.069577080491132, - "grad_norm": 0.0012978437589481473, - "learning_rate": 9.967429482954768e-06, - "loss": 0.0001504249172285199, - "step": 4500 - }, - { - "epoch": 3.072987721691678, - "grad_norm": 0.0003166501992382109, - "learning_rate": 9.897932117787476e-06, - "loss": 0.0002773872809484601, - "step": 4505 - }, - { - "epoch": 3.0763983628922236, - "grad_norm": 0.0029924868140369654, - "learning_rate": 9.828641029170544e-06, - "loss": 0.0005509680602699518, - "step": 4510 - }, - { - "epoch": 3.0798090040927693, - "grad_norm": 0.003611995605751872, - "learning_rate": 9.759556734932064e-06, - "loss": 0.0004832141101360321, - "step": 4515 - }, - { - "epoch": 3.083219645293315, - "grad_norm": 0.003923687152564526, - "learning_rate": 9.690679751354736e-06, - "loss": 0.00014967764727771282, - "step": 4520 - }, - { - "epoch": 3.086630286493861, - "grad_norm": 0.012491169385612011, - "learning_rate": 9.62201059317195e-06, - "loss": 8.488112362101675e-05, - "step": 4525 - }, - { - "epoch": 3.0900409276944067, - "grad_norm": 0.00011767258547479287, - "learning_rate": 9.553549773564035e-06, - "loss": 5.874955677427351e-05, - "step": 4530 - }, - { - "epoch": 3.0934515688949524, - "grad_norm": 0.021170401945710182, - "learning_rate": 9.48529780415427e-06, - "loss": 0.00013614417985081674, - "step": 4535 - }, - { - "epoch": 3.096862210095498, - "grad_norm": 0.005378399509936571, - "learning_rate": 9.417255195005218e-06, - "loss": 0.00010139571968466043, - "step": 4540 - }, - { - "epoch": 3.100272851296044, - "grad_norm": 0.0013061281060799956, - "learning_rate": 9.349422454614815e-06, - "loss": 0.0004999907687306404, - "step": 4545 - }, - { - "epoch": 3.1036834924965895, - "grad_norm": 0.000693993701133877, - "learning_rate": 9.281800089912605e-06, - "loss": 0.0001406701048836112, - "step": 4550 - }, - { - "epoch": 3.107094133697135, - "grad_norm": 0.003168008290231228, - "learning_rate": 9.214388606255934e-06, - "loss": 0.0002458775183185935, - "step": 4555 - }, - { - "epoch": 3.110504774897681, - "grad_norm": 0.001224424922838807, - "learning_rate": 9.147188507426224e-06, - "loss": 5.902486154809594e-05, - "step": 4560 - }, - { - "epoch": 3.1139154160982265, - "grad_norm": 0.0006458029965870082, - "learning_rate": 9.080200295625125e-06, - "loss": 6.971908733248711e-05, - "step": 4565 - }, - { - "epoch": 3.117326057298772, - "grad_norm": 0.0004500233626458794, - "learning_rate": 9.013424471470832e-06, - "loss": 4.827580996789038e-05, - "step": 4570 - }, - { - "epoch": 3.120736698499318, - "grad_norm": 0.008253362029790878, - "learning_rate": 8.946861533994316e-06, - "loss": 0.00010066803079098464, - "step": 4575 - }, - { - "epoch": 3.1241473396998636, - "grad_norm": 0.0027607178781181574, - "learning_rate": 8.88051198063559e-06, - "loss": 0.00010761913144961, - "step": 4580 - }, - { - "epoch": 3.1275579809004093, - "grad_norm": 0.0012132832780480385, - "learning_rate": 8.81437630723999e-06, - "loss": 0.00010583751136437059, - "step": 4585 - }, - { - "epoch": 3.130968622100955, - "grad_norm": 0.013205752708017826, - "learning_rate": 8.748455008054519e-06, - "loss": 7.872265996411443e-05, - "step": 4590 - }, - { - "epoch": 3.1343792633015006, - "grad_norm": 0.010380366817116737, - "learning_rate": 8.682748575724071e-06, - "loss": 0.00027635702863335607, - "step": 4595 - }, - { - "epoch": 3.1377899045020463, - "grad_norm": 0.012770955450832844, - "learning_rate": 8.617257501287805e-06, - "loss": 0.00028360043652355673, - "step": 4600 - }, - { - "epoch": 3.141200545702592, - "grad_norm": 0.012632913887500763, - "learning_rate": 8.551982274175449e-06, - "loss": 4.925676621496678e-05, - "step": 4605 - }, - { - "epoch": 3.1446111869031377, - "grad_norm": 0.0028189525473862886, - "learning_rate": 8.486923382203703e-06, - "loss": 0.0039628144353628155, - "step": 4610 - }, - { - "epoch": 3.1480218281036834, - "grad_norm": 0.11367341130971909, - "learning_rate": 8.422081311572464e-06, - "loss": 0.000568081671372056, - "step": 4615 - }, - { - "epoch": 3.151432469304229, - "grad_norm": 0.00095061567844823, - "learning_rate": 8.35745654686135e-06, - "loss": 0.00036408030427992344, - "step": 4620 - }, - { - "epoch": 3.1548431105047747, - "grad_norm": 0.05772553011775017, - "learning_rate": 8.29304957102596e-06, - "loss": 0.0006427288055419922, - "step": 4625 - }, - { - "epoch": 3.1582537517053204, - "grad_norm": 0.017082368955016136, - "learning_rate": 8.22886086539432e-06, - "loss": 0.00015330149326473475, - "step": 4630 - }, - { - "epoch": 3.161664392905866, - "grad_norm": 0.0033851212356239557, - "learning_rate": 8.164890909663256e-06, - "loss": 0.00012121353065595031, - "step": 4635 - }, - { - "epoch": 3.1650750341064118, - "grad_norm": 0.000577523373067379, - "learning_rate": 8.101140181894868e-06, - "loss": 7.793278782628477e-05, - "step": 4640 - }, - { - "epoch": 3.168485675306958, - "grad_norm": 0.00039884017314761877, - "learning_rate": 8.037609158512875e-06, - "loss": 0.0014324543066322804, - "step": 4645 - }, - { - "epoch": 3.1718963165075036, - "grad_norm": 0.004100058693438768, - "learning_rate": 7.97429831429911e-06, - "loss": 0.00015990985557436943, - "step": 4650 - }, - { - "epoch": 3.1753069577080493, - "grad_norm": 0.005648438818752766, - "learning_rate": 7.911208122389956e-06, - "loss": 8.149745990522206e-05, - "step": 4655 - }, - { - "epoch": 3.178717598908595, - "grad_norm": 0.005042952951043844, - "learning_rate": 7.848339054272808e-06, - "loss": 9.191314456984401e-05, - "step": 4660 - }, - { - "epoch": 3.1821282401091406, - "grad_norm": 0.0007950706058181822, - "learning_rate": 7.785691579782546e-06, - "loss": 7.44381221011281e-05, - "step": 4665 - }, - { - "epoch": 3.1855388813096863, - "grad_norm": 0.0717335194349289, - "learning_rate": 7.723266167098058e-06, - "loss": 0.00012347951997071505, - "step": 4670 - }, - { - "epoch": 3.188949522510232, - "grad_norm": 0.011817894876003265, - "learning_rate": 7.661063282738685e-06, - "loss": 0.000311569613404572, - "step": 4675 - }, - { - "epoch": 3.1923601637107777, - "grad_norm": 0.011100267991423607, - "learning_rate": 7.599083391560774e-06, - "loss": 0.00023061195388436319, - "step": 4680 - }, - { - "epoch": 3.1957708049113234, - "grad_norm": 0.025753796100616455, - "learning_rate": 7.5373269567541776e-06, - "loss": 0.00013706330209970475, - "step": 4685 - }, - { - "epoch": 3.199181446111869, - "grad_norm": 0.0529993437230587, - "learning_rate": 7.47579443983886e-06, - "loss": 0.00025801956653594973, - "step": 4690 - }, - { - "epoch": 3.2025920873124147, - "grad_norm": 0.01164156198501587, - "learning_rate": 7.41448630066132e-06, - "loss": 0.0001246333820745349, - "step": 4695 - }, - { - "epoch": 3.2060027285129604, - "grad_norm": 0.0022943434305489063, - "learning_rate": 7.353402997391271e-06, - "loss": 4.788096994161606e-05, - "step": 4700 - }, - { - "epoch": 3.209413369713506, - "grad_norm": 0.0016377613646909595, - "learning_rate": 7.292544986518198e-06, - "loss": 0.0014273281209170817, - "step": 4705 - }, - { - "epoch": 3.212824010914052, - "grad_norm": 0.046027738600969315, - "learning_rate": 7.231912722847881e-06, - "loss": 0.00015070366207510234, - "step": 4710 - }, - { - "epoch": 3.2162346521145975, - "grad_norm": 0.12626095116138458, - "learning_rate": 7.171506659499067e-06, - "loss": 0.0002223264891654253, - "step": 4715 - }, - { - "epoch": 3.219645293315143, - "grad_norm": 0.004387991968542337, - "learning_rate": 7.1113272479000465e-06, - "loss": 5.1431613974273206e-05, - "step": 4720 - }, - { - "epoch": 3.223055934515689, - "grad_norm": 0.0005260159377939999, - "learning_rate": 7.051374937785289e-06, - "loss": 9.967307560145854e-05, - "step": 4725 - }, - { - "epoch": 3.2264665757162345, - "grad_norm": 0.001557494280859828, - "learning_rate": 6.9916501771920795e-06, - "loss": 3.6639469908550384e-05, - "step": 4730 - }, - { - "epoch": 3.22987721691678, - "grad_norm": 0.0013014579890295863, - "learning_rate": 6.932153412457195e-06, - "loss": 0.00015290889423340558, - "step": 4735 - }, - { - "epoch": 3.233287858117326, - "grad_norm": 0.0005693956045433879, - "learning_rate": 6.872885088213522e-06, - "loss": 9.23092185985297e-05, - "step": 4740 - }, - { - "epoch": 3.2366984993178716, - "grad_norm": 0.0008315684972330928, - "learning_rate": 6.813845647386771e-06, - "loss": 0.00010763210011646151, - "step": 4745 - }, - { - "epoch": 3.2401091405184177, - "grad_norm": 0.0021727036219090223, - "learning_rate": 6.755035531192148e-06, - "loss": 3.423129383008927e-05, - "step": 4750 - }, - { - "epoch": 3.2435197817189634, - "grad_norm": 0.0001480428036302328, - "learning_rate": 6.696455179131084e-06, - "loss": 0.023981352150440217, - "step": 4755 - }, - { - "epoch": 3.2455661664392905, - "eval_loss": 0.074391670525074, - "eval_runtime": 0.9318, - "eval_samples_per_second": 80.487, - "eval_steps_per_second": 2.146, - "step": 4758 - }, - { - "eval_cer_subset": 0.015260934732252351, - "eval_cer_subset_edit_distance": 112, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4758 - }, - { - "epoch": 3.246930422919509, - "grad_norm": 0.007659688591957092, - "learning_rate": 6.638105028987886e-06, - "loss": 0.0004011324606835842, - "step": 4760 - }, - { - "epoch": 3.2503410641200547, - "grad_norm": 0.033579885959625244, - "learning_rate": 6.579985516826564e-06, - "loss": 0.000267721782438457, - "step": 4765 - }, - { - "epoch": 3.2537517053206004, - "grad_norm": 0.01196813303977251, - "learning_rate": 6.52209707698748e-06, - "loss": 0.00014587611658498645, - "step": 4770 - }, - { - "epoch": 3.257162346521146, - "grad_norm": 0.022396638989448547, - "learning_rate": 6.464440142084156e-06, - "loss": 0.00038080187514424326, - "step": 4775 - }, - { - "epoch": 3.260572987721692, - "grad_norm": 0.002048628870397806, - "learning_rate": 6.407015143000002e-06, - "loss": 0.007085768878459931, - "step": 4780 - }, - { - "epoch": 3.2639836289222375, - "grad_norm": 0.0019598486833274364, - "learning_rate": 6.3498225088851686e-06, - "loss": 0.0010610194876790047, - "step": 4785 - }, - { - "epoch": 3.267394270122783, - "grad_norm": 0.02006545290350914, - "learning_rate": 6.29286266715324e-06, - "loss": 0.00014606654876843096, - "step": 4790 - }, - { - "epoch": 3.270804911323329, - "grad_norm": 0.0005910536856390536, - "learning_rate": 6.236136043478106e-06, - "loss": 6.873804377391935e-05, - "step": 4795 - }, - { - "epoch": 3.2742155525238745, - "grad_norm": 0.021028559654951096, - "learning_rate": 6.179643061790775e-06, - "loss": 0.0013180674985051155, - "step": 4800 - }, - { - "epoch": 3.27762619372442, - "grad_norm": 0.0004542934475466609, - "learning_rate": 6.123384144276183e-06, - "loss": 4.965414409525692e-05, - "step": 4805 - }, - { - "epoch": 3.281036834924966, - "grad_norm": 0.041615571826696396, - "learning_rate": 6.067359711370047e-06, - "loss": 0.019744729995727538, - "step": 4810 - }, - { - "epoch": 3.2844474761255116, - "grad_norm": 0.002755318768322468, - "learning_rate": 6.011570181755754e-06, - "loss": 0.00010759549913927913, - "step": 4815 - }, - { - "epoch": 3.2878581173260573, - "grad_norm": 0.0032264923211187124, - "learning_rate": 5.956015972361171e-06, - "loss": 0.0007094295229762793, - "step": 4820 - }, - { - "epoch": 3.291268758526603, - "grad_norm": 1.4725555181503296, - "learning_rate": 5.900697498355589e-06, - "loss": 0.010635539144277572, - "step": 4825 - }, - { - "epoch": 3.2946793997271486, - "grad_norm": 0.017544033005833626, - "learning_rate": 5.84561517314657e-06, - "loss": 0.0003255015704780817, - "step": 4830 - }, - { - "epoch": 3.2980900409276943, - "grad_norm": 0.0035306529607623816, - "learning_rate": 5.790769408376922e-06, - "loss": 0.00013384120538830757, - "step": 4835 - }, - { - "epoch": 3.30150068212824, - "grad_norm": 0.0018269309075549245, - "learning_rate": 5.736160613921528e-06, - "loss": 0.0007960126735270023, - "step": 4840 - }, - { - "epoch": 3.3049113233287857, - "grad_norm": 0.010024248622357845, - "learning_rate": 5.6817891978843855e-06, - "loss": 0.0003233390394598246, - "step": 4845 - }, - { - "epoch": 3.3083219645293314, - "grad_norm": 0.004048422910273075, - "learning_rate": 5.627655566595489e-06, - "loss": 0.00011264056665822863, - "step": 4850 - }, - { - "epoch": 3.311732605729877, - "grad_norm": 0.0034091162960976362, - "learning_rate": 5.573760124607812e-06, - "loss": 0.00048314151354134083, - "step": 4855 - }, - { - "epoch": 3.3151432469304227, - "grad_norm": 0.009309383109211922, - "learning_rate": 5.5201032746942796e-06, - "loss": 0.0002101475838571787, - "step": 4860 - }, - { - "epoch": 3.3185538881309684, - "grad_norm": 0.06538962572813034, - "learning_rate": 5.466685417844797e-06, - "loss": 0.0003499136073514819, - "step": 4865 - }, - { - "epoch": 3.321964529331514, - "grad_norm": 0.002885893452912569, - "learning_rate": 5.413506953263162e-06, - "loss": 0.00048564458265900614, - "step": 4870 - }, - { - "epoch": 3.32537517053206, - "grad_norm": 0.004836421925574541, - "learning_rate": 5.3605682783642e-06, - "loss": 6.691411836072803e-05, - "step": 4875 - }, - { - "epoch": 3.328785811732606, - "grad_norm": 0.00475132092833519, - "learning_rate": 5.307869788770694e-06, - "loss": 0.0006194526329636573, - "step": 4880 - }, - { - "epoch": 3.3321964529331516, - "grad_norm": 0.005688230507075787, - "learning_rate": 5.255411878310482e-06, - "loss": 9.07582463696599e-05, - "step": 4885 - }, - { - "epoch": 3.3356070941336973, - "grad_norm": 0.0037306994199752808, - "learning_rate": 5.2031949390134856e-06, - "loss": 0.00012413164367899298, - "step": 4890 - }, - { - "epoch": 3.339017735334243, - "grad_norm": 0.011166035197675228, - "learning_rate": 5.15121936110882e-06, - "loss": 7.776234415359795e-05, - "step": 4895 - }, - { - "epoch": 3.3424283765347886, - "grad_norm": 0.0012123408960178494, - "learning_rate": 5.099485533021836e-06, - "loss": 2.784754615277052e-05, - "step": 4900 - }, - { - "epoch": 3.3458390177353343, - "grad_norm": 0.002357951132580638, - "learning_rate": 5.047993841371223e-06, - "loss": 0.00029555323999375105, - "step": 4905 - }, - { - "epoch": 3.34924965893588, - "grad_norm": 0.00030636831070296466, - "learning_rate": 4.996744670966138e-06, - "loss": 0.00010590272722765803, - "step": 4910 - }, - { - "epoch": 3.3526603001364257, - "grad_norm": 0.0058077736757695675, - "learning_rate": 4.945738404803348e-06, - "loss": 0.0001937644206918776, - "step": 4915 - }, - { - "epoch": 3.3560709413369714, - "grad_norm": 0.0007610859465785325, - "learning_rate": 4.8949754240642775e-06, - "loss": 0.00011190775549039245, - "step": 4920 - }, - { - "epoch": 3.359481582537517, - "grad_norm": 0.6333717107772827, - "learning_rate": 4.844456108112297e-06, - "loss": 0.0008945153094828129, - "step": 4925 - }, - { - "epoch": 3.3628922237380627, - "grad_norm": 0.006114002782851458, - "learning_rate": 4.794180834489772e-06, - "loss": 9.972437983378768e-05, - "step": 4930 - }, - { - "epoch": 3.3663028649386084, - "grad_norm": 0.0017828121781349182, - "learning_rate": 4.744149978915274e-06, - "loss": 7.763381581753493e-05, - "step": 4935 - }, - { - "epoch": 3.369713506139154, - "grad_norm": 0.0009099289891310036, - "learning_rate": 4.694363915280814e-06, - "loss": 0.05654715895652771, - "step": 4940 - }, - { - "epoch": 3.3731241473397, - "grad_norm": 0.013658248819410801, - "learning_rate": 4.644823015649009e-06, - "loss": 0.0013173201121389865, - "step": 4945 - }, - { - "epoch": 3.3765347885402455, - "grad_norm": 0.6665285229682922, - "learning_rate": 4.5955276502502944e-06, - "loss": 0.0018493477255105972, - "step": 4950 - }, - { - "epoch": 3.379945429740791, - "grad_norm": 0.001095029292628169, - "learning_rate": 4.546478187480176e-06, - "loss": 0.0005424355156719684, - "step": 4955 - }, - { - "epoch": 3.383356070941337, - "grad_norm": 0.015392723493278027, - "learning_rate": 4.497674993896503e-06, - "loss": 9.325146675109863e-05, - "step": 4960 - }, - { - "epoch": 3.3867667121418825, - "grad_norm": 0.08219064027070999, - "learning_rate": 4.449118434216653e-06, - "loss": 0.0004451565444469452, - "step": 4965 - }, - { - "epoch": 3.390177353342428, - "grad_norm": 0.003267089370638132, - "learning_rate": 4.4008088713148845e-06, - "loss": 0.00016891954001039267, - "step": 4970 - }, - { - "epoch": 3.3935879945429743, - "grad_norm": 0.008226803503930569, - "learning_rate": 4.35274666621957e-06, - "loss": 0.0001009777537547052, - "step": 4975 - }, - { - "epoch": 3.39699863574352, - "grad_norm": 0.01762073114514351, - "learning_rate": 4.304932178110558e-06, - "loss": 0.006487253308296204, - "step": 4980 - }, - { - "epoch": 3.4004092769440657, - "grad_norm": 0.0021151783876121044, - "learning_rate": 4.257365764316395e-06, - "loss": 0.00011428899597376585, - "step": 4985 - }, - { - "epoch": 3.4038199181446114, - "grad_norm": 0.0008960114791989326, - "learning_rate": 4.210047780311768e-06, - "loss": 3.825195599347353e-05, - "step": 4990 - }, - { - "epoch": 3.407230559345157, - "grad_norm": 0.058672014623880386, - "learning_rate": 4.162978579714753e-06, - "loss": 0.0005217622965574265, - "step": 4995 - }, - { - "epoch": 3.4106412005457027, - "grad_norm": 6.555901927640662e-05, - "learning_rate": 4.11615851428423e-06, - "loss": 0.00011362402001395822, - "step": 5000 - }, - { - "epoch": 3.4140518417462484, - "grad_norm": 0.21014879643917084, - "learning_rate": 4.069587933917221e-06, - "loss": 0.00179185438901186, - "step": 5005 - }, - { - "epoch": 3.417462482946794, - "grad_norm": 0.1315806359052658, - "learning_rate": 4.023267186646317e-06, - "loss": 0.0014887897297739983, - "step": 5010 - }, - { - "epoch": 3.42087312414734, - "grad_norm": 0.018509764224290848, - "learning_rate": 3.977196618637e-06, - "loss": 0.00016269356710836292, - "step": 5015 - }, - { - "epoch": 3.4242837653478855, - "grad_norm": 0.0007501108921132982, - "learning_rate": 3.931376574185166e-06, - "loss": 3.563327190931886e-05, - "step": 5020 - }, - { - "epoch": 3.427694406548431, - "grad_norm": 0.0036405418068170547, - "learning_rate": 3.885807395714441e-06, - "loss": 0.0009133132174611091, - "step": 5025 - }, - { - "epoch": 3.431105047748977, - "grad_norm": 0.01853407360613346, - "learning_rate": 3.840489423773698e-06, - "loss": 0.00011221827007830143, - "step": 5030 - }, - { - "epoch": 3.4345156889495225, - "grad_norm": 0.004317726474255323, - "learning_rate": 3.7954229970344725e-06, - "loss": 0.00033687916584312916, - "step": 5035 - }, - { - "epoch": 3.437926330150068, - "grad_norm": 0.003909669350832701, - "learning_rate": 3.7506084522884684e-06, - "loss": 7.524496177211404e-05, - "step": 5040 - }, - { - "epoch": 3.441336971350614, - "grad_norm": 0.016738831996917725, - "learning_rate": 3.7060461244449945e-06, - "loss": 9.533832781016827e-05, - "step": 5045 - }, - { - "epoch": 3.4447476125511596, - "grad_norm": 0.0012303644325584173, - "learning_rate": 3.6617363465284875e-06, - "loss": 0.00011376941110938788, - "step": 5050 - }, - { - "epoch": 3.4481582537517053, - "grad_norm": 0.005091819446533918, - "learning_rate": 3.617679449676028e-06, - "loss": 0.000578709552064538, - "step": 5055 - }, - { - "epoch": 3.451568894952251, - "grad_norm": 0.0007091189618222415, - "learning_rate": 3.5738757631348744e-06, - "loss": 7.042675861157477e-05, - "step": 5060 - }, - { - "epoch": 3.4549795361527966, - "grad_norm": 0.004033643286675215, - "learning_rate": 3.5303256142599407e-06, - "loss": 5.9417390730232e-05, - "step": 5065 - }, - { - "epoch": 3.4583901773533423, - "grad_norm": 0.018419573083519936, - "learning_rate": 3.487029328511444e-06, - "loss": 0.0001460162689909339, - "step": 5070 - }, - { - "epoch": 3.461800818553888, - "grad_norm": 0.0035185501910746098, - "learning_rate": 3.4439872294524025e-06, - "loss": 0.0001254791859537363, - "step": 5075 - }, - { - "epoch": 3.4652114597544337, - "grad_norm": 0.0007496779435314238, - "learning_rate": 3.401199638746241e-06, - "loss": 4.7221675049513576e-05, - "step": 5080 - }, - { - "epoch": 3.4686221009549794, - "grad_norm": 2.67924165725708, - "learning_rate": 3.3586668761543813e-06, - "loss": 0.0016780177131295205, - "step": 5085 - }, - { - "epoch": 3.472032742155525, - "grad_norm": 0.011002305895090103, - "learning_rate": 3.316389259533876e-06, - "loss": 9.696125634945929e-05, - "step": 5090 - }, - { - "epoch": 3.4754433833560707, - "grad_norm": 0.0005470504984259605, - "learning_rate": 3.2743671048349755e-06, - "loss": 3.457541752140969e-05, - "step": 5095 - }, - { - "epoch": 3.4788540245566164, - "grad_norm": 0.014639006927609444, - "learning_rate": 3.232600726098851e-06, - "loss": 0.005385900661349296, - "step": 5100 - }, - { - "epoch": 3.4822646657571625, - "grad_norm": 0.02914433367550373, - "learning_rate": 3.191090435455171e-06, - "loss": 0.00018561827018857003, - "step": 5105 - }, - { - "epoch": 3.485675306957708, - "grad_norm": 0.0009303450351580977, - "learning_rate": 3.1498365431198048e-06, - "loss": 8.976480457931758e-05, - "step": 5110 - }, - { - "epoch": 3.489085948158254, - "grad_norm": 0.001520369085483253, - "learning_rate": 3.1088393573924966e-06, - "loss": 0.00012106491485610604, - "step": 5115 - }, - { - "epoch": 3.4924965893587996, - "grad_norm": 0.0004795770801138133, - "learning_rate": 3.0680991846545836e-06, - "loss": 5.365515244193375e-05, - "step": 5120 - }, - { - "epoch": 3.495225102319236, - "eval_loss": 0.07684502005577087, - "eval_runtime": 0.9207, - "eval_samples_per_second": 81.46, - "eval_steps_per_second": 2.172, - "step": 5124 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5124 - }, - { - "epoch": 3.4959072305593453, - "grad_norm": 0.0028329631313681602, - "learning_rate": 3.027616329366664e-06, - "loss": 0.00016044501680880784, - "step": 5125 - }, - { - "epoch": 3.499317871759891, - "grad_norm": 0.00026357462047599256, - "learning_rate": 2.987391094066345e-06, - "loss": 0.00023950175382196902, - "step": 5130 - }, - { - "epoch": 3.5027285129604366, - "grad_norm": 0.05449860543012619, - "learning_rate": 2.9474237793659956e-06, - "loss": 0.0002613885561004281, - "step": 5135 - }, - { - "epoch": 3.5061391541609823, - "grad_norm": 0.0011948413448408246, - "learning_rate": 2.907714683950471e-06, - "loss": 5.1718176109716296e-05, - "step": 5140 - }, - { - "epoch": 3.509549795361528, - "grad_norm": 0.005827807355672121, - "learning_rate": 2.8682641045748964e-06, - "loss": 0.002432660385966301, - "step": 5145 - }, - { - "epoch": 3.5129604365620737, - "grad_norm": 0.0015064133331179619, - "learning_rate": 2.829072336062463e-06, - "loss": 3.671176382340491e-05, - "step": 5150 - }, - { - "epoch": 3.5163710777626194, - "grad_norm": 0.0009614004520699382, - "learning_rate": 2.790139671302191e-06, - "loss": 8.640417945571244e-05, - "step": 5155 - }, - { - "epoch": 3.519781718963165, - "grad_norm": 0.0023597145918756723, - "learning_rate": 2.7514664012467727e-06, - "loss": 7.012838032096624e-05, - "step": 5160 - }, - { - "epoch": 3.5231923601637107, - "grad_norm": 0.0007319003925658762, - "learning_rate": 2.7130528149103657e-06, - "loss": 2.3347024398390204e-05, - "step": 5165 - }, - { - "epoch": 3.5266030013642564, - "grad_norm": 0.006225161254405975, - "learning_rate": 2.6748991993664774e-06, - "loss": 0.00018728851573541762, - "step": 5170 - }, - { - "epoch": 3.530013642564802, - "grad_norm": 0.5371012687683105, - "learning_rate": 2.637005839745772e-06, - "loss": 0.004397168383002281, - "step": 5175 - }, - { - "epoch": 3.533424283765348, - "grad_norm": 0.0009830278577283025, - "learning_rate": 2.5993730192339663e-06, - "loss": 5.1807129057124254e-05, - "step": 5180 - }, - { - "epoch": 3.5368349249658935, - "grad_norm": 0.002657173899933696, - "learning_rate": 2.562001019069726e-06, - "loss": 0.00013663843274116516, - "step": 5185 - }, - { - "epoch": 3.540245566166439, - "grad_norm": 0.005280365701764822, - "learning_rate": 2.5248901185425345e-06, - "loss": 0.00618811659514904, - "step": 5190 - }, - { - "epoch": 3.543656207366985, - "grad_norm": 0.5160993933677673, - "learning_rate": 2.488040594990606e-06, - "loss": 0.0008973299525678157, - "step": 5195 - }, - { - "epoch": 3.547066848567531, - "grad_norm": 0.00302395923063159, - "learning_rate": 2.451452723798844e-06, - "loss": 0.00022636731155216693, - "step": 5200 - }, - { - "epoch": 3.5504774897680766, - "grad_norm": 0.010127821005880833, - "learning_rate": 2.415126778396764e-06, - "loss": 0.0002638277132064104, - "step": 5205 - }, - { - "epoch": 3.5538881309686223, - "grad_norm": 0.0024330182932317257, - "learning_rate": 2.379063030256424e-06, - "loss": 0.0007223737891763448, - "step": 5210 - }, - { - "epoch": 3.557298772169168, - "grad_norm": 0.006238727364689112, - "learning_rate": 2.343261748890457e-06, - "loss": 0.0010717549361288548, - "step": 5215 - }, - { - "epoch": 3.5607094133697137, - "grad_norm": 0.0005047949962317944, - "learning_rate": 2.3077232018500024e-06, - "loss": 7.232563220895826e-05, - "step": 5220 - }, - { - "epoch": 3.5641200545702594, - "grad_norm": 0.0017868287395685911, - "learning_rate": 2.2724476547227325e-06, - "loss": 5.702247726731002e-05, - "step": 5225 - }, - { - "epoch": 3.567530695770805, - "grad_norm": 0.0005412849714048207, - "learning_rate": 2.2374353711308467e-06, - "loss": 4.539011861197651e-05, - "step": 5230 - }, - { - "epoch": 3.5709413369713507, - "grad_norm": 0.0008777446346357465, - "learning_rate": 2.2026866127291516e-06, - "loss": 0.0010734600946307182, - "step": 5235 - }, - { - "epoch": 3.5743519781718964, - "grad_norm": 0.020784150809049606, - "learning_rate": 2.1682016392030198e-06, - "loss": 0.0001352211693301797, - "step": 5240 - }, - { - "epoch": 3.577762619372442, - "grad_norm": 0.0032426826655864716, - "learning_rate": 2.133980708266539e-06, - "loss": 0.0001363346236757934, - "step": 5245 - }, - { - "epoch": 3.581173260572988, - "grad_norm": 0.00047889159759506583, - "learning_rate": 2.100024075660538e-06, - "loss": 0.00017863117391243578, - "step": 5250 - }, - { - "epoch": 3.5845839017735335, - "grad_norm": 0.00031558674527332187, - "learning_rate": 2.066331995150666e-06, - "loss": 0.00021245412062853575, - "step": 5255 - }, - { - "epoch": 3.587994542974079, - "grad_norm": 0.0009245559340342879, - "learning_rate": 2.032904718525531e-06, - "loss": 0.0003619278548285365, - "step": 5260 - }, - { - "epoch": 3.591405184174625, - "grad_norm": 0.0013650038745254278, - "learning_rate": 1.9997424955948054e-06, - "loss": 3.983181086368859e-05, - "step": 5265 - }, - { - "epoch": 3.5948158253751705, - "grad_norm": 0.004131193272769451, - "learning_rate": 1.9668455741873424e-06, - "loss": 5.411377060227096e-05, - "step": 5270 - }, - { - "epoch": 3.598226466575716, - "grad_norm": 0.0061623696237802505, - "learning_rate": 1.9342142001493394e-06, - "loss": 5.686166696250439e-05, - "step": 5275 - }, - { - "epoch": 3.601637107776262, - "grad_norm": 0.0031696371734142303, - "learning_rate": 1.9018486173424982e-06, - "loss": 0.00025364819448441266, - "step": 5280 - }, - { - "epoch": 3.6050477489768076, - "grad_norm": 0.0009586649248376489, - "learning_rate": 1.8697490676422052e-06, - "loss": 0.0001193733187392354, - "step": 5285 - }, - { - "epoch": 3.6084583901773533, - "grad_norm": 0.04152340814471245, - "learning_rate": 1.837915790935708e-06, - "loss": 0.00011144874151796103, - "step": 5290 - }, - { - "epoch": 3.611869031377899, - "grad_norm": 0.0054172491654753685, - "learning_rate": 1.8063490251203628e-06, - "loss": 0.0011931947432458402, - "step": 5295 - }, - { - "epoch": 3.6152796725784446, - "grad_norm": 0.009695466607809067, - "learning_rate": 1.7750490061018058e-06, - "loss": 0.00018604533979669214, - "step": 5300 - }, - { - "epoch": 3.6186903137789903, - "grad_norm": 0.021507805213332176, - "learning_rate": 1.7440159677922186e-06, - "loss": 0.00013907469110563397, - "step": 5305 - }, - { - "epoch": 3.622100954979536, - "grad_norm": 0.006461102515459061, - "learning_rate": 1.7132501421085659e-06, - "loss": 0.0003493543481454253, - "step": 5310 - }, - { - "epoch": 3.6255115961800817, - "grad_norm": 0.0105512710288167, - "learning_rate": 1.6827517589709057e-06, - "loss": 7.155602797865867e-05, - "step": 5315 - }, - { - "epoch": 3.6289222373806274, - "grad_norm": 0.0008970944327302277, - "learning_rate": 1.6525210463005868e-06, - "loss": 0.00011632050154730678, - "step": 5320 - }, - { - "epoch": 3.632332878581173, - "grad_norm": 0.00177982565946877, - "learning_rate": 1.6225582300186405e-06, - "loss": 0.002909584902226925, - "step": 5325 - }, - { - "epoch": 3.6357435197817187, - "grad_norm": 0.001438588253222406, - "learning_rate": 1.5928635340440255e-06, - "loss": 0.00019459464820101858, - "step": 5330 - }, - { - "epoch": 3.6391541609822644, - "grad_norm": 0.00501625519245863, - "learning_rate": 1.5634371802919696e-06, - "loss": 6.730342865921557e-05, - "step": 5335 - }, - { - "epoch": 3.64256480218281, - "grad_norm": 0.003331825602799654, - "learning_rate": 1.534279388672334e-06, - "loss": 4.0749041363596913e-05, - "step": 5340 - }, - { - "epoch": 3.645975443383356, - "grad_norm": 0.000979799311608076, - "learning_rate": 1.5053903770879518e-06, - "loss": 3.709651937242597e-05, - "step": 5345 - }, - { - "epoch": 3.649386084583902, - "grad_norm": 0.000521843961905688, - "learning_rate": 1.4767703614329884e-06, - "loss": 8.522009593434632e-05, - "step": 5350 - }, - { - "epoch": 3.6527967257844476, - "grad_norm": 0.0004526655247900635, - "learning_rate": 1.4484195555913582e-06, - "loss": 2.5001828907988967e-05, - "step": 5355 - }, - { - "epoch": 3.6562073669849933, - "grad_norm": 0.0029042293317615986, - "learning_rate": 1.420338171435094e-06, - "loss": 0.00018259206553921102, - "step": 5360 - }, - { - "epoch": 3.659618008185539, - "grad_norm": 0.003727864008396864, - "learning_rate": 1.3925264188227925e-06, - "loss": 0.00011327432002872229, - "step": 5365 - }, - { - "epoch": 3.6630286493860846, - "grad_norm": 0.0013012837152928114, - "learning_rate": 1.3649845055980172e-06, - "loss": 3.7070104735903445e-05, - "step": 5370 - }, - { - "epoch": 3.6664392905866303, - "grad_norm": 0.013001679442822933, - "learning_rate": 1.3377126375877817e-06, - "loss": 5.2633637096732855e-05, - "step": 5375 - }, - { - "epoch": 3.669849931787176, - "grad_norm": 0.0007804427877999842, - "learning_rate": 1.3107110186009643e-06, - "loss": 3.320692339912057e-05, - "step": 5380 - }, - { - "epoch": 3.6732605729877217, - "grad_norm": 0.002156102331355214, - "learning_rate": 1.2839798504268333e-06, - "loss": 0.0010122624225914478, - "step": 5385 - }, - { - "epoch": 3.6766712141882674, - "grad_norm": 0.014592370949685574, - "learning_rate": 1.2575193328334995e-06, - "loss": 8.401789236813783e-05, - "step": 5390 - }, - { - "epoch": 3.680081855388813, - "grad_norm": 0.0031752304639667273, - "learning_rate": 1.2313296635664576e-06, - "loss": 3.530957619659603e-05, - "step": 5395 - }, - { - "epoch": 3.6834924965893587, - "grad_norm": 0.0010194798232987523, - "learning_rate": 1.205411038347072e-06, - "loss": 3.8149964530020954e-05, - "step": 5400 - }, - { - "epoch": 3.6869031377899044, - "grad_norm": 0.015784459188580513, - "learning_rate": 1.1797636508711433e-06, - "loss": 0.0003640792798250914, - "step": 5405 - }, - { - "epoch": 3.69031377899045, - "grad_norm": 0.044826168566942215, - "learning_rate": 1.1543876928074485e-06, - "loss": 9.083467884920537e-05, - "step": 5410 - }, - { - "epoch": 3.693724420190996, - "grad_norm": 0.00281776231713593, - "learning_rate": 1.1292833537963162e-06, - "loss": 7.455614977516233e-05, - "step": 5415 - }, - { - "epoch": 3.6971350613915415, - "grad_norm": 0.0028589575085788965, - "learning_rate": 1.1044508214481981e-06, - "loss": 0.00014598952839151025, - "step": 5420 - }, - { - "epoch": 3.700545702592087, - "grad_norm": 0.22950734198093414, - "learning_rate": 1.0798902813422674e-06, - "loss": 0.0015467993915081024, - "step": 5425 - }, - { - "epoch": 3.7039563437926333, - "grad_norm": 0.00023460436204914004, - "learning_rate": 1.055601917025052e-06, - "loss": 9.705557604320348e-05, - "step": 5430 - }, - { - "epoch": 3.707366984993179, - "grad_norm": 0.001205096603371203, - "learning_rate": 1.0315859100090278e-06, - "loss": 0.00016396433347836136, - "step": 5435 - }, - { - "epoch": 3.7107776261937246, - "grad_norm": 0.001592331798747182, - "learning_rate": 1.0078424397713072e-06, - "loss": 5.8903940953314304e-05, - "step": 5440 - }, - { - "epoch": 3.7141882673942703, - "grad_norm": 0.006218360736966133, - "learning_rate": 9.843716837522524e-07, - "loss": 0.00036285766400396825, - "step": 5445 - }, - { - "epoch": 3.717598908594816, - "grad_norm": 0.0005638732109218836, - "learning_rate": 9.611738173541856e-07, - "loss": 9.058183059096336e-05, - "step": 5450 - }, - { - "epoch": 3.7210095497953617, - "grad_norm": 0.0001975312188733369, - "learning_rate": 9.382490139400386e-07, - "loss": 0.0006882708985358477, - "step": 5455 - }, - { - "epoch": 3.7244201909959074, - "grad_norm": 0.0014521559933200479, - "learning_rate": 9.155974448321182e-07, - "loss": 4.348080838099122e-05, - "step": 5460 - }, - { - "epoch": 3.727830832196453, - "grad_norm": 0.0007730235811322927, - "learning_rate": 8.932192793107515e-07, - "loss": 6.59460376482457e-05, - "step": 5465 - }, - { - "epoch": 3.7312414733969987, - "grad_norm": 0.0017008045688271523, - "learning_rate": 8.711146846130834e-07, - "loss": 4.6517132432200015e-05, - "step": 5470 - }, - { - "epoch": 3.7346521145975444, - "grad_norm": 0.000825837894808501, - "learning_rate": 8.492838259317902e-07, - "loss": 5.0807202933356165e-05, - "step": 5475 - }, - { - "epoch": 3.73806275579809, - "grad_norm": 0.0004800660244654864, - "learning_rate": 8.277268664138553e-07, - "loss": 7.82750197686255e-05, - "step": 5480 - }, - { - "epoch": 3.741473396998636, - "grad_norm": 0.024822546169161797, - "learning_rate": 8.06443967159362e-07, - "loss": 6.785190780647098e-05, - "step": 5485 - }, - { - "epoch": 3.7448840381991815, - "grad_norm": 0.0010685365414246917, - "learning_rate": 7.854352872202735e-07, - "loss": 6.689840811304748e-05, - "step": 5490 - }, - { - "epoch": 3.7448840381991815, - "eval_loss": 0.07814140617847443, - "eval_runtime": 0.8865, - "eval_samples_per_second": 84.605, - "eval_steps_per_second": 2.256, - "step": 5490 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5490 - }, - { - "epoch": 3.748294679399727, - "grad_norm": 0.00021385519357863814, - "learning_rate": 7.64700983599234e-07, - "loss": 2.2849228116683662e-05, - "step": 5495 - }, - { - "epoch": 3.751705320600273, - "grad_norm": 0.0010771763045340776, - "learning_rate": 7.442412112484406e-07, - "loss": 0.0004126048181205988, - "step": 5500 - }, - { - "epoch": 3.7551159618008185, - "grad_norm": 0.0018533640541136265, - "learning_rate": 7.240561230684311e-07, - "loss": 5.3862365894019605e-05, - "step": 5505 - }, - { - "epoch": 3.758526603001364, - "grad_norm": 0.005947180092334747, - "learning_rate": 7.041458699069774e-07, - "loss": 0.00014215235132724046, - "step": 5510 - }, - { - "epoch": 3.76193724420191, - "grad_norm": 0.0012127163354307413, - "learning_rate": 6.845106005579401e-07, - "loss": 8.649011724628508e-05, - "step": 5515 - }, - { - "epoch": 3.7653478854024556, - "grad_norm": 0.008430424146354198, - "learning_rate": 6.651504617601736e-07, - "loss": 4.855484585277736e-05, - "step": 5520 - }, - { - "epoch": 3.7687585266030013, - "grad_norm": 0.0005919627728872001, - "learning_rate": 6.460655981964061e-07, - "loss": 9.893624810501933e-05, - "step": 5525 - }, - { - "epoch": 3.772169167803547, - "grad_norm": 0.0023226868361234665, - "learning_rate": 6.272561524921782e-07, - "loss": 0.00013384660705924035, - "step": 5530 - }, - { - "epoch": 3.7755798090040926, - "grad_norm": 0.0009323696722276509, - "learning_rate": 6.087222652147644e-07, - "loss": 0.000312839774414897, - "step": 5535 - }, - { - "epoch": 3.7789904502046383, - "grad_norm": 0.0005047317827120423, - "learning_rate": 5.904640748721448e-07, - "loss": 7.338747382164001e-05, - "step": 5540 - }, - { - "epoch": 3.782401091405184, - "grad_norm": 0.0005660484894178808, - "learning_rate": 5.724817179119268e-07, - "loss": 6.945489440113306e-05, - "step": 5545 - }, - { - "epoch": 3.7858117326057297, - "grad_norm": 0.1210128664970398, - "learning_rate": 5.54775328720379e-07, - "loss": 0.0005033219233155251, - "step": 5550 - }, - { - "epoch": 3.7892223738062754, - "grad_norm": 0.0032783239148557186, - "learning_rate": 5.373450396213785e-07, - "loss": 7.25714024156332e-05, - "step": 5555 - }, - { - "epoch": 3.792633015006821, - "grad_norm": 0.022924935445189476, - "learning_rate": 5.201909808754567e-07, - "loss": 0.0003666009521111846, - "step": 5560 - }, - { - "epoch": 3.7960436562073667, - "grad_norm": 0.0006960026803426445, - "learning_rate": 5.033132806788048e-07, - "loss": 2.8438412118703128e-05, - "step": 5565 - }, - { - "epoch": 3.799454297407913, - "grad_norm": 0.0023685309570282698, - "learning_rate": 4.867120651623327e-07, - "loss": 4.687009786721319e-05, - "step": 5570 - }, - { - "epoch": 3.8028649386084585, - "grad_norm": 0.000736989954020828, - "learning_rate": 4.703874583906989e-07, - "loss": 0.00036185523495078085, - "step": 5575 - }, - { - "epoch": 3.806275579809004, - "grad_norm": 0.002065070439130068, - "learning_rate": 4.5433958236142795e-07, - "loss": 4.816077707801014e-05, - "step": 5580 - }, - { - "epoch": 3.80968622100955, - "grad_norm": 0.006012174300849438, - "learning_rate": 4.3856855700395294e-07, - "loss": 0.003127726539969444, - "step": 5585 - }, - { - "epoch": 3.8130968622100956, - "grad_norm": 0.0058719078078866005, - "learning_rate": 4.230745001787453e-07, - "loss": 0.00023026710841804742, - "step": 5590 - }, - { - "epoch": 3.8165075034106413, - "grad_norm": 0.001011750428006053, - "learning_rate": 4.078575276764237e-07, - "loss": 4.3114786967635156e-05, - "step": 5595 - }, - { - "epoch": 3.819918144611187, - "grad_norm": 0.0018015744863077998, - "learning_rate": 3.929177532168967e-07, - "loss": 6.310571334324777e-05, - "step": 5600 - }, - { - "epoch": 3.8233287858117326, - "grad_norm": 0.005773304495960474, - "learning_rate": 3.7825528844850903e-07, - "loss": 0.008211637288331986, - "step": 5605 - }, - { - "epoch": 3.8267394270122783, - "grad_norm": 0.005971482954919338, - "learning_rate": 3.6387024294720483e-07, - "loss": 0.00014537961687892674, - "step": 5610 - }, - { - "epoch": 3.830150068212824, - "grad_norm": 0.0021432344801723957, - "learning_rate": 3.497627242157075e-07, - "loss": 0.0003147674491629004, - "step": 5615 - }, - { - "epoch": 3.8335607094133697, - "grad_norm": 0.01809293031692505, - "learning_rate": 3.359328376827286e-07, - "loss": 0.00021316034253686665, - "step": 5620 - }, - { - "epoch": 3.8369713506139154, - "grad_norm": 0.001072075916454196, - "learning_rate": 3.223806867021561e-07, - "loss": 0.0004111470188945532, - "step": 5625 - }, - { - "epoch": 3.840381991814461, - "grad_norm": 0.001951684826053679, - "learning_rate": 3.0910637255231325e-07, - "loss": 0.00011240362655371428, - "step": 5630 - }, - { - "epoch": 3.8437926330150067, - "grad_norm": 0.004423170816153288, - "learning_rate": 2.9610999443517996e-07, - "loss": 0.00011202857131138444, - "step": 5635 - }, - { - "epoch": 3.8472032742155524, - "grad_norm": 0.00013841276813764125, - "learning_rate": 2.8339164947564775e-07, - "loss": 3.643531817942858e-05, - "step": 5640 - }, - { - "epoch": 3.850613915416098, - "grad_norm": 0.00026944882120005786, - "learning_rate": 2.709514327208118e-07, - "loss": 6.067348294891417e-05, - "step": 5645 - }, - { - "epoch": 3.854024556616644, - "grad_norm": 0.000910903443582356, - "learning_rate": 2.587894371392549e-07, - "loss": 0.004593908041715622, - "step": 5650 - }, - { - "epoch": 3.85743519781719, - "grad_norm": 0.0011465694988146424, - "learning_rate": 2.4690575362034393e-07, - "loss": 6.263987743295729e-05, - "step": 5655 - }, - { - "epoch": 3.8608458390177356, - "grad_norm": 0.0008314873557537794, - "learning_rate": 2.3530047097354704e-07, - "loss": 0.00017264452762901782, - "step": 5660 - }, - { - "epoch": 3.8642564802182813, - "grad_norm": 0.0015781678957864642, - "learning_rate": 2.2397367592780074e-07, - "loss": 8.745520026423037e-05, - "step": 5665 - }, - { - "epoch": 3.867667121418827, - "grad_norm": 0.002430699532851577, - "learning_rate": 2.1292545313082721e-07, - "loss": 0.00019512665458023548, - "step": 5670 - }, - { - "epoch": 3.8710777626193726, - "grad_norm": 0.000959254743065685, - "learning_rate": 2.021558851485097e-07, - "loss": 7.31457956135273e-05, - "step": 5675 - }, - { - "epoch": 3.8744884038199183, - "grad_norm": 0.0041350554674863815, - "learning_rate": 1.9166505246428903e-07, - "loss": 5.959914997220039e-05, - "step": 5680 - }, - { - "epoch": 3.877899045020464, - "grad_norm": 0.0020819318015128374, - "learning_rate": 1.814530334785555e-07, - "loss": 9.297068463638425e-05, - "step": 5685 - }, - { - "epoch": 3.8813096862210097, - "grad_norm": 0.019831830635666847, - "learning_rate": 1.7151990450804953e-07, - "loss": 6.57864729873836e-05, - "step": 5690 - }, - { - "epoch": 3.8847203274215554, - "grad_norm": 0.006569270975887775, - "learning_rate": 1.6186573978531626e-07, - "loss": 0.0001178255770355463, - "step": 5695 - }, - { - "epoch": 3.888130968622101, - "grad_norm": 0.028108395636081696, - "learning_rate": 1.524906114581309e-07, - "loss": 0.00034154695458710194, - "step": 5700 - }, - { - "epoch": 3.8915416098226467, - "grad_norm": 0.016931869089603424, - "learning_rate": 1.4339458958896182e-07, - "loss": 0.00014965070877224207, - "step": 5705 - }, - { - "epoch": 3.8949522510231924, - "grad_norm": 0.004743209574371576, - "learning_rate": 1.3457774215445415e-07, - "loss": 9.899177821353078e-05, - "step": 5710 - }, - { - "epoch": 3.898362892223738, - "grad_norm": 0.005506719462573528, - "learning_rate": 1.2604013504493028e-07, - "loss": 8.313862490467727e-05, - "step": 5715 - }, - { - "epoch": 3.901773533424284, - "grad_norm": 0.031609803438186646, - "learning_rate": 1.1778183206386516e-07, - "loss": 0.0001896424451842904, - "step": 5720 - }, - { - "epoch": 3.9051841746248295, - "grad_norm": 0.004502533935010433, - "learning_rate": 1.0980289492744931e-07, - "loss": 0.00013039627810940146, - "step": 5725 - }, - { - "epoch": 3.908594815825375, - "grad_norm": 0.004705691710114479, - "learning_rate": 1.0210338326409329e-07, - "loss": 0.00019792017992585896, - "step": 5730 - }, - { - "epoch": 3.912005457025921, - "grad_norm": 0.0037855699192732573, - "learning_rate": 9.468335461400717e-08, - "loss": 0.0002551279263570905, - "step": 5735 - }, - { - "epoch": 3.9154160982264665, - "grad_norm": 0.10445121675729752, - "learning_rate": 8.754286442875929e-08, - "loss": 0.0006038970313966274, - "step": 5740 - }, - { - "epoch": 3.918826739427012, - "grad_norm": 0.001668531564064324, - "learning_rate": 8.068196607085992e-08, - "loss": 0.0005313832312822342, - "step": 5745 - }, - { - "epoch": 3.922237380627558, - "grad_norm": 0.00047295662807300687, - "learning_rate": 7.410071081336982e-08, - "loss": 9.868838824331761e-05, - "step": 5750 - }, - { - "epoch": 3.9256480218281036, - "grad_norm": 0.0025327601470053196, - "learning_rate": 6.779914783950902e-08, - "loss": 0.000324072758667171, - "step": 5755 - }, - { - "epoch": 3.9290586630286493, - "grad_norm": 0.01139845047146082, - "learning_rate": 6.177732424229448e-08, - "loss": 0.00017668742220848798, - "step": 5760 - }, - { - "epoch": 3.932469304229195, - "grad_norm": 0.04569331184029579, - "learning_rate": 5.6035285024186305e-08, - "loss": 0.0721853256225586, - "step": 5765 - }, - { - "epoch": 3.9358799454297406, - "grad_norm": 0.004628063179552555, - "learning_rate": 5.057307309675879e-08, - "loss": 4.649970214813948e-05, - "step": 5770 - }, - { - "epoch": 3.9392905866302863, - "grad_norm": 0.009363708086311817, - "learning_rate": 4.539072928036735e-08, - "loss": 0.001338627841323614, - "step": 5775 - }, - { - "epoch": 3.942701227830832, - "grad_norm": 0.0014635550323873758, - "learning_rate": 4.0488292303844647e-08, - "loss": 0.0005166372284293175, - "step": 5780 - }, - { - "epoch": 3.9461118690313777, - "grad_norm": 0.004816197324544191, - "learning_rate": 3.586579880422574e-08, - "loss": 0.00010666162706911564, - "step": 5785 - }, - { - "epoch": 3.9495225102319234, - "grad_norm": 0.0007145903073251247, - "learning_rate": 3.1523283326452544e-08, - "loss": 8.264535572379827e-05, - "step": 5790 - }, - { - "epoch": 3.952933151432469, - "grad_norm": 0.005964918527752161, - "learning_rate": 2.7460778323144805e-08, - "loss": 8.588915225118398e-05, - "step": 5795 - }, - { - "epoch": 3.956343792633015, - "grad_norm": 0.0041807787492871284, - "learning_rate": 2.3678314154317034e-08, - "loss": 0.00015257378108799458, - "step": 5800 - }, - { - "epoch": 3.959754433833561, - "grad_norm": 0.001413834630511701, - "learning_rate": 2.017591908719529e-08, - "loss": 0.00011678774608299136, - "step": 5805 - }, - { - "epoch": 3.9631650750341065, - "grad_norm": 0.003702461253851652, - "learning_rate": 1.6953619295971555e-08, - "loss": 0.00012639843625947834, - "step": 5810 - }, - { - "epoch": 3.966575716234652, - "grad_norm": 0.0013520204229280353, - "learning_rate": 1.4011438861633029e-08, - "loss": 0.00011726657394319772, - "step": 5815 - }, - { - "epoch": 3.969986357435198, - "grad_norm": 0.08151526749134064, - "learning_rate": 1.1349399771762302e-08, - "loss": 0.00021847528405487536, - "step": 5820 - }, - { - "epoch": 3.9733969986357436, - "grad_norm": 0.00278874090872705, - "learning_rate": 8.967521920383303e-09, - "loss": 4.5762333320453764e-05, - "step": 5825 - }, - { - "epoch": 3.9768076398362893, - "grad_norm": 0.0007764157489873469, - "learning_rate": 6.865823107811419e-09, - "loss": 0.004313124716281891, - "step": 5830 - }, - { - "epoch": 3.980218281036835, - "grad_norm": 0.024059277027845383, - "learning_rate": 5.0443190405285995e-09, - "loss": 0.00012323500122874976, - "step": 5835 - }, - { - "epoch": 3.9836289222373806, - "grad_norm": 0.04550166428089142, - "learning_rate": 3.5030233310417987e-09, - "loss": 0.00018079780275002122, - "step": 5840 - }, - { - "epoch": 3.9870395634379263, - "grad_norm": 0.0010700125712901354, - "learning_rate": 2.241947497808039e-09, - "loss": 0.0003132110228762031, - "step": 5845 - }, - { - "epoch": 3.990450204638472, - "grad_norm": 0.0018866128521040082, - "learning_rate": 1.2611009651386505e-09, - "loss": 9.605747763998806e-05, - "step": 5850 - }, - { - "epoch": 3.9938608458390177, - "grad_norm": 0.023178091272711754, - "learning_rate": 5.60491063120172e-10, - "loss": 0.00015867352485656738, - "step": 5855 - }, - { - "epoch": 3.9945429740791267, - "eval_loss": 0.07876446098089218, - "eval_runtime": 0.9242, - "eval_samples_per_second": 81.155, - "eval_steps_per_second": 2.164, - "step": 5856 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5856 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 6.108465892840243e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5856/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/adapter_model.safetensors deleted file mode 100644 index 5ee07d33e23bd1bf5b50a7f47a1a9e25d7b24abc..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:63ed200008632a72ecf5f1c49f3162c8077f10e113e1cfe5e2c5172e9d8e49c1 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/optimizer.pt deleted file mode 100644 index 02b67fdfc942e53b8c2eb4c0dcdc1b0973c649a8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21c5c2f3a786b5428eac85789f33e6b387c99842b1885678e132267001552d6b -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/rng_state.pth deleted file mode 100644 index 4669d52b4df192bc91f2676391b1233756a07f32..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a2df993afae23610a148cbb90d0d2fcfd8b5e755873f80c30cdbe027d950c3c -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/scheduler.pt deleted file mode 100644 index 4e41377a2792bdbcfbf68cc9ee6edbb78ba5bbbe..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d506f17dab2155fb813d265a9159693ebe483f329035970ccda3f9613a1b7ae0 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/trainer_state.json deleted file mode 100644 index 0d025b0846b3fe9869d1b28961b37b3b6fed8e60..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/trainer_state.json +++ /dev/null @@ -1,8494 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 4.0, - "eval_steps": 366, - "global_step": 5864, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - }, - { - "epoch": 0.5013642564802183, - "grad_norm": 0.49610504508018494, - "learning_rate": 7.28932057538939e-05, - "loss": 0.06440846920013428, - "step": 735 - }, - { - "epoch": 0.504774897680764, - "grad_norm": 0.5659550428390503, - "learning_rate": 7.285919624125732e-05, - "loss": 0.08426347374916077, - "step": 740 - }, - { - "epoch": 0.5081855388813097, - "grad_norm": 0.04723689705133438, - "learning_rate": 7.282492248128047e-05, - "loss": 0.07788341641426086, - "step": 745 - }, - { - "epoch": 0.5115961800818554, - "grad_norm": 0.720941960811615, - "learning_rate": 7.2790384730099e-05, - "loss": 0.03100808262825012, - "step": 750 - }, - { - "epoch": 0.515006821282401, - "grad_norm": 0.652988851070404, - "learning_rate": 7.275558324582138e-05, - "loss": 0.03954651951789856, - "step": 755 - }, - { - "epoch": 0.5184174624829468, - "grad_norm": 1.2214330434799194, - "learning_rate": 7.272051828852705e-05, - "loss": 0.019992084801197053, - "step": 760 - }, - { - "epoch": 0.5218281036834925, - "grad_norm": 1.292953372001648, - "learning_rate": 7.268519012026443e-05, - "loss": 0.07394988536834717, - "step": 765 - }, - { - "epoch": 0.5252387448840382, - "grad_norm": 1.1823278665542603, - "learning_rate": 7.264959900504901e-05, - "loss": 0.037449967861175534, - "step": 770 - }, - { - "epoch": 0.5286493860845839, - "grad_norm": 1.1314970254898071, - "learning_rate": 7.261374520886128e-05, - "loss": 0.04381995797157288, - "step": 775 - }, - { - "epoch": 0.5320600272851296, - "grad_norm": 0.02543286792933941, - "learning_rate": 7.257762899964486e-05, - "loss": 0.07130052447319031, - "step": 780 - }, - { - "epoch": 0.5354706684856753, - "grad_norm": 0.37440457940101624, - "learning_rate": 7.25412506473044e-05, - "loss": 0.050841158628463744, - "step": 785 - }, - { - "epoch": 0.538881309686221, - "grad_norm": 0.2532084882259369, - "learning_rate": 7.250461042370365e-05, - "loss": 0.03486245274543762, - "step": 790 - }, - { - "epoch": 0.5422919508867667, - "grad_norm": 1.0150209665298462, - "learning_rate": 7.246770860266333e-05, - "loss": 0.050749993324279784, - "step": 795 - }, - { - "epoch": 0.5457025920873124, - "grad_norm": 1.108716607093811, - "learning_rate": 7.24305454599592e-05, - "loss": 0.03166365325450897, - "step": 800 - }, - { - "epoch": 0.5491132332878581, - "grad_norm": 0.16657976806163788, - "learning_rate": 7.239312127331989e-05, - "loss": 0.05016656517982483, - "step": 805 - }, - { - "epoch": 0.5525238744884038, - "grad_norm": 0.005627671722322702, - "learning_rate": 7.235543632242488e-05, - "loss": 0.021701858937740327, - "step": 810 - }, - { - "epoch": 0.5559345156889495, - "grad_norm": 1.8796381950378418, - "learning_rate": 7.231749088890241e-05, - "loss": 0.061094462871551514, - "step": 815 - }, - { - "epoch": 0.5593451568894953, - "grad_norm": 0.020010950043797493, - "learning_rate": 7.227928525632737e-05, - "loss": 0.0238655224442482, - "step": 820 - }, - { - "epoch": 0.562755798090041, - "grad_norm": 1.9777793884277344, - "learning_rate": 7.224081971021914e-05, - "loss": 0.041665592789649965, - "step": 825 - }, - { - "epoch": 0.5661664392905866, - "grad_norm": 0.06644955277442932, - "learning_rate": 7.220209453803954e-05, - "loss": 0.016651667654514313, - "step": 830 - }, - { - "epoch": 0.5695770804911323, - "grad_norm": 0.8203716278076172, - "learning_rate": 7.216311002919064e-05, - "loss": 0.03519523441791535, - "step": 835 - }, - { - "epoch": 0.572987721691678, - "grad_norm": 1.957996129989624, - "learning_rate": 7.212386647501254e-05, - "loss": 0.03704521656036377, - "step": 840 - }, - { - "epoch": 0.5763983628922238, - "grad_norm": 0.2386065572500229, - "learning_rate": 7.208436416878125e-05, - "loss": 0.02845575213432312, - "step": 845 - }, - { - "epoch": 0.5798090040927695, - "grad_norm": 0.9101418256759644, - "learning_rate": 7.204460340570658e-05, - "loss": 0.01103741154074669, - "step": 850 - }, - { - "epoch": 0.5832196452933152, - "grad_norm": 0.22701004147529602, - "learning_rate": 7.200458448292972e-05, - "loss": 0.06184377670288086, - "step": 855 - }, - { - "epoch": 0.5866302864938608, - "grad_norm": 2.3091611862182617, - "learning_rate": 7.196430769952126e-05, - "loss": 0.0492431253194809, - "step": 860 - }, - { - "epoch": 0.5900409276944065, - "grad_norm": 0.6630973815917969, - "learning_rate": 7.192377335647876e-05, - "loss": 0.027876955270767213, - "step": 865 - }, - { - "epoch": 0.5934515688949522, - "grad_norm": 1.7400578260421753, - "learning_rate": 7.188298175672464e-05, - "loss": 0.023742210865020753, - "step": 870 - }, - { - "epoch": 0.596862210095498, - "grad_norm": 0.7121092081069946, - "learning_rate": 7.184193320510379e-05, - "loss": 0.02125793993473053, - "step": 875 - }, - { - "epoch": 0.6002728512960437, - "grad_norm": 0.429611474275589, - "learning_rate": 7.180062800838143e-05, - "loss": 0.06682519316673279, - "step": 880 - }, - { - "epoch": 0.6036834924965894, - "grad_norm": 0.018405891954898834, - "learning_rate": 7.17590664752407e-05, - "loss": 0.022519922256469725, - "step": 885 - }, - { - "epoch": 0.607094133697135, - "grad_norm": 0.9797202348709106, - "learning_rate": 7.171724891628046e-05, - "loss": 0.10513803958892823, - "step": 890 - }, - { - "epoch": 0.6105047748976807, - "grad_norm": 0.11931606382131577, - "learning_rate": 7.167517564401282e-05, - "loss": 0.055521953105926516, - "step": 895 - }, - { - "epoch": 0.6139154160982264, - "grad_norm": 0.04398813843727112, - "learning_rate": 7.163284697286097e-05, - "loss": 0.018342888355255126, - "step": 900 - }, - { - "epoch": 0.6173260572987722, - "grad_norm": 0.11505168676376343, - "learning_rate": 7.15902632191567e-05, - "loss": 0.026946401596069335, - "step": 905 - }, - { - "epoch": 0.6207366984993179, - "grad_norm": 0.3042922019958496, - "learning_rate": 7.154742470113816e-05, - "loss": 0.02314314842224121, - "step": 910 - }, - { - "epoch": 0.6241473396998636, - "grad_norm": 0.09922346472740173, - "learning_rate": 7.150433173894733e-05, - "loss": 0.06378543972969056, - "step": 915 - }, - { - "epoch": 0.6275579809004093, - "grad_norm": 0.6513009667396545, - "learning_rate": 7.146098465462776e-05, - "loss": 0.036993378400802614, - "step": 920 - }, - { - "epoch": 0.630968622100955, - "grad_norm": 1.131602168083191, - "learning_rate": 7.14173837721221e-05, - "loss": 0.09491010308265686, - "step": 925 - }, - { - "epoch": 0.6343792633015006, - "grad_norm": 0.1672857254743576, - "learning_rate": 7.137352941726969e-05, - "loss": 0.03719751834869385, - "step": 930 - }, - { - "epoch": 0.6377899045020464, - "grad_norm": 0.7450887560844421, - "learning_rate": 7.132942191780414e-05, - "loss": 0.028598809242248537, - "step": 935 - }, - { - "epoch": 0.6412005457025921, - "grad_norm": 2.3537657260894775, - "learning_rate": 7.128506160335084e-05, - "loss": 0.06678735613822936, - "step": 940 - }, - { - "epoch": 0.6446111869031378, - "grad_norm": 0.014428210444748402, - "learning_rate": 7.124044880542455e-05, - "loss": 0.018354634940624236, - "step": 945 - }, - { - "epoch": 0.6480218281036835, - "grad_norm": 2.9239041805267334, - "learning_rate": 7.119558385742688e-05, - "loss": 0.08242651224136352, - "step": 950 - }, - { - "epoch": 0.6514324693042292, - "grad_norm": 0.39032718539237976, - "learning_rate": 7.115046709464383e-05, - "loss": 0.023772728443145753, - "step": 955 - }, - { - "epoch": 0.654843110504775, - "grad_norm": 0.3000798523426056, - "learning_rate": 7.110509885424326e-05, - "loss": 0.03464276790618896, - "step": 960 - }, - { - "epoch": 0.6582537517053206, - "grad_norm": 0.3980049192905426, - "learning_rate": 7.105947947527238e-05, - "loss": 0.08540127277374268, - "step": 965 - }, - { - "epoch": 0.6616643929058663, - "grad_norm": 0.9492272734642029, - "learning_rate": 7.10136092986552e-05, - "loss": 0.02300785481929779, - "step": 970 - }, - { - "epoch": 0.665075034106412, - "grad_norm": 1.9585710763931274, - "learning_rate": 7.096748866719005e-05, - "loss": 0.034704044461250305, - "step": 975 - }, - { - "epoch": 0.6684856753069577, - "grad_norm": 1.142238974571228, - "learning_rate": 7.092111792554689e-05, - "loss": 0.01860647052526474, - "step": 980 - }, - { - "epoch": 0.6718963165075034, - "grad_norm": 0.8443534970283508, - "learning_rate": 7.087449742026488e-05, - "loss": 0.03302992284297943, - "step": 985 - }, - { - "epoch": 0.6753069577080492, - "grad_norm": 1.0402863025665283, - "learning_rate": 7.082762749974968e-05, - "loss": 0.03963000178337097, - "step": 990 - }, - { - "epoch": 0.6787175989085948, - "grad_norm": 0.11959892511367798, - "learning_rate": 7.078050851427089e-05, - "loss": 0.0187692254781723, - "step": 995 - }, - { - "epoch": 0.6821282401091405, - "grad_norm": 1.495011329650879, - "learning_rate": 7.073314081595945e-05, - "loss": 0.050608736276626584, - "step": 1000 - }, - { - "epoch": 0.6855388813096862, - "grad_norm": 0.2534504234790802, - "learning_rate": 7.068552475880499e-05, - "loss": 0.0076520174741745, - "step": 1005 - }, - { - "epoch": 0.6889495225102319, - "grad_norm": 0.17387191951274872, - "learning_rate": 7.063766069865314e-05, - "loss": 0.028283193707466125, - "step": 1010 - }, - { - "epoch": 0.6923601637107776, - "grad_norm": 0.07424458116292953, - "learning_rate": 7.058954899320297e-05, - "loss": 0.03078552782535553, - "step": 1015 - }, - { - "epoch": 0.6957708049113234, - "grad_norm": 0.5854848027229309, - "learning_rate": 7.05411900020042e-05, - "loss": 0.02033105492591858, - "step": 1020 - }, - { - "epoch": 0.699181446111869, - "grad_norm": 0.09108871966600418, - "learning_rate": 7.049258408645463e-05, - "loss": 0.0510578989982605, - "step": 1025 - }, - { - "epoch": 0.7025920873124147, - "grad_norm": 0.2851751148700714, - "learning_rate": 7.044373160979734e-05, - "loss": 0.10413439273834228, - "step": 1030 - }, - { - "epoch": 0.7060027285129604, - "grad_norm": 0.8032590746879578, - "learning_rate": 7.039463293711804e-05, - "loss": 0.05853385329246521, - "step": 1035 - }, - { - "epoch": 0.7094133697135061, - "grad_norm": 0.1301775723695755, - "learning_rate": 7.03452884353423e-05, - "loss": 0.051472657918930055, - "step": 1040 - }, - { - "epoch": 0.7128240109140518, - "grad_norm": 0.46156957745552063, - "learning_rate": 7.029569847323287e-05, - "loss": 0.034115567803382874, - "step": 1045 - }, - { - "epoch": 0.7162346521145976, - "grad_norm": 1.081560730934143, - "learning_rate": 7.02458634213868e-05, - "loss": 0.059652507305145264, - "step": 1050 - }, - { - "epoch": 0.7196452933151433, - "grad_norm": 0.721208930015564, - "learning_rate": 7.019578365223286e-05, - "loss": 0.061070340871810916, - "step": 1055 - }, - { - "epoch": 0.723055934515689, - "grad_norm": 0.5738947987556458, - "learning_rate": 7.014545954002855e-05, - "loss": 0.03556577265262604, - "step": 1060 - }, - { - "epoch": 0.7264665757162346, - "grad_norm": 0.15053460001945496, - "learning_rate": 7.009489146085744e-05, - "loss": 0.03284372091293335, - "step": 1065 - }, - { - "epoch": 0.7298772169167803, - "grad_norm": 0.4496553838253021, - "learning_rate": 7.004407979262635e-05, - "loss": 0.07945018410682678, - "step": 1070 - }, - { - "epoch": 0.7332878581173261, - "grad_norm": 1.1821213960647583, - "learning_rate": 6.999302491506245e-05, - "loss": 0.033741748332977294, - "step": 1075 - }, - { - "epoch": 0.7366984993178718, - "grad_norm": 0.2809429168701172, - "learning_rate": 6.994172720971047e-05, - "loss": 0.023005199432373048, - "step": 1080 - }, - { - "epoch": 0.7401091405184175, - "grad_norm": 0.14925819635391235, - "learning_rate": 6.989018705992991e-05, - "loss": 0.01791207939386368, - "step": 1085 - }, - { - "epoch": 0.7435197817189632, - "grad_norm": 1.1947131156921387, - "learning_rate": 6.983840485089203e-05, - "loss": 0.03395574688911438, - "step": 1090 - }, - { - "epoch": 0.7469304229195088, - "grad_norm": 1.336547613143921, - "learning_rate": 6.978638096957712e-05, - "loss": 0.02712726593017578, - "step": 1095 - }, - { - "epoch": 0.7489768076398363, - "eval_loss": 0.05635881423950195, - "eval_runtime": 0.8951, - "eval_samples_per_second": 83.789, - "eval_steps_per_second": 2.234, - "step": 1098 - }, - { - "eval_cer_subset": 0.023981468864967978, - "eval_cer_subset_edit_distance": 176, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1098 - }, - { - "epoch": 0.7503410641200545, - "grad_norm": 0.015995435416698456, - "learning_rate": 6.973411580477149e-05, - "loss": 0.018527305126190184, - "step": 1100 - }, - { - "epoch": 0.7537517053206003, - "grad_norm": 2.3465819358825684, - "learning_rate": 6.968160974706465e-05, - "loss": 0.03113352656364441, - "step": 1105 - }, - { - "epoch": 0.757162346521146, - "grad_norm": 8.048991203308105, - "learning_rate": 6.962886318884633e-05, - "loss": 0.01905607581138611, - "step": 1110 - }, - { - "epoch": 0.7605729877216917, - "grad_norm": 2.0705132484436035, - "learning_rate": 6.957587652430363e-05, - "loss": 0.08121066093444824, - "step": 1115 - }, - { - "epoch": 0.7639836289222374, - "grad_norm": 0.6205260157585144, - "learning_rate": 6.952265014941796e-05, - "loss": 0.030836066603660582, - "step": 1120 - }, - { - "epoch": 0.767394270122783, - "grad_norm": 0.02030811458826065, - "learning_rate": 6.946918446196215e-05, - "loss": 0.0715795874595642, - "step": 1125 - }, - { - "epoch": 0.7708049113233287, - "grad_norm": 0.20006906986236572, - "learning_rate": 6.94154798614975e-05, - "loss": 0.09267728328704834, - "step": 1130 - }, - { - "epoch": 0.7742155525238745, - "grad_norm": 1.3217955827713013, - "learning_rate": 6.936153674937074e-05, - "loss": 0.057087546586990355, - "step": 1135 - }, - { - "epoch": 0.7776261937244202, - "grad_norm": 0.97421795129776, - "learning_rate": 6.930735552871105e-05, - "loss": 0.02381356656551361, - "step": 1140 - }, - { - "epoch": 0.7810368349249659, - "grad_norm": 1.1994248628616333, - "learning_rate": 6.925293660442705e-05, - "loss": 0.03957775831222534, - "step": 1145 - }, - { - "epoch": 0.7844474761255116, - "grad_norm": 1.0654077529907227, - "learning_rate": 6.919828038320378e-05, - "loss": 0.04088171124458313, - "step": 1150 - }, - { - "epoch": 0.7878581173260573, - "grad_norm": 0.6241940855979919, - "learning_rate": 6.914338727349963e-05, - "loss": 0.0698228895664215, - "step": 1155 - }, - { - "epoch": 0.791268758526603, - "grad_norm": 1.4655344486236572, - "learning_rate": 6.908825768554337e-05, - "loss": 0.0430897206068039, - "step": 1160 - }, - { - "epoch": 0.7946793997271487, - "grad_norm": 0.3493589162826538, - "learning_rate": 6.903289203133096e-05, - "loss": 0.05850836634635925, - "step": 1165 - }, - { - "epoch": 0.7980900409276944, - "grad_norm": 1.1164323091506958, - "learning_rate": 6.897729072462257e-05, - "loss": 0.02702825963497162, - "step": 1170 - }, - { - "epoch": 0.8015006821282401, - "grad_norm": 0.056947700679302216, - "learning_rate": 6.892145418093947e-05, - "loss": 0.11043190956115723, - "step": 1175 - }, - { - "epoch": 0.8049113233287858, - "grad_norm": 1.2450393438339233, - "learning_rate": 6.886538281756085e-05, - "loss": 0.06005706787109375, - "step": 1180 - }, - { - "epoch": 0.8083219645293315, - "grad_norm": 0.10885969549417496, - "learning_rate": 6.880907705352083e-05, - "loss": 0.022018623352050782, - "step": 1185 - }, - { - "epoch": 0.8117326057298773, - "grad_norm": 0.17044247686862946, - "learning_rate": 6.875253730960522e-05, - "loss": 0.024727573990821837, - "step": 1190 - }, - { - "epoch": 0.815143246930423, - "grad_norm": 0.22665634751319885, - "learning_rate": 6.869576400834843e-05, - "loss": 0.014500510692596436, - "step": 1195 - }, - { - "epoch": 0.8185538881309686, - "grad_norm": 0.6808337569236755, - "learning_rate": 6.863875757403028e-05, - "loss": 0.040313297510147096, - "step": 1200 - }, - { - "epoch": 0.8219645293315143, - "grad_norm": 0.37714090943336487, - "learning_rate": 6.858151843267289e-05, - "loss": 0.02225676029920578, - "step": 1205 - }, - { - "epoch": 0.82537517053206, - "grad_norm": 0.28732752799987793, - "learning_rate": 6.852404701203738e-05, - "loss": 0.017132116854190825, - "step": 1210 - }, - { - "epoch": 0.8287858117326057, - "grad_norm": 0.011728805489838123, - "learning_rate": 6.846634374162082e-05, - "loss": 0.043106210231781, - "step": 1215 - }, - { - "epoch": 0.8321964529331515, - "grad_norm": 0.06264204531908035, - "learning_rate": 6.84084090526529e-05, - "loss": 0.09258266687393188, - "step": 1220 - }, - { - "epoch": 0.8356070941336972, - "grad_norm": 0.024779995903372765, - "learning_rate": 6.835024337809278e-05, - "loss": 0.08440889716148377, - "step": 1225 - }, - { - "epoch": 0.8390177353342428, - "grad_norm": 0.3760814964771271, - "learning_rate": 6.829184715262579e-05, - "loss": 0.046157938241958615, - "step": 1230 - }, - { - "epoch": 0.8424283765347885, - "grad_norm": 0.41763243079185486, - "learning_rate": 6.823322081266027e-05, - "loss": 0.007576120644807815, - "step": 1235 - }, - { - "epoch": 0.8458390177353342, - "grad_norm": 0.20451563596725464, - "learning_rate": 6.817436479632423e-05, - "loss": 0.00685272142291069, - "step": 1240 - }, - { - "epoch": 0.8492496589358799, - "grad_norm": 0.19664883613586426, - "learning_rate": 6.811527954346208e-05, - "loss": 0.029371869564056397, - "step": 1245 - }, - { - "epoch": 0.8526603001364257, - "grad_norm": 0.18445859849452972, - "learning_rate": 6.805596549563143e-05, - "loss": 0.013169947266578674, - "step": 1250 - }, - { - "epoch": 0.8560709413369714, - "grad_norm": 0.25306227803230286, - "learning_rate": 6.799642309609968e-05, - "loss": 0.04840644598007202, - "step": 1255 - }, - { - "epoch": 0.859481582537517, - "grad_norm": 0.013680736534297466, - "learning_rate": 6.793665278984076e-05, - "loss": 0.005744677782058716, - "step": 1260 - }, - { - "epoch": 0.8628922237380627, - "grad_norm": 0.896000862121582, - "learning_rate": 6.78766550235318e-05, - "loss": 0.06524677276611328, - "step": 1265 - }, - { - "epoch": 0.8663028649386084, - "grad_norm": 0.028516558930277824, - "learning_rate": 6.781643024554982e-05, - "loss": 0.011343669146299362, - "step": 1270 - }, - { - "epoch": 0.8697135061391542, - "grad_norm": 0.8379983305931091, - "learning_rate": 6.775597890596829e-05, - "loss": 0.022122929990291595, - "step": 1275 - }, - { - "epoch": 0.8731241473396999, - "grad_norm": 1.4136202335357666, - "learning_rate": 6.769530145655389e-05, - "loss": 0.0679425597190857, - "step": 1280 - }, - { - "epoch": 0.8765347885402456, - "grad_norm": 1.3402701616287231, - "learning_rate": 6.763439835076303e-05, - "loss": 0.04459039568901062, - "step": 1285 - }, - { - "epoch": 0.8799454297407913, - "grad_norm": 1.0337740182876587, - "learning_rate": 6.757327004373852e-05, - "loss": 0.03138587772846222, - "step": 1290 - }, - { - "epoch": 0.883356070941337, - "grad_norm": 0.0886356458067894, - "learning_rate": 6.751191699230613e-05, - "loss": 0.008893608301877975, - "step": 1295 - }, - { - "epoch": 0.8867667121418826, - "grad_norm": 0.2752978801727295, - "learning_rate": 6.745033965497122e-05, - "loss": 0.036550650000572206, - "step": 1300 - }, - { - "epoch": 0.8901773533424284, - "grad_norm": 0.17057837545871735, - "learning_rate": 6.73885384919153e-05, - "loss": 0.34759960174560545, - "step": 1305 - }, - { - "epoch": 0.8935879945429741, - "grad_norm": 0.9223344326019287, - "learning_rate": 6.732651396499253e-05, - "loss": 0.017408999800682067, - "step": 1310 - }, - { - "epoch": 0.8969986357435198, - "grad_norm": 0.3093169033527374, - "learning_rate": 6.726426653772635e-05, - "loss": 0.05584460496902466, - "step": 1315 - }, - { - "epoch": 0.9004092769440655, - "grad_norm": 1.0157201290130615, - "learning_rate": 6.7201796675306e-05, - "loss": 0.023202185332775117, - "step": 1320 - }, - { - "epoch": 0.9038199181446112, - "grad_norm": 0.9780434370040894, - "learning_rate": 6.713910484458302e-05, - "loss": 0.029402348399162292, - "step": 1325 - }, - { - "epoch": 0.9072305593451568, - "grad_norm": 0.07956309616565704, - "learning_rate": 6.707619151406774e-05, - "loss": 0.02493150979280472, - "step": 1330 - }, - { - "epoch": 0.9106412005457026, - "grad_norm": 0.18366064131259918, - "learning_rate": 6.701305715392586e-05, - "loss": 0.06721556782722474, - "step": 1335 - }, - { - "epoch": 0.9140518417462483, - "grad_norm": 0.8265342116355896, - "learning_rate": 6.694970223597483e-05, - "loss": 0.03872359693050385, - "step": 1340 - }, - { - "epoch": 0.917462482946794, - "grad_norm": 1.9715158939361572, - "learning_rate": 6.688612723368042e-05, - "loss": 0.05604517459869385, - "step": 1345 - }, - { - "epoch": 0.9208731241473397, - "grad_norm": 0.06577733904123306, - "learning_rate": 6.682233262215312e-05, - "loss": 0.04941270649433136, - "step": 1350 - }, - { - "epoch": 0.9242837653478854, - "grad_norm": 0.2798021733760834, - "learning_rate": 6.67583188781446e-05, - "loss": 0.011715996265411376, - "step": 1355 - }, - { - "epoch": 0.927694406548431, - "grad_norm": 0.7599299550056458, - "learning_rate": 6.669408648004423e-05, - "loss": 0.030529171228408813, - "step": 1360 - }, - { - "epoch": 0.9311050477489768, - "grad_norm": 0.3893057405948639, - "learning_rate": 6.662963590787532e-05, - "loss": 0.06623916625976563, - "step": 1365 - }, - { - "epoch": 0.9345156889495225, - "grad_norm": 0.3796108365058899, - "learning_rate": 6.656496764329171e-05, - "loss": 0.02021588236093521, - "step": 1370 - }, - { - "epoch": 0.9379263301500682, - "grad_norm": 0.9708864688873291, - "learning_rate": 6.65000821695741e-05, - "loss": 0.05283964872360229, - "step": 1375 - }, - { - "epoch": 0.9413369713506139, - "grad_norm": 1.2553836107254028, - "learning_rate": 6.643497997162645e-05, - "loss": 0.11128103733062744, - "step": 1380 - }, - { - "epoch": 0.9447476125511596, - "grad_norm": 1.7390260696411133, - "learning_rate": 6.636966153597231e-05, - "loss": 0.051687347888946536, - "step": 1385 - }, - { - "epoch": 0.9481582537517054, - "grad_norm": 0.575423538684845, - "learning_rate": 6.630412735075128e-05, - "loss": 0.03732641041278839, - "step": 1390 - }, - { - "epoch": 0.951568894952251, - "grad_norm": 0.8504135608673096, - "learning_rate": 6.623837790571525e-05, - "loss": 0.038179832696914676, - "step": 1395 - }, - { - "epoch": 0.9549795361527967, - "grad_norm": 0.3777940273284912, - "learning_rate": 6.617241369222483e-05, - "loss": 0.01817839443683624, - "step": 1400 - }, - { - "epoch": 0.9583901773533424, - "grad_norm": 1.1219260692596436, - "learning_rate": 6.610623520324567e-05, - "loss": 0.0864151120185852, - "step": 1405 - }, - { - "epoch": 0.9618008185538881, - "grad_norm": 0.2320811152458191, - "learning_rate": 6.603984293334466e-05, - "loss": 0.0041168566793203356, - "step": 1410 - }, - { - "epoch": 0.9652114597544338, - "grad_norm": 0.5541130304336548, - "learning_rate": 6.597323737868642e-05, - "loss": 0.06572380065917968, - "step": 1415 - }, - { - "epoch": 0.9686221009549796, - "grad_norm": 0.1585462987422943, - "learning_rate": 6.590641903702944e-05, - "loss": 0.03849715292453766, - "step": 1420 - }, - { - "epoch": 0.9720327421555253, - "grad_norm": 1.7849252223968506, - "learning_rate": 6.583938840772245e-05, - "loss": 0.10304104089736939, - "step": 1425 - }, - { - "epoch": 0.975443383356071, - "grad_norm": 2.1307897567749023, - "learning_rate": 6.57721459917006e-05, - "loss": 0.036449754238128663, - "step": 1430 - }, - { - "epoch": 0.9788540245566166, - "grad_norm": 1.857226848602295, - "learning_rate": 6.570469229148184e-05, - "loss": 0.04441194534301758, - "step": 1435 - }, - { - "epoch": 0.9822646657571623, - "grad_norm": 0.27433109283447266, - "learning_rate": 6.563702781116302e-05, - "loss": 0.028930380940437317, - "step": 1440 - }, - { - "epoch": 0.985675306957708, - "grad_norm": 0.314373642206192, - "learning_rate": 6.556915305641629e-05, - "loss": 0.04347030222415924, - "step": 1445 - }, - { - "epoch": 0.9890859481582538, - "grad_norm": 0.3977321982383728, - "learning_rate": 6.550106853448513e-05, - "loss": 0.0386979341506958, - "step": 1450 - }, - { - "epoch": 0.9924965893587995, - "grad_norm": 1.6032801866531372, - "learning_rate": 6.543277475418074e-05, - "loss": 0.03199186325073242, - "step": 1455 - }, - { - "epoch": 0.9959072305593452, - "grad_norm": 1.1229087114334106, - "learning_rate": 6.53642722258781e-05, - "loss": 0.046002286672592166, - "step": 1460 - }, - { - "epoch": 0.9986357435197817, - "eval_loss": 0.05529617890715599, - "eval_runtime": 0.9152, - "eval_samples_per_second": 81.948, - "eval_steps_per_second": 2.185, - "step": 1464 - }, - { - "eval_cer_subset": 0.024662760594086387, - "eval_cer_subset_edit_distance": 181, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1464 - }, - { - "epoch": 0.9993178717598908, - "grad_norm": 0.8476057648658752, - "learning_rate": 6.529556146151224e-05, - "loss": 0.01695575416088104, - "step": 1465 - }, - { - "epoch": 1.0027285129604366, - "grad_norm": 0.0731077790260315, - "learning_rate": 6.522664297457437e-05, - "loss": 0.0027170730754733086, - "step": 1470 - }, - { - "epoch": 1.0061391541609823, - "grad_norm": 0.05015791580080986, - "learning_rate": 6.515751728010807e-05, - "loss": 0.015530210733413697, - "step": 1475 - }, - { - "epoch": 1.009549795361528, - "grad_norm": 1.0450271368026733, - "learning_rate": 6.508818489470543e-05, - "loss": 0.028301748633384704, - "step": 1480 - }, - { - "epoch": 1.0129604365620737, - "grad_norm": 0.007203489542007446, - "learning_rate": 6.501864633650318e-05, - "loss": 0.013968841731548309, - "step": 1485 - }, - { - "epoch": 1.0163710777626194, - "grad_norm": 0.02691703289747238, - "learning_rate": 6.494890212517883e-05, - "loss": 0.005682830139994622, - "step": 1490 - }, - { - "epoch": 1.019781718963165, - "grad_norm": 0.6411488652229309, - "learning_rate": 6.487895278194678e-05, - "loss": 0.005073993653059006, - "step": 1495 - }, - { - "epoch": 1.0231923601637107, - "grad_norm": 0.13043923676013947, - "learning_rate": 6.480879882955443e-05, - "loss": 0.034558257460594176, - "step": 1500 - }, - { - "epoch": 1.0266030013642564, - "grad_norm": 0.42835143208503723, - "learning_rate": 6.473844079227828e-05, - "loss": 0.008179995417594909, - "step": 1505 - }, - { - "epoch": 1.030013642564802, - "grad_norm": 0.08968371897935867, - "learning_rate": 6.466787919591997e-05, - "loss": 0.06659101843833923, - "step": 1510 - }, - { - "epoch": 1.0334242837653478, - "grad_norm": 0.028647486120462418, - "learning_rate": 6.459711456780243e-05, - "loss": 0.002646555379033089, - "step": 1515 - }, - { - "epoch": 1.0368349249658937, - "grad_norm": 0.49801063537597656, - "learning_rate": 6.452614743676588e-05, - "loss": 0.014094460010528564, - "step": 1520 - }, - { - "epoch": 1.0402455661664394, - "grad_norm": 1.1292961835861206, - "learning_rate": 6.445497833316385e-05, - "loss": 0.03136319518089294, - "step": 1525 - }, - { - "epoch": 1.043656207366985, - "grad_norm": 0.07291857898235321, - "learning_rate": 6.438360778885929e-05, - "loss": 0.013663257658481597, - "step": 1530 - }, - { - "epoch": 1.0470668485675307, - "grad_norm": 0.4733221232891083, - "learning_rate": 6.43120363372206e-05, - "loss": 0.011823048442602157, - "step": 1535 - }, - { - "epoch": 1.0504774897680764, - "grad_norm": 0.03299790620803833, - "learning_rate": 6.424026451311753e-05, - "loss": 0.017025044560432433, - "step": 1540 - }, - { - "epoch": 1.053888130968622, - "grad_norm": 2.0730843544006348, - "learning_rate": 6.416829285291728e-05, - "loss": 0.022110742330551148, - "step": 1545 - }, - { - "epoch": 1.0572987721691678, - "grad_norm": 0.004568230360746384, - "learning_rate": 6.409612189448053e-05, - "loss": 0.03601303398609161, - "step": 1550 - }, - { - "epoch": 1.0607094133697135, - "grad_norm": 1.453091025352478, - "learning_rate": 6.402375217715729e-05, - "loss": 0.014915446937084197, - "step": 1555 - }, - { - "epoch": 1.0641200545702592, - "grad_norm": 0.5859401226043701, - "learning_rate": 6.395118424178299e-05, - "loss": 0.03671925961971283, - "step": 1560 - }, - { - "epoch": 1.0675306957708048, - "grad_norm": 0.007798578590154648, - "learning_rate": 6.387841863067433e-05, - "loss": 0.024042302370071413, - "step": 1565 - }, - { - "epoch": 1.0709413369713505, - "grad_norm": 0.05673844739794731, - "learning_rate": 6.380545588762534e-05, - "loss": 0.014150387048721314, - "step": 1570 - }, - { - "epoch": 1.0743519781718964, - "grad_norm": 0.6972388029098511, - "learning_rate": 6.373229655790325e-05, - "loss": 0.03881770372390747, - "step": 1575 - }, - { - "epoch": 1.077762619372442, - "grad_norm": 0.8956314325332642, - "learning_rate": 6.365894118824444e-05, - "loss": 0.021957725286483765, - "step": 1580 - }, - { - "epoch": 1.0811732605729878, - "grad_norm": 1.0231817960739136, - "learning_rate": 6.358539032685029e-05, - "loss": 0.03818466663360596, - "step": 1585 - }, - { - "epoch": 1.0845839017735335, - "grad_norm": 0.32065045833587646, - "learning_rate": 6.351164452338316e-05, - "loss": 0.017974501848220824, - "step": 1590 - }, - { - "epoch": 1.0879945429740792, - "grad_norm": 0.052197907119989395, - "learning_rate": 6.34377043289623e-05, - "loss": 0.34247732162475586, - "step": 1595 - }, - { - "epoch": 1.0914051841746248, - "grad_norm": 0.1314801126718521, - "learning_rate": 6.336357029615964e-05, - "loss": 0.007924404740333558, - "step": 1600 - }, - { - "epoch": 1.0948158253751705, - "grad_norm": 0.013010814785957336, - "learning_rate": 6.32892429789957e-05, - "loss": 0.013722099363803864, - "step": 1605 - }, - { - "epoch": 1.0982264665757162, - "grad_norm": 0.07680380344390869, - "learning_rate": 6.321472293293549e-05, - "loss": 0.020718716084957123, - "step": 1610 - }, - { - "epoch": 1.101637107776262, - "grad_norm": 0.3573530912399292, - "learning_rate": 6.314001071488434e-05, - "loss": 0.017910942435264587, - "step": 1615 - }, - { - "epoch": 1.1050477489768076, - "grad_norm": 0.0073904613964259624, - "learning_rate": 6.306510688318365e-05, - "loss": 0.009220895171165467, - "step": 1620 - }, - { - "epoch": 1.1084583901773533, - "grad_norm": 0.0597323514521122, - "learning_rate": 6.299001199760687e-05, - "loss": 0.010637883096933365, - "step": 1625 - }, - { - "epoch": 1.111869031377899, - "grad_norm": 0.07265184819698334, - "learning_rate": 6.291472661935522e-05, - "loss": 0.00596662349998951, - "step": 1630 - }, - { - "epoch": 1.1152796725784448, - "grad_norm": 5.774064064025879, - "learning_rate": 6.283925131105348e-05, - "loss": 0.032669514417648315, - "step": 1635 - }, - { - "epoch": 1.1186903137789905, - "grad_norm": 0.42285504937171936, - "learning_rate": 6.276358663674589e-05, - "loss": 0.028756555914878846, - "step": 1640 - }, - { - "epoch": 1.1221009549795362, - "grad_norm": 0.014294124208390713, - "learning_rate": 6.268773316189178e-05, - "loss": 0.0047109007835388185, - "step": 1645 - }, - { - "epoch": 1.125511596180082, - "grad_norm": 0.34502843022346497, - "learning_rate": 6.261169145336151e-05, - "loss": 0.015978309512138366, - "step": 1650 - }, - { - "epoch": 1.1289222373806276, - "grad_norm": 0.02683340758085251, - "learning_rate": 6.253546207943209e-05, - "loss": 0.014604611694812775, - "step": 1655 - }, - { - "epoch": 1.1323328785811733, - "grad_norm": 0.02333923988044262, - "learning_rate": 6.245904560978302e-05, - "loss": 0.021287524700164796, - "step": 1660 - }, - { - "epoch": 1.135743519781719, - "grad_norm": 0.2537156939506531, - "learning_rate": 6.238244261549203e-05, - "loss": 0.01746509373188019, - "step": 1665 - }, - { - "epoch": 1.1391541609822646, - "grad_norm": 0.2644752860069275, - "learning_rate": 6.230565366903075e-05, - "loss": 0.021785764396190642, - "step": 1670 - }, - { - "epoch": 1.1425648021828103, - "grad_norm": 0.3956565260887146, - "learning_rate": 6.222867934426052e-05, - "loss": 0.003387744724750519, - "step": 1675 - }, - { - "epoch": 1.145975443383356, - "grad_norm": 0.5124446153640747, - "learning_rate": 6.215152021642801e-05, - "loss": 0.013412350416183471, - "step": 1680 - }, - { - "epoch": 1.1493860845839017, - "grad_norm": 0.0077205742709338665, - "learning_rate": 6.2074176862161e-05, - "loss": 0.06386477947235107, - "step": 1685 - }, - { - "epoch": 1.1527967257844476, - "grad_norm": 0.044003162533044815, - "learning_rate": 6.1996649859464e-05, - "loss": 0.002909707650542259, - "step": 1690 - }, - { - "epoch": 1.1562073669849933, - "grad_norm": 0.358694463968277, - "learning_rate": 6.191893978771402e-05, - "loss": 0.021670857071876527, - "step": 1695 - }, - { - "epoch": 1.159618008185539, - "grad_norm": 0.09412632882595062, - "learning_rate": 6.184104722765613e-05, - "loss": 0.019501471519470216, - "step": 1700 - }, - { - "epoch": 1.1630286493860846, - "grad_norm": 0.8273324370384216, - "learning_rate": 6.17629727613992e-05, - "loss": 0.034558022022247316, - "step": 1705 - }, - { - "epoch": 1.1664392905866303, - "grad_norm": 0.3224208950996399, - "learning_rate": 6.168471697241155e-05, - "loss": 0.022453221678733825, - "step": 1710 - }, - { - "epoch": 1.169849931787176, - "grad_norm": 0.27806228399276733, - "learning_rate": 6.160628044551652e-05, - "loss": 0.028394827246665956, - "step": 1715 - }, - { - "epoch": 1.1732605729877217, - "grad_norm": 0.05991955101490021, - "learning_rate": 6.152766376688818e-05, - "loss": 0.02458793669939041, - "step": 1720 - }, - { - "epoch": 1.1766712141882674, - "grad_norm": 0.5648256540298462, - "learning_rate": 6.14488675240469e-05, - "loss": 0.019231566786766054, - "step": 1725 - }, - { - "epoch": 1.180081855388813, - "grad_norm": 0.5112252831459045, - "learning_rate": 6.1369892305855e-05, - "loss": 0.00729234516620636, - "step": 1730 - }, - { - "epoch": 1.1834924965893587, - "grad_norm": 0.10093241930007935, - "learning_rate": 6.129073870251228e-05, - "loss": 0.026474124193191527, - "step": 1735 - }, - { - "epoch": 1.1869031377899044, - "grad_norm": 0.007164946291595697, - "learning_rate": 6.12114073055517e-05, - "loss": 0.011781595647335052, - "step": 1740 - }, - { - "epoch": 1.19031377899045, - "grad_norm": 0.0596928596496582, - "learning_rate": 6.113189870783484e-05, - "loss": 0.022979708015918733, - "step": 1745 - }, - { - "epoch": 1.1937244201909958, - "grad_norm": 0.07026170939207077, - "learning_rate": 6.105221350354764e-05, - "loss": 0.021880485117435455, - "step": 1750 - }, - { - "epoch": 1.1971350613915417, - "grad_norm": 0.4536992311477661, - "learning_rate": 6.097235228819578e-05, - "loss": 0.0312265545129776, - "step": 1755 - }, - { - "epoch": 1.2005457025920874, - "grad_norm": 0.013953015208244324, - "learning_rate": 6.089231565860035e-05, - "loss": 0.006989015638828278, - "step": 1760 - }, - { - "epoch": 1.203956343792633, - "grad_norm": 0.12421152740716934, - "learning_rate": 6.0812104212893353e-05, - "loss": 0.010978200286626816, - "step": 1765 - }, - { - "epoch": 1.2073669849931787, - "grad_norm": 0.0044771963730454445, - "learning_rate": 6.073171855051322e-05, - "loss": 0.029409635066986083, - "step": 1770 - }, - { - "epoch": 1.2107776261937244, - "grad_norm": 0.07688756287097931, - "learning_rate": 6.065115927220032e-05, - "loss": 0.013059450685977936, - "step": 1775 - }, - { - "epoch": 1.21418826739427, - "grad_norm": 0.3248527944087982, - "learning_rate": 6.0570426979992546e-05, - "loss": 0.005089572072029114, - "step": 1780 - }, - { - "epoch": 1.2175989085948158, - "grad_norm": 0.13590829074382782, - "learning_rate": 6.048952227722073e-05, - "loss": 0.013443182408809661, - "step": 1785 - }, - { - "epoch": 1.2210095497953615, - "grad_norm": 0.8500165343284607, - "learning_rate": 6.040844576850416e-05, - "loss": 0.05245064496994019, - "step": 1790 - }, - { - "epoch": 1.2244201909959072, - "grad_norm": 0.009083034470677376, - "learning_rate": 6.0327198059746115e-05, - "loss": 0.02519877254962921, - "step": 1795 - }, - { - "epoch": 1.2278308321964528, - "grad_norm": 0.010473054833710194, - "learning_rate": 6.024577975812922e-05, - "loss": 0.0116177037358284, - "step": 1800 - }, - { - "epoch": 1.2312414733969987, - "grad_norm": 0.01996340975165367, - "learning_rate": 6.016419147211102e-05, - "loss": 0.002756960690021515, - "step": 1805 - }, - { - "epoch": 1.2346521145975444, - "grad_norm": 0.046663638204336166, - "learning_rate": 6.008243381141942e-05, - "loss": 0.006187716126441955, - "step": 1810 - }, - { - "epoch": 1.23806275579809, - "grad_norm": 0.5459519624710083, - "learning_rate": 6.000050738704805e-05, - "loss": 0.032647940516471866, - "step": 1815 - }, - { - "epoch": 1.2414733969986358, - "grad_norm": 0.24907033145427704, - "learning_rate": 5.991841281125177e-05, - "loss": 0.007942546159029007, - "step": 1820 - }, - { - "epoch": 1.2448840381991815, - "grad_norm": 0.05362895876169205, - "learning_rate": 5.9836150697542086e-05, - "loss": 0.009079506993293763, - "step": 1825 - }, - { - "epoch": 1.2482946793997272, - "grad_norm": 0.34499797224998474, - "learning_rate": 5.9753721660682515e-05, - "loss": 0.0033931449055671693, - "step": 1830 - }, - { - "epoch": 1.2482946793997272, - "eval_loss": 0.05778764560818672, - "eval_runtime": 0.8976, - "eval_samples_per_second": 83.554, - "eval_steps_per_second": 2.228, - "step": 1830 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 1830 - }, - { - "epoch": 1.2517053206002728, - "grad_norm": 4.153449058532715, - "learning_rate": 5.967112631668409e-05, - "loss": 0.012082754075527192, - "step": 1835 - }, - { - "epoch": 1.2551159618008185, - "grad_norm": 3.670395612716675, - "learning_rate": 5.958836528280062e-05, - "loss": 0.009738349914550781, - "step": 1840 - }, - { - "epoch": 1.2585266030013642, - "grad_norm": 0.10426812618970871, - "learning_rate": 5.950543917752422e-05, - "loss": 0.0026493463665246964, - "step": 1845 - }, - { - "epoch": 1.26193724420191, - "grad_norm": 0.022981934249401093, - "learning_rate": 5.942234862058059e-05, - "loss": 0.005860285460948944, - "step": 1850 - }, - { - "epoch": 1.2653478854024556, - "grad_norm": 0.0007500631618313491, - "learning_rate": 5.933909423292441e-05, - "loss": 0.05660415887832641, - "step": 1855 - }, - { - "epoch": 1.2687585266030013, - "grad_norm": 0.37151023745536804, - "learning_rate": 5.925567663673472e-05, - "loss": 0.0029373887926340105, - "step": 1860 - }, - { - "epoch": 1.272169167803547, - "grad_norm": 0.036931321024894714, - "learning_rate": 5.9172096455410244e-05, - "loss": 0.007140242308378219, - "step": 1865 - }, - { - "epoch": 1.2755798090040928, - "grad_norm": 0.008696082048118114, - "learning_rate": 5.908835431356475e-05, - "loss": 0.03127997815608978, - "step": 1870 - }, - { - "epoch": 1.2789904502046385, - "grad_norm": 1.442112684249878, - "learning_rate": 5.900445083702235e-05, - "loss": 0.05517878532409668, - "step": 1875 - }, - { - "epoch": 1.2824010914051842, - "grad_norm": 0.5617618560791016, - "learning_rate": 5.8920386652812894e-05, - "loss": 0.018300823867321014, - "step": 1880 - }, - { - "epoch": 1.28581173260573, - "grad_norm": 0.7440968751907349, - "learning_rate": 5.883616238916718e-05, - "loss": 0.025746351480484007, - "step": 1885 - }, - { - "epoch": 1.2892223738062756, - "grad_norm": 0.23570798337459564, - "learning_rate": 5.875177867551236e-05, - "loss": 0.009138952195644378, - "step": 1890 - }, - { - "epoch": 1.2926330150068213, - "grad_norm": 0.8084076046943665, - "learning_rate": 5.866723614246718e-05, - "loss": 0.029955285787582397, - "step": 1895 - }, - { - "epoch": 1.296043656207367, - "grad_norm": 0.40341874957084656, - "learning_rate": 5.858253542183727e-05, - "loss": 0.03340296447277069, - "step": 1900 - }, - { - "epoch": 1.2994542974079126, - "grad_norm": 1.6409776210784912, - "learning_rate": 5.8497677146610444e-05, - "loss": 0.037276041507720944, - "step": 1905 - }, - { - "epoch": 1.3028649386084583, - "grad_norm": 0.1204327791929245, - "learning_rate": 5.841266195095195e-05, - "loss": 0.008522054553031922, - "step": 1910 - }, - { - "epoch": 1.3062755798090042, - "grad_norm": 0.07556264847517014, - "learning_rate": 5.832749047019973e-05, - "loss": 0.0024863181635737417, - "step": 1915 - }, - { - "epoch": 1.30968622100955, - "grad_norm": 0.03532743453979492, - "learning_rate": 5.824216334085971e-05, - "loss": 0.004078276455402374, - "step": 1920 - }, - { - "epoch": 1.3130968622100956, - "grad_norm": 0.8705688118934631, - "learning_rate": 5.815668120060098e-05, - "loss": 0.02017311155796051, - "step": 1925 - }, - { - "epoch": 1.3165075034106413, - "grad_norm": 0.0009952038526535034, - "learning_rate": 5.8071044688251086e-05, - "loss": 0.008325689285993577, - "step": 1930 - }, - { - "epoch": 1.319918144611187, - "grad_norm": 2.50828218460083, - "learning_rate": 5.7985254443791206e-05, - "loss": 0.006225927546620369, - "step": 1935 - }, - { - "epoch": 1.3233287858117326, - "grad_norm": 0.6447598934173584, - "learning_rate": 5.789931110835142e-05, - "loss": 0.005092256143689156, - "step": 1940 - }, - { - "epoch": 1.3267394270122783, - "grad_norm": 0.00778482249006629, - "learning_rate": 5.781321532420588e-05, - "loss": 0.003357316926121712, - "step": 1945 - }, - { - "epoch": 1.330150068212824, - "grad_norm": 0.5357232689857483, - "learning_rate": 5.772696773476801e-05, - "loss": 0.01329006552696228, - "step": 1950 - }, - { - "epoch": 1.3335607094133697, - "grad_norm": 0.8402428030967712, - "learning_rate": 5.7640568984585756e-05, - "loss": 0.05447224378585815, - "step": 1955 - }, - { - "epoch": 1.3369713506139154, - "grad_norm": 1.4458378553390503, - "learning_rate": 5.755401971933664e-05, - "loss": 0.017956557869911193, - "step": 1960 - }, - { - "epoch": 1.340381991814461, - "grad_norm": 0.3120212256908417, - "learning_rate": 5.746732058582311e-05, - "loss": 0.025589832663536073, - "step": 1965 - }, - { - "epoch": 1.3437926330150067, - "grad_norm": 0.5558730363845825, - "learning_rate": 5.738047223196755e-05, - "loss": 0.012551702558994293, - "step": 1970 - }, - { - "epoch": 1.3472032742155524, - "grad_norm": 1.4269353151321411, - "learning_rate": 5.729347530680753e-05, - "loss": 0.056649971008300784, - "step": 1975 - }, - { - "epoch": 1.350613915416098, - "grad_norm": 0.2933025062084198, - "learning_rate": 5.720633046049091e-05, - "loss": 0.008710381388664246, - "step": 1980 - }, - { - "epoch": 1.354024556616644, - "grad_norm": 0.9624903798103333, - "learning_rate": 5.7119038344271e-05, - "loss": 0.014256273210048676, - "step": 1985 - }, - { - "epoch": 1.3574351978171897, - "grad_norm": 1.7939856052398682, - "learning_rate": 5.703159961050172e-05, - "loss": 0.02518789768218994, - "step": 1990 - }, - { - "epoch": 1.3608458390177354, - "grad_norm": 0.49102070927619934, - "learning_rate": 5.694401491263267e-05, - "loss": 0.019194112718105318, - "step": 1995 - }, - { - "epoch": 1.364256480218281, - "grad_norm": 0.04536288231611252, - "learning_rate": 5.6856284905204266e-05, - "loss": 0.0032343052327632902, - "step": 2000 - }, - { - "epoch": 1.3676671214188267, - "grad_norm": 1.0203709602355957, - "learning_rate": 5.676841024384287e-05, - "loss": 0.03292953073978424, - "step": 2005 - }, - { - "epoch": 1.3710777626193724, - "grad_norm": 0.9504344463348389, - "learning_rate": 5.6680391585255886e-05, - "loss": 0.005538972094655037, - "step": 2010 - }, - { - "epoch": 1.374488403819918, - "grad_norm": 0.0067398096434772015, - "learning_rate": 5.6592229587226823e-05, - "loss": 0.0005991209298372268, - "step": 2015 - }, - { - "epoch": 1.3778990450204638, - "grad_norm": 1.5757488012313843, - "learning_rate": 5.6503924908610405e-05, - "loss": 0.012159132212400437, - "step": 2020 - }, - { - "epoch": 1.3813096862210095, - "grad_norm": 0.5669568181037903, - "learning_rate": 5.641547820932765e-05, - "loss": 0.010695122182369232, - "step": 2025 - }, - { - "epoch": 1.3847203274215554, - "grad_norm": 0.23352237045764923, - "learning_rate": 5.63268901503609e-05, - "loss": 0.012255635857582093, - "step": 2030 - }, - { - "epoch": 1.388130968622101, - "grad_norm": 1.5650484561920166, - "learning_rate": 5.623816139374895e-05, - "loss": 0.05114143490791321, - "step": 2035 - }, - { - "epoch": 1.3915416098226467, - "grad_norm": 0.05920939892530441, - "learning_rate": 5.614929260258202e-05, - "loss": 0.002235228754580021, - "step": 2040 - }, - { - "epoch": 1.3949522510231924, - "grad_norm": 0.015943612903356552, - "learning_rate": 5.606028444099689e-05, - "loss": 0.00463336743414402, - "step": 2045 - }, - { - "epoch": 1.398362892223738, - "grad_norm": 1.171777606010437, - "learning_rate": 5.597113757417183e-05, - "loss": 0.010701537132263184, - "step": 2050 - }, - { - "epoch": 1.4017735334242838, - "grad_norm": 1.0710582733154297, - "learning_rate": 5.588185266832173e-05, - "loss": 0.0072472900152206424, - "step": 2055 - }, - { - "epoch": 1.4051841746248295, - "grad_norm": 0.0567881241440773, - "learning_rate": 5.5792430390693046e-05, - "loss": 0.0031896911561489103, - "step": 2060 - }, - { - "epoch": 1.4085948158253752, - "grad_norm": 0.5927397608757019, - "learning_rate": 5.570287140955886e-05, - "loss": 0.004600095748901367, - "step": 2065 - }, - { - "epoch": 1.4120054570259208, - "grad_norm": 1.6821774244308472, - "learning_rate": 5.5613176394213896e-05, - "loss": 0.010283125936985016, - "step": 2070 - }, - { - "epoch": 1.4154160982264665, - "grad_norm": 0.3331978917121887, - "learning_rate": 5.552334601496944e-05, - "loss": 0.00821176841855049, - "step": 2075 - }, - { - "epoch": 1.4188267394270122, - "grad_norm": 0.0056209871545434, - "learning_rate": 5.5433380943148414e-05, - "loss": 0.00954909473657608, - "step": 2080 - }, - { - "epoch": 1.422237380627558, - "grad_norm": 0.059546198695898056, - "learning_rate": 5.534328185108033e-05, - "loss": 0.004072617739439011, - "step": 2085 - }, - { - "epoch": 1.4256480218281036, - "grad_norm": 2.5227770805358887, - "learning_rate": 5.525304941209626e-05, - "loss": 0.006328310817480087, - "step": 2090 - }, - { - "epoch": 1.4290586630286493, - "grad_norm": 0.012882530689239502, - "learning_rate": 5.5162684300523796e-05, - "loss": 0.0069836869835853575, - "step": 2095 - }, - { - "epoch": 1.4324693042291952, - "grad_norm": 0.44880563020706177, - "learning_rate": 5.507218719168204e-05, - "loss": 0.006641192734241486, - "step": 2100 - }, - { - "epoch": 1.4358799454297408, - "grad_norm": 0.03564910218119621, - "learning_rate": 5.498155876187654e-05, - "loss": 0.03126881420612335, - "step": 2105 - }, - { - "epoch": 1.4392905866302865, - "grad_norm": 0.12222933769226074, - "learning_rate": 5.48907996883942e-05, - "loss": 0.0010469739325344562, - "step": 2110 - }, - { - "epoch": 1.4427012278308322, - "grad_norm": 0.2652647793292999, - "learning_rate": 5.4799910649498316e-05, - "loss": 0.004861130565404892, - "step": 2115 - }, - { - "epoch": 1.446111869031378, - "grad_norm": 0.45194199681282043, - "learning_rate": 5.4708892324423375e-05, - "loss": 0.03450656533241272, - "step": 2120 - }, - { - "epoch": 1.4495225102319236, - "grad_norm": 7.245598316192627, - "learning_rate": 5.4617745393370124e-05, - "loss": 0.020797762274742126, - "step": 2125 - }, - { - "epoch": 1.4529331514324693, - "grad_norm": 0.003704208880662918, - "learning_rate": 5.452647053750035e-05, - "loss": 0.0023305635899305344, - "step": 2130 - }, - { - "epoch": 1.456343792633015, - "grad_norm": 0.1513793021440506, - "learning_rate": 5.4435068438931866e-05, - "loss": 0.004804682731628418, - "step": 2135 - }, - { - "epoch": 1.4597544338335606, - "grad_norm": 0.8121495246887207, - "learning_rate": 5.434353978073342e-05, - "loss": 0.012413371354341507, - "step": 2140 - }, - { - "epoch": 1.4631650750341065, - "grad_norm": 0.01748906634747982, - "learning_rate": 5.425188524691956e-05, - "loss": 0.004680215567350388, - "step": 2145 - }, - { - "epoch": 1.4665757162346522, - "grad_norm": 0.8548330068588257, - "learning_rate": 5.4160105522445514e-05, - "loss": 0.023970893025398253, - "step": 2150 - }, - { - "epoch": 1.469986357435198, - "grad_norm": 0.005144836381077766, - "learning_rate": 5.406820129320212e-05, - "loss": 0.031422802805900575, - "step": 2155 - }, - { - "epoch": 1.4733969986357436, - "grad_norm": 0.5495908856391907, - "learning_rate": 5.397617324601062e-05, - "loss": 0.019562892615795135, - "step": 2160 - }, - { - "epoch": 1.4768076398362893, - "grad_norm": 0.0021735087502747774, - "learning_rate": 5.388402206861764e-05, - "loss": 0.01983659714460373, - "step": 2165 - }, - { - "epoch": 1.480218281036835, - "grad_norm": 0.06112198159098625, - "learning_rate": 5.3791748449689934e-05, - "loss": 0.0020502086728811262, - "step": 2170 - }, - { - "epoch": 1.4836289222373806, - "grad_norm": 0.7758064866065979, - "learning_rate": 5.36993530788093e-05, - "loss": 0.05557147264480591, - "step": 2175 - }, - { - "epoch": 1.4870395634379263, - "grad_norm": 0.03924431651830673, - "learning_rate": 5.360683664646744e-05, - "loss": 0.0065080620348453525, - "step": 2180 - }, - { - "epoch": 1.490450204638472, - "grad_norm": 0.03558855503797531, - "learning_rate": 5.351419984406074e-05, - "loss": 0.013747562468051911, - "step": 2185 - }, - { - "epoch": 1.4938608458390177, - "grad_norm": 0.018586739897727966, - "learning_rate": 5.3421443363885186e-05, - "loss": 0.006936004757881165, - "step": 2190 - }, - { - "epoch": 1.4972714870395634, - "grad_norm": 0.02448296919465065, - "learning_rate": 5.332856789913109e-05, - "loss": 0.0032054468989372253, - "step": 2195 - }, - { - "epoch": 1.4979536152796726, - "eval_loss": 0.05913596600294113, - "eval_runtime": 0.906, - "eval_samples_per_second": 82.784, - "eval_steps_per_second": 2.208, - "step": 2196 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2196 - }, - { - "epoch": 1.500682128240109, - "grad_norm": 0.5655078291893005, - "learning_rate": 5.3235574143878004e-05, - "loss": 0.02095775157213211, - "step": 2200 - }, - { - "epoch": 1.5040927694406547, - "grad_norm": 0.3196074366569519, - "learning_rate": 5.314246279308946e-05, - "loss": 0.03257318735122681, - "step": 2205 - }, - { - "epoch": 1.5075034106412004, - "grad_norm": 0.9191421866416931, - "learning_rate": 5.304923454260784e-05, - "loss": 0.019658437371253966, - "step": 2210 - }, - { - "epoch": 1.510914051841746, - "grad_norm": 0.13027027249336243, - "learning_rate": 5.2955890089149125e-05, - "loss": 0.007904764264822006, - "step": 2215 - }, - { - "epoch": 1.514324693042292, - "grad_norm": 0.4703820049762726, - "learning_rate": 5.286243013029772e-05, - "loss": 0.041682767868041995, - "step": 2220 - }, - { - "epoch": 1.5177353342428377, - "grad_norm": 0.03042331151664257, - "learning_rate": 5.2768855364501194e-05, - "loss": 0.000719944667071104, - "step": 2225 - }, - { - "epoch": 1.5211459754433834, - "grad_norm": 0.049632977694272995, - "learning_rate": 5.267516649106514e-05, - "loss": 0.01883329451084137, - "step": 2230 - }, - { - "epoch": 1.524556616643929, - "grad_norm": 0.5063273310661316, - "learning_rate": 5.258136421014788e-05, - "loss": 0.01596176326274872, - "step": 2235 - }, - { - "epoch": 1.5279672578444747, - "grad_norm": 1.134919285774231, - "learning_rate": 5.248744922275524e-05, - "loss": 0.011541023850440979, - "step": 2240 - }, - { - "epoch": 1.5313778990450204, - "grad_norm": 0.28322428464889526, - "learning_rate": 5.2393422230735386e-05, - "loss": 0.004992625117301941, - "step": 2245 - }, - { - "epoch": 1.5347885402455663, - "grad_norm": 2.1087021827697754, - "learning_rate": 5.2299283936773434e-05, - "loss": 0.028424540162086488, - "step": 2250 - }, - { - "epoch": 1.538199181446112, - "grad_norm": 0.022644788026809692, - "learning_rate": 5.2205035044386364e-05, - "loss": 0.0054498337209224704, - "step": 2255 - }, - { - "epoch": 1.5416098226466577, - "grad_norm": 0.06426636874675751, - "learning_rate": 5.2110676257917646e-05, - "loss": 0.012433752417564392, - "step": 2260 - }, - { - "epoch": 1.5450204638472034, - "grad_norm": 0.24290472269058228, - "learning_rate": 5.2016208282532014e-05, - "loss": 0.004430773109197617, - "step": 2265 - }, - { - "epoch": 1.548431105047749, - "grad_norm": 0.2007899135351181, - "learning_rate": 5.19216318242102e-05, - "loss": 0.0474501758813858, - "step": 2270 - }, - { - "epoch": 1.5518417462482947, - "grad_norm": 0.09467290341854095, - "learning_rate": 5.182694758974365e-05, - "loss": 0.0030128231272101404, - "step": 2275 - }, - { - "epoch": 1.5552523874488404, - "grad_norm": 0.03324189782142639, - "learning_rate": 5.173215628672925e-05, - "loss": 0.009206626564264297, - "step": 2280 - }, - { - "epoch": 1.558663028649386, - "grad_norm": 0.004646006040275097, - "learning_rate": 5.163725862356403e-05, - "loss": 0.0049092542380094525, - "step": 2285 - }, - { - "epoch": 1.5620736698499318, - "grad_norm": 0.5874412655830383, - "learning_rate": 5.1542255309439885e-05, - "loss": 0.004077530652284622, - "step": 2290 - }, - { - "epoch": 1.5654843110504775, - "grad_norm": 0.0026035842020064592, - "learning_rate": 5.1447147054338254e-05, - "loss": 0.00545373484492302, - "step": 2295 - }, - { - "epoch": 1.5688949522510232, - "grad_norm": 0.012637780047953129, - "learning_rate": 5.135193456902482e-05, - "loss": 0.010747735947370529, - "step": 2300 - }, - { - "epoch": 1.5723055934515688, - "grad_norm": 0.3359721302986145, - "learning_rate": 5.12566185650442e-05, - "loss": 0.007188577950000763, - "step": 2305 - }, - { - "epoch": 1.5757162346521145, - "grad_norm": 0.020812660455703735, - "learning_rate": 5.116119975471467e-05, - "loss": 0.011499383300542832, - "step": 2310 - }, - { - "epoch": 1.5791268758526602, - "grad_norm": 3.0099117755889893, - "learning_rate": 5.106567885112272e-05, - "loss": 0.022649967670440675, - "step": 2315 - }, - { - "epoch": 1.5825375170532059, - "grad_norm": 0.012876384891569614, - "learning_rate": 5.097005656811788e-05, - "loss": 0.009297363460063934, - "step": 2320 - }, - { - "epoch": 1.5859481582537516, - "grad_norm": 1.328519344329834, - "learning_rate": 5.0874333620307305e-05, - "loss": 0.017031437158584593, - "step": 2325 - }, - { - "epoch": 1.5893587994542973, - "grad_norm": 1.3355894088745117, - "learning_rate": 5.0778510723050386e-05, - "loss": 0.004430291429162026, - "step": 2330 - }, - { - "epoch": 1.5927694406548432, - "grad_norm": 1.1440656185150146, - "learning_rate": 5.068258859245352e-05, - "loss": 0.003388546034693718, - "step": 2335 - }, - { - "epoch": 1.5961800818553888, - "grad_norm": 1.7381155490875244, - "learning_rate": 5.0586567945364654e-05, - "loss": 0.012762372195720673, - "step": 2340 - }, - { - "epoch": 1.5995907230559345, - "grad_norm": 0.7628080248832703, - "learning_rate": 5.0490449499368e-05, - "loss": 0.02783372700214386, - "step": 2345 - }, - { - "epoch": 1.6030013642564802, - "grad_norm": 0.12800562381744385, - "learning_rate": 5.039423397277864e-05, - "loss": 0.01945704221725464, - "step": 2350 - }, - { - "epoch": 1.606412005457026, - "grad_norm": 0.39633259177207947, - "learning_rate": 5.029792208463714e-05, - "loss": 0.054477882385253903, - "step": 2355 - }, - { - "epoch": 1.6098226466575716, - "grad_norm": 3.504084587097168, - "learning_rate": 5.0201514554704213e-05, - "loss": 0.0472748190164566, - "step": 2360 - }, - { - "epoch": 1.6132332878581175, - "grad_norm": 0.20941582322120667, - "learning_rate": 5.0105012103455346e-05, - "loss": 0.007487633824348449, - "step": 2365 - }, - { - "epoch": 1.6166439290586632, - "grad_norm": 0.03847242146730423, - "learning_rate": 5.000841545207534e-05, - "loss": 0.003787395358085632, - "step": 2370 - }, - { - "epoch": 1.6200545702592088, - "grad_norm": 0.001856139744631946, - "learning_rate": 4.9911725322453036e-05, - "loss": 0.011801865696907044, - "step": 2375 - }, - { - "epoch": 1.6234652114597545, - "grad_norm": 1.0232354402542114, - "learning_rate": 4.981494243717581e-05, - "loss": 0.004162260890007019, - "step": 2380 - }, - { - "epoch": 1.6268758526603002, - "grad_norm": 0.005511613562703133, - "learning_rate": 4.971806751952427e-05, - "loss": 0.003771597146987915, - "step": 2385 - }, - { - "epoch": 1.630286493860846, - "grad_norm": 0.01450763177126646, - "learning_rate": 4.962110129346675e-05, - "loss": 0.06707316637039185, - "step": 2390 - }, - { - "epoch": 1.6336971350613916, - "grad_norm": 0.03073696792125702, - "learning_rate": 4.952404448365399e-05, - "loss": 0.05193127393722534, - "step": 2395 - }, - { - "epoch": 1.6371077762619373, - "grad_norm": 0.10307765007019043, - "learning_rate": 4.9426897815413666e-05, - "loss": 0.0354169100522995, - "step": 2400 - }, - { - "epoch": 1.640518417462483, - "grad_norm": 0.01193988136947155, - "learning_rate": 4.9329662014745006e-05, - "loss": 0.042882445454597476, - "step": 2405 - }, - { - "epoch": 1.6439290586630286, - "grad_norm": 0.3846999704837799, - "learning_rate": 4.923233780831333e-05, - "loss": 0.017827124893665315, - "step": 2410 - }, - { - "epoch": 1.6473396998635743, - "grad_norm": 3.323974847793579, - "learning_rate": 4.9134925923444614e-05, - "loss": 0.05941871404647827, - "step": 2415 - }, - { - "epoch": 1.65075034106412, - "grad_norm": 0.036679740995168686, - "learning_rate": 4.9037427088120124e-05, - "loss": 0.006155381351709366, - "step": 2420 - }, - { - "epoch": 1.6541609822646657, - "grad_norm": 0.5567948222160339, - "learning_rate": 4.8939842030970876e-05, - "loss": 0.029164910316467285, - "step": 2425 - }, - { - "epoch": 1.6575716234652114, - "grad_norm": 0.046739183366298676, - "learning_rate": 4.884217148127228e-05, - "loss": 0.00716848075389862, - "step": 2430 - }, - { - "epoch": 1.660982264665757, - "grad_norm": 0.13504035770893097, - "learning_rate": 4.8744416168938645e-05, - "loss": 0.003317892923951149, - "step": 2435 - }, - { - "epoch": 1.6643929058663027, - "grad_norm": 0.06312979012727737, - "learning_rate": 4.864657682451769e-05, - "loss": 0.01881844997406006, - "step": 2440 - }, - { - "epoch": 1.6678035470668484, - "grad_norm": 0.09641221910715103, - "learning_rate": 4.8548654179185184e-05, - "loss": 0.005701170116662979, - "step": 2445 - }, - { - "epoch": 1.6712141882673943, - "grad_norm": 0.2802797555923462, - "learning_rate": 4.84506489647394e-05, - "loss": 0.03824037313461304, - "step": 2450 - }, - { - "epoch": 1.67462482946794, - "grad_norm": 0.12466538697481155, - "learning_rate": 4.8352561913595644e-05, - "loss": 0.11009746789932251, - "step": 2455 - }, - { - "epoch": 1.6780354706684857, - "grad_norm": 0.07567616552114487, - "learning_rate": 4.825439375878083e-05, - "loss": 0.005253869295120239, - "step": 2460 - }, - { - "epoch": 1.6814461118690314, - "grad_norm": 1.185194492340088, - "learning_rate": 4.815614523392798e-05, - "loss": 0.025198251008987427, - "step": 2465 - }, - { - "epoch": 1.684856753069577, - "grad_norm": 1.530340552330017, - "learning_rate": 4.805781707327073e-05, - "loss": 0.010728911310434342, - "step": 2470 - }, - { - "epoch": 1.6882673942701227, - "grad_norm": 0.006984261330217123, - "learning_rate": 4.795941001163787e-05, - "loss": 0.006418578326702118, - "step": 2475 - }, - { - "epoch": 1.6916780354706686, - "grad_norm": 0.027223482728004456, - "learning_rate": 4.78609247844478e-05, - "loss": 0.0029812423512339593, - "step": 2480 - }, - { - "epoch": 1.6950886766712143, - "grad_norm": 0.030092809349298477, - "learning_rate": 4.776236212770311e-05, - "loss": 0.02785273492336273, - "step": 2485 - }, - { - "epoch": 1.69849931787176, - "grad_norm": 0.5638413429260254, - "learning_rate": 4.7663722777985006e-05, - "loss": 0.033540394902229306, - "step": 2490 - }, - { - "epoch": 1.7019099590723057, - "grad_norm": 0.20694783329963684, - "learning_rate": 4.756500747244786e-05, - "loss": 0.01764456629753113, - "step": 2495 - }, - { - "epoch": 1.7053206002728514, - "grad_norm": 2.3641645908355713, - "learning_rate": 4.746621694881367e-05, - "loss": 0.05572155714035034, - "step": 2500 - }, - { - "epoch": 1.708731241473397, - "grad_norm": 0.007526062428951263, - "learning_rate": 4.736735194536656e-05, - "loss": 0.003397466242313385, - "step": 2505 - }, - { - "epoch": 1.7121418826739427, - "grad_norm": 0.006733228452503681, - "learning_rate": 4.7268413200947256e-05, - "loss": 0.016803480684757233, - "step": 2510 - }, - { - "epoch": 1.7155525238744884, - "grad_norm": 0.6736524105072021, - "learning_rate": 4.716940145494756e-05, - "loss": 0.02161557227373123, - "step": 2515 - }, - { - "epoch": 1.718963165075034, - "grad_norm": 0.5968140959739685, - "learning_rate": 4.7070317447304834e-05, - "loss": 0.010566375404596328, - "step": 2520 - }, - { - "epoch": 1.7223738062755798, - "grad_norm": 0.7741436958312988, - "learning_rate": 4.697116191849649e-05, - "loss": 0.007695452868938446, - "step": 2525 - }, - { - "epoch": 1.7257844474761255, - "grad_norm": 2.7030420303344727, - "learning_rate": 4.6871935609534385e-05, - "loss": 0.01793634444475174, - "step": 2530 - }, - { - "epoch": 1.7291950886766712, - "grad_norm": 0.20148785412311554, - "learning_rate": 4.67726392619594e-05, - "loss": 0.008627812564373016, - "step": 2535 - }, - { - "epoch": 1.7326057298772168, - "grad_norm": 1.2200349569320679, - "learning_rate": 4.667327361783577e-05, - "loss": 0.024143791198730467, - "step": 2540 - }, - { - "epoch": 1.7360163710777625, - "grad_norm": 0.293761670589447, - "learning_rate": 4.657383941974562e-05, - "loss": 0.0015484040603041648, - "step": 2545 - }, - { - "epoch": 1.7394270122783082, - "grad_norm": 1.093536376953125, - "learning_rate": 4.647433741078341e-05, - "loss": 0.03945474326610565, - "step": 2550 - }, - { - "epoch": 1.7428376534788539, - "grad_norm": 0.719284176826477, - "learning_rate": 4.637476833455036e-05, - "loss": 0.013909505307674408, - "step": 2555 - }, - { - "epoch": 1.7462482946793996, - "grad_norm": 0.6357909440994263, - "learning_rate": 4.6275132935148864e-05, - "loss": 0.0144243061542511, - "step": 2560 - }, - { - "epoch": 1.747612551159618, - "eval_loss": 0.06399191915988922, - "eval_runtime": 0.8814, - "eval_samples_per_second": 85.088, - "eval_steps_per_second": 2.269, - "step": 2562 - }, - { - "eval_cer_subset": 0.019484943452786483, - "eval_cer_subset_edit_distance": 143, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2562 - }, - { - "epoch": 1.7496589358799455, - "grad_norm": 0.1577913612127304, - "learning_rate": 4.617543195717702e-05, - "loss": 0.008352726697921753, - "step": 2565 - }, - { - "epoch": 1.7530695770804912, - "grad_norm": 0.0097503075376153, - "learning_rate": 4.607566614572297e-05, - "loss": 0.0051550988107919695, - "step": 2570 - }, - { - "epoch": 1.7564802182810368, - "grad_norm": 0.0745258778333664, - "learning_rate": 4.5975836246359376e-05, - "loss": 0.021304388344287873, - "step": 2575 - }, - { - "epoch": 1.7598908594815825, - "grad_norm": 0.06805134564638138, - "learning_rate": 4.5875943005137875e-05, - "loss": 0.002654948644340038, - "step": 2580 - }, - { - "epoch": 1.7633015006821282, - "grad_norm": 0.005402611568570137, - "learning_rate": 4.577598716858342e-05, - "loss": 0.0005614575464278459, - "step": 2585 - }, - { - "epoch": 1.766712141882674, - "grad_norm": 1.909899115562439, - "learning_rate": 4.5675969483688796e-05, - "loss": 0.07116572856903076, - "step": 2590 - }, - { - "epoch": 1.7701227830832198, - "grad_norm": 0.31900784373283386, - "learning_rate": 4.557589069790897e-05, - "loss": 0.00657682865858078, - "step": 2595 - }, - { - "epoch": 1.7735334242837655, - "grad_norm": 0.029744356870651245, - "learning_rate": 4.547575155915556e-05, - "loss": 0.022768501937389374, - "step": 2600 - }, - { - "epoch": 1.7769440654843112, - "grad_norm": 0.3033762276172638, - "learning_rate": 4.537555281579118e-05, - "loss": 0.06703370213508605, - "step": 2605 - }, - { - "epoch": 1.7803547066848568, - "grad_norm": 0.1135796457529068, - "learning_rate": 4.5275295216623895e-05, - "loss": 0.02318609356880188, - "step": 2610 - }, - { - "epoch": 1.7837653478854025, - "grad_norm": 0.03714317828416824, - "learning_rate": 4.517497951090163e-05, - "loss": 0.0006621151696890593, - "step": 2615 - }, - { - "epoch": 1.7871759890859482, - "grad_norm": 0.022167189046740532, - "learning_rate": 4.507460644830652e-05, - "loss": 0.02861899733543396, - "step": 2620 - }, - { - "epoch": 1.790586630286494, - "grad_norm": 0.85112464427948, - "learning_rate": 4.497417677894937e-05, - "loss": 0.006332498788833618, - "step": 2625 - }, - { - "epoch": 1.7939972714870396, - "grad_norm": 0.35604724287986755, - "learning_rate": 4.487369125336402e-05, - "loss": 0.0009636864997446537, - "step": 2630 - }, - { - "epoch": 1.7974079126875853, - "grad_norm": 1.309139370918274, - "learning_rate": 4.477315062250173e-05, - "loss": 0.015147030353546143, - "step": 2635 - }, - { - "epoch": 1.800818553888131, - "grad_norm": 0.00445478456094861, - "learning_rate": 4.467255563772554e-05, - "loss": 0.0033130761235952376, - "step": 2640 - }, - { - "epoch": 1.8042291950886766, - "grad_norm": 0.6081591248512268, - "learning_rate": 4.457190705080476e-05, - "loss": 0.05884039998054504, - "step": 2645 - }, - { - "epoch": 1.8076398362892223, - "grad_norm": 0.021707098931074142, - "learning_rate": 4.4471205613909215e-05, - "loss": 0.011024921387434005, - "step": 2650 - }, - { - "epoch": 1.811050477489768, - "grad_norm": 0.07584571093320847, - "learning_rate": 4.437045207960372e-05, - "loss": 0.0034543585032224657, - "step": 2655 - }, - { - "epoch": 1.8144611186903137, - "grad_norm": 0.09392323344945908, - "learning_rate": 4.4269647200842444e-05, - "loss": 0.020604337751865386, - "step": 2660 - }, - { - "epoch": 1.8178717598908594, - "grad_norm": 0.01986370049417019, - "learning_rate": 4.4168791730963214e-05, - "loss": 0.002114328555762768, - "step": 2665 - }, - { - "epoch": 1.821282401091405, - "grad_norm": 0.09479828178882599, - "learning_rate": 4.406788642368199e-05, - "loss": 0.025032815337181092, - "step": 2670 - }, - { - "epoch": 1.8246930422919507, - "grad_norm": 0.049587834626436234, - "learning_rate": 4.396693203308714e-05, - "loss": 0.010216309875249862, - "step": 2675 - }, - { - "epoch": 1.8281036834924966, - "grad_norm": 0.8442233204841614, - "learning_rate": 4.3865929313633846e-05, - "loss": 0.028453707695007324, - "step": 2680 - }, - { - "epoch": 1.8315143246930423, - "grad_norm": 1.1214258670806885, - "learning_rate": 4.37648790201385e-05, - "loss": 0.030564960837364197, - "step": 2685 - }, - { - "epoch": 1.834924965893588, - "grad_norm": 0.0033480043057352304, - "learning_rate": 4.3663781907772984e-05, - "loss": 0.0031189337372779847, - "step": 2690 - }, - { - "epoch": 1.8383356070941337, - "grad_norm": 1.5848685503005981, - "learning_rate": 4.356263873205908e-05, - "loss": 0.006724410504102707, - "step": 2695 - }, - { - "epoch": 1.8417462482946794, - "grad_norm": 2.099224090576172, - "learning_rate": 4.346145024886282e-05, - "loss": 0.02901224195957184, - "step": 2700 - }, - { - "epoch": 1.845156889495225, - "grad_norm": 0.009933914057910442, - "learning_rate": 4.336021721438881e-05, - "loss": 0.01628301590681076, - "step": 2705 - }, - { - "epoch": 1.848567530695771, - "grad_norm": 0.36502084136009216, - "learning_rate": 4.325894038517463e-05, - "loss": 0.0063889890909194945, - "step": 2710 - }, - { - "epoch": 1.8519781718963166, - "grad_norm": 0.5338266491889954, - "learning_rate": 4.3157620518085123e-05, - "loss": 0.0017654186114668847, - "step": 2715 - }, - { - "epoch": 1.8553888130968623, - "grad_norm": 0.019173067063093185, - "learning_rate": 4.3056258370306773e-05, - "loss": 0.026314160227775572, - "step": 2720 - }, - { - "epoch": 1.858799454297408, - "grad_norm": 0.0022459065075963736, - "learning_rate": 4.295485469934203e-05, - "loss": 0.034506088495254515, - "step": 2725 - }, - { - "epoch": 1.8622100954979537, - "grad_norm": 0.009851609356701374, - "learning_rate": 4.285341026300366e-05, - "loss": 0.037691861391067505, - "step": 2730 - }, - { - "epoch": 1.8656207366984994, - "grad_norm": 0.037228964269161224, - "learning_rate": 4.275192581940908e-05, - "loss": 0.019535519182682037, - "step": 2735 - }, - { - "epoch": 1.869031377899045, - "grad_norm": 0.03244785964488983, - "learning_rate": 4.2650402126974704e-05, - "loss": 0.004778595641255379, - "step": 2740 - }, - { - "epoch": 1.8724420190995907, - "grad_norm": 0.027707895264029503, - "learning_rate": 4.254883994441023e-05, - "loss": 0.014376556873321534, - "step": 2745 - }, - { - "epoch": 1.8758526603001364, - "grad_norm": 0.0010299253044649959, - "learning_rate": 4.244724003071302e-05, - "loss": 0.03207484483718872, - "step": 2750 - }, - { - "epoch": 1.879263301500682, - "grad_norm": 0.09466377645730972, - "learning_rate": 4.234560314516241e-05, - "loss": 0.0034012068063020706, - "step": 2755 - }, - { - "epoch": 1.8826739427012278, - "grad_norm": 0.010737070813775063, - "learning_rate": 4.224393004731403e-05, - "loss": 0.0015277769416570663, - "step": 2760 - }, - { - "epoch": 1.8860845839017735, - "grad_norm": 0.0126175656914711, - "learning_rate": 4.2142221496994144e-05, - "loss": 0.0010683752596378326, - "step": 2765 - }, - { - "epoch": 1.8894952251023192, - "grad_norm": 0.03981072083115578, - "learning_rate": 4.2040478254293946e-05, - "loss": 0.013066369295120239, - "step": 2770 - }, - { - "epoch": 1.8929058663028648, - "grad_norm": 1.678425669670105, - "learning_rate": 4.193870107956391e-05, - "loss": 0.04245063066482544, - "step": 2775 - }, - { - "epoch": 1.8963165075034105, - "grad_norm": 0.21114972233772278, - "learning_rate": 4.1836890733408063e-05, - "loss": 0.006565409898757935, - "step": 2780 - }, - { - "epoch": 1.8997271487039562, - "grad_norm": 0.13557757437229156, - "learning_rate": 4.173504797667836e-05, - "loss": 0.0032049473375082016, - "step": 2785 - }, - { - "epoch": 1.9031377899045019, - "grad_norm": 0.03560245409607887, - "learning_rate": 4.163317357046896e-05, - "loss": 0.036527630686759946, - "step": 2790 - }, - { - "epoch": 1.9065484311050478, - "grad_norm": 0.016457395628094673, - "learning_rate": 4.1531268276110534e-05, - "loss": 0.004363229870796204, - "step": 2795 - }, - { - "epoch": 1.9099590723055935, - "grad_norm": 0.15892116725444794, - "learning_rate": 4.142933285516459e-05, - "loss": 0.0009642043150961399, - "step": 2800 - }, - { - "epoch": 1.9133697135061392, - "grad_norm": 1.4503952264785767, - "learning_rate": 4.1327368069417805e-05, - "loss": 0.016029161214828492, - "step": 2805 - }, - { - "epoch": 1.9167803547066848, - "grad_norm": 0.05091691017150879, - "learning_rate": 4.122537468087626e-05, - "loss": 0.008691015094518662, - "step": 2810 - }, - { - "epoch": 1.9201909959072305, - "grad_norm": 0.33131423592567444, - "learning_rate": 4.1123353451759843e-05, - "loss": 0.026498579978942872, - "step": 2815 - }, - { - "epoch": 1.9236016371077762, - "grad_norm": 0.011024169623851776, - "learning_rate": 4.1021305144496455e-05, - "loss": 0.005746996402740479, - "step": 2820 - }, - { - "epoch": 1.9270122783083221, - "grad_norm": 0.003868364728987217, - "learning_rate": 4.091923052171637e-05, - "loss": 0.010599984973669051, - "step": 2825 - }, - { - "epoch": 1.9304229195088678, - "grad_norm": 0.06572377681732178, - "learning_rate": 4.081713034624656e-05, - "loss": 0.0074796505272388455, - "step": 2830 - }, - { - "epoch": 1.9338335607094135, - "grad_norm": 0.004749608226120472, - "learning_rate": 4.07150053811049e-05, - "loss": 0.001074880175292492, - "step": 2835 - }, - { - "epoch": 1.9372442019099592, - "grad_norm": 1.0662771463394165, - "learning_rate": 4.061285638949456e-05, - "loss": 0.012685902416706085, - "step": 2840 - }, - { - "epoch": 1.9406548431105048, - "grad_norm": 0.05863015726208687, - "learning_rate": 4.0510684134798275e-05, - "loss": 0.02307589054107666, - "step": 2845 - }, - { - "epoch": 1.9440654843110505, - "grad_norm": 0.1672995239496231, - "learning_rate": 4.0408489380572595e-05, - "loss": 0.0009137367829680443, - "step": 2850 - }, - { - "epoch": 1.9474761255115962, - "grad_norm": 0.37154069542884827, - "learning_rate": 4.030627289054224e-05, - "loss": 0.03070145845413208, - "step": 2855 - }, - { - "epoch": 1.950886766712142, - "grad_norm": 1.7240241765975952, - "learning_rate": 4.020403542859436e-05, - "loss": 0.02094976305961609, - "step": 2860 - }, - { - "epoch": 1.9542974079126876, - "grad_norm": 0.019670270383358, - "learning_rate": 4.0101777758772826e-05, - "loss": 0.002072345092892647, - "step": 2865 - }, - { - "epoch": 1.9577080491132333, - "grad_norm": 0.004318730439990759, - "learning_rate": 3.999950064527255e-05, - "loss": 0.004361823201179504, - "step": 2870 - }, - { - "epoch": 1.961118690313779, - "grad_norm": 0.01702667959034443, - "learning_rate": 3.989720485243371e-05, - "loss": 0.0062848575413227085, - "step": 2875 - }, - { - "epoch": 1.9645293315143246, - "grad_norm": 0.011724979616701603, - "learning_rate": 3.9794891144736114e-05, - "loss": 0.010996624082326888, - "step": 2880 - }, - { - "epoch": 1.9679399727148703, - "grad_norm": 0.05676786229014397, - "learning_rate": 3.9692560286793454e-05, - "loss": 0.0015414996072649957, - "step": 2885 - }, - { - "epoch": 1.971350613915416, - "grad_norm": 0.0819210484623909, - "learning_rate": 3.959021304334756e-05, - "loss": 0.0008444737643003464, - "step": 2890 - }, - { - "epoch": 1.9747612551159617, - "grad_norm": 2.654984951019287, - "learning_rate": 3.948785017926274e-05, - "loss": 0.029948851466178893, - "step": 2895 - }, - { - "epoch": 1.9781718963165074, - "grad_norm": 1.9272631406784058, - "learning_rate": 3.938547245952003e-05, - "loss": 0.028752812743186952, - "step": 2900 - }, - { - "epoch": 1.981582537517053, - "grad_norm": 1.8656548261642456, - "learning_rate": 3.9283080649211474e-05, - "loss": 0.013515619933605194, - "step": 2905 - }, - { - "epoch": 1.984993178717599, - "grad_norm": 0.12653613090515137, - "learning_rate": 3.918067551353445e-05, - "loss": 0.0011569897644221783, - "step": 2910 - }, - { - "epoch": 1.9884038199181446, - "grad_norm": 0.07956118136644363, - "learning_rate": 3.9078257817785886e-05, - "loss": 0.024067461490631104, - "step": 2915 - }, - { - "epoch": 1.9918144611186903, - "grad_norm": 0.05314113199710846, - "learning_rate": 3.897582832735658e-05, - "loss": 0.008826743811368942, - "step": 2920 - }, - { - "epoch": 1.995225102319236, - "grad_norm": 0.05015930160880089, - "learning_rate": 3.887338780772551e-05, - "loss": 0.017050875723361968, - "step": 2925 - }, - { - "epoch": 1.9972714870395634, - "eval_loss": 0.051675114780664444, - "eval_runtime": 0.9019, - "eval_samples_per_second": 83.154, - "eval_steps_per_second": 2.217, - "step": 2928 - }, - { - "eval_cer_subset": 0.016214743153018123, - "eval_cer_subset_edit_distance": 119, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 2928 - }, - { - "epoch": 1.9986357435197817, - "grad_norm": 0.0063513885252177715, - "learning_rate": 3.8770937024454024e-05, - "loss": 0.06096935272216797, - "step": 2930 - }, - { - "epoch": 2.0020463847203276, - "grad_norm": 0.3572078347206116, - "learning_rate": 3.86684767431802e-05, - "loss": 0.006809630990028381, - "step": 2935 - }, - { - "epoch": 2.0054570259208733, - "grad_norm": 0.06565147638320923, - "learning_rate": 3.8566007729613116e-05, - "loss": 0.0028434153646230698, - "step": 2940 - }, - { - "epoch": 2.008867667121419, - "grad_norm": 0.842980682849884, - "learning_rate": 3.846353074952705e-05, - "loss": 0.009484797716140747, - "step": 2945 - }, - { - "epoch": 2.0122783083219646, - "grad_norm": 0.2072802633047104, - "learning_rate": 3.83610465687559e-05, - "loss": 0.002631821669638157, - "step": 2950 - }, - { - "epoch": 2.0156889495225103, - "grad_norm": 0.010441784746944904, - "learning_rate": 3.8258555953187294e-05, - "loss": 0.0031868770718574526, - "step": 2955 - }, - { - "epoch": 2.019099590723056, - "grad_norm": 0.01504182443022728, - "learning_rate": 3.8156059668756994e-05, - "loss": 0.0008036898449063301, - "step": 2960 - }, - { - "epoch": 2.0225102319236017, - "grad_norm": 0.1884382665157318, - "learning_rate": 3.805355848144312e-05, - "loss": 0.0035643450915813445, - "step": 2965 - }, - { - "epoch": 2.0259208731241474, - "grad_norm": 0.0791923776268959, - "learning_rate": 3.795105315726042e-05, - "loss": 0.006217213720083237, - "step": 2970 - }, - { - "epoch": 2.029331514324693, - "grad_norm": 0.3660128116607666, - "learning_rate": 3.7848544462254586e-05, - "loss": 0.01521536111831665, - "step": 2975 - }, - { - "epoch": 2.0327421555252387, - "grad_norm": 1.1506773233413696, - "learning_rate": 3.774603316249646e-05, - "loss": 0.016655027866363525, - "step": 2980 - }, - { - "epoch": 2.0361527967257844, - "grad_norm": 0.010742763057351112, - "learning_rate": 3.764352002407638e-05, - "loss": 0.0031856697052717207, - "step": 2985 - }, - { - "epoch": 2.03956343792633, - "grad_norm": 0.008567198179662228, - "learning_rate": 3.754100581309843e-05, - "loss": 0.006546307355165482, - "step": 2990 - }, - { - "epoch": 2.042974079126876, - "grad_norm": 0.242637500166893, - "learning_rate": 3.743849129567467e-05, - "loss": 0.01844196617603302, - "step": 2995 - }, - { - "epoch": 2.0463847203274215, - "grad_norm": 0.0021864245645701885, - "learning_rate": 3.7335977237919486e-05, - "loss": 0.0010665619745850563, - "step": 3000 - }, - { - "epoch": 2.049795361527967, - "grad_norm": 0.0823604166507721, - "learning_rate": 3.723346440594384e-05, - "loss": 0.007866787165403366, - "step": 3005 - }, - { - "epoch": 2.053206002728513, - "grad_norm": 1.950189471244812, - "learning_rate": 3.713095356584948e-05, - "loss": 0.008237652480602264, - "step": 3010 - }, - { - "epoch": 2.0566166439290585, - "grad_norm": 0.0031665184069424868, - "learning_rate": 3.702844548372333e-05, - "loss": 0.0026241108775138856, - "step": 3015 - }, - { - "epoch": 2.060027285129604, - "grad_norm": 0.000985809718258679, - "learning_rate": 3.692594092563164e-05, - "loss": 0.0006582608446478843, - "step": 3020 - }, - { - "epoch": 2.06343792633015, - "grad_norm": 0.005130598321557045, - "learning_rate": 3.682344065761439e-05, - "loss": 0.003146781027317047, - "step": 3025 - }, - { - "epoch": 2.0668485675306956, - "grad_norm": 0.37476831674575806, - "learning_rate": 3.6720945445679456e-05, - "loss": 0.0038135331124067307, - "step": 3030 - }, - { - "epoch": 2.0702592087312413, - "grad_norm": 0.7057031989097595, - "learning_rate": 3.661845605579694e-05, - "loss": 0.01638354957103729, - "step": 3035 - }, - { - "epoch": 2.0736698499317874, - "grad_norm": 0.002361712511628866, - "learning_rate": 3.651597325389343e-05, - "loss": 0.008126144856214523, - "step": 3040 - }, - { - "epoch": 2.077080491132333, - "grad_norm": 0.14359615743160248, - "learning_rate": 3.641349780584628e-05, - "loss": 0.0010236113332211972, - "step": 3045 - }, - { - "epoch": 2.0804911323328787, - "grad_norm": 0.02165267802774906, - "learning_rate": 3.631103047747791e-05, - "loss": 0.001835300400853157, - "step": 3050 - }, - { - "epoch": 2.0839017735334244, - "grad_norm": 0.08726503700017929, - "learning_rate": 3.6208572034550014e-05, - "loss": 0.0011202219873666763, - "step": 3055 - }, - { - "epoch": 2.08731241473397, - "grad_norm": 0.011694432236254215, - "learning_rate": 3.6106123242757934e-05, - "loss": 0.0012682409957051276, - "step": 3060 - }, - { - "epoch": 2.090723055934516, - "grad_norm": 0.011882166378200054, - "learning_rate": 3.6003684867724856e-05, - "loss": 0.0017605546861886978, - "step": 3065 - }, - { - "epoch": 2.0941336971350615, - "grad_norm": 0.04591360688209534, - "learning_rate": 3.5901257674996135e-05, - "loss": 0.005101381987333298, - "step": 3070 - }, - { - "epoch": 2.097544338335607, - "grad_norm": 0.004696133080869913, - "learning_rate": 3.579884243003353e-05, - "loss": 0.0011597291566431522, - "step": 3075 - }, - { - "epoch": 2.100954979536153, - "grad_norm": 0.15893827378749847, - "learning_rate": 3.5696439898209546e-05, - "loss": 0.004860148951411247, - "step": 3080 - }, - { - "epoch": 2.1043656207366985, - "grad_norm": 0.0052610537968575954, - "learning_rate": 3.559405084480166e-05, - "loss": 0.0022133996710181235, - "step": 3085 - }, - { - "epoch": 2.107776261937244, - "grad_norm": 0.04163965955376625, - "learning_rate": 3.5491676034986634e-05, - "loss": 0.0011653171852231026, - "step": 3090 - }, - { - "epoch": 2.11118690313779, - "grad_norm": 0.03646431118249893, - "learning_rate": 3.538931623383477e-05, - "loss": 0.0010974819771945477, - "step": 3095 - }, - { - "epoch": 2.1145975443383356, - "grad_norm": 0.0027346035931259394, - "learning_rate": 3.5286972206304225e-05, - "loss": 0.0002963356673717499, - "step": 3100 - }, - { - "epoch": 2.1180081855388813, - "grad_norm": 0.030314767733216286, - "learning_rate": 3.5184644717235233e-05, - "loss": 0.0013646245934069157, - "step": 3105 - }, - { - "epoch": 2.121418826739427, - "grad_norm": 0.07366184145212173, - "learning_rate": 3.5082334531344514e-05, - "loss": 0.0018215376883745193, - "step": 3110 - }, - { - "epoch": 2.1248294679399726, - "grad_norm": 0.5131163597106934, - "learning_rate": 3.498004241321938e-05, - "loss": 0.011788859963417053, - "step": 3115 - }, - { - "epoch": 2.1282401091405183, - "grad_norm": 0.005606099497526884, - "learning_rate": 3.48777691273122e-05, - "loss": 0.00023315094877034425, - "step": 3120 - }, - { - "epoch": 2.131650750341064, - "grad_norm": 0.218208909034729, - "learning_rate": 3.4775515437934554e-05, - "loss": 0.0018167193979024888, - "step": 3125 - }, - { - "epoch": 2.1350613915416097, - "grad_norm": 0.026605455204844475, - "learning_rate": 3.467328210925161e-05, - "loss": 0.0012846079654991627, - "step": 3130 - }, - { - "epoch": 2.1384720327421554, - "grad_norm": 1.4139975309371948, - "learning_rate": 3.457106990527632e-05, - "loss": 0.00508112832903862, - "step": 3135 - }, - { - "epoch": 2.141882673942701, - "grad_norm": 0.019414646551012993, - "learning_rate": 3.446887958986385e-05, - "loss": 0.00019377884455025197, - "step": 3140 - }, - { - "epoch": 2.1452933151432467, - "grad_norm": 0.0009443740709684789, - "learning_rate": 3.4366711926705694e-05, - "loss": 0.0002384019084274769, - "step": 3145 - }, - { - "epoch": 2.148703956343793, - "grad_norm": 0.004274521954357624, - "learning_rate": 3.426456767932416e-05, - "loss": 0.0001209982088766992, - "step": 3150 - }, - { - "epoch": 2.1521145975443385, - "grad_norm": 1.536423683166504, - "learning_rate": 3.416244761106645e-05, - "loss": 0.008635792136192321, - "step": 3155 - }, - { - "epoch": 2.155525238744884, - "grad_norm": 0.0005219463491812348, - "learning_rate": 3.406035248509918e-05, - "loss": 0.009876561909914016, - "step": 3160 - }, - { - "epoch": 2.15893587994543, - "grad_norm": 0.0003459984145592898, - "learning_rate": 3.395828306440249e-05, - "loss": 0.00039457883685827254, - "step": 3165 - }, - { - "epoch": 2.1623465211459756, - "grad_norm": 0.001981241861358285, - "learning_rate": 3.3856240111764465e-05, - "loss": 0.0015202089212834834, - "step": 3170 - }, - { - "epoch": 2.1657571623465213, - "grad_norm": 2.1673197746276855, - "learning_rate": 3.375422438977536e-05, - "loss": 0.006145225092768669, - "step": 3175 - }, - { - "epoch": 2.169167803547067, - "grad_norm": 1.1808840036392212, - "learning_rate": 3.365223666082195e-05, - "loss": 0.013607437908649444, - "step": 3180 - }, - { - "epoch": 2.1725784447476126, - "grad_norm": 0.028271734714508057, - "learning_rate": 3.3550277687081766e-05, - "loss": 0.0013325599022209645, - "step": 3185 - }, - { - "epoch": 2.1759890859481583, - "grad_norm": 0.24461407959461212, - "learning_rate": 3.344834823051754e-05, - "loss": 0.0011271734721958638, - "step": 3190 - }, - { - "epoch": 2.179399727148704, - "grad_norm": 0.870476245880127, - "learning_rate": 3.334644905287129e-05, - "loss": 0.006120540574193001, - "step": 3195 - }, - { - "epoch": 2.1828103683492497, - "grad_norm": 0.24630939960479736, - "learning_rate": 3.324458091565887e-05, - "loss": 0.006894994527101517, - "step": 3200 - }, - { - "epoch": 2.1862210095497954, - "grad_norm": 0.035023678094148636, - "learning_rate": 3.314274458016407e-05, - "loss": 0.0012451894581317902, - "step": 3205 - }, - { - "epoch": 2.189631650750341, - "grad_norm": 0.0890582948923111, - "learning_rate": 3.3040940807433086e-05, - "loss": 0.0024930678308010103, - "step": 3210 - }, - { - "epoch": 2.1930422919508867, - "grad_norm": 0.30489957332611084, - "learning_rate": 3.2939170358268715e-05, - "loss": 0.023087967932224274, - "step": 3215 - }, - { - "epoch": 2.1964529331514324, - "grad_norm": 0.0017311271512880921, - "learning_rate": 3.283743399322477e-05, - "loss": 0.010184342414140702, - "step": 3220 - }, - { - "epoch": 2.199863574351978, - "grad_norm": 0.08493661880493164, - "learning_rate": 3.273573247260027e-05, - "loss": 0.02805263102054596, - "step": 3225 - }, - { - "epoch": 2.203274215552524, - "grad_norm": 0.09135634452104568, - "learning_rate": 3.2634066556433934e-05, - "loss": 0.0009638279676437378, - "step": 3230 - }, - { - "epoch": 2.2066848567530695, - "grad_norm": 0.05967738851904869, - "learning_rate": 3.2532437004498316e-05, - "loss": 0.007517358660697937, - "step": 3235 - }, - { - "epoch": 2.210095497953615, - "grad_norm": 0.03030387870967388, - "learning_rate": 3.243084457629425e-05, - "loss": 0.0009239451959729194, - "step": 3240 - }, - { - "epoch": 2.213506139154161, - "grad_norm": 0.004568960517644882, - "learning_rate": 3.2329290031045114e-05, - "loss": 0.00045777852647006513, - "step": 3245 - }, - { - "epoch": 2.2169167803547065, - "grad_norm": 0.20760250091552734, - "learning_rate": 3.2227774127691225e-05, - "loss": 0.0007285364903509617, - "step": 3250 - }, - { - "epoch": 2.220327421555252, - "grad_norm": 0.0028919128235429525, - "learning_rate": 3.2126297624884065e-05, - "loss": 0.001136382296681404, - "step": 3255 - }, - { - "epoch": 2.223738062755798, - "grad_norm": 0.0026204732712358236, - "learning_rate": 3.20248612809807e-05, - "loss": 0.0001833780319429934, - "step": 3260 - }, - { - "epoch": 2.2271487039563436, - "grad_norm": 0.054305560886859894, - "learning_rate": 3.192346585403805e-05, - "loss": 0.003986500948667526, - "step": 3265 - }, - { - "epoch": 2.2305593451568897, - "grad_norm": 0.024538133293390274, - "learning_rate": 3.182211210180732e-05, - "loss": 0.0003968174569308758, - "step": 3270 - }, - { - "epoch": 2.2339699863574354, - "grad_norm": 0.003948847763240337, - "learning_rate": 3.172080078172817e-05, - "loss": 0.0004363433923572302, - "step": 3275 - }, - { - "epoch": 2.237380627557981, - "grad_norm": 0.0024372704792767763, - "learning_rate": 3.161953265092322e-05, - "loss": 0.001003190129995346, - "step": 3280 - }, - { - "epoch": 2.2407912687585267, - "grad_norm": 0.05504141375422478, - "learning_rate": 3.1518308466192346e-05, - "loss": 0.00043217958882451056, - "step": 3285 - }, - { - "epoch": 2.2442019099590724, - "grad_norm": 0.003660411573946476, - "learning_rate": 3.141712898400692e-05, - "loss": 0.0005229417700320482, - "step": 3290 - }, - { - "epoch": 2.246930422919509, - "eval_loss": 0.07013023644685745, - "eval_runtime": 0.915, - "eval_samples_per_second": 81.971, - "eval_steps_per_second": 2.186, - "step": 3294 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3294 - }, - { - "epoch": 2.247612551159618, - "grad_norm": 0.0009039652650244534, - "learning_rate": 3.1315994960504354e-05, - "loss": 0.0009505398571491242, - "step": 3295 - }, - { - "epoch": 2.251023192360164, - "grad_norm": 0.0008299718610942364, - "learning_rate": 3.121490715148224e-05, - "loss": 0.006436178088188171, - "step": 3300 - }, - { - "epoch": 2.2544338335607095, - "grad_norm": 0.003360757604241371, - "learning_rate": 3.1113866312392846e-05, - "loss": 0.0004931201227009296, - "step": 3305 - }, - { - "epoch": 2.257844474761255, - "grad_norm": 0.0006958392332307994, - "learning_rate": 3.1012873198337415e-05, - "loss": 0.0008634727448225022, - "step": 3310 - }, - { - "epoch": 2.261255115961801, - "grad_norm": 0.0006489035440608859, - "learning_rate": 3.0911928564060525e-05, - "loss": 0.02126412242650986, - "step": 3315 - }, - { - "epoch": 2.2646657571623465, - "grad_norm": 0.05853112041950226, - "learning_rate": 3.081103316394446e-05, - "loss": 0.0011481027118861674, - "step": 3320 - }, - { - "epoch": 2.268076398362892, - "grad_norm": 0.018470890820026398, - "learning_rate": 3.0710187752003576e-05, - "loss": 0.0005085847340524196, - "step": 3325 - }, - { - "epoch": 2.271487039563438, - "grad_norm": 0.002904064953327179, - "learning_rate": 3.06093930818786e-05, - "loss": 0.0011355782859027385, - "step": 3330 - }, - { - "epoch": 2.2748976807639836, - "grad_norm": 0.006562090013176203, - "learning_rate": 3.0508649906831165e-05, - "loss": 0.0017314480617642402, - "step": 3335 - }, - { - "epoch": 2.2783083219645293, - "grad_norm": 0.012832654640078545, - "learning_rate": 3.040795897973794e-05, - "loss": 0.014564378559589386, - "step": 3340 - }, - { - "epoch": 2.281718963165075, - "grad_norm": 0.40248075127601624, - "learning_rate": 3.030732105308523e-05, - "loss": 0.013111820816993714, - "step": 3345 - }, - { - "epoch": 2.2851296043656206, - "grad_norm": 0.009625518694519997, - "learning_rate": 3.0206736878963198e-05, - "loss": 0.0003735888050869107, - "step": 3350 - }, - { - "epoch": 2.2885402455661663, - "grad_norm": 0.05925761163234711, - "learning_rate": 3.010620720906034e-05, - "loss": 0.0005200970452278852, - "step": 3355 - }, - { - "epoch": 2.291950886766712, - "grad_norm": 0.04488271474838257, - "learning_rate": 3.0005732794657804e-05, - "loss": 0.0017546603456139564, - "step": 3360 - }, - { - "epoch": 2.2953615279672577, - "grad_norm": 0.0013143798569217324, - "learning_rate": 2.990531438662383e-05, - "loss": 0.0006482157856225968, - "step": 3365 - }, - { - "epoch": 2.2987721691678034, - "grad_norm": 0.0018280980875715613, - "learning_rate": 2.980495273540805e-05, - "loss": 0.002798055298626423, - "step": 3370 - }, - { - "epoch": 2.3021828103683495, - "grad_norm": 0.0068644145503640175, - "learning_rate": 2.9704648591036028e-05, - "loss": 0.0010916708968579769, - "step": 3375 - }, - { - "epoch": 2.305593451568895, - "grad_norm": 0.006140770856291056, - "learning_rate": 2.9604402703103482e-05, - "loss": 0.0003204951295629144, - "step": 3380 - }, - { - "epoch": 2.309004092769441, - "grad_norm": 0.01666918210685253, - "learning_rate": 2.9504215820770825e-05, - "loss": 0.002915392816066742, - "step": 3385 - }, - { - "epoch": 2.3124147339699865, - "grad_norm": 0.001569412648677826, - "learning_rate": 2.9404088692757462e-05, - "loss": 0.00282623004168272, - "step": 3390 - }, - { - "epoch": 2.315825375170532, - "grad_norm": 2.6985678672790527, - "learning_rate": 2.930402206733629e-05, - "loss": 0.056363034248352054, - "step": 3395 - }, - { - "epoch": 2.319236016371078, - "grad_norm": 0.061534252017736435, - "learning_rate": 2.9204016692328008e-05, - "loss": 0.002193786948919296, - "step": 3400 - }, - { - "epoch": 2.3226466575716236, - "grad_norm": 0.00724546005949378, - "learning_rate": 2.9104073315095624e-05, - "loss": 0.0027640098705887794, - "step": 3405 - }, - { - "epoch": 2.3260572987721693, - "grad_norm": 0.0014935819199308753, - "learning_rate": 2.900419268253876e-05, - "loss": 0.0014965098351240158, - "step": 3410 - }, - { - "epoch": 2.329467939972715, - "grad_norm": 0.2667955458164215, - "learning_rate": 2.89043755410882e-05, - "loss": 0.0009135601110756397, - "step": 3415 - }, - { - "epoch": 2.3328785811732606, - "grad_norm": 0.015711264684796333, - "learning_rate": 2.8804622636700195e-05, - "loss": 0.0004993634298443794, - "step": 3420 - }, - { - "epoch": 2.3362892223738063, - "grad_norm": 0.000695803901180625, - "learning_rate": 2.8704934714850972e-05, - "loss": 0.0010460540652275085, - "step": 3425 - }, - { - "epoch": 2.339699863574352, - "grad_norm": 0.00030175631400197744, - "learning_rate": 2.8605312520531102e-05, - "loss": 0.0011491063050925732, - "step": 3430 - }, - { - "epoch": 2.3431105047748977, - "grad_norm": 0.0008360512438230217, - "learning_rate": 2.850575679823998e-05, - "loss": 0.005195276811718941, - "step": 3435 - }, - { - "epoch": 2.3465211459754434, - "grad_norm": 0.07670744508504868, - "learning_rate": 2.840626829198022e-05, - "loss": 0.001102046575397253, - "step": 3440 - }, - { - "epoch": 2.349931787175989, - "grad_norm": 0.0048200939781963825, - "learning_rate": 2.8306847745252154e-05, - "loss": 0.00011967071332037449, - "step": 3445 - }, - { - "epoch": 2.3533424283765347, - "grad_norm": 0.0036802536342293024, - "learning_rate": 2.8207495901048164e-05, - "loss": 0.003212982416152954, - "step": 3450 - }, - { - "epoch": 2.3567530695770804, - "grad_norm": 0.0017565820598974824, - "learning_rate": 2.8108213501847284e-05, - "loss": 3.878590650856495e-05, - "step": 3455 - }, - { - "epoch": 2.360163710777626, - "grad_norm": 0.07837986201047897, - "learning_rate": 2.8009001289609514e-05, - "loss": 0.00035386246163398026, - "step": 3460 - }, - { - "epoch": 2.363574351978172, - "grad_norm": 0.035858154296875, - "learning_rate": 2.7909860005770364e-05, - "loss": 0.0020171813666820526, - "step": 3465 - }, - { - "epoch": 2.3669849931787175, - "grad_norm": 0.001313618617132306, - "learning_rate": 2.781079039123525e-05, - "loss": 0.0077533811330795285, - "step": 3470 - }, - { - "epoch": 2.370395634379263, - "grad_norm": 0.022166471928358078, - "learning_rate": 2.771179318637402e-05, - "loss": 0.00021515686530619859, - "step": 3475 - }, - { - "epoch": 2.373806275579809, - "grad_norm": 0.0037807885091751814, - "learning_rate": 2.7612869131015353e-05, - "loss": 0.008334387093782425, - "step": 3480 - }, - { - "epoch": 2.3772169167803545, - "grad_norm": 0.02126333676278591, - "learning_rate": 2.7514018964441313e-05, - "loss": 0.0012980472296476365, - "step": 3485 - }, - { - "epoch": 2.3806275579809, - "grad_norm": 0.0007329506915993989, - "learning_rate": 2.7415243425381707e-05, - "loss": 0.000131706683896482, - "step": 3490 - }, - { - "epoch": 2.384038199181446, - "grad_norm": 0.008962417021393776, - "learning_rate": 2.73165432520087e-05, - "loss": 0.0001407766016200185, - "step": 3495 - }, - { - "epoch": 2.3874488403819916, - "grad_norm": 0.06224314495921135, - "learning_rate": 2.721791918193119e-05, - "loss": 0.0005040234886109829, - "step": 3500 - }, - { - "epoch": 2.3908594815825377, - "grad_norm": 0.007790696807205677, - "learning_rate": 2.7119371952189368e-05, - "loss": 0.00020941467955708503, - "step": 3505 - }, - { - "epoch": 2.3942701227830834, - "grad_norm": 0.1999143660068512, - "learning_rate": 2.7020902299249144e-05, - "loss": 0.0005157966166734696, - "step": 3510 - }, - { - "epoch": 2.397680763983629, - "grad_norm": 1.5223946571350098, - "learning_rate": 2.692251095899673e-05, - "loss": 0.004808775335550308, - "step": 3515 - }, - { - "epoch": 2.4010914051841747, - "grad_norm": 0.0005383774405345321, - "learning_rate": 2.6824198666733024e-05, - "loss": 0.0007459132932126522, - "step": 3520 - }, - { - "epoch": 2.4045020463847204, - "grad_norm": 0.02152041345834732, - "learning_rate": 2.672596615716823e-05, - "loss": 0.010163982212543488, - "step": 3525 - }, - { - "epoch": 2.407912687585266, - "grad_norm": 0.1950986683368683, - "learning_rate": 2.6627814164416303e-05, - "loss": 0.002464359626173973, - "step": 3530 - }, - { - "epoch": 2.411323328785812, - "grad_norm": 0.21561792492866516, - "learning_rate": 2.652974342198947e-05, - "loss": 0.0010975897312164307, - "step": 3535 - }, - { - "epoch": 2.4147339699863575, - "grad_norm": 0.0007951174047775567, - "learning_rate": 2.6431754662792775e-05, - "loss": 6.033455138094723e-05, - "step": 3540 - }, - { - "epoch": 2.418144611186903, - "grad_norm": 0.0016590118175372481, - "learning_rate": 2.633384861911856e-05, - "loss": 0.00012161724735051393, - "step": 3545 - }, - { - "epoch": 2.421555252387449, - "grad_norm": 0.004098537378013134, - "learning_rate": 2.6236026022641047e-05, - "loss": 0.0006160829216241837, - "step": 3550 - }, - { - "epoch": 2.4249658935879945, - "grad_norm": 0.0009240853250958025, - "learning_rate": 2.6138287604410772e-05, - "loss": 8.804704993963242e-05, - "step": 3555 - }, - { - "epoch": 2.42837653478854, - "grad_norm": 0.005952226463705301, - "learning_rate": 2.604063409484928e-05, - "loss": 0.0006035147234797478, - "step": 3560 - }, - { - "epoch": 2.431787175989086, - "grad_norm": 0.03809252381324768, - "learning_rate": 2.5943066223743488e-05, - "loss": 0.00727783590555191, - "step": 3565 - }, - { - "epoch": 2.4351978171896316, - "grad_norm": 0.05054875835776329, - "learning_rate": 2.5845584720240384e-05, - "loss": 0.0082052581012249, - "step": 3570 - }, - { - "epoch": 2.4386084583901773, - "grad_norm": 0.0147855868563056, - "learning_rate": 2.5748190312841466e-05, - "loss": 0.011614852398633958, - "step": 3575 - }, - { - "epoch": 2.442019099590723, - "grad_norm": 0.011641742661595345, - "learning_rate": 2.5650883729397373e-05, - "loss": 0.0002830417361110449, - "step": 3580 - }, - { - "epoch": 2.4454297407912686, - "grad_norm": 0.04626445844769478, - "learning_rate": 2.5553665697102386e-05, - "loss": 0.0003774407086893916, - "step": 3585 - }, - { - "epoch": 2.4488403819918143, - "grad_norm": 0.3234706521034241, - "learning_rate": 2.5456536942489065e-05, - "loss": 0.0009496832266449928, - "step": 3590 - }, - { - "epoch": 2.45225102319236, - "grad_norm": 0.029156841337680817, - "learning_rate": 2.535949819142272e-05, - "loss": 0.0016127176582813262, - "step": 3595 - }, - { - "epoch": 2.4556616643929057, - "grad_norm": 0.0015022120205685496, - "learning_rate": 2.52625501690961e-05, - "loss": 0.00010128046851605177, - "step": 3600 - }, - { - "epoch": 2.459072305593452, - "grad_norm": 0.12954266369342804, - "learning_rate": 2.5165693600023872e-05, - "loss": 0.004440005496144294, - "step": 3605 - }, - { - "epoch": 2.4624829467939975, - "grad_norm": 0.022409839555621147, - "learning_rate": 2.5068929208037295e-05, - "loss": 0.0019246777519583702, - "step": 3610 - }, - { - "epoch": 2.465893587994543, - "grad_norm": 0.0018720730440691113, - "learning_rate": 2.497225771627873e-05, - "loss": 0.004561808705329895, - "step": 3615 - }, - { - "epoch": 2.469304229195089, - "grad_norm": 0.0021158247254788876, - "learning_rate": 2.4875679847196312e-05, - "loss": 0.005481125041842461, - "step": 3620 - }, - { - "epoch": 2.4727148703956345, - "grad_norm": 0.0024307845160365105, - "learning_rate": 2.477919632253845e-05, - "loss": 0.0009140795096755028, - "step": 3625 - }, - { - "epoch": 2.47612551159618, - "grad_norm": 0.0020758784376084805, - "learning_rate": 2.4682807863348583e-05, - "loss": 0.001236506924033165, - "step": 3630 - }, - { - "epoch": 2.479536152796726, - "grad_norm": 0.0006182460929267108, - "learning_rate": 2.4586515189959614e-05, - "loss": 0.00015565860085189342, - "step": 3635 - }, - { - "epoch": 2.4829467939972716, - "grad_norm": 0.05087731033563614, - "learning_rate": 2.4490319021988688e-05, - "loss": 0.00022137174382805825, - "step": 3640 - }, - { - "epoch": 2.4863574351978173, - "grad_norm": 0.03250613436102867, - "learning_rate": 2.4394220078331695e-05, - "loss": 0.00028696306981146336, - "step": 3645 - }, - { - "epoch": 2.489768076398363, - "grad_norm": 0.017168540507555008, - "learning_rate": 2.429821907715798e-05, - "loss": 0.0003641644492745399, - "step": 3650 - }, - { - "epoch": 2.4931787175989086, - "grad_norm": 0.0670199990272522, - "learning_rate": 2.420231673590491e-05, - "loss": 0.00015748695004731418, - "step": 3655 - }, - { - "epoch": 2.4965893587994543, - "grad_norm": 0.003998387139290571, - "learning_rate": 2.4106513771272585e-05, - "loss": 0.00026149852201342585, - "step": 3660 - }, - { - "epoch": 2.4965893587994543, - "eval_loss": 0.06822175532579422, - "eval_runtime": 0.9108, - "eval_samples_per_second": 82.345, - "eval_steps_per_second": 2.196, - "step": 3660 - }, - { - "eval_cer_subset": 0.01675977653631285, - "eval_cer_subset_edit_distance": 123, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 3660 - }, - { - "epoch": 2.5, - "grad_norm": 0.0059893373399972916, - "learning_rate": 2.4010810899218384e-05, - "loss": 0.0037302006036043166, - "step": 3665 - }, - { - "epoch": 2.5034106412005457, - "grad_norm": 0.3968847692012787, - "learning_rate": 2.3915208834951736e-05, - "loss": 0.0008235686458647251, - "step": 3670 - }, - { - "epoch": 2.5068212824010914, - "grad_norm": 0.001170233590528369, - "learning_rate": 2.3819708292928645e-05, - "loss": 0.0021816927939653395, - "step": 3675 - }, - { - "epoch": 2.510231923601637, - "grad_norm": 0.0864306092262268, - "learning_rate": 2.3724309986846476e-05, - "loss": 0.00794672966003418, - "step": 3680 - }, - { - "epoch": 2.5136425648021827, - "grad_norm": 0.0012164375511929393, - "learning_rate": 2.362901462963851e-05, - "loss": 0.00014161464059725404, - "step": 3685 - }, - { - "epoch": 2.5170532060027284, - "grad_norm": 0.0047707995399832726, - "learning_rate": 2.353382293346872e-05, - "loss": 0.00012235456379130482, - "step": 3690 - }, - { - "epoch": 2.520463847203274, - "grad_norm": 0.0010226344456896186, - "learning_rate": 2.3438735609726346e-05, - "loss": 0.0006677288562059403, - "step": 3695 - }, - { - "epoch": 2.52387448840382, - "grad_norm": 0.01809096150100231, - "learning_rate": 2.334375336902067e-05, - "loss": 0.0004967927932739257, - "step": 3700 - }, - { - "epoch": 2.5272851296043655, - "grad_norm": 0.006922638975083828, - "learning_rate": 2.3248876921175613e-05, - "loss": 0.0012997164390981196, - "step": 3705 - }, - { - "epoch": 2.530695770804911, - "grad_norm": 0.0002996268740389496, - "learning_rate": 2.315410697522456e-05, - "loss": 5.4457224905490875e-05, - "step": 3710 - }, - { - "epoch": 2.534106412005457, - "grad_norm": 0.00561846699565649, - "learning_rate": 2.3059444239404896e-05, - "loss": 0.0002347052562981844, - "step": 3715 - }, - { - "epoch": 2.5375170532060025, - "grad_norm": 1.200972318649292, - "learning_rate": 2.296488942115287e-05, - "loss": 0.003510555624961853, - "step": 3720 - }, - { - "epoch": 2.540927694406548, - "grad_norm": 0.008847455494105816, - "learning_rate": 2.287044322709819e-05, - "loss": 0.00010497854091227055, - "step": 3725 - }, - { - "epoch": 2.544338335607094, - "grad_norm": 0.0026281927712261677, - "learning_rate": 2.277610636305883e-05, - "loss": 0.001988488808274269, - "step": 3730 - }, - { - "epoch": 2.5477489768076396, - "grad_norm": 0.008025784976780415, - "learning_rate": 2.268187953403568e-05, - "loss": 0.023679326474666595, - "step": 3735 - }, - { - "epoch": 2.5511596180081857, - "grad_norm": 0.03441132605075836, - "learning_rate": 2.258776344420735e-05, - "loss": 0.0004788160789757967, - "step": 3740 - }, - { - "epoch": 2.5545702592087314, - "grad_norm": 3.1458778381347656, - "learning_rate": 2.2493758796924816e-05, - "loss": 0.008043569326400758, - "step": 3745 - }, - { - "epoch": 2.557980900409277, - "grad_norm": 0.002775805303826928, - "learning_rate": 2.2399866294706302e-05, - "loss": 0.0013419794850051403, - "step": 3750 - }, - { - "epoch": 2.5613915416098227, - "grad_norm": 0.0030509193893522024, - "learning_rate": 2.2306086639231857e-05, - "loss": 0.03926021754741669, - "step": 3755 - }, - { - "epoch": 2.5648021828103684, - "grad_norm": 0.0024770230520516634, - "learning_rate": 2.2212420531338248e-05, - "loss": 0.0011906253173947334, - "step": 3760 - }, - { - "epoch": 2.568212824010914, - "grad_norm": 0.007617161609232426, - "learning_rate": 2.2118868671013692e-05, - "loss": 0.008073102682828903, - "step": 3765 - }, - { - "epoch": 2.57162346521146, - "grad_norm": 0.08999158442020416, - "learning_rate": 2.202543175739254e-05, - "loss": 0.0009835162200033665, - "step": 3770 - }, - { - "epoch": 2.5750341064120055, - "grad_norm": 0.010818173177540302, - "learning_rate": 2.193211048875022e-05, - "loss": 0.0019240962341427804, - "step": 3775 - }, - { - "epoch": 2.578444747612551, - "grad_norm": 0.01809680461883545, - "learning_rate": 2.183890556249781e-05, - "loss": 0.005013756453990936, - "step": 3780 - }, - { - "epoch": 2.581855388813097, - "grad_norm": 0.021501798182725906, - "learning_rate": 2.1745817675177027e-05, - "loss": 0.0005870801862329245, - "step": 3785 - }, - { - "epoch": 2.5852660300136425, - "grad_norm": 0.011130684986710548, - "learning_rate": 2.165284752245485e-05, - "loss": 0.00037821107544004916, - "step": 3790 - }, - { - "epoch": 2.588676671214188, - "grad_norm": 0.00624213507398963, - "learning_rate": 2.1559995799118496e-05, - "loss": 0.015400664508342743, - "step": 3795 - }, - { - "epoch": 2.592087312414734, - "grad_norm": 0.23763298988342285, - "learning_rate": 2.1467263199070018e-05, - "loss": 0.0006831173319369555, - "step": 3800 - }, - { - "epoch": 2.5954979536152796, - "grad_norm": 0.0056639909744262695, - "learning_rate": 2.137465041532133e-05, - "loss": 0.002130754478275776, - "step": 3805 - }, - { - "epoch": 2.5989085948158253, - "grad_norm": 0.42903369665145874, - "learning_rate": 2.1282158139988877e-05, - "loss": 0.0020006079226732253, - "step": 3810 - }, - { - "epoch": 2.602319236016371, - "grad_norm": 0.014364579692482948, - "learning_rate": 2.118978706428854e-05, - "loss": 0.0005437508225440979, - "step": 3815 - }, - { - "epoch": 2.6057298772169166, - "grad_norm": 0.017512807622551918, - "learning_rate": 2.1097537878530427e-05, - "loss": 0.00019666440784931182, - "step": 3820 - }, - { - "epoch": 2.6091405184174628, - "grad_norm": 0.00863230973482132, - "learning_rate": 2.100541127211379e-05, - "loss": 0.0001873808912932873, - "step": 3825 - }, - { - "epoch": 2.6125511596180084, - "grad_norm": 0.006781345698982477, - "learning_rate": 2.0913407933521714e-05, - "loss": 0.00018554476555436849, - "step": 3830 - }, - { - "epoch": 2.615961800818554, - "grad_norm": 0.004758995026350021, - "learning_rate": 2.082152855031618e-05, - "loss": 0.0004659180995076895, - "step": 3835 - }, - { - "epoch": 2.6193724420191, - "grad_norm": 0.0036142354365438223, - "learning_rate": 2.0729773809132782e-05, - "loss": 0.00033613520208746194, - "step": 3840 - }, - { - "epoch": 2.6227830832196455, - "grad_norm": 0.0798744410276413, - "learning_rate": 2.0638144395675614e-05, - "loss": 0.00026304563507437704, - "step": 3845 - }, - { - "epoch": 2.626193724420191, - "grad_norm": 0.07229600101709366, - "learning_rate": 2.0546640994712183e-05, - "loss": 0.022786998748779298, - "step": 3850 - }, - { - "epoch": 2.629604365620737, - "grad_norm": 0.11226585507392883, - "learning_rate": 2.04552642900683e-05, - "loss": 0.0002580304862931371, - "step": 3855 - }, - { - "epoch": 2.6330150068212825, - "grad_norm": 0.0019689116161316633, - "learning_rate": 2.036401496462292e-05, - "loss": 0.005474040284752846, - "step": 3860 - }, - { - "epoch": 2.636425648021828, - "grad_norm": 0.08611829578876495, - "learning_rate": 2.027289370030307e-05, - "loss": 0.0007106051780283451, - "step": 3865 - }, - { - "epoch": 2.639836289222374, - "grad_norm": 0.06968124955892563, - "learning_rate": 2.0181901178078723e-05, - "loss": 0.00030497927218675613, - "step": 3870 - }, - { - "epoch": 2.6432469304229196, - "grad_norm": 0.002102258615195751, - "learning_rate": 2.0091038077957807e-05, - "loss": 0.00039041375275701285, - "step": 3875 - }, - { - "epoch": 2.6466575716234653, - "grad_norm": 0.01055186241865158, - "learning_rate": 2.000030507898094e-05, - "loss": 0.00028035915456712244, - "step": 3880 - }, - { - "epoch": 2.650068212824011, - "grad_norm": 0.013122744858264923, - "learning_rate": 1.990970285921656e-05, - "loss": 0.0002463514683768153, - "step": 3885 - }, - { - "epoch": 2.6534788540245566, - "grad_norm": 0.043785031884908676, - "learning_rate": 1.9819232095755712e-05, - "loss": 0.0006866191513836383, - "step": 3890 - }, - { - "epoch": 2.6568894952251023, - "grad_norm": 0.014347897842526436, - "learning_rate": 1.9728893464707063e-05, - "loss": 0.00304874274879694, - "step": 3895 - }, - { - "epoch": 2.660300136425648, - "grad_norm": 0.01495263073593378, - "learning_rate": 1.9638687641191784e-05, - "loss": 0.0027243653312325478, - "step": 3900 - }, - { - "epoch": 2.6637107776261937, - "grad_norm": 0.0025812601670622826, - "learning_rate": 1.954861529933862e-05, - "loss": 0.00015772593906149268, - "step": 3905 - }, - { - "epoch": 2.6671214188267394, - "grad_norm": 3.2978317737579346, - "learning_rate": 1.9458677112278677e-05, - "loss": 0.011941131204366684, - "step": 3910 - }, - { - "epoch": 2.670532060027285, - "grad_norm": 0.00819153431802988, - "learning_rate": 1.936887375214059e-05, - "loss": 0.0019363060593605042, - "step": 3915 - }, - { - "epoch": 2.6739427012278307, - "grad_norm": 0.3553819954395294, - "learning_rate": 1.9279205890045335e-05, - "loss": 0.001681213453412056, - "step": 3920 - }, - { - "epoch": 2.6773533424283764, - "grad_norm": 0.14068304002285004, - "learning_rate": 1.9189674196101303e-05, - "loss": 0.0004354804754257202, - "step": 3925 - }, - { - "epoch": 2.680763983628922, - "grad_norm": 0.05331770330667496, - "learning_rate": 1.9100279339399258e-05, - "loss": 0.0006728332955390215, - "step": 3930 - }, - { - "epoch": 2.684174624829468, - "grad_norm": 0.010825222358107567, - "learning_rate": 1.9011021988007387e-05, - "loss": 0.011760103702545165, - "step": 3935 - }, - { - "epoch": 2.6875852660300135, - "grad_norm": 0.02598944492638111, - "learning_rate": 1.892190280896622e-05, - "loss": 0.00020915823988616468, - "step": 3940 - }, - { - "epoch": 2.690995907230559, - "grad_norm": 0.00512358546257019, - "learning_rate": 1.8832922468283724e-05, - "loss": 0.000882271584123373, - "step": 3945 - }, - { - "epoch": 2.694406548431105, - "grad_norm": 0.05068441852927208, - "learning_rate": 1.874408163093028e-05, - "loss": 0.000997264590114355, - "step": 3950 - }, - { - "epoch": 2.6978171896316505, - "grad_norm": 0.0038104017730802298, - "learning_rate": 1.8655380960833724e-05, - "loss": 0.001553349569439888, - "step": 3955 - }, - { - "epoch": 2.701227830832196, - "grad_norm": 0.0013087299885228276, - "learning_rate": 1.8566821120874394e-05, - "loss": 0.006560490280389786, - "step": 3960 - }, - { - "epoch": 2.704638472032742, - "grad_norm": 1.1628080606460571, - "learning_rate": 1.8478402772880208e-05, - "loss": 0.0015312742441892623, - "step": 3965 - }, - { - "epoch": 2.708049113233288, - "grad_norm": 0.0020620303694158792, - "learning_rate": 1.8390126577621636e-05, - "loss": 0.013011389970779419, - "step": 3970 - }, - { - "epoch": 2.7114597544338337, - "grad_norm": 0.0014427551068365574, - "learning_rate": 1.830199319480682e-05, - "loss": 0.0008381184190511704, - "step": 3975 - }, - { - "epoch": 2.7148703956343794, - "grad_norm": 0.0005985202733427286, - "learning_rate": 1.821400328307663e-05, - "loss": 0.0005598202813416719, - "step": 3980 - }, - { - "epoch": 2.718281036834925, - "grad_norm": 0.0016122297383844852, - "learning_rate": 1.8126157499999783e-05, - "loss": 0.006013911962509155, - "step": 3985 - }, - { - "epoch": 2.7216916780354707, - "grad_norm": 0.0028895260766148567, - "learning_rate": 1.8038456502067822e-05, - "loss": 0.00017103723948821425, - "step": 3990 - }, - { - "epoch": 2.7251023192360164, - "grad_norm": 0.09400962293148041, - "learning_rate": 1.7950900944690308e-05, - "loss": 0.07410463690757751, - "step": 3995 - }, - { - "epoch": 2.728512960436562, - "grad_norm": 0.015019465237855911, - "learning_rate": 1.786349148218993e-05, - "loss": 0.004524913057684899, - "step": 4000 - }, - { - "epoch": 2.731923601637108, - "grad_norm": 0.000663114245980978, - "learning_rate": 1.7776228767797522e-05, - "loss": 0.0212590754032135, - "step": 4005 - }, - { - "epoch": 2.7353342428376535, - "grad_norm": 0.0029272777028381824, - "learning_rate": 1.768911345364726e-05, - "loss": 0.000913316011428833, - "step": 4010 - }, - { - "epoch": 2.738744884038199, - "grad_norm": 0.03791525587439537, - "learning_rate": 1.7602146190771743e-05, - "loss": 0.0018313366919755936, - "step": 4015 - }, - { - "epoch": 2.742155525238745, - "grad_norm": 0.041133999824523926, - "learning_rate": 1.7515327629097217e-05, - "loss": 0.0006253012455999851, - "step": 4020 - }, - { - "epoch": 2.7455661664392905, - "grad_norm": 0.01035034004598856, - "learning_rate": 1.7428658417438534e-05, - "loss": 0.005944912880659103, - "step": 4025 - }, - { - "epoch": 2.7462482946793996, - "eval_loss": 0.06961391866207123, - "eval_runtime": 0.9223, - "eval_samples_per_second": 81.321, - "eval_steps_per_second": 2.169, - "step": 4026 - }, - { - "eval_cer_subset": 0.015397193078076032, - "eval_cer_subset_edit_distance": 113, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4026 - }, - { - "epoch": 2.748976807639836, - "grad_norm": 0.9081467986106873, - "learning_rate": 1.7342139203494537e-05, - "loss": 0.0036753010004758834, - "step": 4030 - }, - { - "epoch": 2.752387448840382, - "grad_norm": 0.07232939451932907, - "learning_rate": 1.7255770633843028e-05, - "loss": 0.0029829120263457297, - "step": 4035 - }, - { - "epoch": 2.7557980900409276, - "grad_norm": 0.005813417956233025, - "learning_rate": 1.7169553353936035e-05, - "loss": 0.00047225411981344223, - "step": 4040 - }, - { - "epoch": 2.7592087312414733, - "grad_norm": 0.2354760318994522, - "learning_rate": 1.7083488008094945e-05, - "loss": 0.0015884984284639358, - "step": 4045 - }, - { - "epoch": 2.762619372442019, - "grad_norm": 0.1201627105474472, - "learning_rate": 1.699757523950577e-05, - "loss": 0.0010768620297312737, - "step": 4050 - }, - { - "epoch": 2.766030013642565, - "grad_norm": 0.033547814935445786, - "learning_rate": 1.6911815690214166e-05, - "loss": 0.01052093282341957, - "step": 4055 - }, - { - "epoch": 2.7694406548431107, - "grad_norm": 0.01014826912432909, - "learning_rate": 1.682621000112085e-05, - "loss": 0.0003362501040101051, - "step": 4060 - }, - { - "epoch": 2.7728512960436564, - "grad_norm": 0.004405386745929718, - "learning_rate": 1.6740758811976665e-05, - "loss": 0.00044001247733831405, - "step": 4065 - }, - { - "epoch": 2.776261937244202, - "grad_norm": 3.621004104614258, - "learning_rate": 1.665546276137783e-05, - "loss": 0.18638403415679933, - "step": 4070 - }, - { - "epoch": 2.779672578444748, - "grad_norm": 0.01203183177858591, - "learning_rate": 1.6570322486761184e-05, - "loss": 0.00013435594737529755, - "step": 4075 - }, - { - "epoch": 2.7830832196452935, - "grad_norm": 0.23053398728370667, - "learning_rate": 1.6485338624399445e-05, - "loss": 0.0010434269905090332, - "step": 4080 - }, - { - "epoch": 2.786493860845839, - "grad_norm": 0.0109801534563303, - "learning_rate": 1.6400511809396394e-05, - "loss": 0.00012704560067504643, - "step": 4085 - }, - { - "epoch": 2.789904502046385, - "grad_norm": 0.8463883399963379, - "learning_rate": 1.631584267568217e-05, - "loss": 0.007707947492599487, - "step": 4090 - }, - { - "epoch": 2.7933151432469305, - "grad_norm": 0.05444789677858353, - "learning_rate": 1.623133185600852e-05, - "loss": 0.0008411366492509842, - "step": 4095 - }, - { - "epoch": 2.796725784447476, - "grad_norm": 0.007274657487869263, - "learning_rate": 1.6146979981944095e-05, - "loss": 0.0002061120932921767, - "step": 4100 - }, - { - "epoch": 2.800136425648022, - "grad_norm": 0.013262615539133549, - "learning_rate": 1.6062787683869667e-05, - "loss": 0.0003235015319660306, - "step": 4105 - }, - { - "epoch": 2.8035470668485676, - "grad_norm": 0.0030244409572333097, - "learning_rate": 1.597875559097352e-05, - "loss": 0.0007286245469003916, - "step": 4110 - }, - { - "epoch": 2.8069577080491133, - "grad_norm": 0.0021644949447363615, - "learning_rate": 1.5894884331246632e-05, - "loss": 0.0003056209534406662, - "step": 4115 - }, - { - "epoch": 2.810368349249659, - "grad_norm": 0.014495057985186577, - "learning_rate": 1.5811174531478074e-05, - "loss": 0.0014359142631292343, - "step": 4120 - }, - { - "epoch": 2.8137789904502046, - "grad_norm": 0.05203676223754883, - "learning_rate": 1.5727626817250255e-05, - "loss": 0.0006342739332467317, - "step": 4125 - }, - { - "epoch": 2.8171896316507503, - "grad_norm": 0.004998183809220791, - "learning_rate": 1.564424181293435e-05, - "loss": 0.00013386564096435906, - "step": 4130 - }, - { - "epoch": 2.820600272851296, - "grad_norm": 0.05428452417254448, - "learning_rate": 1.556102014168546e-05, - "loss": 0.00014423681423068047, - "step": 4135 - }, - { - "epoch": 2.8240109140518417, - "grad_norm": 0.0013511159922927618, - "learning_rate": 1.5477962425438164e-05, - "loss": 6.5605464624241e-05, - "step": 4140 - }, - { - "epoch": 2.8274215552523874, - "grad_norm": 0.9757132530212402, - "learning_rate": 1.539506928490171e-05, - "loss": 0.00839839205145836, - "step": 4145 - }, - { - "epoch": 2.830832196452933, - "grad_norm": 0.0018282996024936438, - "learning_rate": 1.5312341339555445e-05, - "loss": 0.005959897115826607, - "step": 4150 - }, - { - "epoch": 2.8342428376534787, - "grad_norm": 0.0022353942040354013, - "learning_rate": 1.5229779207644171e-05, - "loss": 0.000250368332490325, - "step": 4155 - }, - { - "epoch": 2.8376534788540244, - "grad_norm": 0.006538774352520704, - "learning_rate": 1.5147383506173572e-05, - "loss": 0.0004641829524189234, - "step": 4160 - }, - { - "epoch": 2.84106412005457, - "grad_norm": 0.003464010776951909, - "learning_rate": 1.5065154850905465e-05, - "loss": 0.008368657529354095, - "step": 4165 - }, - { - "epoch": 2.844474761255116, - "grad_norm": 0.0020767974201589823, - "learning_rate": 1.4983093856353398e-05, - "loss": 0.00010023106588050724, - "step": 4170 - }, - { - "epoch": 2.8478854024556615, - "grad_norm": 0.0025670777540653944, - "learning_rate": 1.4901201135777887e-05, - "loss": 0.0006715046241879463, - "step": 4175 - }, - { - "epoch": 2.851296043656207, - "grad_norm": 0.0006886612391099334, - "learning_rate": 1.4819477301181915e-05, - "loss": 0.0008357623592019081, - "step": 4180 - }, - { - "epoch": 2.854706684856753, - "grad_norm": 0.0023747060913592577, - "learning_rate": 1.4737922963306332e-05, - "loss": 0.00488339364528656, - "step": 4185 - }, - { - "epoch": 2.8581173260572985, - "grad_norm": 0.0030493123922497034, - "learning_rate": 1.4656538731625333e-05, - "loss": 0.017219077050685882, - "step": 4190 - }, - { - "epoch": 2.8615279672578446, - "grad_norm": 0.0030935786198824644, - "learning_rate": 1.457532521434184e-05, - "loss": 0.00014684826601296663, - "step": 4195 - }, - { - "epoch": 2.8649386084583903, - "grad_norm": 0.0004102849052287638, - "learning_rate": 1.4494283018382991e-05, - "loss": 0.0002242558402940631, - "step": 4200 - }, - { - "epoch": 2.868349249658936, - "grad_norm": 1.6695232391357422, - "learning_rate": 1.4413412749395593e-05, - "loss": 0.01916976124048233, - "step": 4205 - }, - { - "epoch": 2.8717598908594817, - "grad_norm": 0.003357100998982787, - "learning_rate": 1.4332715011741656e-05, - "loss": 0.0036146264523267747, - "step": 4210 - }, - { - "epoch": 2.8751705320600274, - "grad_norm": 0.002434425987303257, - "learning_rate": 1.425219040849373e-05, - "loss": 0.0001186407171189785, - "step": 4215 - }, - { - "epoch": 2.878581173260573, - "grad_norm": 0.0272241048514843, - "learning_rate": 1.4171839541430586e-05, - "loss": 0.003773893415927887, - "step": 4220 - }, - { - "epoch": 2.8819918144611187, - "grad_norm": 0.00031364246387965977, - "learning_rate": 1.409166301103257e-05, - "loss": 3.483370819594711e-05, - "step": 4225 - }, - { - "epoch": 2.8854024556616644, - "grad_norm": 0.008605693466961384, - "learning_rate": 1.4011661416477186e-05, - "loss": 0.005898609757423401, - "step": 4230 - }, - { - "epoch": 2.88881309686221, - "grad_norm": 0.0003439450520090759, - "learning_rate": 1.3931835355634601e-05, - "loss": 0.0017080994322896004, - "step": 4235 - }, - { - "epoch": 2.892223738062756, - "grad_norm": 0.004427058156579733, - "learning_rate": 1.3852185425063181e-05, - "loss": 0.00010978456120938062, - "step": 4240 - }, - { - "epoch": 2.8956343792633015, - "grad_norm": 0.02229383960366249, - "learning_rate": 1.377271222000503e-05, - "loss": 0.0012531550601124763, - "step": 4245 - }, - { - "epoch": 2.899045020463847, - "grad_norm": 0.004171700682491064, - "learning_rate": 1.3693416334381517e-05, - "loss": 0.0012122373096644878, - "step": 4250 - }, - { - "epoch": 2.902455661664393, - "grad_norm": 0.0016778658609837294, - "learning_rate": 1.3614298360788924e-05, - "loss": 0.0006234514527022839, - "step": 4255 - }, - { - "epoch": 2.9058663028649385, - "grad_norm": 0.00403103232383728, - "learning_rate": 1.3535358890493897e-05, - "loss": 0.00030033572111278775, - "step": 4260 - }, - { - "epoch": 2.909276944065484, - "grad_norm": 0.006126928608864546, - "learning_rate": 1.3456598513429111e-05, - "loss": 0.014299000799655914, - "step": 4265 - }, - { - "epoch": 2.91268758526603, - "grad_norm": 0.0007882033823989332, - "learning_rate": 1.3378017818188815e-05, - "loss": 0.001049484871327877, - "step": 4270 - }, - { - "epoch": 2.9160982264665756, - "grad_norm": 0.0004515725013334304, - "learning_rate": 1.329961739202451e-05, - "loss": 6.776668014936149e-05, - "step": 4275 - }, - { - "epoch": 2.9195088676671213, - "grad_norm": 0.3738904297351837, - "learning_rate": 1.3221397820840419e-05, - "loss": 0.0010396759025752544, - "step": 4280 - }, - { - "epoch": 2.9229195088676674, - "grad_norm": 0.035881806164979935, - "learning_rate": 1.3143359689189279e-05, - "loss": 0.0003127899952232838, - "step": 4285 - }, - { - "epoch": 2.926330150068213, - "grad_norm": 0.010501476936042309, - "learning_rate": 1.306550358026784e-05, - "loss": 0.013835662603378296, - "step": 4290 - }, - { - "epoch": 2.9297407912687587, - "grad_norm": 0.002153329784050584, - "learning_rate": 1.2987830075912565e-05, - "loss": 0.007172297686338425, - "step": 4295 - }, - { - "epoch": 2.9331514324693044, - "grad_norm": 0.000594582874327898, - "learning_rate": 1.2910339756595254e-05, - "loss": 6.662132800556719e-05, - "step": 4300 - }, - { - "epoch": 2.93656207366985, - "grad_norm": 1.2802950143814087, - "learning_rate": 1.283303320141879e-05, - "loss": 0.0013225926086306572, - "step": 4305 - }, - { - "epoch": 2.939972714870396, - "grad_norm": 0.0010621119290590286, - "learning_rate": 1.2755910988112639e-05, - "loss": 0.0001950544072315097, - "step": 4310 - }, - { - "epoch": 2.9433833560709415, - "grad_norm": 0.0004629544273484498, - "learning_rate": 1.2678973693028735e-05, - "loss": 0.0002407266292721033, - "step": 4315 - }, - { - "epoch": 2.946793997271487, - "grad_norm": 0.00353289395570755, - "learning_rate": 1.2602221891137021e-05, - "loss": 0.006271860748529434, - "step": 4320 - }, - { - "epoch": 2.950204638472033, - "grad_norm": 0.021108930930495262, - "learning_rate": 1.2525656156021227e-05, - "loss": 0.007909800857305527, - "step": 4325 - }, - { - "epoch": 2.9536152796725785, - "grad_norm": 0.007604979444295168, - "learning_rate": 1.2449277059874547e-05, - "loss": 0.00022456045262515546, - "step": 4330 - }, - { - "epoch": 2.957025920873124, - "grad_norm": 0.0007357494323514402, - "learning_rate": 1.2373085173495411e-05, - "loss": 0.0010289529338479042, - "step": 4335 - }, - { - "epoch": 2.96043656207367, - "grad_norm": 0.0004920829669572413, - "learning_rate": 1.229708106628316e-05, - "loss": 8.303072536364198e-05, - "step": 4340 - }, - { - "epoch": 2.9638472032742156, - "grad_norm": 0.0014207189669832587, - "learning_rate": 1.2221265306233824e-05, - "loss": 0.001923336647450924, - "step": 4345 - }, - { - "epoch": 2.9672578444747613, - "grad_norm": 0.00898696668446064, - "learning_rate": 1.2145638459935863e-05, - "loss": 0.01918397843837738, - "step": 4350 - }, - { - "epoch": 2.970668485675307, - "grad_norm": 0.08236313611268997, - "learning_rate": 1.2070201092565988e-05, - "loss": 0.0005465132184326648, - "step": 4355 - }, - { - "epoch": 2.9740791268758526, - "grad_norm": 0.00796231534332037, - "learning_rate": 1.199495376788481e-05, - "loss": 4.8452542978338896e-05, - "step": 4360 - }, - { - "epoch": 2.9774897680763983, - "grad_norm": 0.026452092453837395, - "learning_rate": 1.1919897048232791e-05, - "loss": 0.0006576513405889273, - "step": 4365 - }, - { - "epoch": 2.980900409276944, - "grad_norm": 0.08616425842046738, - "learning_rate": 1.1845031494525901e-05, - "loss": 0.0013777482323348521, - "step": 4370 - }, - { - "epoch": 2.9843110504774897, - "grad_norm": 0.00048112327931448817, - "learning_rate": 1.1770357666251509e-05, - "loss": 0.0018716825172305108, - "step": 4375 - }, - { - "epoch": 2.9877216916780354, - "grad_norm": 0.0013632692862302065, - "learning_rate": 1.1695876121464154e-05, - "loss": 4.9980584299191834e-05, - "step": 4380 - }, - { - "epoch": 2.991132332878581, - "grad_norm": 1.318718671798706, - "learning_rate": 1.1621587416781445e-05, - "loss": 0.013328136503696441, - "step": 4385 - }, - { - "epoch": 2.9945429740791267, - "grad_norm": 0.00219643022865057, - "learning_rate": 1.1547492107379782e-05, - "loss": 9.902374586090446e-05, - "step": 4390 - }, - { - "epoch": 2.9959072305593453, - "eval_loss": 0.07537060230970383, - "eval_runtime": 0.95, - "eval_samples_per_second": 78.945, - "eval_steps_per_second": 2.105, - "step": 4392 - }, - { - "eval_cer_subset": 0.018803651723668074, - "eval_cer_subset_edit_distance": 138, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4392 - }, - { - "epoch": 2.9979536152796724, - "grad_norm": 0.003875225316733122, - "learning_rate": 1.1473590746990342e-05, - "loss": 0.0025784535333514213, - "step": 4395 - }, - { - "epoch": 3.001364256480218, - "grad_norm": 0.011882650665938854, - "learning_rate": 1.1399883887894846e-05, - "loss": 6.913430406711996e-05, - "step": 4400 - }, - { - "epoch": 3.004774897680764, - "grad_norm": 0.002775507280603051, - "learning_rate": 1.1326372080921464e-05, - "loss": 0.00019087132532149553, - "step": 4405 - }, - { - "epoch": 3.00818553888131, - "grad_norm": 0.06267738342285156, - "learning_rate": 1.125305587544069e-05, - "loss": 0.0007432831451296806, - "step": 4410 - }, - { - "epoch": 3.0115961800818556, - "grad_norm": 0.026564784348011017, - "learning_rate": 1.1179935819361272e-05, - "loss": 0.00015866300091147423, - "step": 4415 - }, - { - "epoch": 3.0150068212824013, - "grad_norm": 0.0011134434025734663, - "learning_rate": 1.1107012459126064e-05, - "loss": 0.0005193403456360101, - "step": 4420 - }, - { - "epoch": 3.018417462482947, - "grad_norm": 0.0006882239249534905, - "learning_rate": 1.1034286339707975e-05, - "loss": 0.0002253461629152298, - "step": 4425 - }, - { - "epoch": 3.0218281036834926, - "grad_norm": 0.008092721924185753, - "learning_rate": 1.0961758004605873e-05, - "loss": 0.0003596893046051264, - "step": 4430 - }, - { - "epoch": 3.0252387448840383, - "grad_norm": 0.01083564292639494, - "learning_rate": 1.0889427995840585e-05, - "loss": 0.010776457190513612, - "step": 4435 - }, - { - "epoch": 3.028649386084584, - "grad_norm": 0.040309611707925797, - "learning_rate": 1.0817296853950724e-05, - "loss": 0.0002762762364000082, - "step": 4440 - }, - { - "epoch": 3.0320600272851297, - "grad_norm": 0.0026077954098582268, - "learning_rate": 1.0745365117988804e-05, - "loss": 0.00011963967699557543, - "step": 4445 - }, - { - "epoch": 3.0354706684856754, - "grad_norm": 0.0013187731383368373, - "learning_rate": 1.0673633325517088e-05, - "loss": 0.0022230114787817, - "step": 4450 - }, - { - "epoch": 3.038881309686221, - "grad_norm": 0.08760128915309906, - "learning_rate": 1.060210201260362e-05, - "loss": 0.00029163951985538005, - "step": 4455 - }, - { - "epoch": 3.0422919508867667, - "grad_norm": 0.2557981610298157, - "learning_rate": 1.0530771713818229e-05, - "loss": 0.0014749299734830856, - "step": 4460 - }, - { - "epoch": 3.0457025920873124, - "grad_norm": 0.009532714262604713, - "learning_rate": 1.0459642962228502e-05, - "loss": 0.00023887362331151963, - "step": 4465 - }, - { - "epoch": 3.049113233287858, - "grad_norm": 0.002977812895551324, - "learning_rate": 1.0388716289395833e-05, - "loss": 3.836472751572728e-05, - "step": 4470 - }, - { - "epoch": 3.052523874488404, - "grad_norm": 0.0007074066670611501, - "learning_rate": 1.0317992225371411e-05, - "loss": 7.527543348260224e-05, - "step": 4475 - }, - { - "epoch": 3.0559345156889495, - "grad_norm": 0.020919082686305046, - "learning_rate": 1.0247471298692336e-05, - "loss": 0.00021068421192467214, - "step": 4480 - }, - { - "epoch": 3.059345156889495, - "grad_norm": 0.0012850193306803703, - "learning_rate": 1.0177154036377557e-05, - "loss": 0.00027780483942478894, - "step": 4485 - }, - { - "epoch": 3.062755798090041, - "grad_norm": 0.030514074489474297, - "learning_rate": 1.0107040963924027e-05, - "loss": 0.00020711682736873627, - "step": 4490 - }, - { - "epoch": 3.0661664392905865, - "grad_norm": 0.001317308866418898, - "learning_rate": 1.0037132605302716e-05, - "loss": 0.00024969261139631274, - "step": 4495 - }, - { - "epoch": 3.069577080491132, - "grad_norm": 0.0012978437589481473, - "learning_rate": 9.967429482954768e-06, - "loss": 0.0001504249172285199, - "step": 4500 - }, - { - "epoch": 3.072987721691678, - "grad_norm": 0.0003166501992382109, - "learning_rate": 9.897932117787476e-06, - "loss": 0.0002773872809484601, - "step": 4505 - }, - { - "epoch": 3.0763983628922236, - "grad_norm": 0.0029924868140369654, - "learning_rate": 9.828641029170544e-06, - "loss": 0.0005509680602699518, - "step": 4510 - }, - { - "epoch": 3.0798090040927693, - "grad_norm": 0.003611995605751872, - "learning_rate": 9.759556734932064e-06, - "loss": 0.0004832141101360321, - "step": 4515 - }, - { - "epoch": 3.083219645293315, - "grad_norm": 0.003923687152564526, - "learning_rate": 9.690679751354736e-06, - "loss": 0.00014967764727771282, - "step": 4520 - }, - { - "epoch": 3.086630286493861, - "grad_norm": 0.012491169385612011, - "learning_rate": 9.62201059317195e-06, - "loss": 8.488112362101675e-05, - "step": 4525 - }, - { - "epoch": 3.0900409276944067, - "grad_norm": 0.00011767258547479287, - "learning_rate": 9.553549773564035e-06, - "loss": 5.874955677427351e-05, - "step": 4530 - }, - { - "epoch": 3.0934515688949524, - "grad_norm": 0.021170401945710182, - "learning_rate": 9.48529780415427e-06, - "loss": 0.00013614417985081674, - "step": 4535 - }, - { - "epoch": 3.096862210095498, - "grad_norm": 0.005378399509936571, - "learning_rate": 9.417255195005218e-06, - "loss": 0.00010139571968466043, - "step": 4540 - }, - { - "epoch": 3.100272851296044, - "grad_norm": 0.0013061281060799956, - "learning_rate": 9.349422454614815e-06, - "loss": 0.0004999907687306404, - "step": 4545 - }, - { - "epoch": 3.1036834924965895, - "grad_norm": 0.000693993701133877, - "learning_rate": 9.281800089912605e-06, - "loss": 0.0001406701048836112, - "step": 4550 - }, - { - "epoch": 3.107094133697135, - "grad_norm": 0.003168008290231228, - "learning_rate": 9.214388606255934e-06, - "loss": 0.0002458775183185935, - "step": 4555 - }, - { - "epoch": 3.110504774897681, - "grad_norm": 0.001224424922838807, - "learning_rate": 9.147188507426224e-06, - "loss": 5.902486154809594e-05, - "step": 4560 - }, - { - "epoch": 3.1139154160982265, - "grad_norm": 0.0006458029965870082, - "learning_rate": 9.080200295625125e-06, - "loss": 6.971908733248711e-05, - "step": 4565 - }, - { - "epoch": 3.117326057298772, - "grad_norm": 0.0004500233626458794, - "learning_rate": 9.013424471470832e-06, - "loss": 4.827580996789038e-05, - "step": 4570 - }, - { - "epoch": 3.120736698499318, - "grad_norm": 0.008253362029790878, - "learning_rate": 8.946861533994316e-06, - "loss": 0.00010066803079098464, - "step": 4575 - }, - { - "epoch": 3.1241473396998636, - "grad_norm": 0.0027607178781181574, - "learning_rate": 8.88051198063559e-06, - "loss": 0.00010761913144961, - "step": 4580 - }, - { - "epoch": 3.1275579809004093, - "grad_norm": 0.0012132832780480385, - "learning_rate": 8.81437630723999e-06, - "loss": 0.00010583751136437059, - "step": 4585 - }, - { - "epoch": 3.130968622100955, - "grad_norm": 0.013205752708017826, - "learning_rate": 8.748455008054519e-06, - "loss": 7.872265996411443e-05, - "step": 4590 - }, - { - "epoch": 3.1343792633015006, - "grad_norm": 0.010380366817116737, - "learning_rate": 8.682748575724071e-06, - "loss": 0.00027635702863335607, - "step": 4595 - }, - { - "epoch": 3.1377899045020463, - "grad_norm": 0.012770955450832844, - "learning_rate": 8.617257501287805e-06, - "loss": 0.00028360043652355673, - "step": 4600 - }, - { - "epoch": 3.141200545702592, - "grad_norm": 0.012632913887500763, - "learning_rate": 8.551982274175449e-06, - "loss": 4.925676621496678e-05, - "step": 4605 - }, - { - "epoch": 3.1446111869031377, - "grad_norm": 0.0028189525473862886, - "learning_rate": 8.486923382203703e-06, - "loss": 0.0039628144353628155, - "step": 4610 - }, - { - "epoch": 3.1480218281036834, - "grad_norm": 0.11367341130971909, - "learning_rate": 8.422081311572464e-06, - "loss": 0.000568081671372056, - "step": 4615 - }, - { - "epoch": 3.151432469304229, - "grad_norm": 0.00095061567844823, - "learning_rate": 8.35745654686135e-06, - "loss": 0.00036408030427992344, - "step": 4620 - }, - { - "epoch": 3.1548431105047747, - "grad_norm": 0.05772553011775017, - "learning_rate": 8.29304957102596e-06, - "loss": 0.0006427288055419922, - "step": 4625 - }, - { - "epoch": 3.1582537517053204, - "grad_norm": 0.017082368955016136, - "learning_rate": 8.22886086539432e-06, - "loss": 0.00015330149326473475, - "step": 4630 - }, - { - "epoch": 3.161664392905866, - "grad_norm": 0.0033851212356239557, - "learning_rate": 8.164890909663256e-06, - "loss": 0.00012121353065595031, - "step": 4635 - }, - { - "epoch": 3.1650750341064118, - "grad_norm": 0.000577523373067379, - "learning_rate": 8.101140181894868e-06, - "loss": 7.793278782628477e-05, - "step": 4640 - }, - { - "epoch": 3.168485675306958, - "grad_norm": 0.00039884017314761877, - "learning_rate": 8.037609158512875e-06, - "loss": 0.0014324543066322804, - "step": 4645 - }, - { - "epoch": 3.1718963165075036, - "grad_norm": 0.004100058693438768, - "learning_rate": 7.97429831429911e-06, - "loss": 0.00015990985557436943, - "step": 4650 - }, - { - "epoch": 3.1753069577080493, - "grad_norm": 0.005648438818752766, - "learning_rate": 7.911208122389956e-06, - "loss": 8.149745990522206e-05, - "step": 4655 - }, - { - "epoch": 3.178717598908595, - "grad_norm": 0.005042952951043844, - "learning_rate": 7.848339054272808e-06, - "loss": 9.191314456984401e-05, - "step": 4660 - }, - { - "epoch": 3.1821282401091406, - "grad_norm": 0.0007950706058181822, - "learning_rate": 7.785691579782546e-06, - "loss": 7.44381221011281e-05, - "step": 4665 - }, - { - "epoch": 3.1855388813096863, - "grad_norm": 0.0717335194349289, - "learning_rate": 7.723266167098058e-06, - "loss": 0.00012347951997071505, - "step": 4670 - }, - { - "epoch": 3.188949522510232, - "grad_norm": 0.011817894876003265, - "learning_rate": 7.661063282738685e-06, - "loss": 0.000311569613404572, - "step": 4675 - }, - { - "epoch": 3.1923601637107777, - "grad_norm": 0.011100267991423607, - "learning_rate": 7.599083391560774e-06, - "loss": 0.00023061195388436319, - "step": 4680 - }, - { - "epoch": 3.1957708049113234, - "grad_norm": 0.025753796100616455, - "learning_rate": 7.5373269567541776e-06, - "loss": 0.00013706330209970475, - "step": 4685 - }, - { - "epoch": 3.199181446111869, - "grad_norm": 0.0529993437230587, - "learning_rate": 7.47579443983886e-06, - "loss": 0.00025801956653594973, - "step": 4690 - }, - { - "epoch": 3.2025920873124147, - "grad_norm": 0.01164156198501587, - "learning_rate": 7.41448630066132e-06, - "loss": 0.0001246333820745349, - "step": 4695 - }, - { - "epoch": 3.2060027285129604, - "grad_norm": 0.0022943434305489063, - "learning_rate": 7.353402997391271e-06, - "loss": 4.788096994161606e-05, - "step": 4700 - }, - { - "epoch": 3.209413369713506, - "grad_norm": 0.0016377613646909595, - "learning_rate": 7.292544986518198e-06, - "loss": 0.0014273281209170817, - "step": 4705 - }, - { - "epoch": 3.212824010914052, - "grad_norm": 0.046027738600969315, - "learning_rate": 7.231912722847881e-06, - "loss": 0.00015070366207510234, - "step": 4710 - }, - { - "epoch": 3.2162346521145975, - "grad_norm": 0.12626095116138458, - "learning_rate": 7.171506659499067e-06, - "loss": 0.0002223264891654253, - "step": 4715 - }, - { - "epoch": 3.219645293315143, - "grad_norm": 0.004387991968542337, - "learning_rate": 7.1113272479000465e-06, - "loss": 5.1431613974273206e-05, - "step": 4720 - }, - { - "epoch": 3.223055934515689, - "grad_norm": 0.0005260159377939999, - "learning_rate": 7.051374937785289e-06, - "loss": 9.967307560145854e-05, - "step": 4725 - }, - { - "epoch": 3.2264665757162345, - "grad_norm": 0.001557494280859828, - "learning_rate": 6.9916501771920795e-06, - "loss": 3.6639469908550384e-05, - "step": 4730 - }, - { - "epoch": 3.22987721691678, - "grad_norm": 0.0013014579890295863, - "learning_rate": 6.932153412457195e-06, - "loss": 0.00015290889423340558, - "step": 4735 - }, - { - "epoch": 3.233287858117326, - "grad_norm": 0.0005693956045433879, - "learning_rate": 6.872885088213522e-06, - "loss": 9.23092185985297e-05, - "step": 4740 - }, - { - "epoch": 3.2366984993178716, - "grad_norm": 0.0008315684972330928, - "learning_rate": 6.813845647386771e-06, - "loss": 0.00010763210011646151, - "step": 4745 - }, - { - "epoch": 3.2401091405184177, - "grad_norm": 0.0021727036219090223, - "learning_rate": 6.755035531192148e-06, - "loss": 3.423129383008927e-05, - "step": 4750 - }, - { - "epoch": 3.2435197817189634, - "grad_norm": 0.0001480428036302328, - "learning_rate": 6.696455179131084e-06, - "loss": 0.023981352150440217, - "step": 4755 - }, - { - "epoch": 3.2455661664392905, - "eval_loss": 0.074391670525074, - "eval_runtime": 0.9318, - "eval_samples_per_second": 80.487, - "eval_steps_per_second": 2.146, - "step": 4758 - }, - { - "eval_cer_subset": 0.015260934732252351, - "eval_cer_subset_edit_distance": 112, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 4758 - }, - { - "epoch": 3.246930422919509, - "grad_norm": 0.007659688591957092, - "learning_rate": 6.638105028987886e-06, - "loss": 0.0004011324606835842, - "step": 4760 - }, - { - "epoch": 3.2503410641200547, - "grad_norm": 0.033579885959625244, - "learning_rate": 6.579985516826564e-06, - "loss": 0.000267721782438457, - "step": 4765 - }, - { - "epoch": 3.2537517053206004, - "grad_norm": 0.01196813303977251, - "learning_rate": 6.52209707698748e-06, - "loss": 0.00014587611658498645, - "step": 4770 - }, - { - "epoch": 3.257162346521146, - "grad_norm": 0.022396638989448547, - "learning_rate": 6.464440142084156e-06, - "loss": 0.00038080187514424326, - "step": 4775 - }, - { - "epoch": 3.260572987721692, - "grad_norm": 0.002048628870397806, - "learning_rate": 6.407015143000002e-06, - "loss": 0.007085768878459931, - "step": 4780 - }, - { - "epoch": 3.2639836289222375, - "grad_norm": 0.0019598486833274364, - "learning_rate": 6.3498225088851686e-06, - "loss": 0.0010610194876790047, - "step": 4785 - }, - { - "epoch": 3.267394270122783, - "grad_norm": 0.02006545290350914, - "learning_rate": 6.29286266715324e-06, - "loss": 0.00014606654876843096, - "step": 4790 - }, - { - "epoch": 3.270804911323329, - "grad_norm": 0.0005910536856390536, - "learning_rate": 6.236136043478106e-06, - "loss": 6.873804377391935e-05, - "step": 4795 - }, - { - "epoch": 3.2742155525238745, - "grad_norm": 0.021028559654951096, - "learning_rate": 6.179643061790775e-06, - "loss": 0.0013180674985051155, - "step": 4800 - }, - { - "epoch": 3.27762619372442, - "grad_norm": 0.0004542934475466609, - "learning_rate": 6.123384144276183e-06, - "loss": 4.965414409525692e-05, - "step": 4805 - }, - { - "epoch": 3.281036834924966, - "grad_norm": 0.041615571826696396, - "learning_rate": 6.067359711370047e-06, - "loss": 0.019744729995727538, - "step": 4810 - }, - { - "epoch": 3.2844474761255116, - "grad_norm": 0.002755318768322468, - "learning_rate": 6.011570181755754e-06, - "loss": 0.00010759549913927913, - "step": 4815 - }, - { - "epoch": 3.2878581173260573, - "grad_norm": 0.0032264923211187124, - "learning_rate": 5.956015972361171e-06, - "loss": 0.0007094295229762793, - "step": 4820 - }, - { - "epoch": 3.291268758526603, - "grad_norm": 1.4725555181503296, - "learning_rate": 5.900697498355589e-06, - "loss": 0.010635539144277572, - "step": 4825 - }, - { - "epoch": 3.2946793997271486, - "grad_norm": 0.017544033005833626, - "learning_rate": 5.84561517314657e-06, - "loss": 0.0003255015704780817, - "step": 4830 - }, - { - "epoch": 3.2980900409276943, - "grad_norm": 0.0035306529607623816, - "learning_rate": 5.790769408376922e-06, - "loss": 0.00013384120538830757, - "step": 4835 - }, - { - "epoch": 3.30150068212824, - "grad_norm": 0.0018269309075549245, - "learning_rate": 5.736160613921528e-06, - "loss": 0.0007960126735270023, - "step": 4840 - }, - { - "epoch": 3.3049113233287857, - "grad_norm": 0.010024248622357845, - "learning_rate": 5.6817891978843855e-06, - "loss": 0.0003233390394598246, - "step": 4845 - }, - { - "epoch": 3.3083219645293314, - "grad_norm": 0.004048422910273075, - "learning_rate": 5.627655566595489e-06, - "loss": 0.00011264056665822863, - "step": 4850 - }, - { - "epoch": 3.311732605729877, - "grad_norm": 0.0034091162960976362, - "learning_rate": 5.573760124607812e-06, - "loss": 0.00048314151354134083, - "step": 4855 - }, - { - "epoch": 3.3151432469304227, - "grad_norm": 0.009309383109211922, - "learning_rate": 5.5201032746942796e-06, - "loss": 0.0002101475838571787, - "step": 4860 - }, - { - "epoch": 3.3185538881309684, - "grad_norm": 0.06538962572813034, - "learning_rate": 5.466685417844797e-06, - "loss": 0.0003499136073514819, - "step": 4865 - }, - { - "epoch": 3.321964529331514, - "grad_norm": 0.002885893452912569, - "learning_rate": 5.413506953263162e-06, - "loss": 0.00048564458265900614, - "step": 4870 - }, - { - "epoch": 3.32537517053206, - "grad_norm": 0.004836421925574541, - "learning_rate": 5.3605682783642e-06, - "loss": 6.691411836072803e-05, - "step": 4875 - }, - { - "epoch": 3.328785811732606, - "grad_norm": 0.00475132092833519, - "learning_rate": 5.307869788770694e-06, - "loss": 0.0006194526329636573, - "step": 4880 - }, - { - "epoch": 3.3321964529331516, - "grad_norm": 0.005688230507075787, - "learning_rate": 5.255411878310482e-06, - "loss": 9.07582463696599e-05, - "step": 4885 - }, - { - "epoch": 3.3356070941336973, - "grad_norm": 0.0037306994199752808, - "learning_rate": 5.2031949390134856e-06, - "loss": 0.00012413164367899298, - "step": 4890 - }, - { - "epoch": 3.339017735334243, - "grad_norm": 0.011166035197675228, - "learning_rate": 5.15121936110882e-06, - "loss": 7.776234415359795e-05, - "step": 4895 - }, - { - "epoch": 3.3424283765347886, - "grad_norm": 0.0012123408960178494, - "learning_rate": 5.099485533021836e-06, - "loss": 2.784754615277052e-05, - "step": 4900 - }, - { - "epoch": 3.3458390177353343, - "grad_norm": 0.002357951132580638, - "learning_rate": 5.047993841371223e-06, - "loss": 0.00029555323999375105, - "step": 4905 - }, - { - "epoch": 3.34924965893588, - "grad_norm": 0.00030636831070296466, - "learning_rate": 4.996744670966138e-06, - "loss": 0.00010590272722765803, - "step": 4910 - }, - { - "epoch": 3.3526603001364257, - "grad_norm": 0.0058077736757695675, - "learning_rate": 4.945738404803348e-06, - "loss": 0.0001937644206918776, - "step": 4915 - }, - { - "epoch": 3.3560709413369714, - "grad_norm": 0.0007610859465785325, - "learning_rate": 4.8949754240642775e-06, - "loss": 0.00011190775549039245, - "step": 4920 - }, - { - "epoch": 3.359481582537517, - "grad_norm": 0.6333717107772827, - "learning_rate": 4.844456108112297e-06, - "loss": 0.0008945153094828129, - "step": 4925 - }, - { - "epoch": 3.3628922237380627, - "grad_norm": 0.006114002782851458, - "learning_rate": 4.794180834489772e-06, - "loss": 9.972437983378768e-05, - "step": 4930 - }, - { - "epoch": 3.3663028649386084, - "grad_norm": 0.0017828121781349182, - "learning_rate": 4.744149978915274e-06, - "loss": 7.763381581753493e-05, - "step": 4935 - }, - { - "epoch": 3.369713506139154, - "grad_norm": 0.0009099289891310036, - "learning_rate": 4.694363915280814e-06, - "loss": 0.05654715895652771, - "step": 4940 - }, - { - "epoch": 3.3731241473397, - "grad_norm": 0.013658248819410801, - "learning_rate": 4.644823015649009e-06, - "loss": 0.0013173201121389865, - "step": 4945 - }, - { - "epoch": 3.3765347885402455, - "grad_norm": 0.6665285229682922, - "learning_rate": 4.5955276502502944e-06, - "loss": 0.0018493477255105972, - "step": 4950 - }, - { - "epoch": 3.379945429740791, - "grad_norm": 0.001095029292628169, - "learning_rate": 4.546478187480176e-06, - "loss": 0.0005424355156719684, - "step": 4955 - }, - { - "epoch": 3.383356070941337, - "grad_norm": 0.015392723493278027, - "learning_rate": 4.497674993896503e-06, - "loss": 9.325146675109863e-05, - "step": 4960 - }, - { - "epoch": 3.3867667121418825, - "grad_norm": 0.08219064027070999, - "learning_rate": 4.449118434216653e-06, - "loss": 0.0004451565444469452, - "step": 4965 - }, - { - "epoch": 3.390177353342428, - "grad_norm": 0.003267089370638132, - "learning_rate": 4.4008088713148845e-06, - "loss": 0.00016891954001039267, - "step": 4970 - }, - { - "epoch": 3.3935879945429743, - "grad_norm": 0.008226803503930569, - "learning_rate": 4.35274666621957e-06, - "loss": 0.0001009777537547052, - "step": 4975 - }, - { - "epoch": 3.39699863574352, - "grad_norm": 0.01762073114514351, - "learning_rate": 4.304932178110558e-06, - "loss": 0.006487253308296204, - "step": 4980 - }, - { - "epoch": 3.4004092769440657, - "grad_norm": 0.0021151783876121044, - "learning_rate": 4.257365764316395e-06, - "loss": 0.00011428899597376585, - "step": 4985 - }, - { - "epoch": 3.4038199181446114, - "grad_norm": 0.0008960114791989326, - "learning_rate": 4.210047780311768e-06, - "loss": 3.825195599347353e-05, - "step": 4990 - }, - { - "epoch": 3.407230559345157, - "grad_norm": 0.058672014623880386, - "learning_rate": 4.162978579714753e-06, - "loss": 0.0005217622965574265, - "step": 4995 - }, - { - "epoch": 3.4106412005457027, - "grad_norm": 6.555901927640662e-05, - "learning_rate": 4.11615851428423e-06, - "loss": 0.00011362402001395822, - "step": 5000 - }, - { - "epoch": 3.4140518417462484, - "grad_norm": 0.21014879643917084, - "learning_rate": 4.069587933917221e-06, - "loss": 0.00179185438901186, - "step": 5005 - }, - { - "epoch": 3.417462482946794, - "grad_norm": 0.1315806359052658, - "learning_rate": 4.023267186646317e-06, - "loss": 0.0014887897297739983, - "step": 5010 - }, - { - "epoch": 3.42087312414734, - "grad_norm": 0.018509764224290848, - "learning_rate": 3.977196618637e-06, - "loss": 0.00016269356710836292, - "step": 5015 - }, - { - "epoch": 3.4242837653478855, - "grad_norm": 0.0007501108921132982, - "learning_rate": 3.931376574185166e-06, - "loss": 3.563327190931886e-05, - "step": 5020 - }, - { - "epoch": 3.427694406548431, - "grad_norm": 0.0036405418068170547, - "learning_rate": 3.885807395714441e-06, - "loss": 0.0009133132174611091, - "step": 5025 - }, - { - "epoch": 3.431105047748977, - "grad_norm": 0.01853407360613346, - "learning_rate": 3.840489423773698e-06, - "loss": 0.00011221827007830143, - "step": 5030 - }, - { - "epoch": 3.4345156889495225, - "grad_norm": 0.004317726474255323, - "learning_rate": 3.7954229970344725e-06, - "loss": 0.00033687916584312916, - "step": 5035 - }, - { - "epoch": 3.437926330150068, - "grad_norm": 0.003909669350832701, - "learning_rate": 3.7506084522884684e-06, - "loss": 7.524496177211404e-05, - "step": 5040 - }, - { - "epoch": 3.441336971350614, - "grad_norm": 0.016738831996917725, - "learning_rate": 3.7060461244449945e-06, - "loss": 9.533832781016827e-05, - "step": 5045 - }, - { - "epoch": 3.4447476125511596, - "grad_norm": 0.0012303644325584173, - "learning_rate": 3.6617363465284875e-06, - "loss": 0.00011376941110938788, - "step": 5050 - }, - { - "epoch": 3.4481582537517053, - "grad_norm": 0.005091819446533918, - "learning_rate": 3.617679449676028e-06, - "loss": 0.000578709552064538, - "step": 5055 - }, - { - "epoch": 3.451568894952251, - "grad_norm": 0.0007091189618222415, - "learning_rate": 3.5738757631348744e-06, - "loss": 7.042675861157477e-05, - "step": 5060 - }, - { - "epoch": 3.4549795361527966, - "grad_norm": 0.004033643286675215, - "learning_rate": 3.5303256142599407e-06, - "loss": 5.9417390730232e-05, - "step": 5065 - }, - { - "epoch": 3.4583901773533423, - "grad_norm": 0.018419573083519936, - "learning_rate": 3.487029328511444e-06, - "loss": 0.0001460162689909339, - "step": 5070 - }, - { - "epoch": 3.461800818553888, - "grad_norm": 0.0035185501910746098, - "learning_rate": 3.4439872294524025e-06, - "loss": 0.0001254791859537363, - "step": 5075 - }, - { - "epoch": 3.4652114597544337, - "grad_norm": 0.0007496779435314238, - "learning_rate": 3.401199638746241e-06, - "loss": 4.7221675049513576e-05, - "step": 5080 - }, - { - "epoch": 3.4686221009549794, - "grad_norm": 2.67924165725708, - "learning_rate": 3.3586668761543813e-06, - "loss": 0.0016780177131295205, - "step": 5085 - }, - { - "epoch": 3.472032742155525, - "grad_norm": 0.011002305895090103, - "learning_rate": 3.316389259533876e-06, - "loss": 9.696125634945929e-05, - "step": 5090 - }, - { - "epoch": 3.4754433833560707, - "grad_norm": 0.0005470504984259605, - "learning_rate": 3.2743671048349755e-06, - "loss": 3.457541752140969e-05, - "step": 5095 - }, - { - "epoch": 3.4788540245566164, - "grad_norm": 0.014639006927609444, - "learning_rate": 3.232600726098851e-06, - "loss": 0.005385900661349296, - "step": 5100 - }, - { - "epoch": 3.4822646657571625, - "grad_norm": 0.02914433367550373, - "learning_rate": 3.191090435455171e-06, - "loss": 0.00018561827018857003, - "step": 5105 - }, - { - "epoch": 3.485675306957708, - "grad_norm": 0.0009303450351580977, - "learning_rate": 3.1498365431198048e-06, - "loss": 8.976480457931758e-05, - "step": 5110 - }, - { - "epoch": 3.489085948158254, - "grad_norm": 0.001520369085483253, - "learning_rate": 3.1088393573924966e-06, - "loss": 0.00012106491485610604, - "step": 5115 - }, - { - "epoch": 3.4924965893587996, - "grad_norm": 0.0004795770801138133, - "learning_rate": 3.0680991846545836e-06, - "loss": 5.365515244193375e-05, - "step": 5120 - }, - { - "epoch": 3.495225102319236, - "eval_loss": 0.07684502005577087, - "eval_runtime": 0.9207, - "eval_samples_per_second": 81.46, - "eval_steps_per_second": 2.172, - "step": 5124 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5124 - }, - { - "epoch": 3.4959072305593453, - "grad_norm": 0.0028329631313681602, - "learning_rate": 3.027616329366664e-06, - "loss": 0.00016044501680880784, - "step": 5125 - }, - { - "epoch": 3.499317871759891, - "grad_norm": 0.00026357462047599256, - "learning_rate": 2.987391094066345e-06, - "loss": 0.00023950175382196902, - "step": 5130 - }, - { - "epoch": 3.5027285129604366, - "grad_norm": 0.05449860543012619, - "learning_rate": 2.9474237793659956e-06, - "loss": 0.0002613885561004281, - "step": 5135 - }, - { - "epoch": 3.5061391541609823, - "grad_norm": 0.0011948413448408246, - "learning_rate": 2.907714683950471e-06, - "loss": 5.1718176109716296e-05, - "step": 5140 - }, - { - "epoch": 3.509549795361528, - "grad_norm": 0.005827807355672121, - "learning_rate": 2.8682641045748964e-06, - "loss": 0.002432660385966301, - "step": 5145 - }, - { - "epoch": 3.5129604365620737, - "grad_norm": 0.0015064133331179619, - "learning_rate": 2.829072336062463e-06, - "loss": 3.671176382340491e-05, - "step": 5150 - }, - { - "epoch": 3.5163710777626194, - "grad_norm": 0.0009614004520699382, - "learning_rate": 2.790139671302191e-06, - "loss": 8.640417945571244e-05, - "step": 5155 - }, - { - "epoch": 3.519781718963165, - "grad_norm": 0.0023597145918756723, - "learning_rate": 2.7514664012467727e-06, - "loss": 7.012838032096624e-05, - "step": 5160 - }, - { - "epoch": 3.5231923601637107, - "grad_norm": 0.0007319003925658762, - "learning_rate": 2.7130528149103657e-06, - "loss": 2.3347024398390204e-05, - "step": 5165 - }, - { - "epoch": 3.5266030013642564, - "grad_norm": 0.006225161254405975, - "learning_rate": 2.6748991993664774e-06, - "loss": 0.00018728851573541762, - "step": 5170 - }, - { - "epoch": 3.530013642564802, - "grad_norm": 0.5371012687683105, - "learning_rate": 2.637005839745772e-06, - "loss": 0.004397168383002281, - "step": 5175 - }, - { - "epoch": 3.533424283765348, - "grad_norm": 0.0009830278577283025, - "learning_rate": 2.5993730192339663e-06, - "loss": 5.1807129057124254e-05, - "step": 5180 - }, - { - "epoch": 3.5368349249658935, - "grad_norm": 0.002657173899933696, - "learning_rate": 2.562001019069726e-06, - "loss": 0.00013663843274116516, - "step": 5185 - }, - { - "epoch": 3.540245566166439, - "grad_norm": 0.005280365701764822, - "learning_rate": 2.5248901185425345e-06, - "loss": 0.00618811659514904, - "step": 5190 - }, - { - "epoch": 3.543656207366985, - "grad_norm": 0.5160993933677673, - "learning_rate": 2.488040594990606e-06, - "loss": 0.0008973299525678157, - "step": 5195 - }, - { - "epoch": 3.547066848567531, - "grad_norm": 0.00302395923063159, - "learning_rate": 2.451452723798844e-06, - "loss": 0.00022636731155216693, - "step": 5200 - }, - { - "epoch": 3.5504774897680766, - "grad_norm": 0.010127821005880833, - "learning_rate": 2.415126778396764e-06, - "loss": 0.0002638277132064104, - "step": 5205 - }, - { - "epoch": 3.5538881309686223, - "grad_norm": 0.0024330182932317257, - "learning_rate": 2.379063030256424e-06, - "loss": 0.0007223737891763448, - "step": 5210 - }, - { - "epoch": 3.557298772169168, - "grad_norm": 0.006238727364689112, - "learning_rate": 2.343261748890457e-06, - "loss": 0.0010717549361288548, - "step": 5215 - }, - { - "epoch": 3.5607094133697137, - "grad_norm": 0.0005047949962317944, - "learning_rate": 2.3077232018500024e-06, - "loss": 7.232563220895826e-05, - "step": 5220 - }, - { - "epoch": 3.5641200545702594, - "grad_norm": 0.0017868287395685911, - "learning_rate": 2.2724476547227325e-06, - "loss": 5.702247726731002e-05, - "step": 5225 - }, - { - "epoch": 3.567530695770805, - "grad_norm": 0.0005412849714048207, - "learning_rate": 2.2374353711308467e-06, - "loss": 4.539011861197651e-05, - "step": 5230 - }, - { - "epoch": 3.5709413369713507, - "grad_norm": 0.0008777446346357465, - "learning_rate": 2.2026866127291516e-06, - "loss": 0.0010734600946307182, - "step": 5235 - }, - { - "epoch": 3.5743519781718964, - "grad_norm": 0.020784150809049606, - "learning_rate": 2.1682016392030198e-06, - "loss": 0.0001352211693301797, - "step": 5240 - }, - { - "epoch": 3.577762619372442, - "grad_norm": 0.0032426826655864716, - "learning_rate": 2.133980708266539e-06, - "loss": 0.0001363346236757934, - "step": 5245 - }, - { - "epoch": 3.581173260572988, - "grad_norm": 0.00047889159759506583, - "learning_rate": 2.100024075660538e-06, - "loss": 0.00017863117391243578, - "step": 5250 - }, - { - "epoch": 3.5845839017735335, - "grad_norm": 0.00031558674527332187, - "learning_rate": 2.066331995150666e-06, - "loss": 0.00021245412062853575, - "step": 5255 - }, - { - "epoch": 3.587994542974079, - "grad_norm": 0.0009245559340342879, - "learning_rate": 2.032904718525531e-06, - "loss": 0.0003619278548285365, - "step": 5260 - }, - { - "epoch": 3.591405184174625, - "grad_norm": 0.0013650038745254278, - "learning_rate": 1.9997424955948054e-06, - "loss": 3.983181086368859e-05, - "step": 5265 - }, - { - "epoch": 3.5948158253751705, - "grad_norm": 0.004131193272769451, - "learning_rate": 1.9668455741873424e-06, - "loss": 5.411377060227096e-05, - "step": 5270 - }, - { - "epoch": 3.598226466575716, - "grad_norm": 0.0061623696237802505, - "learning_rate": 1.9342142001493394e-06, - "loss": 5.686166696250439e-05, - "step": 5275 - }, - { - "epoch": 3.601637107776262, - "grad_norm": 0.0031696371734142303, - "learning_rate": 1.9018486173424982e-06, - "loss": 0.00025364819448441266, - "step": 5280 - }, - { - "epoch": 3.6050477489768076, - "grad_norm": 0.0009586649248376489, - "learning_rate": 1.8697490676422052e-06, - "loss": 0.0001193733187392354, - "step": 5285 - }, - { - "epoch": 3.6084583901773533, - "grad_norm": 0.04152340814471245, - "learning_rate": 1.837915790935708e-06, - "loss": 0.00011144874151796103, - "step": 5290 - }, - { - "epoch": 3.611869031377899, - "grad_norm": 0.0054172491654753685, - "learning_rate": 1.8063490251203628e-06, - "loss": 0.0011931947432458402, - "step": 5295 - }, - { - "epoch": 3.6152796725784446, - "grad_norm": 0.009695466607809067, - "learning_rate": 1.7750490061018058e-06, - "loss": 0.00018604533979669214, - "step": 5300 - }, - { - "epoch": 3.6186903137789903, - "grad_norm": 0.021507805213332176, - "learning_rate": 1.7440159677922186e-06, - "loss": 0.00013907469110563397, - "step": 5305 - }, - { - "epoch": 3.622100954979536, - "grad_norm": 0.006461102515459061, - "learning_rate": 1.7132501421085659e-06, - "loss": 0.0003493543481454253, - "step": 5310 - }, - { - "epoch": 3.6255115961800817, - "grad_norm": 0.0105512710288167, - "learning_rate": 1.6827517589709057e-06, - "loss": 7.155602797865867e-05, - "step": 5315 - }, - { - "epoch": 3.6289222373806274, - "grad_norm": 0.0008970944327302277, - "learning_rate": 1.6525210463005868e-06, - "loss": 0.00011632050154730678, - "step": 5320 - }, - { - "epoch": 3.632332878581173, - "grad_norm": 0.00177982565946877, - "learning_rate": 1.6225582300186405e-06, - "loss": 0.002909584902226925, - "step": 5325 - }, - { - "epoch": 3.6357435197817187, - "grad_norm": 0.001438588253222406, - "learning_rate": 1.5928635340440255e-06, - "loss": 0.00019459464820101858, - "step": 5330 - }, - { - "epoch": 3.6391541609822644, - "grad_norm": 0.00501625519245863, - "learning_rate": 1.5634371802919696e-06, - "loss": 6.730342865921557e-05, - "step": 5335 - }, - { - "epoch": 3.64256480218281, - "grad_norm": 0.003331825602799654, - "learning_rate": 1.534279388672334e-06, - "loss": 4.0749041363596913e-05, - "step": 5340 - }, - { - "epoch": 3.645975443383356, - "grad_norm": 0.000979799311608076, - "learning_rate": 1.5053903770879518e-06, - "loss": 3.709651937242597e-05, - "step": 5345 - }, - { - "epoch": 3.649386084583902, - "grad_norm": 0.000521843961905688, - "learning_rate": 1.4767703614329884e-06, - "loss": 8.522009593434632e-05, - "step": 5350 - }, - { - "epoch": 3.6527967257844476, - "grad_norm": 0.0004526655247900635, - "learning_rate": 1.4484195555913582e-06, - "loss": 2.5001828907988967e-05, - "step": 5355 - }, - { - "epoch": 3.6562073669849933, - "grad_norm": 0.0029042293317615986, - "learning_rate": 1.420338171435094e-06, - "loss": 0.00018259206553921102, - "step": 5360 - }, - { - "epoch": 3.659618008185539, - "grad_norm": 0.003727864008396864, - "learning_rate": 1.3925264188227925e-06, - "loss": 0.00011327432002872229, - "step": 5365 - }, - { - "epoch": 3.6630286493860846, - "grad_norm": 0.0013012837152928114, - "learning_rate": 1.3649845055980172e-06, - "loss": 3.7070104735903445e-05, - "step": 5370 - }, - { - "epoch": 3.6664392905866303, - "grad_norm": 0.013001679442822933, - "learning_rate": 1.3377126375877817e-06, - "loss": 5.2633637096732855e-05, - "step": 5375 - }, - { - "epoch": 3.669849931787176, - "grad_norm": 0.0007804427877999842, - "learning_rate": 1.3107110186009643e-06, - "loss": 3.320692339912057e-05, - "step": 5380 - }, - { - "epoch": 3.6732605729877217, - "grad_norm": 0.002156102331355214, - "learning_rate": 1.2839798504268333e-06, - "loss": 0.0010122624225914478, - "step": 5385 - }, - { - "epoch": 3.6766712141882674, - "grad_norm": 0.014592370949685574, - "learning_rate": 1.2575193328334995e-06, - "loss": 8.401789236813783e-05, - "step": 5390 - }, - { - "epoch": 3.680081855388813, - "grad_norm": 0.0031752304639667273, - "learning_rate": 1.2313296635664576e-06, - "loss": 3.530957619659603e-05, - "step": 5395 - }, - { - "epoch": 3.6834924965893587, - "grad_norm": 0.0010194798232987523, - "learning_rate": 1.205411038347072e-06, - "loss": 3.8149964530020954e-05, - "step": 5400 - }, - { - "epoch": 3.6869031377899044, - "grad_norm": 0.015784459188580513, - "learning_rate": 1.1797636508711433e-06, - "loss": 0.0003640792798250914, - "step": 5405 - }, - { - "epoch": 3.69031377899045, - "grad_norm": 0.044826168566942215, - "learning_rate": 1.1543876928074485e-06, - "loss": 9.083467884920537e-05, - "step": 5410 - }, - { - "epoch": 3.693724420190996, - "grad_norm": 0.00281776231713593, - "learning_rate": 1.1292833537963162e-06, - "loss": 7.455614977516233e-05, - "step": 5415 - }, - { - "epoch": 3.6971350613915415, - "grad_norm": 0.0028589575085788965, - "learning_rate": 1.1044508214481981e-06, - "loss": 0.00014598952839151025, - "step": 5420 - }, - { - "epoch": 3.700545702592087, - "grad_norm": 0.22950734198093414, - "learning_rate": 1.0798902813422674e-06, - "loss": 0.0015467993915081024, - "step": 5425 - }, - { - "epoch": 3.7039563437926333, - "grad_norm": 0.00023460436204914004, - "learning_rate": 1.055601917025052e-06, - "loss": 9.705557604320348e-05, - "step": 5430 - }, - { - "epoch": 3.707366984993179, - "grad_norm": 0.001205096603371203, - "learning_rate": 1.0315859100090278e-06, - "loss": 0.00016396433347836136, - "step": 5435 - }, - { - "epoch": 3.7107776261937246, - "grad_norm": 0.001592331798747182, - "learning_rate": 1.0078424397713072e-06, - "loss": 5.8903940953314304e-05, - "step": 5440 - }, - { - "epoch": 3.7141882673942703, - "grad_norm": 0.006218360736966133, - "learning_rate": 9.843716837522524e-07, - "loss": 0.00036285766400396825, - "step": 5445 - }, - { - "epoch": 3.717598908594816, - "grad_norm": 0.0005638732109218836, - "learning_rate": 9.611738173541856e-07, - "loss": 9.058183059096336e-05, - "step": 5450 - }, - { - "epoch": 3.7210095497953617, - "grad_norm": 0.0001975312188733369, - "learning_rate": 9.382490139400386e-07, - "loss": 0.0006882708985358477, - "step": 5455 - }, - { - "epoch": 3.7244201909959074, - "grad_norm": 0.0014521559933200479, - "learning_rate": 9.155974448321182e-07, - "loss": 4.348080838099122e-05, - "step": 5460 - }, - { - "epoch": 3.727830832196453, - "grad_norm": 0.0007730235811322927, - "learning_rate": 8.932192793107515e-07, - "loss": 6.59460376482457e-05, - "step": 5465 - }, - { - "epoch": 3.7312414733969987, - "grad_norm": 0.0017008045688271523, - "learning_rate": 8.711146846130834e-07, - "loss": 4.6517132432200015e-05, - "step": 5470 - }, - { - "epoch": 3.7346521145975444, - "grad_norm": 0.000825837894808501, - "learning_rate": 8.492838259317902e-07, - "loss": 5.0807202933356165e-05, - "step": 5475 - }, - { - "epoch": 3.73806275579809, - "grad_norm": 0.0004800660244654864, - "learning_rate": 8.277268664138553e-07, - "loss": 7.82750197686255e-05, - "step": 5480 - }, - { - "epoch": 3.741473396998636, - "grad_norm": 0.024822546169161797, - "learning_rate": 8.06443967159362e-07, - "loss": 6.785190780647098e-05, - "step": 5485 - }, - { - "epoch": 3.7448840381991815, - "grad_norm": 0.0010685365414246917, - "learning_rate": 7.854352872202735e-07, - "loss": 6.689840811304748e-05, - "step": 5490 - }, - { - "epoch": 3.7448840381991815, - "eval_loss": 0.07814140617847443, - "eval_runtime": 0.8865, - "eval_samples_per_second": 84.605, - "eval_steps_per_second": 2.256, - "step": 5490 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5490 - }, - { - "epoch": 3.748294679399727, - "grad_norm": 0.00021385519357863814, - "learning_rate": 7.64700983599234e-07, - "loss": 2.2849228116683662e-05, - "step": 5495 - }, - { - "epoch": 3.751705320600273, - "grad_norm": 0.0010771763045340776, - "learning_rate": 7.442412112484406e-07, - "loss": 0.0004126048181205988, - "step": 5500 - }, - { - "epoch": 3.7551159618008185, - "grad_norm": 0.0018533640541136265, - "learning_rate": 7.240561230684311e-07, - "loss": 5.3862365894019605e-05, - "step": 5505 - }, - { - "epoch": 3.758526603001364, - "grad_norm": 0.005947180092334747, - "learning_rate": 7.041458699069774e-07, - "loss": 0.00014215235132724046, - "step": 5510 - }, - { - "epoch": 3.76193724420191, - "grad_norm": 0.0012127163354307413, - "learning_rate": 6.845106005579401e-07, - "loss": 8.649011724628508e-05, - "step": 5515 - }, - { - "epoch": 3.7653478854024556, - "grad_norm": 0.008430424146354198, - "learning_rate": 6.651504617601736e-07, - "loss": 4.855484585277736e-05, - "step": 5520 - }, - { - "epoch": 3.7687585266030013, - "grad_norm": 0.0005919627728872001, - "learning_rate": 6.460655981964061e-07, - "loss": 9.893624810501933e-05, - "step": 5525 - }, - { - "epoch": 3.772169167803547, - "grad_norm": 0.0023226868361234665, - "learning_rate": 6.272561524921782e-07, - "loss": 0.00013384660705924035, - "step": 5530 - }, - { - "epoch": 3.7755798090040926, - "grad_norm": 0.0009323696722276509, - "learning_rate": 6.087222652147644e-07, - "loss": 0.000312839774414897, - "step": 5535 - }, - { - "epoch": 3.7789904502046383, - "grad_norm": 0.0005047317827120423, - "learning_rate": 5.904640748721448e-07, - "loss": 7.338747382164001e-05, - "step": 5540 - }, - { - "epoch": 3.782401091405184, - "grad_norm": 0.0005660484894178808, - "learning_rate": 5.724817179119268e-07, - "loss": 6.945489440113306e-05, - "step": 5545 - }, - { - "epoch": 3.7858117326057297, - "grad_norm": 0.1210128664970398, - "learning_rate": 5.54775328720379e-07, - "loss": 0.0005033219233155251, - "step": 5550 - }, - { - "epoch": 3.7892223738062754, - "grad_norm": 0.0032783239148557186, - "learning_rate": 5.373450396213785e-07, - "loss": 7.25714024156332e-05, - "step": 5555 - }, - { - "epoch": 3.792633015006821, - "grad_norm": 0.022924935445189476, - "learning_rate": 5.201909808754567e-07, - "loss": 0.0003666009521111846, - "step": 5560 - }, - { - "epoch": 3.7960436562073667, - "grad_norm": 0.0006960026803426445, - "learning_rate": 5.033132806788048e-07, - "loss": 2.8438412118703128e-05, - "step": 5565 - }, - { - "epoch": 3.799454297407913, - "grad_norm": 0.0023685309570282698, - "learning_rate": 4.867120651623327e-07, - "loss": 4.687009786721319e-05, - "step": 5570 - }, - { - "epoch": 3.8028649386084585, - "grad_norm": 0.000736989954020828, - "learning_rate": 4.703874583906989e-07, - "loss": 0.00036185523495078085, - "step": 5575 - }, - { - "epoch": 3.806275579809004, - "grad_norm": 0.002065070439130068, - "learning_rate": 4.5433958236142795e-07, - "loss": 4.816077707801014e-05, - "step": 5580 - }, - { - "epoch": 3.80968622100955, - "grad_norm": 0.006012174300849438, - "learning_rate": 4.3856855700395294e-07, - "loss": 0.003127726539969444, - "step": 5585 - }, - { - "epoch": 3.8130968622100956, - "grad_norm": 0.0058719078078866005, - "learning_rate": 4.230745001787453e-07, - "loss": 0.00023026710841804742, - "step": 5590 - }, - { - "epoch": 3.8165075034106413, - "grad_norm": 0.001011750428006053, - "learning_rate": 4.078575276764237e-07, - "loss": 4.3114786967635156e-05, - "step": 5595 - }, - { - "epoch": 3.819918144611187, - "grad_norm": 0.0018015744863077998, - "learning_rate": 3.929177532168967e-07, - "loss": 6.310571334324777e-05, - "step": 5600 - }, - { - "epoch": 3.8233287858117326, - "grad_norm": 0.005773304495960474, - "learning_rate": 3.7825528844850903e-07, - "loss": 0.008211637288331986, - "step": 5605 - }, - { - "epoch": 3.8267394270122783, - "grad_norm": 0.005971482954919338, - "learning_rate": 3.6387024294720483e-07, - "loss": 0.00014537961687892674, - "step": 5610 - }, - { - "epoch": 3.830150068212824, - "grad_norm": 0.0021432344801723957, - "learning_rate": 3.497627242157075e-07, - "loss": 0.0003147674491629004, - "step": 5615 - }, - { - "epoch": 3.8335607094133697, - "grad_norm": 0.01809293031692505, - "learning_rate": 3.359328376827286e-07, - "loss": 0.00021316034253686665, - "step": 5620 - }, - { - "epoch": 3.8369713506139154, - "grad_norm": 0.001072075916454196, - "learning_rate": 3.223806867021561e-07, - "loss": 0.0004111470188945532, - "step": 5625 - }, - { - "epoch": 3.840381991814461, - "grad_norm": 0.001951684826053679, - "learning_rate": 3.0910637255231325e-07, - "loss": 0.00011240362655371428, - "step": 5630 - }, - { - "epoch": 3.8437926330150067, - "grad_norm": 0.004423170816153288, - "learning_rate": 2.9610999443517996e-07, - "loss": 0.00011202857131138444, - "step": 5635 - }, - { - "epoch": 3.8472032742155524, - "grad_norm": 0.00013841276813764125, - "learning_rate": 2.8339164947564775e-07, - "loss": 3.643531817942858e-05, - "step": 5640 - }, - { - "epoch": 3.850613915416098, - "grad_norm": 0.00026944882120005786, - "learning_rate": 2.709514327208118e-07, - "loss": 6.067348294891417e-05, - "step": 5645 - }, - { - "epoch": 3.854024556616644, - "grad_norm": 0.000910903443582356, - "learning_rate": 2.587894371392549e-07, - "loss": 0.004593908041715622, - "step": 5650 - }, - { - "epoch": 3.85743519781719, - "grad_norm": 0.0011465694988146424, - "learning_rate": 2.4690575362034393e-07, - "loss": 6.263987743295729e-05, - "step": 5655 - }, - { - "epoch": 3.8608458390177356, - "grad_norm": 0.0008314873557537794, - "learning_rate": 2.3530047097354704e-07, - "loss": 0.00017264452762901782, - "step": 5660 - }, - { - "epoch": 3.8642564802182813, - "grad_norm": 0.0015781678957864642, - "learning_rate": 2.2397367592780074e-07, - "loss": 8.745520026423037e-05, - "step": 5665 - }, - { - "epoch": 3.867667121418827, - "grad_norm": 0.002430699532851577, - "learning_rate": 2.1292545313082721e-07, - "loss": 0.00019512665458023548, - "step": 5670 - }, - { - "epoch": 3.8710777626193726, - "grad_norm": 0.000959254743065685, - "learning_rate": 2.021558851485097e-07, - "loss": 7.31457956135273e-05, - "step": 5675 - }, - { - "epoch": 3.8744884038199183, - "grad_norm": 0.0041350554674863815, - "learning_rate": 1.9166505246428903e-07, - "loss": 5.959914997220039e-05, - "step": 5680 - }, - { - "epoch": 3.877899045020464, - "grad_norm": 0.0020819318015128374, - "learning_rate": 1.814530334785555e-07, - "loss": 9.297068463638425e-05, - "step": 5685 - }, - { - "epoch": 3.8813096862210097, - "grad_norm": 0.019831830635666847, - "learning_rate": 1.7151990450804953e-07, - "loss": 6.57864729873836e-05, - "step": 5690 - }, - { - "epoch": 3.8847203274215554, - "grad_norm": 0.006569270975887775, - "learning_rate": 1.6186573978531626e-07, - "loss": 0.0001178255770355463, - "step": 5695 - }, - { - "epoch": 3.888130968622101, - "grad_norm": 0.028108395636081696, - "learning_rate": 1.524906114581309e-07, - "loss": 0.00034154695458710194, - "step": 5700 - }, - { - "epoch": 3.8915416098226467, - "grad_norm": 0.016931869089603424, - "learning_rate": 1.4339458958896182e-07, - "loss": 0.00014965070877224207, - "step": 5705 - }, - { - "epoch": 3.8949522510231924, - "grad_norm": 0.004743209574371576, - "learning_rate": 1.3457774215445415e-07, - "loss": 9.899177821353078e-05, - "step": 5710 - }, - { - "epoch": 3.898362892223738, - "grad_norm": 0.005506719462573528, - "learning_rate": 1.2604013504493028e-07, - "loss": 8.313862490467727e-05, - "step": 5715 - }, - { - "epoch": 3.901773533424284, - "grad_norm": 0.031609803438186646, - "learning_rate": 1.1778183206386516e-07, - "loss": 0.0001896424451842904, - "step": 5720 - }, - { - "epoch": 3.9051841746248295, - "grad_norm": 0.004502533935010433, - "learning_rate": 1.0980289492744931e-07, - "loss": 0.00013039627810940146, - "step": 5725 - }, - { - "epoch": 3.908594815825375, - "grad_norm": 0.004705691710114479, - "learning_rate": 1.0210338326409329e-07, - "loss": 0.00019792017992585896, - "step": 5730 - }, - { - "epoch": 3.912005457025921, - "grad_norm": 0.0037855699192732573, - "learning_rate": 9.468335461400717e-08, - "loss": 0.0002551279263570905, - "step": 5735 - }, - { - "epoch": 3.9154160982264665, - "grad_norm": 0.10445121675729752, - "learning_rate": 8.754286442875929e-08, - "loss": 0.0006038970313966274, - "step": 5740 - }, - { - "epoch": 3.918826739427012, - "grad_norm": 0.001668531564064324, - "learning_rate": 8.068196607085992e-08, - "loss": 0.0005313832312822342, - "step": 5745 - }, - { - "epoch": 3.922237380627558, - "grad_norm": 0.00047295662807300687, - "learning_rate": 7.410071081336982e-08, - "loss": 9.868838824331761e-05, - "step": 5750 - }, - { - "epoch": 3.9256480218281036, - "grad_norm": 0.0025327601470053196, - "learning_rate": 6.779914783950902e-08, - "loss": 0.000324072758667171, - "step": 5755 - }, - { - "epoch": 3.9290586630286493, - "grad_norm": 0.01139845047146082, - "learning_rate": 6.177732424229448e-08, - "loss": 0.00017668742220848798, - "step": 5760 - }, - { - "epoch": 3.932469304229195, - "grad_norm": 0.04569331184029579, - "learning_rate": 5.6035285024186305e-08, - "loss": 0.0721853256225586, - "step": 5765 - }, - { - "epoch": 3.9358799454297406, - "grad_norm": 0.004628063179552555, - "learning_rate": 5.057307309675879e-08, - "loss": 4.649970214813948e-05, - "step": 5770 - }, - { - "epoch": 3.9392905866302863, - "grad_norm": 0.009363708086311817, - "learning_rate": 4.539072928036735e-08, - "loss": 0.001338627841323614, - "step": 5775 - }, - { - "epoch": 3.942701227830832, - "grad_norm": 0.0014635550323873758, - "learning_rate": 4.0488292303844647e-08, - "loss": 0.0005166372284293175, - "step": 5780 - }, - { - "epoch": 3.9461118690313777, - "grad_norm": 0.004816197324544191, - "learning_rate": 3.586579880422574e-08, - "loss": 0.00010666162706911564, - "step": 5785 - }, - { - "epoch": 3.9495225102319234, - "grad_norm": 0.0007145903073251247, - "learning_rate": 3.1523283326452544e-08, - "loss": 8.264535572379827e-05, - "step": 5790 - }, - { - "epoch": 3.952933151432469, - "grad_norm": 0.005964918527752161, - "learning_rate": 2.7460778323144805e-08, - "loss": 8.588915225118398e-05, - "step": 5795 - }, - { - "epoch": 3.956343792633015, - "grad_norm": 0.0041807787492871284, - "learning_rate": 2.3678314154317034e-08, - "loss": 0.00015257378108799458, - "step": 5800 - }, - { - "epoch": 3.959754433833561, - "grad_norm": 0.001413834630511701, - "learning_rate": 2.017591908719529e-08, - "loss": 0.00011678774608299136, - "step": 5805 - }, - { - "epoch": 3.9631650750341065, - "grad_norm": 0.003702461253851652, - "learning_rate": 1.6953619295971555e-08, - "loss": 0.00012639843625947834, - "step": 5810 - }, - { - "epoch": 3.966575716234652, - "grad_norm": 0.0013520204229280353, - "learning_rate": 1.4011438861633029e-08, - "loss": 0.00011726657394319772, - "step": 5815 - }, - { - "epoch": 3.969986357435198, - "grad_norm": 0.08151526749134064, - "learning_rate": 1.1349399771762302e-08, - "loss": 0.00021847528405487536, - "step": 5820 - }, - { - "epoch": 3.9733969986357436, - "grad_norm": 0.00278874090872705, - "learning_rate": 8.967521920383303e-09, - "loss": 4.5762333320453764e-05, - "step": 5825 - }, - { - "epoch": 3.9768076398362893, - "grad_norm": 0.0007764157489873469, - "learning_rate": 6.865823107811419e-09, - "loss": 0.004313124716281891, - "step": 5830 - }, - { - "epoch": 3.980218281036835, - "grad_norm": 0.024059277027845383, - "learning_rate": 5.0443190405285995e-09, - "loss": 0.00012323500122874976, - "step": 5835 - }, - { - "epoch": 3.9836289222373806, - "grad_norm": 0.04550166428089142, - "learning_rate": 3.5030233310417987e-09, - "loss": 0.00018079780275002122, - "step": 5840 - }, - { - "epoch": 3.9870395634379263, - "grad_norm": 0.0010700125712901354, - "learning_rate": 2.241947497808039e-09, - "loss": 0.0003132110228762031, - "step": 5845 - }, - { - "epoch": 3.990450204638472, - "grad_norm": 0.0018866128521040082, - "learning_rate": 1.2611009651386505e-09, - "loss": 9.605747763998806e-05, - "step": 5850 - }, - { - "epoch": 3.9938608458390177, - "grad_norm": 0.023178091272711754, - "learning_rate": 5.60491063120172e-10, - "loss": 0.00015867352485656738, - "step": 5855 - }, - { - "epoch": 3.9945429740791267, - "eval_loss": 0.07876446098089218, - "eval_runtime": 0.9242, - "eval_samples_per_second": 81.155, - "eval_steps_per_second": 2.164, - "step": 5856 - }, - { - "eval_cer_subset": 0.014852159694781306, - "eval_cer_subset_edit_distance": 109, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 5856 - }, - { - "epoch": 3.9972714870395634, - "grad_norm": 0.024283738806843758, - "learning_rate": 1.4012302757271388e-10, - "loss": 9.466741466894746e-05, - "step": 5860 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.115573349203968e+16, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-5864/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/adapter_config.json deleted file mode 100644 index 434760415b669853f06b2a616d415df01cc3f177..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "up_proj", - "gate_proj", - "down_proj", - "o_proj", - "v_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/adapter_model.safetensors deleted file mode 100644 index 1baab9105c129a634342263395e1c68774a8aa1e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:190ea47addc604be2e1cd959860c305670d8b735a85de646c2b8bacfa6edafb9 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/optimizer.pt deleted file mode 100644 index 9e207b4a7ea38527013941e48277d261b67506f9..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19239c8519befd022e830fd7939462404cd780b68f855948892c8b8d82282278 -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/rng_state.pth deleted file mode 100644 index 567b652f4426334d6573c42dfcfb408434537684..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec2cf52e4c3f15759e904ba93b5a10165bb850df54a226c7977bcfa5b79f76fb -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/scheduler.pt deleted file mode 100644 index e2a0feedc0262f951c05f13c6b8cf4740328e707..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce57785080eda2ead7c151998c630b491013bd31fd338b06d64bde9f0f287969 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/trainer_state.json deleted file mode 100644 index e694fa0e245295c94ae196833bd3e07834d15203..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/trainer_state.json +++ /dev/null @@ -1,1088 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.49931787175989084, - "eval_steps": 366, - "global_step": 732, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0034106412005457027, - "grad_norm": 3.1571648120880127, - "learning_rate": 2.542372881355932e-06, - "loss": 1.6981744766235352, - "step": 5 - }, - { - "epoch": 0.0068212824010914054, - "grad_norm": 9.80073070526123, - "learning_rate": 5.720338983050847e-06, - "loss": 1.5801130294799806, - "step": 10 - }, - { - "epoch": 0.010231923601637109, - "grad_norm": 4.481558799743652, - "learning_rate": 8.898305084745763e-06, - "loss": 2.1718717575073243, - "step": 15 - }, - { - "epoch": 0.013642564802182811, - "grad_norm": 7.221553802490234, - "learning_rate": 1.2076271186440677e-05, - "loss": 1.5270480155944823, - "step": 20 - }, - { - "epoch": 0.017053206002728513, - "grad_norm": 5.330269813537598, - "learning_rate": 1.5254237288135592e-05, - "loss": 0.9586630821228027, - "step": 25 - }, - { - "epoch": 0.020463847203274217, - "grad_norm": 1.7404323816299438, - "learning_rate": 1.8432203389830506e-05, - "loss": 0.48505802154541017, - "step": 30 - }, - { - "epoch": 0.023874488403819918, - "grad_norm": 1.2418887615203857, - "learning_rate": 2.1610169491525424e-05, - "loss": 0.24452033042907714, - "step": 35 - }, - { - "epoch": 0.027285129604365622, - "grad_norm": 1.5928819179534912, - "learning_rate": 2.4788135593220338e-05, - "loss": 0.25337533950805663, - "step": 40 - }, - { - "epoch": 0.030695770804911322, - "grad_norm": 2.0335769653320312, - "learning_rate": 2.796610169491525e-05, - "loss": 0.1938886046409607, - "step": 45 - }, - { - "epoch": 0.034106412005457026, - "grad_norm": 0.18029411137104034, - "learning_rate": 3.114406779661017e-05, - "loss": 0.1834133505821228, - "step": 50 - }, - { - "epoch": 0.03751705320600273, - "grad_norm": 1.7757192850112915, - "learning_rate": 3.432203389830508e-05, - "loss": 0.15151114463806153, - "step": 55 - }, - { - "epoch": 0.040927694406548434, - "grad_norm": 2.1376893520355225, - "learning_rate": 3.75e-05, - "loss": 0.21634674072265625, - "step": 60 - }, - { - "epoch": 0.04433833560709413, - "grad_norm": 2.196387767791748, - "learning_rate": 4.067796610169491e-05, - "loss": 0.11320395469665527, - "step": 65 - }, - { - "epoch": 0.047748976807639835, - "grad_norm": 1.5888807773590088, - "learning_rate": 4.3855932203389825e-05, - "loss": 0.11050152778625488, - "step": 70 - }, - { - "epoch": 0.05115961800818554, - "grad_norm": 4.606944561004639, - "learning_rate": 4.703389830508474e-05, - "loss": 0.23255281448364257, - "step": 75 - }, - { - "epoch": 0.054570259208731244, - "grad_norm": 1.7308639287948608, - "learning_rate": 5.021186440677966e-05, - "loss": 0.08995866179466247, - "step": 80 - }, - { - "epoch": 0.05798090040927694, - "grad_norm": 2.622438430786133, - "learning_rate": 5.338983050847457e-05, - "loss": 0.05715223550796509, - "step": 85 - }, - { - "epoch": 0.061391541609822645, - "grad_norm": 0.34140461683273315, - "learning_rate": 5.656779661016949e-05, - "loss": 0.06703727245330811, - "step": 90 - }, - { - "epoch": 0.06480218281036836, - "grad_norm": 2.7264959812164307, - "learning_rate": 5.97457627118644e-05, - "loss": 0.10092716217041016, - "step": 95 - }, - { - "epoch": 0.06821282401091405, - "grad_norm": 2.0234780311584473, - "learning_rate": 6.292372881355932e-05, - "loss": 0.03579937815666199, - "step": 100 - }, - { - "epoch": 0.07162346521145975, - "grad_norm": 1.3716133832931519, - "learning_rate": 6.610169491525423e-05, - "loss": 0.07851418852806091, - "step": 105 - }, - { - "epoch": 0.07503410641200546, - "grad_norm": 1.2307227849960327, - "learning_rate": 6.927966101694914e-05, - "loss": 0.07370938658714295, - "step": 110 - }, - { - "epoch": 0.07844474761255116, - "grad_norm": 1.7142690420150757, - "learning_rate": 7.245762711864406e-05, - "loss": 0.08064572811126709, - "step": 115 - }, - { - "epoch": 0.08185538881309687, - "grad_norm": 1.343595266342163, - "learning_rate": 7.499999439507554e-05, - "loss": 0.11841402053833008, - "step": 120 - }, - { - "epoch": 0.08526603001364257, - "grad_norm": 1.2511327266693115, - "learning_rate": 7.499979822289558e-05, - "loss": 0.06974860429763793, - "step": 125 - }, - { - "epoch": 0.08867667121418826, - "grad_norm": 1.5642656087875366, - "learning_rate": 7.49993218061684e-05, - "loss": 0.10972714424133301, - "step": 130 - }, - { - "epoch": 0.09208731241473397, - "grad_norm": 1.6669789552688599, - "learning_rate": 7.499856514845436e-05, - "loss": 0.09864612817764282, - "step": 135 - }, - { - "epoch": 0.09549795361527967, - "grad_norm": 0.9789333343505859, - "learning_rate": 7.499752825540815e-05, - "loss": 0.0699621558189392, - "step": 140 - }, - { - "epoch": 0.09890859481582538, - "grad_norm": 0.41792476177215576, - "learning_rate": 7.499621113477873e-05, - "loss": 0.06734349727630615, - "step": 145 - }, - { - "epoch": 0.10231923601637108, - "grad_norm": 1.5968533754348755, - "learning_rate": 7.499461379640919e-05, - "loss": 0.060729533433914185, - "step": 150 - }, - { - "epoch": 0.10572987721691678, - "grad_norm": 0.8512193560600281, - "learning_rate": 7.499273625223683e-05, - "loss": 0.052730172872543335, - "step": 155 - }, - { - "epoch": 0.10914051841746249, - "grad_norm": 1.3257211446762085, - "learning_rate": 7.499057851629299e-05, - "loss": 0.10094538927078248, - "step": 160 - }, - { - "epoch": 0.11255115961800818, - "grad_norm": 0.659118115901947, - "learning_rate": 7.498814060470288e-05, - "loss": 0.01604635864496231, - "step": 165 - }, - { - "epoch": 0.11596180081855388, - "grad_norm": 2.454979181289673, - "learning_rate": 7.49854225356856e-05, - "loss": 0.13272385597229003, - "step": 170 - }, - { - "epoch": 0.11937244201909959, - "grad_norm": 2.510040521621704, - "learning_rate": 7.498242432955388e-05, - "loss": 0.08396227955818177, - "step": 175 - }, - { - "epoch": 0.12278308321964529, - "grad_norm": 0.2637259364128113, - "learning_rate": 7.4979146008714e-05, - "loss": 0.05852065086364746, - "step": 180 - }, - { - "epoch": 0.126193724420191, - "grad_norm": 1.4320851564407349, - "learning_rate": 7.497558759766564e-05, - "loss": 0.10392802953720093, - "step": 185 - }, - { - "epoch": 0.1296043656207367, - "grad_norm": 1.273027777671814, - "learning_rate": 7.497174912300156e-05, - "loss": 0.08062989711761474, - "step": 190 - }, - { - "epoch": 0.1330150068212824, - "grad_norm": 0.8102871179580688, - "learning_rate": 7.496763061340759e-05, - "loss": 0.07294153571128845, - "step": 195 - }, - { - "epoch": 0.1364256480218281, - "grad_norm": 1.951328992843628, - "learning_rate": 7.496323209966228e-05, - "loss": 0.07738351821899414, - "step": 200 - }, - { - "epoch": 0.13983628922237382, - "grad_norm": 0.3880983889102936, - "learning_rate": 7.495855361463674e-05, - "loss": 0.07225048542022705, - "step": 205 - }, - { - "epoch": 0.1432469304229195, - "grad_norm": 3.3205058574676514, - "learning_rate": 7.495359519329433e-05, - "loss": 0.05682974457740784, - "step": 210 - }, - { - "epoch": 0.1466575716234652, - "grad_norm": 0.9203559160232544, - "learning_rate": 7.49483568726905e-05, - "loss": 0.08767472505569458, - "step": 215 - }, - { - "epoch": 0.15006821282401092, - "grad_norm": 0.585360586643219, - "learning_rate": 7.494283869197239e-05, - "loss": 0.039227068424224854, - "step": 220 - }, - { - "epoch": 0.1534788540245566, - "grad_norm": 1.7096059322357178, - "learning_rate": 7.493704069237862e-05, - "loss": 0.10281096696853638, - "step": 225 - }, - { - "epoch": 0.15688949522510232, - "grad_norm": 0.4110204875469208, - "learning_rate": 7.493096291723898e-05, - "loss": 0.04346161186695099, - "step": 230 - }, - { - "epoch": 0.16030013642564803, - "grad_norm": 1.3272292613983154, - "learning_rate": 7.492460541197404e-05, - "loss": 0.049719154834747314, - "step": 235 - }, - { - "epoch": 0.16371077762619374, - "grad_norm": 1.1005016565322876, - "learning_rate": 7.491796822409494e-05, - "loss": 0.09335108399391175, - "step": 240 - }, - { - "epoch": 0.16712141882673942, - "grad_norm": 0.7811501026153564, - "learning_rate": 7.491105140320285e-05, - "loss": 0.05943926572799683, - "step": 245 - }, - { - "epoch": 0.17053206002728513, - "grad_norm": 1.4607417583465576, - "learning_rate": 7.490385500098879e-05, - "loss": 0.04385361075401306, - "step": 250 - }, - { - "epoch": 0.17394270122783084, - "grad_norm": 0.394960880279541, - "learning_rate": 7.489637907123308e-05, - "loss": 0.04446137547492981, - "step": 255 - }, - { - "epoch": 0.17735334242837653, - "grad_norm": 0.8768635988235474, - "learning_rate": 7.488862366980505e-05, - "loss": 0.04143576025962829, - "step": 260 - }, - { - "epoch": 0.18076398362892224, - "grad_norm": 1.9996010065078735, - "learning_rate": 7.488058885466262e-05, - "loss": 0.07952215671539306, - "step": 265 - }, - { - "epoch": 0.18417462482946795, - "grad_norm": 0.03770223259925842, - "learning_rate": 7.487227468585178e-05, - "loss": 0.02531362771987915, - "step": 270 - }, - { - "epoch": 0.18758526603001363, - "grad_norm": 0.26082542538642883, - "learning_rate": 7.486368122550619e-05, - "loss": 0.09930967688560485, - "step": 275 - }, - { - "epoch": 0.19099590723055934, - "grad_norm": 5.622270584106445, - "learning_rate": 7.485480853784677e-05, - "loss": 0.06534865498542786, - "step": 280 - }, - { - "epoch": 0.19440654843110505, - "grad_norm": 0.5298851132392883, - "learning_rate": 7.484565668918111e-05, - "loss": 0.06109699010848999, - "step": 285 - }, - { - "epoch": 0.19781718963165076, - "grad_norm": 1.4887421131134033, - "learning_rate": 7.483622574790308e-05, - "loss": 0.048966211080551145, - "step": 290 - }, - { - "epoch": 0.20122783083219645, - "grad_norm": 0.5699282884597778, - "learning_rate": 7.482651578449223e-05, - "loss": 0.05427658557891846, - "step": 295 - }, - { - "epoch": 0.20463847203274216, - "grad_norm": 1.6645292043685913, - "learning_rate": 7.481652687151339e-05, - "loss": 0.037466832995414735, - "step": 300 - }, - { - "epoch": 0.20804911323328787, - "grad_norm": 0.4979431629180908, - "learning_rate": 7.480625908361593e-05, - "loss": 0.019084173440933227, - "step": 305 - }, - { - "epoch": 0.21145975443383355, - "grad_norm": 2.73081636428833, - "learning_rate": 7.479571249753339e-05, - "loss": 0.07597044706344605, - "step": 310 - }, - { - "epoch": 0.21487039563437926, - "grad_norm": 0.009097559377551079, - "learning_rate": 7.478488719208281e-05, - "loss": 0.017771795392036438, - "step": 315 - }, - { - "epoch": 0.21828103683492497, - "grad_norm": 1.5284112691879272, - "learning_rate": 7.477378324816419e-05, - "loss": 0.07524526119232178, - "step": 320 - }, - { - "epoch": 0.22169167803547066, - "grad_norm": 1.400959849357605, - "learning_rate": 7.47624007487598e-05, - "loss": 0.0357323557138443, - "step": 325 - }, - { - "epoch": 0.22510231923601637, - "grad_norm": 0.5988397598266602, - "learning_rate": 7.47507397789337e-05, - "loss": 0.06072888970375061, - "step": 330 - }, - { - "epoch": 0.22851296043656208, - "grad_norm": 0.18309183418750763, - "learning_rate": 7.473880042583092e-05, - "loss": 0.03904334008693695, - "step": 335 - }, - { - "epoch": 0.23192360163710776, - "grad_norm": 0.7360084056854248, - "learning_rate": 7.472658277867702e-05, - "loss": 0.05045387148857117, - "step": 340 - }, - { - "epoch": 0.23533424283765347, - "grad_norm": 2.315072536468506, - "learning_rate": 7.471408692877724e-05, - "loss": 0.07920202016830444, - "step": 345 - }, - { - "epoch": 0.23874488403819918, - "grad_norm": 1.2811086177825928, - "learning_rate": 7.470131296951592e-05, - "loss": 0.05552580952644348, - "step": 350 - }, - { - "epoch": 0.2421555252387449, - "grad_norm": 4.006563186645508, - "learning_rate": 7.468826099635578e-05, - "loss": 0.1419215679168701, - "step": 355 - }, - { - "epoch": 0.24556616643929058, - "grad_norm": 1.1540688276290894, - "learning_rate": 7.467493110683718e-05, - "loss": 0.03980849981307984, - "step": 360 - }, - { - "epoch": 0.2489768076398363, - "grad_norm": 1.5472272634506226, - "learning_rate": 7.466132340057742e-05, - "loss": 0.020862475037574768, - "step": 365 - }, - { - "epoch": 0.24965893587994542, - "eval_loss": 0.11654457449913025, - "eval_runtime": 1.0333, - "eval_samples_per_second": 72.584, - "eval_steps_per_second": 1.936, - "step": 366 - }, - { - "eval_cer_subset": 0.05232320479629377, - "eval_cer_subset_edit_distance": 384, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 366 - }, - { - "epoch": 0.252387448840382, - "grad_norm": 1.730627417564392, - "learning_rate": 7.464743797927002e-05, - "loss": 0.11239330768585205, - "step": 370 - }, - { - "epoch": 0.2557980900409277, - "grad_norm": 0.1921682506799698, - "learning_rate": 7.463327494668388e-05, - "loss": 0.09260941743850708, - "step": 375 - }, - { - "epoch": 0.2592087312414734, - "grad_norm": 1.9259151220321655, - "learning_rate": 7.461883440866259e-05, - "loss": 0.03999299705028534, - "step": 380 - }, - { - "epoch": 0.2626193724420191, - "grad_norm": 0.0488249845802784, - "learning_rate": 7.460411647312358e-05, - "loss": 0.01459498256444931, - "step": 385 - }, - { - "epoch": 0.2660300136425648, - "grad_norm": 1.1967735290527344, - "learning_rate": 7.458912125005732e-05, - "loss": 0.17716412544250487, - "step": 390 - }, - { - "epoch": 0.2694406548431105, - "grad_norm": 1.0083205699920654, - "learning_rate": 7.457384885152655e-05, - "loss": 0.08738511800765991, - "step": 395 - }, - { - "epoch": 0.2728512960436562, - "grad_norm": 0.6705593466758728, - "learning_rate": 7.455829939166539e-05, - "loss": 0.026945650577545166, - "step": 400 - }, - { - "epoch": 0.2762619372442019, - "grad_norm": 1.0791361331939697, - "learning_rate": 7.45424729866785e-05, - "loss": 0.03804046213626862, - "step": 405 - }, - { - "epoch": 0.27967257844474763, - "grad_norm": 0.7377910017967224, - "learning_rate": 7.452636975484021e-05, - "loss": 0.0464675635099411, - "step": 410 - }, - { - "epoch": 0.2830832196452933, - "grad_norm": 0.8061625957489014, - "learning_rate": 7.450998981649365e-05, - "loss": 0.02737331986427307, - "step": 415 - }, - { - "epoch": 0.286493860845839, - "grad_norm": 0.2580685019493103, - "learning_rate": 7.449333329404982e-05, - "loss": 0.018785761296749116, - "step": 420 - }, - { - "epoch": 0.28990450204638474, - "grad_norm": 0.017052194103598595, - "learning_rate": 7.447640031198675e-05, - "loss": 0.11424320936203003, - "step": 425 - }, - { - "epoch": 0.2933151432469304, - "grad_norm": 0.2855948805809021, - "learning_rate": 7.445919099684845e-05, - "loss": 0.012821969389915467, - "step": 430 - }, - { - "epoch": 0.2967257844474761, - "grad_norm": 1.5070558786392212, - "learning_rate": 7.444170547724405e-05, - "loss": 0.037783479690551756, - "step": 435 - }, - { - "epoch": 0.30013642564802184, - "grad_norm": 0.18241967260837555, - "learning_rate": 7.442394388384684e-05, - "loss": 0.03347713351249695, - "step": 440 - }, - { - "epoch": 0.3035470668485675, - "grad_norm": 0.37319424748420715, - "learning_rate": 7.440590634939327e-05, - "loss": 0.05499382615089417, - "step": 445 - }, - { - "epoch": 0.3069577080491132, - "grad_norm": 0.11083097755908966, - "learning_rate": 7.438759300868193e-05, - "loss": 0.021977408230304717, - "step": 450 - }, - { - "epoch": 0.31036834924965895, - "grad_norm": 0.24985608458518982, - "learning_rate": 7.436900399857261e-05, - "loss": 0.07826730608940125, - "step": 455 - }, - { - "epoch": 0.31377899045020463, - "grad_norm": 2.5360186100006104, - "learning_rate": 7.43501394579852e-05, - "loss": 0.080865478515625, - "step": 460 - }, - { - "epoch": 0.3171896316507503, - "grad_norm": 0.7053658366203308, - "learning_rate": 7.433099952789876e-05, - "loss": 0.012464526295661926, - "step": 465 - }, - { - "epoch": 0.32060027285129605, - "grad_norm": 0.3297032117843628, - "learning_rate": 7.43115843513503e-05, - "loss": 0.05623521208763123, - "step": 470 - }, - { - "epoch": 0.32401091405184174, - "grad_norm": 1.3899471759796143, - "learning_rate": 7.42918940734339e-05, - "loss": 0.07306370735168458, - "step": 475 - }, - { - "epoch": 0.3274215552523875, - "grad_norm": 0.9437380433082581, - "learning_rate": 7.427192884129948e-05, - "loss": 0.058290761709213254, - "step": 480 - }, - { - "epoch": 0.33083219645293316, - "grad_norm": 1.8157323598861694, - "learning_rate": 7.42516888041518e-05, - "loss": 0.058532989025115965, - "step": 485 - }, - { - "epoch": 0.33424283765347884, - "grad_norm": 0.6275774836540222, - "learning_rate": 7.423117411324924e-05, - "loss": 0.0624964714050293, - "step": 490 - }, - { - "epoch": 0.3376534788540246, - "grad_norm": 0.6674565672874451, - "learning_rate": 7.421038492190278e-05, - "loss": 0.020014175772666933, - "step": 495 - }, - { - "epoch": 0.34106412005457026, - "grad_norm": 0.6229842901229858, - "learning_rate": 7.418932138547481e-05, - "loss": 0.03286575376987457, - "step": 500 - }, - { - "epoch": 0.34447476125511595, - "grad_norm": 1.441677212715149, - "learning_rate": 7.41679836613779e-05, - "loss": 0.04557921886444092, - "step": 505 - }, - { - "epoch": 0.3478854024556617, - "grad_norm": 0.8439592719078064, - "learning_rate": 7.414637190907379e-05, - "loss": 0.027792316675186158, - "step": 510 - }, - { - "epoch": 0.35129604365620737, - "grad_norm": 0.1357346475124359, - "learning_rate": 7.412448629007198e-05, - "loss": 0.024153730273246764, - "step": 515 - }, - { - "epoch": 0.35470668485675305, - "grad_norm": 0.11876281350851059, - "learning_rate": 7.41023269679287e-05, - "loss": 0.11651531457901002, - "step": 520 - }, - { - "epoch": 0.3581173260572988, - "grad_norm": 0.8576210737228394, - "learning_rate": 7.407989410824566e-05, - "loss": 0.045156928896903994, - "step": 525 - }, - { - "epoch": 0.3615279672578445, - "grad_norm": 0.39947113394737244, - "learning_rate": 7.40571878786687e-05, - "loss": 0.02562606930732727, - "step": 530 - }, - { - "epoch": 0.36493860845839016, - "grad_norm": 0.8822716474533081, - "learning_rate": 7.403420844888668e-05, - "loss": 0.05394383668899536, - "step": 535 - }, - { - "epoch": 0.3683492496589359, - "grad_norm": 1.8026832342147827, - "learning_rate": 7.40109559906301e-05, - "loss": 0.07706952095031738, - "step": 540 - }, - { - "epoch": 0.3717598908594816, - "grad_norm": 0.28902706503868103, - "learning_rate": 7.398743067766987e-05, - "loss": 0.0352792352437973, - "step": 545 - }, - { - "epoch": 0.37517053206002726, - "grad_norm": 0.2759908437728882, - "learning_rate": 7.396363268581609e-05, - "loss": 0.038266700506210324, - "step": 550 - }, - { - "epoch": 0.378581173260573, - "grad_norm": 1.0520153045654297, - "learning_rate": 7.39395621929165e-05, - "loss": 0.04206843376159668, - "step": 555 - }, - { - "epoch": 0.3819918144611187, - "grad_norm": 0.29290419816970825, - "learning_rate": 7.391521937885543e-05, - "loss": 0.04060278534889221, - "step": 560 - }, - { - "epoch": 0.38540245566166437, - "grad_norm": 0.11243477463722229, - "learning_rate": 7.389060442555228e-05, - "loss": 0.05412468910217285, - "step": 565 - }, - { - "epoch": 0.3888130968622101, - "grad_norm": 1.9416879415512085, - "learning_rate": 7.386571751696019e-05, - "loss": 0.02231921851634979, - "step": 570 - }, - { - "epoch": 0.3922237380627558, - "grad_norm": 0.5937671661376953, - "learning_rate": 7.384055883906474e-05, - "loss": 0.032561862468719484, - "step": 575 - }, - { - "epoch": 0.3956343792633015, - "grad_norm": 0.026148535311222076, - "learning_rate": 7.381512857988244e-05, - "loss": 0.07647547125816345, - "step": 580 - }, - { - "epoch": 0.3990450204638472, - "grad_norm": 0.7875772714614868, - "learning_rate": 7.378942692945944e-05, - "loss": 0.031203645467758178, - "step": 585 - }, - { - "epoch": 0.4024556616643929, - "grad_norm": 0.45512810349464417, - "learning_rate": 7.376345407987002e-05, - "loss": 0.04238590002059937, - "step": 590 - }, - { - "epoch": 0.40586630286493863, - "grad_norm": 1.66355299949646, - "learning_rate": 7.373721022521521e-05, - "loss": 0.052533066272735594, - "step": 595 - }, - { - "epoch": 0.4092769440654843, - "grad_norm": 0.08107655495405197, - "learning_rate": 7.371069556162133e-05, - "loss": 0.017715978622436523, - "step": 600 - }, - { - "epoch": 0.41268758526603, - "grad_norm": 0.32274800539016724, - "learning_rate": 7.368391028723851e-05, - "loss": 0.1379294991493225, - "step": 605 - }, - { - "epoch": 0.41609822646657574, - "grad_norm": 1.8197475671768188, - "learning_rate": 7.365685460223922e-05, - "loss": 0.03312918543815613, - "step": 610 - }, - { - "epoch": 0.4195088676671214, - "grad_norm": 0.1390945166349411, - "learning_rate": 7.362952870881677e-05, - "loss": 0.027584537863731384, - "step": 615 - }, - { - "epoch": 0.4229195088676671, - "grad_norm": 0.9081276655197144, - "learning_rate": 7.360193281118378e-05, - "loss": 0.06143233776092529, - "step": 620 - }, - { - "epoch": 0.42633015006821284, - "grad_norm": 0.07777975499629974, - "learning_rate": 7.35740671155707e-05, - "loss": 0.053598570823669436, - "step": 625 - }, - { - "epoch": 0.4297407912687585, - "grad_norm": 0.9314269423484802, - "learning_rate": 7.354593183022422e-05, - "loss": 0.05946495532989502, - "step": 630 - }, - { - "epoch": 0.4331514324693042, - "grad_norm": 0.5312000513076782, - "learning_rate": 7.351752716540575e-05, - "loss": 0.030707958340644836, - "step": 635 - }, - { - "epoch": 0.43656207366984995, - "grad_norm": 0.855117917060852, - "learning_rate": 7.348885333338984e-05, - "loss": 0.09321808815002441, - "step": 640 - }, - { - "epoch": 0.43997271487039563, - "grad_norm": 0.12914253771305084, - "learning_rate": 7.345991054846257e-05, - "loss": 0.010356919467449188, - "step": 645 - }, - { - "epoch": 0.4433833560709413, - "grad_norm": 0.4129096567630768, - "learning_rate": 7.343069902691999e-05, - "loss": 0.054264682531356814, - "step": 650 - }, - { - "epoch": 0.44679399727148705, - "grad_norm": 1.8499324321746826, - "learning_rate": 7.340121898706643e-05, - "loss": 0.050659948587417604, - "step": 655 - }, - { - "epoch": 0.45020463847203274, - "grad_norm": 0.5490806698799133, - "learning_rate": 7.337147064921299e-05, - "loss": 0.07158003449440002, - "step": 660 - }, - { - "epoch": 0.4536152796725784, - "grad_norm": 1.1408376693725586, - "learning_rate": 7.334145423567575e-05, - "loss": 0.08845412135124206, - "step": 665 - }, - { - "epoch": 0.45702592087312416, - "grad_norm": 1.5242546796798706, - "learning_rate": 7.331116997077426e-05, - "loss": 0.07773985266685486, - "step": 670 - }, - { - "epoch": 0.46043656207366984, - "grad_norm": 0.7061560153961182, - "learning_rate": 7.32806180808297e-05, - "loss": 0.047228410840034485, - "step": 675 - }, - { - "epoch": 0.4638472032742155, - "grad_norm": 0.8088539838790894, - "learning_rate": 7.324979879416333e-05, - "loss": 0.03726888597011566, - "step": 680 - }, - { - "epoch": 0.46725784447476126, - "grad_norm": 0.5670620799064636, - "learning_rate": 7.321871234109472e-05, - "loss": 0.02899191677570343, - "step": 685 - }, - { - "epoch": 0.47066848567530695, - "grad_norm": 2.3821427822113037, - "learning_rate": 7.318735895394e-05, - "loss": 0.033483856916427614, - "step": 690 - }, - { - "epoch": 0.4740791268758527, - "grad_norm": 0.8073883652687073, - "learning_rate": 7.315573886701023e-05, - "loss": 0.05756385326385498, - "step": 695 - }, - { - "epoch": 0.47748976807639837, - "grad_norm": 0.09120920300483704, - "learning_rate": 7.31238523166095e-05, - "loss": 0.0338085800409317, - "step": 700 - }, - { - "epoch": 0.48090040927694405, - "grad_norm": 0.33443862199783325, - "learning_rate": 7.309169954103326e-05, - "loss": 0.00844155102968216, - "step": 705 - }, - { - "epoch": 0.4843110504774898, - "grad_norm": 0.4880702793598175, - "learning_rate": 7.305928078056657e-05, - "loss": 0.09532383680343628, - "step": 710 - }, - { - "epoch": 0.4877216916780355, - "grad_norm": 0.11862733215093613, - "learning_rate": 7.302659627748221e-05, - "loss": 0.01845739334821701, - "step": 715 - }, - { - "epoch": 0.49113233287858116, - "grad_norm": 0.03655651956796646, - "learning_rate": 7.299364627603892e-05, - "loss": 0.030477851629257202, - "step": 720 - }, - { - "epoch": 0.4945429740791269, - "grad_norm": 1.481441617012024, - "learning_rate": 7.29604310224796e-05, - "loss": 0.07586092352867127, - "step": 725 - }, - { - "epoch": 0.4979536152796726, - "grad_norm": 0.8510580658912659, - "learning_rate": 7.292695076502938e-05, - "loss": 0.03589251637458801, - "step": 730 - }, - { - "epoch": 0.49931787175989084, - "eval_loss": 0.061700768768787384, - "eval_runtime": 0.8886, - "eval_samples_per_second": 84.399, - "eval_steps_per_second": 2.251, - "step": 732 - }, - { - "eval_cer_subset": 0.021256301948494344, - "eval_cer_subset_edit_distance": 156, - "eval_cer_subset_groups": 75, - "eval_cer_subset_items": 75, - "eval_cer_subset_ref_chars": 7339, - "step": 732 - } - ], - "logging_steps": 5, - "max_steps": 5864, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 366, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 7560364796559360.0, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/training_args.bin deleted file mode 100644 index 4b2caf110aea0ff545882448056d16e2bf7ea427..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-732/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc6915d8d9dd9b5c9c17756c87ba7ec8221fd06789232d79225f9518167f0aa1 -size 5841 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/README.md b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/README.md deleted file mode 100644 index 524237c97921c94c7e104e9d9473c86acdc3587f..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/Meta-Llama-3.1-8B -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/Meta-Llama-3.1-8B -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/adapter_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/adapter_config.json deleted file mode 100644 index dcdb6b17c195950cd12709b48b32f4a0c173c74e..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 16, - "lora_bias": false, - "lora_dropout": 0.0, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 8, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "down_proj", - "v_proj", - "gate_proj", - "up_proj", - "o_proj", - "q_proj", - "k_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/adapter_model.safetensors b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/adapter_model.safetensors deleted file mode 100644 index 8b64401c64b4291a3a05cf04fb08cac4f5e6b0c6..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd9ea284a3065881527d3b5988c88814835c3fd6ccd78d16b803053020e10950 -size 83945296 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/chat_template.jinja b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/chat_template.jinja deleted file mode 100644 index b44c6edb96b4a9deb59115ac3e418dc3d6ab30ca..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/chat_template.jinja +++ /dev/null @@ -1,12 +0,0 @@ -{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + ' - -' }}{% set loop_messages = messages[1:] %}{% else %}{{ '' + ' - -' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction: -' + message['content'] + ' - -' }}{% elif message['role'] == 'assistant' %}{{ '### Response: -' + message['content'] + eos_token + ' - -' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response: -' }}{% endif %} \ No newline at end of file diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/optimizer.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/optimizer.pt deleted file mode 100644 index 0c022cbeb3a5329d8d5ae843316066c2a6d5ebfb..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:252c960a9b9066f5bb8068ae000edeb068c75e1c5f878e8141ab988bb1a9544a -size 43127973 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/rng_state.pth b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/rng_state.pth deleted file mode 100644 index 48d3b0862789a8279291084fafa92eaa06b9c1fd..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:827270dbeb2e04c837d1c35958cd7372837c48e711d15c2963ea3917cc5c7b4a -size 14645 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/scheduler.pt b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/scheduler.pt deleted file mode 100644 index 9db4beb900aad9272db15f4178a29dd0b354a5be..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:61e00a62946ed72c71ff8117dac7946594558e92a3644ad06a04c1247eec5a62 -size 1465 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/tokenizer.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/tokenizer_config.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/tokenizer_config.json deleted file mode 100644 index f507e387a7a6fbf56356356db2d8bf588d34a30d..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/trainer_state.json b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/trainer_state.json deleted file mode 100644 index e129fb1a9cd20ae2b9b68025735fe10f034197c3..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/trainer_state.json +++ /dev/null @@ -1,12443 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.4997442455242966, - "eval_steps": 1466, - "global_step": 8796, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0008525149190110827, - "grad_norm": 1.6653118133544922, - "learning_rate": 6.382978723404255e-07, - "loss": 0.6878558158874511, - "step": 5 - }, - { - "epoch": 0.0017050298380221654, - "grad_norm": 1.6072094440460205, - "learning_rate": 1.4361702127659573e-06, - "loss": 0.7222867965698242, - "step": 10 - }, - { - "epoch": 0.0025575447570332483, - "grad_norm": 1.8327608108520508, - "learning_rate": 2.2340425531914894e-06, - "loss": 0.6977188110351562, - "step": 15 - }, - { - "epoch": 0.0034100596760443308, - "grad_norm": 1.7263766527175903, - "learning_rate": 3.0319148936170214e-06, - "loss": 0.6928215026855469, - "step": 20 - }, - { - "epoch": 0.004262574595055414, - "grad_norm": 1.8482989072799683, - "learning_rate": 3.829787234042553e-06, - "loss": 0.6887872695922852, - "step": 25 - }, - { - "epoch": 0.005115089514066497, - "grad_norm": 2.0272440910339355, - "learning_rate": 4.627659574468085e-06, - "loss": 0.6714544296264648, - "step": 30 - }, - { - "epoch": 0.005967604433077579, - "grad_norm": 2.4286959171295166, - "learning_rate": 5.425531914893616e-06, - "loss": 0.6025971412658692, - "step": 35 - }, - { - "epoch": 0.0068201193520886615, - "grad_norm": 2.26863694190979, - "learning_rate": 6.223404255319148e-06, - "loss": 0.5200423240661621, - "step": 40 - }, - { - "epoch": 0.0076726342710997444, - "grad_norm": 2.0572476387023926, - "learning_rate": 7.02127659574468e-06, - "loss": 0.3741515874862671, - "step": 45 - }, - { - "epoch": 0.008525149190110827, - "grad_norm": 0.9048103094100952, - "learning_rate": 7.819148936170211e-06, - "loss": 0.25923154354095457, - "step": 50 - }, - { - "epoch": 0.00937766410912191, - "grad_norm": 0.5942133069038391, - "learning_rate": 8.617021276595744e-06, - "loss": 0.18589640855789186, - "step": 55 - }, - { - "epoch": 0.010230179028132993, - "grad_norm": 0.40469691157341003, - "learning_rate": 9.414893617021275e-06, - "loss": 0.14508966207504273, - "step": 60 - }, - { - "epoch": 0.011082693947144074, - "grad_norm": 0.319180428981781, - "learning_rate": 1.0212765957446808e-05, - "loss": 0.1287898302078247, - "step": 65 - }, - { - "epoch": 0.011935208866155157, - "grad_norm": 0.2620982229709625, - "learning_rate": 1.101063829787234e-05, - "loss": 0.10825083255767823, - "step": 70 - }, - { - "epoch": 0.01278772378516624, - "grad_norm": 0.2702063024044037, - "learning_rate": 1.1808510638297872e-05, - "loss": 0.09140915870666504, - "step": 75 - }, - { - "epoch": 0.013640238704177323, - "grad_norm": 0.21019567549228668, - "learning_rate": 1.2606382978723403e-05, - "loss": 0.07847741842269898, - "step": 80 - }, - { - "epoch": 0.014492753623188406, - "grad_norm": 0.27865487337112427, - "learning_rate": 1.3404255319148936e-05, - "loss": 0.0681579053401947, - "step": 85 - }, - { - "epoch": 0.015345268542199489, - "grad_norm": 0.2561706006526947, - "learning_rate": 1.4202127659574466e-05, - "loss": 0.06208043098449707, - "step": 90 - }, - { - "epoch": 0.01619778346121057, - "grad_norm": 0.33415308594703674, - "learning_rate": 1.4999999999999999e-05, - "loss": 0.05436077117919922, - "step": 95 - }, - { - "epoch": 0.017050298380221655, - "grad_norm": 0.20705723762512207, - "learning_rate": 1.579787234042553e-05, - "loss": 0.061427068710327146, - "step": 100 - }, - { - "epoch": 0.017902813299232736, - "grad_norm": 0.26269420981407166, - "learning_rate": 1.659574468085106e-05, - "loss": 0.04944279193878174, - "step": 105 - }, - { - "epoch": 0.01875532821824382, - "grad_norm": 0.256533682346344, - "learning_rate": 1.7393617021276596e-05, - "loss": 0.0463131994009018, - "step": 110 - }, - { - "epoch": 0.0196078431372549, - "grad_norm": 0.25077348947525024, - "learning_rate": 1.8191489361702127e-05, - "loss": 0.036723154783248904, - "step": 115 - }, - { - "epoch": 0.020460358056265986, - "grad_norm": 0.2801378071308136, - "learning_rate": 1.8989361702127655e-05, - "loss": 0.04308137893676758, - "step": 120 - }, - { - "epoch": 0.021312872975277068, - "grad_norm": 0.23140908777713776, - "learning_rate": 1.978723404255319e-05, - "loss": 0.0391487717628479, - "step": 125 - }, - { - "epoch": 0.02216538789428815, - "grad_norm": 0.4250975251197815, - "learning_rate": 2.0585106382978724e-05, - "loss": 0.03745899498462677, - "step": 130 - }, - { - "epoch": 0.023017902813299233, - "grad_norm": 0.24533668160438538, - "learning_rate": 2.1382978723404255e-05, - "loss": 0.03649275898933411, - "step": 135 - }, - { - "epoch": 0.023870417732310314, - "grad_norm": 0.2309182733297348, - "learning_rate": 2.2180851063829783e-05, - "loss": 0.036090463399887085, - "step": 140 - }, - { - "epoch": 0.0247229326513214, - "grad_norm": 0.28087928891181946, - "learning_rate": 2.2978723404255317e-05, - "loss": 0.03179733753204346, - "step": 145 - }, - { - "epoch": 0.02557544757033248, - "grad_norm": 0.2785134017467499, - "learning_rate": 2.377659574468085e-05, - "loss": 0.02794267237186432, - "step": 150 - }, - { - "epoch": 0.026427962489343565, - "grad_norm": 0.20468176901340485, - "learning_rate": 2.457446808510638e-05, - "loss": 0.025854837894439698, - "step": 155 - }, - { - "epoch": 0.027280477408354646, - "grad_norm": 0.4069538116455078, - "learning_rate": 2.537234042553191e-05, - "loss": 0.03182780146598816, - "step": 160 - }, - { - "epoch": 0.028132992327365727, - "grad_norm": 0.3025060296058655, - "learning_rate": 2.6170212765957446e-05, - "loss": 0.027975103259086607, - "step": 165 - }, - { - "epoch": 0.028985507246376812, - "grad_norm": 0.22387781739234924, - "learning_rate": 2.6968085106382977e-05, - "loss": 0.025766396522521974, - "step": 170 - }, - { - "epoch": 0.029838022165387893, - "grad_norm": 0.15060213208198547, - "learning_rate": 2.7765957446808508e-05, - "loss": 0.025661858916282653, - "step": 175 - }, - { - "epoch": 0.030690537084398978, - "grad_norm": 0.2912445366382599, - "learning_rate": 2.856382978723404e-05, - "loss": 0.02397846579551697, - "step": 180 - }, - { - "epoch": 0.03154305200341006, - "grad_norm": 0.19390830397605896, - "learning_rate": 2.9361702127659574e-05, - "loss": 0.023744897544384004, - "step": 185 - }, - { - "epoch": 0.03239556692242114, - "grad_norm": 0.24036464095115662, - "learning_rate": 3.0159574468085105e-05, - "loss": 0.0238590270280838, - "step": 190 - }, - { - "epoch": 0.03324808184143223, - "grad_norm": 0.2793026566505432, - "learning_rate": 3.0957446808510636e-05, - "loss": 0.025464403629302978, - "step": 195 - }, - { - "epoch": 0.03410059676044331, - "grad_norm": 0.21164429187774658, - "learning_rate": 3.175531914893617e-05, - "loss": 0.020692554116249085, - "step": 200 - }, - { - "epoch": 0.03495311167945439, - "grad_norm": 0.3742266297340393, - "learning_rate": 3.25531914893617e-05, - "loss": 0.02315486818552017, - "step": 205 - }, - { - "epoch": 0.03580562659846547, - "grad_norm": 0.2690248191356659, - "learning_rate": 3.3351063829787226e-05, - "loss": 0.0174571692943573, - "step": 210 - }, - { - "epoch": 0.03665814151747655, - "grad_norm": 0.3727855980396271, - "learning_rate": 3.414893617021276e-05, - "loss": 0.021705827116966246, - "step": 215 - }, - { - "epoch": 0.03751065643648764, - "grad_norm": 0.27461397647857666, - "learning_rate": 3.4946808510638296e-05, - "loss": 0.028134092688560486, - "step": 220 - }, - { - "epoch": 0.03836317135549872, - "grad_norm": 0.1723722219467163, - "learning_rate": 3.574468085106383e-05, - "loss": 0.02093944847583771, - "step": 225 - }, - { - "epoch": 0.0392156862745098, - "grad_norm": 0.17161321640014648, - "learning_rate": 3.654255319148936e-05, - "loss": 0.021984823048114777, - "step": 230 - }, - { - "epoch": 0.040068201193520885, - "grad_norm": 0.203886479139328, - "learning_rate": 3.734042553191489e-05, - "loss": 0.02234097272157669, - "step": 235 - }, - { - "epoch": 0.04092071611253197, - "grad_norm": 0.2021923065185547, - "learning_rate": 3.813829787234042e-05, - "loss": 0.02671273946762085, - "step": 240 - }, - { - "epoch": 0.041773231031543054, - "grad_norm": 0.24510027468204498, - "learning_rate": 3.8936170212765955e-05, - "loss": 0.016925157606601716, - "step": 245 - }, - { - "epoch": 0.042625745950554135, - "grad_norm": 0.191350057721138, - "learning_rate": 3.973404255319149e-05, - "loss": 0.02067229151725769, - "step": 250 - }, - { - "epoch": 0.043478260869565216, - "grad_norm": 0.20248189568519592, - "learning_rate": 4.053191489361702e-05, - "loss": 0.01805101931095123, - "step": 255 - }, - { - "epoch": 0.0443307757885763, - "grad_norm": 0.22623269259929657, - "learning_rate": 4.1329787234042545e-05, - "loss": 0.019811727106571198, - "step": 260 - }, - { - "epoch": 0.045183290707587385, - "grad_norm": 0.2050849199295044, - "learning_rate": 4.2127659574468086e-05, - "loss": 0.017767781019210817, - "step": 265 - }, - { - "epoch": 0.04603580562659847, - "grad_norm": 0.24867363274097443, - "learning_rate": 4.2925531914893614e-05, - "loss": 0.02127009928226471, - "step": 270 - }, - { - "epoch": 0.04688832054560955, - "grad_norm": 0.20721694827079773, - "learning_rate": 4.372340425531914e-05, - "loss": 0.022770658135414124, - "step": 275 - }, - { - "epoch": 0.04774083546462063, - "grad_norm": 0.19511370360851288, - "learning_rate": 4.452127659574468e-05, - "loss": 0.02165296971797943, - "step": 280 - }, - { - "epoch": 0.04859335038363171, - "grad_norm": 0.19066740572452545, - "learning_rate": 4.5319148936170204e-05, - "loss": 0.020857495069503785, - "step": 285 - }, - { - "epoch": 0.0494458653026428, - "grad_norm": 0.14217901229858398, - "learning_rate": 4.6117021276595746e-05, - "loss": 0.016413891315460206, - "step": 290 - }, - { - "epoch": 0.05029838022165388, - "grad_norm": 0.24177870154380798, - "learning_rate": 4.6914893617021274e-05, - "loss": 0.01902117133140564, - "step": 295 - }, - { - "epoch": 0.05115089514066496, - "grad_norm": 0.2742858827114105, - "learning_rate": 4.77127659574468e-05, - "loss": 0.016554126143455507, - "step": 300 - }, - { - "epoch": 0.05200341005967604, - "grad_norm": 0.24519385397434235, - "learning_rate": 4.851063829787234e-05, - "loss": 0.022036357223987578, - "step": 305 - }, - { - "epoch": 0.05285592497868713, - "grad_norm": 0.296572208404541, - "learning_rate": 4.930851063829787e-05, - "loss": 0.0196581095457077, - "step": 310 - }, - { - "epoch": 0.05370843989769821, - "grad_norm": 0.17092008888721466, - "learning_rate": 5.01063829787234e-05, - "loss": 0.015453743934631347, - "step": 315 - }, - { - "epoch": 0.05456095481670929, - "grad_norm": 0.17074067890644073, - "learning_rate": 5.090425531914893e-05, - "loss": 0.013983796536922454, - "step": 320 - }, - { - "epoch": 0.05541346973572037, - "grad_norm": 0.27302470803260803, - "learning_rate": 5.170212765957446e-05, - "loss": 0.015387402474880218, - "step": 325 - }, - { - "epoch": 0.056265984654731455, - "grad_norm": 0.2603592872619629, - "learning_rate": 5.2499999999999995e-05, - "loss": 0.013557825982570649, - "step": 330 - }, - { - "epoch": 0.05711849957374254, - "grad_norm": 0.23079948127269745, - "learning_rate": 5.329787234042553e-05, - "loss": 0.014809322357177735, - "step": 335 - }, - { - "epoch": 0.057971014492753624, - "grad_norm": 0.15029627084732056, - "learning_rate": 5.409574468085106e-05, - "loss": 0.018204084038734435, - "step": 340 - }, - { - "epoch": 0.058823529411764705, - "grad_norm": 0.2930934429168701, - "learning_rate": 5.4893617021276586e-05, - "loss": 0.02035558819770813, - "step": 345 - }, - { - "epoch": 0.059676044330775786, - "grad_norm": 0.19710905849933624, - "learning_rate": 5.569148936170213e-05, - "loss": 0.015025021135807037, - "step": 350 - }, - { - "epoch": 0.060528559249786874, - "grad_norm": 0.13316969573497772, - "learning_rate": 5.6489361702127655e-05, - "loss": 0.01258893460035324, - "step": 355 - }, - { - "epoch": 0.061381074168797956, - "grad_norm": 0.30591729283332825, - "learning_rate": 5.728723404255319e-05, - "loss": 0.0187319278717041, - "step": 360 - }, - { - "epoch": 0.06223358908780904, - "grad_norm": 0.22696663439273834, - "learning_rate": 5.808510638297872e-05, - "loss": 0.015805599093437196, - "step": 365 - }, - { - "epoch": 0.06308610400682012, - "grad_norm": 0.2800130546092987, - "learning_rate": 5.8882978723404245e-05, - "loss": 0.01719983071088791, - "step": 370 - }, - { - "epoch": 0.0639386189258312, - "grad_norm": 0.20671235024929047, - "learning_rate": 5.9680851063829786e-05, - "loss": 0.014449423551559449, - "step": 375 - }, - { - "epoch": 0.06479113384484228, - "grad_norm": 0.1630883365869522, - "learning_rate": 6.0478723404255314e-05, - "loss": 0.017153808474540712, - "step": 380 - }, - { - "epoch": 0.06564364876385337, - "grad_norm": 0.18699344992637634, - "learning_rate": 6.127659574468084e-05, - "loss": 0.015558701753616334, - "step": 385 - }, - { - "epoch": 0.06649616368286446, - "grad_norm": 0.15257929265499115, - "learning_rate": 6.207446808510638e-05, - "loss": 0.018240168690681458, - "step": 390 - }, - { - "epoch": 0.06734867860187553, - "grad_norm": 0.19658304750919342, - "learning_rate": 6.287234042553191e-05, - "loss": 0.015907022356987, - "step": 395 - }, - { - "epoch": 0.06820119352088662, - "grad_norm": 0.3743384778499603, - "learning_rate": 6.367021276595743e-05, - "loss": 0.014921861886978149, - "step": 400 - }, - { - "epoch": 0.06905370843989769, - "grad_norm": 0.254724383354187, - "learning_rate": 6.446808510638298e-05, - "loss": 0.018739913403987885, - "step": 405 - }, - { - "epoch": 0.06990622335890878, - "grad_norm": 0.19365151226520538, - "learning_rate": 6.52659574468085e-05, - "loss": 0.018984711170196532, - "step": 410 - }, - { - "epoch": 0.07075873827791987, - "grad_norm": 0.20728597044944763, - "learning_rate": 6.606382978723404e-05, - "loss": 0.01524859368801117, - "step": 415 - }, - { - "epoch": 0.07161125319693094, - "grad_norm": 0.1415344476699829, - "learning_rate": 6.686170212765957e-05, - "loss": 0.014004582166671753, - "step": 420 - }, - { - "epoch": 0.07246376811594203, - "grad_norm": 0.14169375598430634, - "learning_rate": 6.765957446808509e-05, - "loss": 0.013503608107566834, - "step": 425 - }, - { - "epoch": 0.0733162830349531, - "grad_norm": 0.22518369555473328, - "learning_rate": 6.845744680851064e-05, - "loss": 0.017587104439735414, - "step": 430 - }, - { - "epoch": 0.0741687979539642, - "grad_norm": 0.1091267541050911, - "learning_rate": 6.925531914893616e-05, - "loss": 0.013890406489372254, - "step": 435 - }, - { - "epoch": 0.07502131287297528, - "grad_norm": 0.24982722103595734, - "learning_rate": 7.00531914893617e-05, - "loss": 0.01599075198173523, - "step": 440 - }, - { - "epoch": 0.07587382779198636, - "grad_norm": 0.22311007976531982, - "learning_rate": 7.085106382978723e-05, - "loss": 0.014870230853557587, - "step": 445 - }, - { - "epoch": 0.07672634271099744, - "grad_norm": 0.27064985036849976, - "learning_rate": 7.164893617021276e-05, - "loss": 0.019213163852691652, - "step": 450 - }, - { - "epoch": 0.07757885763000852, - "grad_norm": 0.16876882314682007, - "learning_rate": 7.244680851063829e-05, - "loss": 0.014935044944286347, - "step": 455 - }, - { - "epoch": 0.0784313725490196, - "grad_norm": 0.18644963204860687, - "learning_rate": 7.324468085106382e-05, - "loss": 0.013823372125625611, - "step": 460 - }, - { - "epoch": 0.0792838874680307, - "grad_norm": 0.13067972660064697, - "learning_rate": 7.404255319148935e-05, - "loss": 0.013839980959892273, - "step": 465 - }, - { - "epoch": 0.08013640238704177, - "grad_norm": 0.0976850613951683, - "learning_rate": 7.484042553191489e-05, - "loss": 0.012782379984855652, - "step": 470 - }, - { - "epoch": 0.08098891730605286, - "grad_norm": 0.2523496150970459, - "learning_rate": 7.499999439800074e-05, - "loss": 0.013075053691864014, - "step": 475 - }, - { - "epoch": 0.08184143222506395, - "grad_norm": 0.18349343538284302, - "learning_rate": 7.499997163988164e-05, - "loss": 0.01753528565168381, - "step": 480 - }, - { - "epoch": 0.08269394714407502, - "grad_norm": 0.16528834402561188, - "learning_rate": 7.499993137552834e-05, - "loss": 0.012987489998340606, - "step": 485 - }, - { - "epoch": 0.08354646206308611, - "grad_norm": 0.1918938159942627, - "learning_rate": 7.499987360495964e-05, - "loss": 0.018496687710285186, - "step": 490 - }, - { - "epoch": 0.08439897698209718, - "grad_norm": 0.11452502012252808, - "learning_rate": 7.499979832820255e-05, - "loss": 0.015578755736351013, - "step": 495 - }, - { - "epoch": 0.08525149190110827, - "grad_norm": 0.2084985226392746, - "learning_rate": 7.499970554529216e-05, - "loss": 0.014264023303985596, - "step": 500 - }, - { - "epoch": 0.08610400682011936, - "grad_norm": 0.13777600228786469, - "learning_rate": 7.49995952562718e-05, - "loss": 0.014527246356010437, - "step": 505 - }, - { - "epoch": 0.08695652173913043, - "grad_norm": 0.2515336275100708, - "learning_rate": 7.499946746119296e-05, - "loss": 0.019042417407035828, - "step": 510 - }, - { - "epoch": 0.08780903665814152, - "grad_norm": 0.12859591841697693, - "learning_rate": 7.499932216011531e-05, - "loss": 0.01340993344783783, - "step": 515 - }, - { - "epoch": 0.0886615515771526, - "grad_norm": 0.15603038668632507, - "learning_rate": 7.499915935310667e-05, - "loss": 0.01645509898662567, - "step": 520 - }, - { - "epoch": 0.08951406649616368, - "grad_norm": 0.1493784785270691, - "learning_rate": 7.499897904024303e-05, - "loss": 0.016503919661045075, - "step": 525 - }, - { - "epoch": 0.09036658141517477, - "grad_norm": 0.14551150798797607, - "learning_rate": 7.499878122160858e-05, - "loss": 0.013891169428825378, - "step": 530 - }, - { - "epoch": 0.09121909633418585, - "grad_norm": 0.12197990715503693, - "learning_rate": 7.499856589729569e-05, - "loss": 0.01531532108783722, - "step": 535 - }, - { - "epoch": 0.09207161125319693, - "grad_norm": 0.24787496030330658, - "learning_rate": 7.499833306740485e-05, - "loss": 0.016374409198760986, - "step": 540 - }, - { - "epoch": 0.09292412617220801, - "grad_norm": 0.10902808606624603, - "learning_rate": 7.499808273204476e-05, - "loss": 0.013505718111991883, - "step": 545 - }, - { - "epoch": 0.0937766410912191, - "grad_norm": 0.16540755331516266, - "learning_rate": 7.499781489133228e-05, - "loss": 0.016257329285144805, - "step": 550 - }, - { - "epoch": 0.09462915601023018, - "grad_norm": 0.1228717565536499, - "learning_rate": 7.499752954539245e-05, - "loss": 0.011345921456813813, - "step": 555 - }, - { - "epoch": 0.09548167092924126, - "grad_norm": 0.12704797089099884, - "learning_rate": 7.49972266943585e-05, - "loss": 0.014449910819530487, - "step": 560 - }, - { - "epoch": 0.09633418584825235, - "grad_norm": 0.2014201581478119, - "learning_rate": 7.499690633837178e-05, - "loss": 0.01456935852766037, - "step": 565 - }, - { - "epoch": 0.09718670076726342, - "grad_norm": 0.17480657994747162, - "learning_rate": 7.499656847758187e-05, - "loss": 0.014413098990917205, - "step": 570 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 0.18946893513202667, - "learning_rate": 7.499621311214646e-05, - "loss": 0.0139508917927742, - "step": 575 - }, - { - "epoch": 0.0988917306052856, - "grad_norm": 0.15367820858955383, - "learning_rate": 7.499584024223149e-05, - "loss": 0.014091435074806213, - "step": 580 - }, - { - "epoch": 0.09974424552429667, - "grad_norm": 0.15018688142299652, - "learning_rate": 7.499544986801099e-05, - "loss": 0.016737687587738036, - "step": 585 - }, - { - "epoch": 0.10059676044330776, - "grad_norm": 0.17522579431533813, - "learning_rate": 7.499504198966722e-05, - "loss": 0.015822765231132508, - "step": 590 - }, - { - "epoch": 0.10144927536231885, - "grad_norm": 0.12634818255901337, - "learning_rate": 7.499461660739059e-05, - "loss": 0.015363042056560517, - "step": 595 - }, - { - "epoch": 0.10230179028132992, - "grad_norm": 0.12466495484113693, - "learning_rate": 7.499417372137968e-05, - "loss": 0.013208043575286866, - "step": 600 - }, - { - "epoch": 0.10315430520034101, - "grad_norm": 0.18877951800823212, - "learning_rate": 7.499371333184125e-05, - "loss": 0.016261917352676392, - "step": 605 - }, - { - "epoch": 0.10400682011935208, - "grad_norm": 0.21521399915218353, - "learning_rate": 7.49932354389902e-05, - "loss": 0.013861049711704255, - "step": 610 - }, - { - "epoch": 0.10485933503836317, - "grad_norm": 0.1375613659620285, - "learning_rate": 7.499274004304964e-05, - "loss": 0.015597744286060334, - "step": 615 - }, - { - "epoch": 0.10571184995737426, - "grad_norm": 0.16078200936317444, - "learning_rate": 7.499222714425087e-05, - "loss": 0.018308353424072266, - "step": 620 - }, - { - "epoch": 0.10656436487638533, - "grad_norm": 0.15485632419586182, - "learning_rate": 7.499169674283328e-05, - "loss": 0.015836401283740996, - "step": 625 - }, - { - "epoch": 0.10741687979539642, - "grad_norm": 0.14019452035427094, - "learning_rate": 7.499114883904451e-05, - "loss": 0.014591602981090546, - "step": 630 - }, - { - "epoch": 0.1082693947144075, - "grad_norm": 0.2042212039232254, - "learning_rate": 7.499058343314031e-05, - "loss": 0.01718664914369583, - "step": 635 - }, - { - "epoch": 0.10912190963341858, - "grad_norm": 0.13801656663417816, - "learning_rate": 7.499000052538467e-05, - "loss": 0.015579727292060853, - "step": 640 - }, - { - "epoch": 0.10997442455242967, - "grad_norm": 0.12787523865699768, - "learning_rate": 7.498940011604968e-05, - "loss": 0.012525486946105956, - "step": 645 - }, - { - "epoch": 0.11082693947144075, - "grad_norm": 0.1752539724111557, - "learning_rate": 7.498878220541564e-05, - "loss": 0.014286568760871888, - "step": 650 - }, - { - "epoch": 0.11167945439045183, - "grad_norm": 0.12485212087631226, - "learning_rate": 7.498814679377101e-05, - "loss": 0.01482405811548233, - "step": 655 - }, - { - "epoch": 0.11253196930946291, - "grad_norm": 0.10980220139026642, - "learning_rate": 7.498749388141243e-05, - "loss": 0.01597980558872223, - "step": 660 - }, - { - "epoch": 0.113384484228474, - "grad_norm": 0.09144977480173111, - "learning_rate": 7.498682346864469e-05, - "loss": 0.011583130061626434, - "step": 665 - }, - { - "epoch": 0.11423699914748509, - "grad_norm": 0.11955336481332779, - "learning_rate": 7.498613555578076e-05, - "loss": 0.013009116053581238, - "step": 670 - }, - { - "epoch": 0.11508951406649616, - "grad_norm": 0.14505070447921753, - "learning_rate": 7.49854301431418e-05, - "loss": 0.013878281414508819, - "step": 675 - }, - { - "epoch": 0.11594202898550725, - "grad_norm": 0.15674598515033722, - "learning_rate": 7.49847072310571e-05, - "loss": 0.014717698097229004, - "step": 680 - }, - { - "epoch": 0.11679454390451834, - "grad_norm": 0.16684003174304962, - "learning_rate": 7.498396681986413e-05, - "loss": 0.015567027032375336, - "step": 685 - }, - { - "epoch": 0.11764705882352941, - "grad_norm": 0.13314439356327057, - "learning_rate": 7.498320890990857e-05, - "loss": 0.017679129540920258, - "step": 690 - }, - { - "epoch": 0.1184995737425405, - "grad_norm": 0.1099700927734375, - "learning_rate": 7.498243350154423e-05, - "loss": 0.015660777688026428, - "step": 695 - }, - { - "epoch": 0.11935208866155157, - "grad_norm": 0.09305288642644882, - "learning_rate": 7.498164059513307e-05, - "loss": 0.011864218115806579, - "step": 700 - }, - { - "epoch": 0.12020460358056266, - "grad_norm": 0.1284978687763214, - "learning_rate": 7.498083019104527e-05, - "loss": 0.013128276169300079, - "step": 705 - }, - { - "epoch": 0.12105711849957375, - "grad_norm": 0.19205476343631744, - "learning_rate": 7.498000228965913e-05, - "loss": 0.014518238604068756, - "step": 710 - }, - { - "epoch": 0.12190963341858482, - "grad_norm": 0.25064221024513245, - "learning_rate": 7.497915689136119e-05, - "loss": 0.014235696196556092, - "step": 715 - }, - { - "epoch": 0.12276214833759591, - "grad_norm": 0.11866044998168945, - "learning_rate": 7.497829399654607e-05, - "loss": 0.013622967898845673, - "step": 720 - }, - { - "epoch": 0.12361466325660699, - "grad_norm": 0.1366252452135086, - "learning_rate": 7.49774136056166e-05, - "loss": 0.013592693209648132, - "step": 725 - }, - { - "epoch": 0.12446717817561807, - "grad_norm": 0.12722773849964142, - "learning_rate": 7.497651571898379e-05, - "loss": 0.02055167257785797, - "step": 730 - }, - { - "epoch": 0.12531969309462915, - "grad_norm": 0.07723517715930939, - "learning_rate": 7.49756003370668e-05, - "loss": 0.012377069890499115, - "step": 735 - }, - { - "epoch": 0.12617220801364024, - "grad_norm": 0.11081967502832413, - "learning_rate": 7.497466746029293e-05, - "loss": 0.013797640800476074, - "step": 740 - }, - { - "epoch": 0.12702472293265132, - "grad_norm": 0.1117677390575409, - "learning_rate": 7.497371708909771e-05, - "loss": 0.01338990479707718, - "step": 745 - }, - { - "epoch": 0.1278772378516624, - "grad_norm": 0.24659112095832825, - "learning_rate": 7.497274922392483e-05, - "loss": 0.012844511866569519, - "step": 750 - }, - { - "epoch": 0.1287297527706735, - "grad_norm": 0.24900244176387787, - "learning_rate": 7.497176386522606e-05, - "loss": 0.015656352043151855, - "step": 755 - }, - { - "epoch": 0.12958226768968456, - "grad_norm": 0.1331390142440796, - "learning_rate": 7.497076101346144e-05, - "loss": 0.013722085952758789, - "step": 760 - }, - { - "epoch": 0.13043478260869565, - "grad_norm": 0.1224697008728981, - "learning_rate": 7.496974066909913e-05, - "loss": 0.011186304688453674, - "step": 765 - }, - { - "epoch": 0.13128729752770674, - "grad_norm": 0.11053820699453354, - "learning_rate": 7.496870283261546e-05, - "loss": 0.012894454598426818, - "step": 770 - }, - { - "epoch": 0.13213981244671782, - "grad_norm": 0.09663277864456177, - "learning_rate": 7.49676475044949e-05, - "loss": 0.015331995487213135, - "step": 775 - }, - { - "epoch": 0.1329923273657289, - "grad_norm": 0.1483219712972641, - "learning_rate": 7.496657468523014e-05, - "loss": 0.015070399641990662, - "step": 780 - }, - { - "epoch": 0.13384484228473997, - "grad_norm": 0.12375539541244507, - "learning_rate": 7.496548437532202e-05, - "loss": 0.013722005486488342, - "step": 785 - }, - { - "epoch": 0.13469735720375106, - "grad_norm": 0.14662130177021027, - "learning_rate": 7.496437657527949e-05, - "loss": 0.012852996587753296, - "step": 790 - }, - { - "epoch": 0.13554987212276215, - "grad_norm": 0.1740300953388214, - "learning_rate": 7.496325128561975e-05, - "loss": 0.014796209335327149, - "step": 795 - }, - { - "epoch": 0.13640238704177324, - "grad_norm": 0.10670243203639984, - "learning_rate": 7.496210850686809e-05, - "loss": 0.013983005285263061, - "step": 800 - }, - { - "epoch": 0.13725490196078433, - "grad_norm": 0.1362515240907669, - "learning_rate": 7.496094823955801e-05, - "loss": 0.014472618699073792, - "step": 805 - }, - { - "epoch": 0.13810741687979539, - "grad_norm": 0.11403689533472061, - "learning_rate": 7.495977048423117e-05, - "loss": 0.01294848918914795, - "step": 810 - }, - { - "epoch": 0.13895993179880647, - "grad_norm": 0.11532565206289291, - "learning_rate": 7.495857524143738e-05, - "loss": 0.011346425861120224, - "step": 815 - }, - { - "epoch": 0.13981244671781756, - "grad_norm": 0.1506761759519577, - "learning_rate": 7.495736251173463e-05, - "loss": 0.016532811522483825, - "step": 820 - }, - { - "epoch": 0.14066496163682865, - "grad_norm": 0.10915899276733398, - "learning_rate": 7.495613229568903e-05, - "loss": 0.01278679072856903, - "step": 825 - }, - { - "epoch": 0.14151747655583974, - "grad_norm": 0.10394130647182465, - "learning_rate": 7.49548845938749e-05, - "loss": 0.011449626088142395, - "step": 830 - }, - { - "epoch": 0.1423699914748508, - "grad_norm": 0.08730677515268326, - "learning_rate": 7.495361940687475e-05, - "loss": 0.011916645616292954, - "step": 835 - }, - { - "epoch": 0.1432225063938619, - "grad_norm": 0.08257348835468292, - "learning_rate": 7.495233673527914e-05, - "loss": 0.013689276576042176, - "step": 840 - }, - { - "epoch": 0.14407502131287298, - "grad_norm": 0.182078555226326, - "learning_rate": 7.495103657968692e-05, - "loss": 0.019141729176044463, - "step": 845 - }, - { - "epoch": 0.14492753623188406, - "grad_norm": 0.105568528175354, - "learning_rate": 7.494971894070501e-05, - "loss": 0.015522226691246033, - "step": 850 - }, - { - "epoch": 0.14578005115089515, - "grad_norm": 0.09030122309923172, - "learning_rate": 7.494838381894856e-05, - "loss": 0.012900182604789734, - "step": 855 - }, - { - "epoch": 0.1466325660699062, - "grad_norm": 0.11199921369552612, - "learning_rate": 7.494703121504082e-05, - "loss": 0.013011230528354645, - "step": 860 - }, - { - "epoch": 0.1474850809889173, - "grad_norm": 0.11342430859804153, - "learning_rate": 7.494566112961325e-05, - "loss": 0.015477502346038818, - "step": 865 - }, - { - "epoch": 0.1483375959079284, - "grad_norm": 0.07956613600254059, - "learning_rate": 7.494427356330544e-05, - "loss": 0.011270551383495331, - "step": 870 - }, - { - "epoch": 0.14919011082693948, - "grad_norm": 0.13356897234916687, - "learning_rate": 7.494286851676515e-05, - "loss": 0.012481572479009629, - "step": 875 - }, - { - "epoch": 0.15004262574595056, - "grad_norm": 0.14493827521800995, - "learning_rate": 7.494144599064833e-05, - "loss": 0.015318951010704041, - "step": 880 - }, - { - "epoch": 0.15089514066496162, - "grad_norm": 0.13254614174365997, - "learning_rate": 7.494000598561902e-05, - "loss": 0.012697519361972808, - "step": 885 - }, - { - "epoch": 0.1517476555839727, - "grad_norm": 0.12164720892906189, - "learning_rate": 7.49385485023495e-05, - "loss": 0.011361779272556305, - "step": 890 - }, - { - "epoch": 0.1526001705029838, - "grad_norm": 0.10743863880634308, - "learning_rate": 7.493707354152015e-05, - "loss": 0.012824115157127381, - "step": 895 - }, - { - "epoch": 0.1534526854219949, - "grad_norm": 0.0933203473687172, - "learning_rate": 7.493558110381954e-05, - "loss": 0.012234293669462205, - "step": 900 - }, - { - "epoch": 0.15430520034100598, - "grad_norm": 0.10252948850393295, - "learning_rate": 7.493407118994437e-05, - "loss": 0.01874733567237854, - "step": 905 - }, - { - "epoch": 0.15515771526001704, - "grad_norm": 0.1184248998761177, - "learning_rate": 7.493254380059954e-05, - "loss": 0.012014241516590118, - "step": 910 - }, - { - "epoch": 0.15601023017902813, - "grad_norm": 0.10140799731016159, - "learning_rate": 7.49309989364981e-05, - "loss": 0.013288506865501403, - "step": 915 - }, - { - "epoch": 0.1568627450980392, - "grad_norm": 0.15704941749572754, - "learning_rate": 7.492943659836121e-05, - "loss": 0.012907981872558594, - "step": 920 - }, - { - "epoch": 0.1577152600170503, - "grad_norm": 0.10368761420249939, - "learning_rate": 7.492785678691822e-05, - "loss": 0.01384827345609665, - "step": 925 - }, - { - "epoch": 0.1585677749360614, - "grad_norm": 0.13517284393310547, - "learning_rate": 7.492625950290668e-05, - "loss": 0.014644764363765717, - "step": 930 - }, - { - "epoch": 0.15942028985507245, - "grad_norm": 0.08189263194799423, - "learning_rate": 7.492464474707222e-05, - "loss": 0.011974713951349258, - "step": 935 - }, - { - "epoch": 0.16027280477408354, - "grad_norm": 0.11737149208784103, - "learning_rate": 7.492301252016867e-05, - "loss": 0.014023615419864655, - "step": 940 - }, - { - "epoch": 0.16112531969309463, - "grad_norm": 0.15778031945228577, - "learning_rate": 7.492136282295801e-05, - "loss": 0.01533726304769516, - "step": 945 - }, - { - "epoch": 0.16197783461210571, - "grad_norm": 0.14194118976593018, - "learning_rate": 7.491969565621036e-05, - "loss": 0.01569782793521881, - "step": 950 - }, - { - "epoch": 0.1628303495311168, - "grad_norm": 0.12579558789730072, - "learning_rate": 7.491801102070403e-05, - "loss": 0.012844063341617584, - "step": 955 - }, - { - "epoch": 0.1636828644501279, - "grad_norm": 0.10578597337007523, - "learning_rate": 7.491630891722547e-05, - "loss": 0.011186198890209198, - "step": 960 - }, - { - "epoch": 0.16453537936913895, - "grad_norm": 0.12398207187652588, - "learning_rate": 7.491458934656925e-05, - "loss": 0.014328254759311676, - "step": 965 - }, - { - "epoch": 0.16538789428815004, - "grad_norm": 0.09390626102685928, - "learning_rate": 7.491285230953814e-05, - "loss": 0.013881708681583404, - "step": 970 - }, - { - "epoch": 0.16624040920716113, - "grad_norm": 0.17196106910705566, - "learning_rate": 7.491109780694303e-05, - "loss": 0.014424234628677368, - "step": 975 - }, - { - "epoch": 0.16709292412617222, - "grad_norm": 0.06994624435901642, - "learning_rate": 7.490932583960302e-05, - "loss": 0.010434426367282867, - "step": 980 - }, - { - "epoch": 0.1679454390451833, - "grad_norm": 0.09414499998092651, - "learning_rate": 7.490753640834527e-05, - "loss": 0.010483803600072861, - "step": 985 - }, - { - "epoch": 0.16879795396419436, - "grad_norm": 0.11002875864505768, - "learning_rate": 7.490572951400518e-05, - "loss": 0.009266073256731034, - "step": 990 - }, - { - "epoch": 0.16965046888320545, - "grad_norm": 0.14956022799015045, - "learning_rate": 7.490390515742626e-05, - "loss": 0.015529079735279084, - "step": 995 - }, - { - "epoch": 0.17050298380221654, - "grad_norm": 0.13828666508197784, - "learning_rate": 7.490206333946019e-05, - "loss": 0.011511271446943283, - "step": 1000 - }, - { - "epoch": 0.17135549872122763, - "grad_norm": 0.1265997439622879, - "learning_rate": 7.490020406096678e-05, - "loss": 0.010465707629919052, - "step": 1005 - }, - { - "epoch": 0.17220801364023872, - "grad_norm": 0.07772056013345718, - "learning_rate": 7.489832732281401e-05, - "loss": 0.013828210532665253, - "step": 1010 - }, - { - "epoch": 0.17306052855924978, - "grad_norm": 0.08055376261472702, - "learning_rate": 7.489643312587799e-05, - "loss": 0.013010218739509583, - "step": 1015 - }, - { - "epoch": 0.17391304347826086, - "grad_norm": 0.09059367328882217, - "learning_rate": 7.489452147104301e-05, - "loss": 0.013864235579967498, - "step": 1020 - }, - { - "epoch": 0.17476555839727195, - "grad_norm": 0.15273553133010864, - "learning_rate": 7.489259235920149e-05, - "loss": 0.013432112336158753, - "step": 1025 - }, - { - "epoch": 0.17561807331628304, - "grad_norm": 0.11343793570995331, - "learning_rate": 7.489064579125399e-05, - "loss": 0.01213686615228653, - "step": 1030 - }, - { - "epoch": 0.17647058823529413, - "grad_norm": 0.11880069226026535, - "learning_rate": 7.488868176810926e-05, - "loss": 0.012188264727592468, - "step": 1035 - }, - { - "epoch": 0.1773231031543052, - "grad_norm": 0.16270127892494202, - "learning_rate": 7.488670029068415e-05, - "loss": 0.015294317901134492, - "step": 1040 - }, - { - "epoch": 0.17817561807331628, - "grad_norm": 0.11542797088623047, - "learning_rate": 7.488470135990369e-05, - "loss": 0.013500772416591644, - "step": 1045 - }, - { - "epoch": 0.17902813299232737, - "grad_norm": 0.09579882025718689, - "learning_rate": 7.488268497670103e-05, - "loss": 0.013453315198421478, - "step": 1050 - }, - { - "epoch": 0.17988064791133845, - "grad_norm": 0.08847666531801224, - "learning_rate": 7.488065114201752e-05, - "loss": 0.0153861403465271, - "step": 1055 - }, - { - "epoch": 0.18073316283034954, - "grad_norm": 0.11167823523283005, - "learning_rate": 7.487859985680257e-05, - "loss": 0.011502107977867127, - "step": 1060 - }, - { - "epoch": 0.1815856777493606, - "grad_norm": 0.11255413293838501, - "learning_rate": 7.487653112201385e-05, - "loss": 0.012194579839706421, - "step": 1065 - }, - { - "epoch": 0.1824381926683717, - "grad_norm": 0.10929680615663528, - "learning_rate": 7.487444493861705e-05, - "loss": 0.015874122083187104, - "step": 1070 - }, - { - "epoch": 0.18329070758738278, - "grad_norm": 0.10753592848777771, - "learning_rate": 7.487234130758613e-05, - "loss": 0.009445396810770034, - "step": 1075 - }, - { - "epoch": 0.18414322250639387, - "grad_norm": 0.11368737369775772, - "learning_rate": 7.487022022990309e-05, - "loss": 0.011674505472183228, - "step": 1080 - }, - { - "epoch": 0.18499573742540495, - "grad_norm": 0.12944836914539337, - "learning_rate": 7.486808170655813e-05, - "loss": 0.011856313049793243, - "step": 1085 - }, - { - "epoch": 0.18584825234441602, - "grad_norm": 0.0833214819431305, - "learning_rate": 7.48659257385496e-05, - "loss": 0.014119544625282287, - "step": 1090 - }, - { - "epoch": 0.1867007672634271, - "grad_norm": 0.11955790221691132, - "learning_rate": 7.486375232688397e-05, - "loss": 0.012977911531925202, - "step": 1095 - }, - { - "epoch": 0.1875532821824382, - "grad_norm": 0.1452455222606659, - "learning_rate": 7.486156147257584e-05, - "loss": 0.015410827100276947, - "step": 1100 - }, - { - "epoch": 0.18840579710144928, - "grad_norm": 0.09630092978477478, - "learning_rate": 7.485935317664801e-05, - "loss": 0.013698874413967133, - "step": 1105 - }, - { - "epoch": 0.18925831202046037, - "grad_norm": 0.11687257140874863, - "learning_rate": 7.485712744013137e-05, - "loss": 0.013196484744548797, - "step": 1110 - }, - { - "epoch": 0.19011082693947143, - "grad_norm": 0.08894632011651993, - "learning_rate": 7.485488426406495e-05, - "loss": 0.01540881097316742, - "step": 1115 - }, - { - "epoch": 0.19096334185848252, - "grad_norm": 0.09196458756923676, - "learning_rate": 7.485262364949597e-05, - "loss": 0.012018527090549468, - "step": 1120 - }, - { - "epoch": 0.1918158567774936, - "grad_norm": 0.12328553944826126, - "learning_rate": 7.485034559747974e-05, - "loss": 0.014925773441791534, - "step": 1125 - }, - { - "epoch": 0.1926683716965047, - "grad_norm": 0.12566202878952026, - "learning_rate": 7.484805010907975e-05, - "loss": 0.01104198843240738, - "step": 1130 - }, - { - "epoch": 0.19352088661551578, - "grad_norm": 0.1022765263915062, - "learning_rate": 7.484573718536758e-05, - "loss": 0.0118367500603199, - "step": 1135 - }, - { - "epoch": 0.19437340153452684, - "grad_norm": 0.09682224690914154, - "learning_rate": 7.4843406827423e-05, - "loss": 0.011423001438379288, - "step": 1140 - }, - { - "epoch": 0.19522591645353793, - "grad_norm": 0.15166254341602325, - "learning_rate": 7.484105903633388e-05, - "loss": 0.014048118889331818, - "step": 1145 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 0.09285032004117966, - "learning_rate": 7.483869381319627e-05, - "loss": 0.012882034480571746, - "step": 1150 - }, - { - "epoch": 0.1969309462915601, - "grad_norm": 0.09011214971542358, - "learning_rate": 7.483631115911434e-05, - "loss": 0.01419239491224289, - "step": 1155 - }, - { - "epoch": 0.1977834612105712, - "grad_norm": 0.14540383219718933, - "learning_rate": 7.483391107520037e-05, - "loss": 0.014623096585273743, - "step": 1160 - }, - { - "epoch": 0.19863597612958228, - "grad_norm": 0.12326448410749435, - "learning_rate": 7.483149356257479e-05, - "loss": 0.013109591603279114, - "step": 1165 - }, - { - "epoch": 0.19948849104859334, - "grad_norm": 0.07067535817623138, - "learning_rate": 7.482905862236622e-05, - "loss": 0.013740380108356477, - "step": 1170 - }, - { - "epoch": 0.20034100596760443, - "grad_norm": 0.10170529782772064, - "learning_rate": 7.482660625571134e-05, - "loss": 0.011151721328496933, - "step": 1175 - }, - { - "epoch": 0.20119352088661552, - "grad_norm": 0.10074683278799057, - "learning_rate": 7.482413646375498e-05, - "loss": 0.01180294007062912, - "step": 1180 - }, - { - "epoch": 0.2020460358056266, - "grad_norm": 0.047992780804634094, - "learning_rate": 7.482164924765016e-05, - "loss": 0.01065710186958313, - "step": 1185 - }, - { - "epoch": 0.2028985507246377, - "grad_norm": 0.1279197484254837, - "learning_rate": 7.481914460855796e-05, - "loss": 0.012219739705324173, - "step": 1190 - }, - { - "epoch": 0.20375106564364875, - "grad_norm": 0.11339649558067322, - "learning_rate": 7.481662254764765e-05, - "loss": 0.012664712965488434, - "step": 1195 - }, - { - "epoch": 0.20460358056265984, - "grad_norm": 0.10576959699392319, - "learning_rate": 7.481408306609662e-05, - "loss": 0.010009048134088516, - "step": 1200 - }, - { - "epoch": 0.20545609548167093, - "grad_norm": 0.08073283731937408, - "learning_rate": 7.481152616509037e-05, - "loss": 0.011126396805047989, - "step": 1205 - }, - { - "epoch": 0.20630861040068202, - "grad_norm": 0.09888558834791183, - "learning_rate": 7.480895184582253e-05, - "loss": 0.013096305727958679, - "step": 1210 - }, - { - "epoch": 0.2071611253196931, - "grad_norm": 0.09703118354082108, - "learning_rate": 7.48063601094949e-05, - "loss": 0.010653502494096755, - "step": 1215 - }, - { - "epoch": 0.20801364023870417, - "grad_norm": 0.10693266987800598, - "learning_rate": 7.48037509573174e-05, - "loss": 0.012283077836036682, - "step": 1220 - }, - { - "epoch": 0.20886615515771526, - "grad_norm": 0.1024889275431633, - "learning_rate": 7.480112439050804e-05, - "loss": 0.012971282005310059, - "step": 1225 - }, - { - "epoch": 0.20971867007672634, - "grad_norm": 0.10043203830718994, - "learning_rate": 7.4798480410293e-05, - "loss": 0.011451539397239686, - "step": 1230 - }, - { - "epoch": 0.21057118499573743, - "grad_norm": 0.11248672753572464, - "learning_rate": 7.47958190179066e-05, - "loss": 0.012805460393428803, - "step": 1235 - }, - { - "epoch": 0.21142369991474852, - "grad_norm": 0.08651525527238846, - "learning_rate": 7.479314021459123e-05, - "loss": 0.013016811013221741, - "step": 1240 - }, - { - "epoch": 0.21227621483375958, - "grad_norm": 0.06062095984816551, - "learning_rate": 7.479044400159746e-05, - "loss": 0.01299230456352234, - "step": 1245 - }, - { - "epoch": 0.21312872975277067, - "grad_norm": 0.1589215248823166, - "learning_rate": 7.478773038018397e-05, - "loss": 0.012607543170452118, - "step": 1250 - }, - { - "epoch": 0.21398124467178176, - "grad_norm": 0.12105076014995575, - "learning_rate": 7.478499935161758e-05, - "loss": 0.012772174179553985, - "step": 1255 - }, - { - "epoch": 0.21483375959079284, - "grad_norm": 0.0846070721745491, - "learning_rate": 7.478225091717323e-05, - "loss": 0.009387130290269852, - "step": 1260 - }, - { - "epoch": 0.21568627450980393, - "grad_norm": 0.054560381919145584, - "learning_rate": 7.477948507813396e-05, - "loss": 0.013299009203910828, - "step": 1265 - }, - { - "epoch": 0.216538789428815, - "grad_norm": 0.10125566273927689, - "learning_rate": 7.477670183579094e-05, - "loss": 0.013010343909263611, - "step": 1270 - }, - { - "epoch": 0.21739130434782608, - "grad_norm": 0.10493458807468414, - "learning_rate": 7.477390119144353e-05, - "loss": 0.013531042635440827, - "step": 1275 - }, - { - "epoch": 0.21824381926683717, - "grad_norm": 0.07453935593366623, - "learning_rate": 7.477108314639913e-05, - "loss": 0.01330820918083191, - "step": 1280 - }, - { - "epoch": 0.21909633418584826, - "grad_norm": 0.14430643618106842, - "learning_rate": 7.47682477019733e-05, - "loss": 0.015061555802822113, - "step": 1285 - }, - { - "epoch": 0.21994884910485935, - "grad_norm": 0.1073407456278801, - "learning_rate": 7.476539485948973e-05, - "loss": 0.011137319356203079, - "step": 1290 - }, - { - "epoch": 0.2208013640238704, - "grad_norm": 0.16794899106025696, - "learning_rate": 7.476252462028021e-05, - "loss": 0.013223762810230254, - "step": 1295 - }, - { - "epoch": 0.2216538789428815, - "grad_norm": 0.11509175598621368, - "learning_rate": 7.475963698568468e-05, - "loss": 0.012790821492671967, - "step": 1300 - }, - { - "epoch": 0.22250639386189258, - "grad_norm": 0.08615118265151978, - "learning_rate": 7.475673195705116e-05, - "loss": 0.011050455272197723, - "step": 1305 - }, - { - "epoch": 0.22335890878090367, - "grad_norm": 0.1186874732375145, - "learning_rate": 7.475380953573583e-05, - "loss": 0.011253353953361512, - "step": 1310 - }, - { - "epoch": 0.22421142369991476, - "grad_norm": 0.08680208027362823, - "learning_rate": 7.475086972310297e-05, - "loss": 0.010736887156963349, - "step": 1315 - }, - { - "epoch": 0.22506393861892582, - "grad_norm": 0.08204677700996399, - "learning_rate": 7.474791252052498e-05, - "loss": 0.011695361882448196, - "step": 1320 - }, - { - "epoch": 0.2259164535379369, - "grad_norm": 0.08945680409669876, - "learning_rate": 7.47449379293824e-05, - "loss": 0.01684057414531708, - "step": 1325 - }, - { - "epoch": 0.226768968456948, - "grad_norm": 0.12350911647081375, - "learning_rate": 7.474194595106384e-05, - "loss": 0.012560060620307923, - "step": 1330 - }, - { - "epoch": 0.22762148337595908, - "grad_norm": 0.09201455116271973, - "learning_rate": 7.473893658696605e-05, - "loss": 0.01128845140337944, - "step": 1335 - }, - { - "epoch": 0.22847399829497017, - "grad_norm": 0.12938448786735535, - "learning_rate": 7.473590983849396e-05, - "loss": 0.011510471999645232, - "step": 1340 - }, - { - "epoch": 0.22932651321398123, - "grad_norm": 0.0837291032075882, - "learning_rate": 7.473286570706047e-05, - "loss": 0.011677465587854385, - "step": 1345 - }, - { - "epoch": 0.23017902813299232, - "grad_norm": 0.12514546513557434, - "learning_rate": 7.472980419408675e-05, - "loss": 0.01308433711528778, - "step": 1350 - }, - { - "epoch": 0.2310315430520034, - "grad_norm": 0.05483829975128174, - "learning_rate": 7.472672530100199e-05, - "loss": 0.007203156501054764, - "step": 1355 - }, - { - "epoch": 0.2318840579710145, - "grad_norm": 0.13903425633907318, - "learning_rate": 7.472362902924352e-05, - "loss": 0.013231572508811951, - "step": 1360 - }, - { - "epoch": 0.23273657289002558, - "grad_norm": 0.09622041881084442, - "learning_rate": 7.472051538025678e-05, - "loss": 0.013325902819633483, - "step": 1365 - }, - { - "epoch": 0.23358908780903667, - "grad_norm": 0.0792275071144104, - "learning_rate": 7.471738435549533e-05, - "loss": 0.011098403483629227, - "step": 1370 - }, - { - "epoch": 0.23444160272804773, - "grad_norm": 0.11833506077528, - "learning_rate": 7.471423595642084e-05, - "loss": 0.014845246076583862, - "step": 1375 - }, - { - "epoch": 0.23529411764705882, - "grad_norm": 0.07531571388244629, - "learning_rate": 7.471107018450309e-05, - "loss": 0.011498528718948364, - "step": 1380 - }, - { - "epoch": 0.2361466325660699, - "grad_norm": 0.08739249408245087, - "learning_rate": 7.470788704121995e-05, - "loss": 0.013241058588027954, - "step": 1385 - }, - { - "epoch": 0.236999147485081, - "grad_norm": 0.07113732397556305, - "learning_rate": 7.470468652805743e-05, - "loss": 0.009632241725921632, - "step": 1390 - }, - { - "epoch": 0.23785166240409208, - "grad_norm": 0.07838430255651474, - "learning_rate": 7.470146864650965e-05, - "loss": 0.009344339370727539, - "step": 1395 - }, - { - "epoch": 0.23870417732310314, - "grad_norm": 0.1086372509598732, - "learning_rate": 7.46982333980788e-05, - "loss": 0.014708395302295684, - "step": 1400 - }, - { - "epoch": 0.23955669224211423, - "grad_norm": 0.06628046184778214, - "learning_rate": 7.469498078427519e-05, - "loss": 0.011472882330417633, - "step": 1405 - }, - { - "epoch": 0.24040920716112532, - "grad_norm": 0.13003386557102203, - "learning_rate": 7.46917108066173e-05, - "loss": 0.011933800578117371, - "step": 1410 - }, - { - "epoch": 0.2412617220801364, - "grad_norm": 0.07365831732749939, - "learning_rate": 7.468842346663162e-05, - "loss": 0.0102902352809906, - "step": 1415 - }, - { - "epoch": 0.2421142369991475, - "grad_norm": 0.11540158838033676, - "learning_rate": 7.468511876585279e-05, - "loss": 0.012016136944293977, - "step": 1420 - }, - { - "epoch": 0.24296675191815856, - "grad_norm": 0.11687944084405899, - "learning_rate": 7.468179670582359e-05, - "loss": 0.01773642897605896, - "step": 1425 - }, - { - "epoch": 0.24381926683716965, - "grad_norm": 0.10464506596326828, - "learning_rate": 7.467845728809483e-05, - "loss": 0.009476778656244278, - "step": 1430 - }, - { - "epoch": 0.24467178175618073, - "grad_norm": 0.11479093879461288, - "learning_rate": 7.46751005142255e-05, - "loss": 0.014100676774978638, - "step": 1435 - }, - { - "epoch": 0.24552429667519182, - "grad_norm": 0.1388610154390335, - "learning_rate": 7.46717263857826e-05, - "loss": 0.012735754251480103, - "step": 1440 - }, - { - "epoch": 0.2463768115942029, - "grad_norm": 0.08022485673427582, - "learning_rate": 7.466833490434132e-05, - "loss": 0.014391189813613892, - "step": 1445 - }, - { - "epoch": 0.24722932651321397, - "grad_norm": 0.12174725532531738, - "learning_rate": 7.466492607148492e-05, - "loss": 0.013387931883335114, - "step": 1450 - }, - { - "epoch": 0.24808184143222506, - "grad_norm": 0.1102161779999733, - "learning_rate": 7.466149988880474e-05, - "loss": 0.01143224760890007, - "step": 1455 - }, - { - "epoch": 0.24893435635123615, - "grad_norm": 0.14878468215465546, - "learning_rate": 7.465805635790024e-05, - "loss": 0.010428830236196517, - "step": 1460 - }, - { - "epoch": 0.24978687127024723, - "grad_norm": 0.3046579658985138, - "learning_rate": 7.4654595480379e-05, - "loss": 0.012648795545101166, - "step": 1465 - }, - { - "epoch": 0.24995737425404946, - "eval_loss": 0.036103956401348114, - "eval_runtime": 3.6524, - "eval_samples_per_second": 68.995, - "eval_steps_per_second": 1.095, - "step": 1466 - }, - { - "eval_cer_subset": 0.01393977885257381, - "eval_cer_subset_edit_distance": 856, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 1466 - }, - { - "epoch": 0.2506393861892583, - "grad_norm": 0.10660448670387268, - "learning_rate": 7.465111725785664e-05, - "loss": 0.013486798107624053, - "step": 1470 - }, - { - "epoch": 0.2514919011082694, - "grad_norm": 0.13888458907604218, - "learning_rate": 7.464762169195693e-05, - "loss": 0.015365575253963471, - "step": 1475 - }, - { - "epoch": 0.25234441602728047, - "grad_norm": 0.14945067465305328, - "learning_rate": 7.464410878431169e-05, - "loss": 0.01226709708571434, - "step": 1480 - }, - { - "epoch": 0.2531969309462916, - "grad_norm": 0.09638198465108871, - "learning_rate": 7.464057853656089e-05, - "loss": 0.012688608467578888, - "step": 1485 - }, - { - "epoch": 0.25404944586530265, - "grad_norm": 0.05725576728582382, - "learning_rate": 7.463703095035256e-05, - "loss": 0.011445847153663636, - "step": 1490 - }, - { - "epoch": 0.2549019607843137, - "grad_norm": 0.08474720269441605, - "learning_rate": 7.463346602734283e-05, - "loss": 0.01112249493598938, - "step": 1495 - }, - { - "epoch": 0.2557544757033248, - "grad_norm": 0.08283067494630814, - "learning_rate": 7.462988376919592e-05, - "loss": 0.01144670695066452, - "step": 1500 - }, - { - "epoch": 0.2566069906223359, - "grad_norm": 0.13687758147716522, - "learning_rate": 7.462628417758415e-05, - "loss": 0.012893360853195191, - "step": 1505 - }, - { - "epoch": 0.257459505541347, - "grad_norm": 0.16319195926189423, - "learning_rate": 7.462266725418793e-05, - "loss": 0.014364737272262573, - "step": 1510 - }, - { - "epoch": 0.25831202046035806, - "grad_norm": 0.0693240761756897, - "learning_rate": 7.461903300069576e-05, - "loss": 0.011550360918045044, - "step": 1515 - }, - { - "epoch": 0.2591645353793691, - "grad_norm": 0.0994478389620781, - "learning_rate": 7.461538141880423e-05, - "loss": 0.011711706221103669, - "step": 1520 - }, - { - "epoch": 0.26001705029838024, - "grad_norm": 0.20310325920581818, - "learning_rate": 7.461171251021802e-05, - "loss": 0.013178233802318574, - "step": 1525 - }, - { - "epoch": 0.2608695652173913, - "grad_norm": 0.07798318564891815, - "learning_rate": 7.460802627664991e-05, - "loss": 0.011273499578237534, - "step": 1530 - }, - { - "epoch": 0.2617220801364024, - "grad_norm": 0.1308072805404663, - "learning_rate": 7.460432271982073e-05, - "loss": 0.008084958046674728, - "step": 1535 - }, - { - "epoch": 0.2625745950554135, - "grad_norm": 0.08926808834075928, - "learning_rate": 7.460060184145944e-05, - "loss": 0.011974562704563142, - "step": 1540 - }, - { - "epoch": 0.26342710997442453, - "grad_norm": 0.07462260872125626, - "learning_rate": 7.459686364330307e-05, - "loss": 0.007739155739545822, - "step": 1545 - }, - { - "epoch": 0.26427962489343565, - "grad_norm": 0.10904734581708908, - "learning_rate": 7.459310812709675e-05, - "loss": 0.012024204432964324, - "step": 1550 - }, - { - "epoch": 0.2651321398124467, - "grad_norm": 0.11935116350650787, - "learning_rate": 7.458933529459364e-05, - "loss": 0.012462839484214783, - "step": 1555 - }, - { - "epoch": 0.2659846547314578, - "grad_norm": 0.08920887112617493, - "learning_rate": 7.458554514755506e-05, - "loss": 0.01472131609916687, - "step": 1560 - }, - { - "epoch": 0.2668371696504689, - "grad_norm": 0.12231490015983582, - "learning_rate": 7.458173768775036e-05, - "loss": 0.014967297017574311, - "step": 1565 - }, - { - "epoch": 0.26768968456947995, - "grad_norm": 0.10691904276609421, - "learning_rate": 7.4577912916957e-05, - "loss": 0.013200350105762482, - "step": 1570 - }, - { - "epoch": 0.26854219948849106, - "grad_norm": 0.06267247349023819, - "learning_rate": 7.457407083696049e-05, - "loss": 0.011946959048509597, - "step": 1575 - }, - { - "epoch": 0.2693947144075021, - "grad_norm": 0.10732340067625046, - "learning_rate": 7.457021144955448e-05, - "loss": 0.012722471356391906, - "step": 1580 - }, - { - "epoch": 0.27024722932651324, - "grad_norm": 0.08628841489553452, - "learning_rate": 7.456633475654061e-05, - "loss": 0.010444843024015427, - "step": 1585 - }, - { - "epoch": 0.2710997442455243, - "grad_norm": 0.1017296314239502, - "learning_rate": 7.456244075972866e-05, - "loss": 0.017299896478652953, - "step": 1590 - }, - { - "epoch": 0.27195225916453536, - "grad_norm": 0.07065381854772568, - "learning_rate": 7.455852946093652e-05, - "loss": 0.01379164457321167, - "step": 1595 - }, - { - "epoch": 0.2728047740835465, - "grad_norm": 0.08550920337438583, - "learning_rate": 7.455460086199008e-05, - "loss": 0.011976235359907151, - "step": 1600 - }, - { - "epoch": 0.27365728900255754, - "grad_norm": 0.08075132966041565, - "learning_rate": 7.455065496472335e-05, - "loss": 0.012481977045536042, - "step": 1605 - }, - { - "epoch": 0.27450980392156865, - "grad_norm": 0.08838896453380585, - "learning_rate": 7.454669177097839e-05, - "loss": 0.011825743317604064, - "step": 1610 - }, - { - "epoch": 0.2753623188405797, - "grad_norm": 0.06823412328958511, - "learning_rate": 7.454271128260537e-05, - "loss": 0.014278222620487214, - "step": 1615 - }, - { - "epoch": 0.27621483375959077, - "grad_norm": 0.09612765908241272, - "learning_rate": 7.45387135014625e-05, - "loss": 0.009220580756664275, - "step": 1620 - }, - { - "epoch": 0.2770673486786019, - "grad_norm": 0.08564051240682602, - "learning_rate": 7.45346984294161e-05, - "loss": 0.015146958827972411, - "step": 1625 - }, - { - "epoch": 0.27791986359761295, - "grad_norm": 0.0729006826877594, - "learning_rate": 7.453066606834052e-05, - "loss": 0.012136349081993103, - "step": 1630 - }, - { - "epoch": 0.27877237851662406, - "grad_norm": 0.10457300394773483, - "learning_rate": 7.452661642011818e-05, - "loss": 0.014803081750869751, - "step": 1635 - }, - { - "epoch": 0.2796248934356351, - "grad_norm": 0.09881619364023209, - "learning_rate": 7.452254948663964e-05, - "loss": 0.012653107941150665, - "step": 1640 - }, - { - "epoch": 0.2804774083546462, - "grad_norm": 0.12094103544950485, - "learning_rate": 7.451846526980343e-05, - "loss": 0.011742380261421204, - "step": 1645 - }, - { - "epoch": 0.2813299232736573, - "grad_norm": 0.06668030470609665, - "learning_rate": 7.451436377151624e-05, - "loss": 0.01095641851425171, - "step": 1650 - }, - { - "epoch": 0.28218243819266836, - "grad_norm": 0.06907116621732712, - "learning_rate": 7.451024499369278e-05, - "loss": 0.01093050017952919, - "step": 1655 - }, - { - "epoch": 0.2830349531116795, - "grad_norm": 0.13372033834457397, - "learning_rate": 7.45061089382558e-05, - "loss": 0.012350015342235565, - "step": 1660 - }, - { - "epoch": 0.28388746803069054, - "grad_norm": 0.06432037055492401, - "learning_rate": 7.450195560713617e-05, - "loss": 0.010150979459285735, - "step": 1665 - }, - { - "epoch": 0.2847399829497016, - "grad_norm": 0.10098759829998016, - "learning_rate": 7.449778500227281e-05, - "loss": 0.01070861890912056, - "step": 1670 - }, - { - "epoch": 0.2855924978687127, - "grad_norm": 0.1708894968032837, - "learning_rate": 7.449359712561269e-05, - "loss": 0.01218695342540741, - "step": 1675 - }, - { - "epoch": 0.2864450127877238, - "grad_norm": 0.15045367181301117, - "learning_rate": 7.448939197911084e-05, - "loss": 0.012416082620620727, - "step": 1680 - }, - { - "epoch": 0.2872975277067349, - "grad_norm": 0.08867572993040085, - "learning_rate": 7.44851695647304e-05, - "loss": 0.011927373707294464, - "step": 1685 - }, - { - "epoch": 0.28815004262574595, - "grad_norm": 0.1402040272951126, - "learning_rate": 7.448092988444247e-05, - "loss": 0.011733450740575791, - "step": 1690 - }, - { - "epoch": 0.289002557544757, - "grad_norm": 0.10436082631349564, - "learning_rate": 7.447667294022631e-05, - "loss": 0.013171072304248809, - "step": 1695 - }, - { - "epoch": 0.2898550724637681, - "grad_norm": 0.10628762096166611, - "learning_rate": 7.447239873406923e-05, - "loss": 0.012366896122694015, - "step": 1700 - }, - { - "epoch": 0.2907075873827792, - "grad_norm": 0.09782184660434723, - "learning_rate": 7.446810726796653e-05, - "loss": 0.011275313794612885, - "step": 1705 - }, - { - "epoch": 0.2915601023017903, - "grad_norm": 0.08403825014829636, - "learning_rate": 7.446379854392162e-05, - "loss": 0.010051032900810242, - "step": 1710 - }, - { - "epoch": 0.29241261722080136, - "grad_norm": 0.07938918471336365, - "learning_rate": 7.445947256394596e-05, - "loss": 0.00972949042916298, - "step": 1715 - }, - { - "epoch": 0.2932651321398124, - "grad_norm": 0.09250234067440033, - "learning_rate": 7.445512933005906e-05, - "loss": 0.009316288679838181, - "step": 1720 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 0.08939237147569656, - "learning_rate": 7.445076884428848e-05, - "loss": 0.007942373305559159, - "step": 1725 - }, - { - "epoch": 0.2949701619778346, - "grad_norm": 0.06440749019384384, - "learning_rate": 7.444639110866985e-05, - "loss": 0.008772502094507218, - "step": 1730 - }, - { - "epoch": 0.2958226768968457, - "grad_norm": 0.0980759784579277, - "learning_rate": 7.444199612524684e-05, - "loss": 0.0127939835190773, - "step": 1735 - }, - { - "epoch": 0.2966751918158568, - "grad_norm": 0.133849635720253, - "learning_rate": 7.443758389607117e-05, - "loss": 0.011026865988969802, - "step": 1740 - }, - { - "epoch": 0.29752770673486784, - "grad_norm": 0.08664857596158981, - "learning_rate": 7.443315442320263e-05, - "loss": 0.010273561626672745, - "step": 1745 - }, - { - "epoch": 0.29838022165387895, - "grad_norm": 0.11462656408548355, - "learning_rate": 7.442870770870902e-05, - "loss": 0.012825533747673035, - "step": 1750 - }, - { - "epoch": 0.29923273657289, - "grad_norm": 0.12586012482643127, - "learning_rate": 7.442424375466624e-05, - "loss": 0.01315489411354065, - "step": 1755 - }, - { - "epoch": 0.30008525149190113, - "grad_norm": 0.07139981538057327, - "learning_rate": 7.441976256315819e-05, - "loss": 0.010728174448013305, - "step": 1760 - }, - { - "epoch": 0.3009377664109122, - "grad_norm": 0.06837856024503708, - "learning_rate": 7.441526413627685e-05, - "loss": 0.012408022582530976, - "step": 1765 - }, - { - "epoch": 0.30179028132992325, - "grad_norm": 0.05851417034864426, - "learning_rate": 7.441074847612224e-05, - "loss": 0.009401807188987732, - "step": 1770 - }, - { - "epoch": 0.30264279624893436, - "grad_norm": 0.09595180302858353, - "learning_rate": 7.44062155848024e-05, - "loss": 0.010888323932886124, - "step": 1775 - }, - { - "epoch": 0.3034953111679454, - "grad_norm": 0.0811101421713829, - "learning_rate": 7.440166546443347e-05, - "loss": 0.00998341292142868, - "step": 1780 - }, - { - "epoch": 0.30434782608695654, - "grad_norm": 0.13257169723510742, - "learning_rate": 7.439709811713958e-05, - "loss": 0.014603719115257263, - "step": 1785 - }, - { - "epoch": 0.3052003410059676, - "grad_norm": 0.1428811252117157, - "learning_rate": 7.439251354505289e-05, - "loss": 0.01388871967792511, - "step": 1790 - }, - { - "epoch": 0.30605285592497866, - "grad_norm": 0.08253402262926102, - "learning_rate": 7.438791175031367e-05, - "loss": 0.010171836614608765, - "step": 1795 - }, - { - "epoch": 0.3069053708439898, - "grad_norm": 0.05991052836179733, - "learning_rate": 7.438329273507019e-05, - "loss": 0.01470649391412735, - "step": 1800 - }, - { - "epoch": 0.30775788576300084, - "grad_norm": 0.10785503685474396, - "learning_rate": 7.437865650147873e-05, - "loss": 0.012740308046340942, - "step": 1805 - }, - { - "epoch": 0.30861040068201195, - "grad_norm": 0.093068428337574, - "learning_rate": 7.437400305170367e-05, - "loss": 0.01183861643075943, - "step": 1810 - }, - { - "epoch": 0.309462915601023, - "grad_norm": 0.08662707358598709, - "learning_rate": 7.436933238791737e-05, - "loss": 0.011762722581624984, - "step": 1815 - }, - { - "epoch": 0.3103154305200341, - "grad_norm": 0.07826617360115051, - "learning_rate": 7.436464451230027e-05, - "loss": 0.009368828684091567, - "step": 1820 - }, - { - "epoch": 0.3111679454390452, - "grad_norm": 0.1295643448829651, - "learning_rate": 7.435993942704082e-05, - "loss": 0.010699732601642609, - "step": 1825 - }, - { - "epoch": 0.31202046035805625, - "grad_norm": 0.1412370651960373, - "learning_rate": 7.43552171343355e-05, - "loss": 0.0124404676258564, - "step": 1830 - }, - { - "epoch": 0.31287297527706737, - "grad_norm": 0.07793306559324265, - "learning_rate": 7.435047763638885e-05, - "loss": 0.010793016105890275, - "step": 1835 - }, - { - "epoch": 0.3137254901960784, - "grad_norm": 0.1273961067199707, - "learning_rate": 7.434572093541341e-05, - "loss": 0.012959575653076172, - "step": 1840 - }, - { - "epoch": 0.3145780051150895, - "grad_norm": 0.10340052098035812, - "learning_rate": 7.434094703362978e-05, - "loss": 0.011804693937301635, - "step": 1845 - }, - { - "epoch": 0.3154305200341006, - "grad_norm": 0.07878883183002472, - "learning_rate": 7.433615593326657e-05, - "loss": 0.011087532341480254, - "step": 1850 - }, - { - "epoch": 0.31628303495311166, - "grad_norm": 0.08166638761758804, - "learning_rate": 7.433134763656042e-05, - "loss": 0.010111966729164123, - "step": 1855 - }, - { - "epoch": 0.3171355498721228, - "grad_norm": 0.12048157304525375, - "learning_rate": 7.432652214575603e-05, - "loss": 0.013003784418106078, - "step": 1860 - }, - { - "epoch": 0.31798806479113384, - "grad_norm": 0.08009333908557892, - "learning_rate": 7.432167946310605e-05, - "loss": 0.01212536245584488, - "step": 1865 - }, - { - "epoch": 0.3188405797101449, - "grad_norm": 0.07344945520162582, - "learning_rate": 7.431681959087126e-05, - "loss": 0.011613032221794129, - "step": 1870 - }, - { - "epoch": 0.319693094629156, - "grad_norm": 0.09358638525009155, - "learning_rate": 7.431194253132037e-05, - "loss": 0.011946377158164979, - "step": 1875 - }, - { - "epoch": 0.3205456095481671, - "grad_norm": 0.14091502130031586, - "learning_rate": 7.430704828673016e-05, - "loss": 0.012845572829246522, - "step": 1880 - }, - { - "epoch": 0.3213981244671782, - "grad_norm": 0.0754130631685257, - "learning_rate": 7.430213685938543e-05, - "loss": 0.011171463876962662, - "step": 1885 - }, - { - "epoch": 0.32225063938618925, - "grad_norm": 0.10210556536912918, - "learning_rate": 7.429720825157901e-05, - "loss": 0.010276605188846589, - "step": 1890 - }, - { - "epoch": 0.32310315430520037, - "grad_norm": 0.10094697028398514, - "learning_rate": 7.429226246561173e-05, - "loss": 0.01233583763241768, - "step": 1895 - }, - { - "epoch": 0.32395566922421143, - "grad_norm": 0.0673881471157074, - "learning_rate": 7.428729950379244e-05, - "loss": 0.008631937205791473, - "step": 1900 - }, - { - "epoch": 0.3248081841432225, - "grad_norm": 0.11807650327682495, - "learning_rate": 7.428231936843803e-05, - "loss": 0.012879209220409393, - "step": 1905 - }, - { - "epoch": 0.3256606990622336, - "grad_norm": 0.0627446100115776, - "learning_rate": 7.427732206187338e-05, - "loss": 0.011548225581645966, - "step": 1910 - }, - { - "epoch": 0.32651321398124467, - "grad_norm": 0.09312627464532852, - "learning_rate": 7.427230758643139e-05, - "loss": 0.012763653695583344, - "step": 1915 - }, - { - "epoch": 0.3273657289002558, - "grad_norm": 0.12694048881530762, - "learning_rate": 7.426727594445302e-05, - "loss": 0.014219759404659272, - "step": 1920 - }, - { - "epoch": 0.32821824381926684, - "grad_norm": 0.09415233880281448, - "learning_rate": 7.426222713828717e-05, - "loss": 0.01088135689496994, - "step": 1925 - }, - { - "epoch": 0.3290707587382779, - "grad_norm": 0.1079363226890564, - "learning_rate": 7.425716117029082e-05, - "loss": 0.013090427219867706, - "step": 1930 - }, - { - "epoch": 0.329923273657289, - "grad_norm": 0.10847736895084381, - "learning_rate": 7.42520780428289e-05, - "loss": 0.011184506118297577, - "step": 1935 - }, - { - "epoch": 0.3307757885763001, - "grad_norm": 0.12416253983974457, - "learning_rate": 7.424697775827442e-05, - "loss": 0.012871085107326508, - "step": 1940 - }, - { - "epoch": 0.3316283034953112, - "grad_norm": 0.08419755846261978, - "learning_rate": 7.424186031900833e-05, - "loss": 0.01026538610458374, - "step": 1945 - }, - { - "epoch": 0.33248081841432225, - "grad_norm": 0.06923236697912216, - "learning_rate": 7.423672572741965e-05, - "loss": 0.012079264223575591, - "step": 1950 - }, - { - "epoch": 0.3333333333333333, - "grad_norm": 0.08516070991754532, - "learning_rate": 7.423157398590534e-05, - "loss": 0.011150284111499787, - "step": 1955 - }, - { - "epoch": 0.33418584825234443, - "grad_norm": 0.054969049990177155, - "learning_rate": 7.422640509687045e-05, - "loss": 0.008261225372552871, - "step": 1960 - }, - { - "epoch": 0.3350383631713555, - "grad_norm": 0.09037495404481888, - "learning_rate": 7.422121906272795e-05, - "loss": 0.015576986968517304, - "step": 1965 - }, - { - "epoch": 0.3358908780903666, - "grad_norm": 0.08676491677761078, - "learning_rate": 7.421601588589889e-05, - "loss": 0.01942193806171417, - "step": 1970 - }, - { - "epoch": 0.33674339300937767, - "grad_norm": 0.09090764820575714, - "learning_rate": 7.421079556881224e-05, - "loss": 0.012568703293800354, - "step": 1975 - }, - { - "epoch": 0.3375959079283887, - "grad_norm": 0.07859542965888977, - "learning_rate": 7.420555811390505e-05, - "loss": 0.011662108451128006, - "step": 1980 - }, - { - "epoch": 0.33844842284739984, - "grad_norm": 0.06368016451597214, - "learning_rate": 7.420030352362235e-05, - "loss": 0.010762494802474976, - "step": 1985 - }, - { - "epoch": 0.3393009377664109, - "grad_norm": 0.10950745642185211, - "learning_rate": 7.419503180041712e-05, - "loss": 0.012577894330024719, - "step": 1990 - }, - { - "epoch": 0.340153452685422, - "grad_norm": 0.07888182997703552, - "learning_rate": 7.41897429467504e-05, - "loss": 0.009134671837091445, - "step": 1995 - }, - { - "epoch": 0.3410059676044331, - "grad_norm": 0.08978903293609619, - "learning_rate": 7.41844369650912e-05, - "loss": 0.011774566024541855, - "step": 2000 - }, - { - "epoch": 0.34185848252344414, - "grad_norm": 0.07103633135557175, - "learning_rate": 7.417911385791653e-05, - "loss": 0.011116493493318558, - "step": 2005 - }, - { - "epoch": 0.34271099744245526, - "grad_norm": 0.07445147633552551, - "learning_rate": 7.417377362771138e-05, - "loss": 0.012135914713144302, - "step": 2010 - }, - { - "epoch": 0.3435635123614663, - "grad_norm": 0.09372841566801071, - "learning_rate": 7.416841627696876e-05, - "loss": 0.014101208746433258, - "step": 2015 - }, - { - "epoch": 0.34441602728047743, - "grad_norm": 0.10181085020303726, - "learning_rate": 7.416304180818966e-05, - "loss": 0.010917666554450988, - "step": 2020 - }, - { - "epoch": 0.3452685421994885, - "grad_norm": 0.08702226728200912, - "learning_rate": 7.415765022388305e-05, - "loss": 0.012508213520050049, - "step": 2025 - }, - { - "epoch": 0.34612105711849955, - "grad_norm": 0.11725348234176636, - "learning_rate": 7.415224152656591e-05, - "loss": 0.012123394012451171, - "step": 2030 - }, - { - "epoch": 0.34697357203751067, - "grad_norm": 0.10797812044620514, - "learning_rate": 7.414681571876321e-05, - "loss": 0.011308898031711579, - "step": 2035 - }, - { - "epoch": 0.34782608695652173, - "grad_norm": 0.07944193482398987, - "learning_rate": 7.414137280300787e-05, - "loss": 0.008828282356262207, - "step": 2040 - }, - { - "epoch": 0.34867860187553285, - "grad_norm": 0.09413408488035202, - "learning_rate": 7.413591278184086e-05, - "loss": 0.010974615067243575, - "step": 2045 - }, - { - "epoch": 0.3495311167945439, - "grad_norm": 0.13984905183315277, - "learning_rate": 7.413043565781107e-05, - "loss": 0.013567428290843963, - "step": 2050 - }, - { - "epoch": 0.35038363171355497, - "grad_norm": 0.09445049613714218, - "learning_rate": 7.41249414334754e-05, - "loss": 0.011122822761535645, - "step": 2055 - }, - { - "epoch": 0.3512361466325661, - "grad_norm": 0.07995510846376419, - "learning_rate": 7.411943011139877e-05, - "loss": 0.009908045828342437, - "step": 2060 - }, - { - "epoch": 0.35208866155157714, - "grad_norm": 0.1185273677110672, - "learning_rate": 7.411390169415402e-05, - "loss": 0.012709785997867585, - "step": 2065 - }, - { - "epoch": 0.35294117647058826, - "grad_norm": 0.11713512986898422, - "learning_rate": 7.4108356184322e-05, - "loss": 0.009765231609344482, - "step": 2070 - }, - { - "epoch": 0.3537936913895993, - "grad_norm": 0.06523539125919342, - "learning_rate": 7.410279358449155e-05, - "loss": 0.0113253653049469, - "step": 2075 - }, - { - "epoch": 0.3546462063086104, - "grad_norm": 0.07587762176990509, - "learning_rate": 7.409721389725948e-05, - "loss": 0.009385265409946442, - "step": 2080 - }, - { - "epoch": 0.3554987212276215, - "grad_norm": 0.05211614444851875, - "learning_rate": 7.409161712523056e-05, - "loss": 0.012498895078897477, - "step": 2085 - }, - { - "epoch": 0.35635123614663256, - "grad_norm": 0.12545894086360931, - "learning_rate": 7.408600327101755e-05, - "loss": 0.012212803959846497, - "step": 2090 - }, - { - "epoch": 0.35720375106564367, - "grad_norm": 0.10047369450330734, - "learning_rate": 7.40803723372412e-05, - "loss": 0.012341489642858505, - "step": 2095 - }, - { - "epoch": 0.35805626598465473, - "grad_norm": 0.13728737831115723, - "learning_rate": 7.40747243265302e-05, - "loss": 0.011351624131202697, - "step": 2100 - }, - { - "epoch": 0.3589087809036658, - "grad_norm": 0.1251213699579239, - "learning_rate": 7.406905924152125e-05, - "loss": 0.013545188307762145, - "step": 2105 - }, - { - "epoch": 0.3597612958226769, - "grad_norm": 0.07805601507425308, - "learning_rate": 7.406337708485897e-05, - "loss": 0.010711775720119476, - "step": 2110 - }, - { - "epoch": 0.36061381074168797, - "grad_norm": 0.08311845362186432, - "learning_rate": 7.405767785919598e-05, - "loss": 0.01128876730799675, - "step": 2115 - }, - { - "epoch": 0.3614663256606991, - "grad_norm": 0.09670841693878174, - "learning_rate": 7.405196156719291e-05, - "loss": 0.013085599243640899, - "step": 2120 - }, - { - "epoch": 0.36231884057971014, - "grad_norm": 0.10827390104532242, - "learning_rate": 7.404622821151829e-05, - "loss": 0.011315967142581939, - "step": 2125 - }, - { - "epoch": 0.3631713554987212, - "grad_norm": 0.08578862994909286, - "learning_rate": 7.404047779484862e-05, - "loss": 0.01172153502702713, - "step": 2130 - }, - { - "epoch": 0.3640238704177323, - "grad_norm": 0.08786064386367798, - "learning_rate": 7.403471031986841e-05, - "loss": 0.010834509134292602, - "step": 2135 - }, - { - "epoch": 0.3648763853367434, - "grad_norm": 0.07956185191869736, - "learning_rate": 7.402892578927012e-05, - "loss": 0.01222250759601593, - "step": 2140 - }, - { - "epoch": 0.3657289002557545, - "grad_norm": 0.10179547220468521, - "learning_rate": 7.402312420575414e-05, - "loss": 0.010667824000120164, - "step": 2145 - }, - { - "epoch": 0.36658141517476556, - "grad_norm": 0.10311263799667358, - "learning_rate": 7.401730557202884e-05, - "loss": 0.014463961124420166, - "step": 2150 - }, - { - "epoch": 0.3674339300937766, - "grad_norm": 0.0935206189751625, - "learning_rate": 7.401146989081058e-05, - "loss": 0.010451390594244003, - "step": 2155 - }, - { - "epoch": 0.36828644501278773, - "grad_norm": 0.08164738863706589, - "learning_rate": 7.400561716482362e-05, - "loss": 0.013009518384933472, - "step": 2160 - }, - { - "epoch": 0.3691389599317988, - "grad_norm": 0.0638088807463646, - "learning_rate": 7.399974739680022e-05, - "loss": 0.0109320767223835, - "step": 2165 - }, - { - "epoch": 0.3699914748508099, - "grad_norm": 0.09591665863990784, - "learning_rate": 7.399386058948057e-05, - "loss": 0.01293652206659317, - "step": 2170 - }, - { - "epoch": 0.37084398976982097, - "grad_norm": 0.08929681777954102, - "learning_rate": 7.398795674561285e-05, - "loss": 0.011034403741359711, - "step": 2175 - }, - { - "epoch": 0.37169650468883203, - "grad_norm": 0.07356081902980804, - "learning_rate": 7.398203586795315e-05, - "loss": 0.010477699339389801, - "step": 2180 - }, - { - "epoch": 0.37254901960784315, - "grad_norm": 0.1117938682436943, - "learning_rate": 7.397609795926555e-05, - "loss": 0.008920109272003174, - "step": 2185 - }, - { - "epoch": 0.3734015345268542, - "grad_norm": 0.10849595069885254, - "learning_rate": 7.397014302232204e-05, - "loss": 0.01170756369829178, - "step": 2190 - }, - { - "epoch": 0.3742540494458653, - "grad_norm": 0.08509895205497742, - "learning_rate": 7.396417105990261e-05, - "loss": 0.010042114555835724, - "step": 2195 - }, - { - "epoch": 0.3751065643648764, - "grad_norm": 0.10500915348529816, - "learning_rate": 7.395818207479515e-05, - "loss": 0.011792914569377899, - "step": 2200 - }, - { - "epoch": 0.37595907928388744, - "grad_norm": 0.06618086993694305, - "learning_rate": 7.395217606979553e-05, - "loss": 0.011084456741809846, - "step": 2205 - }, - { - "epoch": 0.37681159420289856, - "grad_norm": 0.08622384816408157, - "learning_rate": 7.394615304770756e-05, - "loss": 0.010955430567264557, - "step": 2210 - }, - { - "epoch": 0.3776641091219096, - "grad_norm": 0.10002898424863815, - "learning_rate": 7.394011301134296e-05, - "loss": 0.011026810109615325, - "step": 2215 - }, - { - "epoch": 0.37851662404092073, - "grad_norm": 0.16406390070915222, - "learning_rate": 7.393405596352144e-05, - "loss": 0.010511884093284607, - "step": 2220 - }, - { - "epoch": 0.3793691389599318, - "grad_norm": 0.077234648168087, - "learning_rate": 7.392798190707062e-05, - "loss": 0.011723069101572036, - "step": 2225 - }, - { - "epoch": 0.38022165387894286, - "grad_norm": 0.09080372750759125, - "learning_rate": 7.392189084482609e-05, - "loss": 0.010011065006256103, - "step": 2230 - }, - { - "epoch": 0.38107416879795397, - "grad_norm": 0.08161097019910812, - "learning_rate": 7.391578277963134e-05, - "loss": 0.012426529079675674, - "step": 2235 - }, - { - "epoch": 0.38192668371696503, - "grad_norm": 0.09220891445875168, - "learning_rate": 7.390965771433783e-05, - "loss": 0.011983324587345124, - "step": 2240 - }, - { - "epoch": 0.38277919863597615, - "grad_norm": 0.10752015560865402, - "learning_rate": 7.390351565180495e-05, - "loss": 0.014156198501586914, - "step": 2245 - }, - { - "epoch": 0.3836317135549872, - "grad_norm": 0.05059373378753662, - "learning_rate": 7.38973565949e-05, - "loss": 0.00998034030199051, - "step": 2250 - }, - { - "epoch": 0.38448422847399827, - "grad_norm": 0.11214456707239151, - "learning_rate": 7.389118054649824e-05, - "loss": 0.01075390875339508, - "step": 2255 - }, - { - "epoch": 0.3853367433930094, - "grad_norm": 0.07631754130125046, - "learning_rate": 7.388498750948286e-05, - "loss": 0.014638753235340118, - "step": 2260 - }, - { - "epoch": 0.38618925831202044, - "grad_norm": 0.07249671965837479, - "learning_rate": 7.387877748674499e-05, - "loss": 0.011368723213672638, - "step": 2265 - }, - { - "epoch": 0.38704177323103156, - "grad_norm": 0.11984748393297195, - "learning_rate": 7.387255048118364e-05, - "loss": 0.011021500825881958, - "step": 2270 - }, - { - "epoch": 0.3878942881500426, - "grad_norm": 0.08478229492902756, - "learning_rate": 7.386630649570581e-05, - "loss": 0.009952519088983536, - "step": 2275 - }, - { - "epoch": 0.3887468030690537, - "grad_norm": 0.11780049651861191, - "learning_rate": 7.386004553322639e-05, - "loss": 0.009453963488340378, - "step": 2280 - }, - { - "epoch": 0.3895993179880648, - "grad_norm": 0.06949981302022934, - "learning_rate": 7.38537675966682e-05, - "loss": 0.009042493999004364, - "step": 2285 - }, - { - "epoch": 0.39045183290707586, - "grad_norm": 0.11411654949188232, - "learning_rate": 7.3847472688962e-05, - "loss": 0.013985235989093781, - "step": 2290 - }, - { - "epoch": 0.391304347826087, - "grad_norm": 0.11030828952789307, - "learning_rate": 7.384116081304647e-05, - "loss": 0.01135389506816864, - "step": 2295 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 0.0808996856212616, - "learning_rate": 7.38348319718682e-05, - "loss": 0.011089587211608886, - "step": 2300 - }, - { - "epoch": 0.39300937766410915, - "grad_norm": 0.11319196224212646, - "learning_rate": 7.382848616838167e-05, - "loss": 0.01407056450843811, - "step": 2305 - }, - { - "epoch": 0.3938618925831202, - "grad_norm": 0.09301812201738358, - "learning_rate": 7.382212340554937e-05, - "loss": 0.012283433228731155, - "step": 2310 - }, - { - "epoch": 0.39471440750213127, - "grad_norm": 0.08611076325178146, - "learning_rate": 7.381574368634159e-05, - "loss": 0.012206315249204635, - "step": 2315 - }, - { - "epoch": 0.3955669224211424, - "grad_norm": 0.08104816824197769, - "learning_rate": 7.380934701373665e-05, - "loss": 0.01059889942407608, - "step": 2320 - }, - { - "epoch": 0.39641943734015345, - "grad_norm": 0.09607693552970886, - "learning_rate": 7.380293339072067e-05, - "loss": 0.010189100354909896, - "step": 2325 - }, - { - "epoch": 0.39727195225916456, - "grad_norm": 0.08985438197851181, - "learning_rate": 7.37965028202878e-05, - "loss": 0.01145355924963951, - "step": 2330 - }, - { - "epoch": 0.3981244671781756, - "grad_norm": 0.0767461284995079, - "learning_rate": 7.379005530544e-05, - "loss": 0.012533161044120788, - "step": 2335 - }, - { - "epoch": 0.3989769820971867, - "grad_norm": 0.17541736364364624, - "learning_rate": 7.378359084918724e-05, - "loss": 0.011619434505701066, - "step": 2340 - }, - { - "epoch": 0.3998294970161978, - "grad_norm": 0.07870234549045563, - "learning_rate": 7.377710945454728e-05, - "loss": 0.013362208008766174, - "step": 2345 - }, - { - "epoch": 0.40068201193520886, - "grad_norm": 0.08661636710166931, - "learning_rate": 7.377061112454589e-05, - "loss": 0.011086350679397583, - "step": 2350 - }, - { - "epoch": 0.40153452685422, - "grad_norm": 0.08467904478311539, - "learning_rate": 7.376409586221668e-05, - "loss": 0.008972878754138946, - "step": 2355 - }, - { - "epoch": 0.40238704177323104, - "grad_norm": 0.09345834702253342, - "learning_rate": 7.375756367060121e-05, - "loss": 0.01281469464302063, - "step": 2360 - }, - { - "epoch": 0.4032395566922421, - "grad_norm": 0.10789518058300018, - "learning_rate": 7.375101455274893e-05, - "loss": 0.012343473732471466, - "step": 2365 - }, - { - "epoch": 0.4040920716112532, - "grad_norm": 0.06546701490879059, - "learning_rate": 7.374444851171716e-05, - "loss": 0.012971158325672149, - "step": 2370 - }, - { - "epoch": 0.40494458653026427, - "grad_norm": 0.08968871831893921, - "learning_rate": 7.373786555057117e-05, - "loss": 0.012170027941465378, - "step": 2375 - }, - { - "epoch": 0.4057971014492754, - "grad_norm": 0.058557040989398956, - "learning_rate": 7.373126567238412e-05, - "loss": 0.009915658086538316, - "step": 2380 - }, - { - "epoch": 0.40664961636828645, - "grad_norm": 0.08734243363142014, - "learning_rate": 7.3724648880237e-05, - "loss": 0.009043127298355103, - "step": 2385 - }, - { - "epoch": 0.4075021312872975, - "grad_norm": 0.09249505400657654, - "learning_rate": 7.371801517721879e-05, - "loss": 0.008064758032560349, - "step": 2390 - }, - { - "epoch": 0.4083546462063086, - "grad_norm": 0.09015105664730072, - "learning_rate": 7.371136456642631e-05, - "loss": 0.007721304893493652, - "step": 2395 - }, - { - "epoch": 0.4092071611253197, - "grad_norm": 0.08557724207639694, - "learning_rate": 7.37046970509643e-05, - "loss": 0.010766822844743729, - "step": 2400 - }, - { - "epoch": 0.4100596760443308, - "grad_norm": 0.08001160621643066, - "learning_rate": 7.369801263394536e-05, - "loss": 0.00953015759587288, - "step": 2405 - }, - { - "epoch": 0.41091219096334186, - "grad_norm": 0.08470463752746582, - "learning_rate": 7.369131131849e-05, - "loss": 0.010154610127210617, - "step": 2410 - }, - { - "epoch": 0.4117647058823529, - "grad_norm": 0.07110592722892761, - "learning_rate": 7.368459310772664e-05, - "loss": 0.010146965831518173, - "step": 2415 - }, - { - "epoch": 0.41261722080136404, - "grad_norm": 0.06808072328567505, - "learning_rate": 7.367785800479152e-05, - "loss": 0.01043560653924942, - "step": 2420 - }, - { - "epoch": 0.4134697357203751, - "grad_norm": 0.09226541966199875, - "learning_rate": 7.367110601282884e-05, - "loss": 0.011138775944709777, - "step": 2425 - }, - { - "epoch": 0.4143222506393862, - "grad_norm": 0.08650510013103485, - "learning_rate": 7.366433713499067e-05, - "loss": 0.011451859772205353, - "step": 2430 - }, - { - "epoch": 0.4151747655583973, - "grad_norm": 0.11477349698543549, - "learning_rate": 7.365755137443691e-05, - "loss": 0.013105396926403046, - "step": 2435 - }, - { - "epoch": 0.41602728047740833, - "grad_norm": 0.1117088794708252, - "learning_rate": 7.365074873433541e-05, - "loss": 0.01190647780895233, - "step": 2440 - }, - { - "epoch": 0.41687979539641945, - "grad_norm": 0.058514054864645004, - "learning_rate": 7.364392921786185e-05, - "loss": 0.011006749421358108, - "step": 2445 - }, - { - "epoch": 0.4177323103154305, - "grad_norm": 0.0925084576010704, - "learning_rate": 7.363709282819981e-05, - "loss": 0.011449025571346283, - "step": 2450 - }, - { - "epoch": 0.4185848252344416, - "grad_norm": 0.10087555646896362, - "learning_rate": 7.363023956854074e-05, - "loss": 0.011715477705001831, - "step": 2455 - }, - { - "epoch": 0.4194373401534527, - "grad_norm": 0.08760760724544525, - "learning_rate": 7.362336944208399e-05, - "loss": 0.011089532822370528, - "step": 2460 - }, - { - "epoch": 0.42028985507246375, - "grad_norm": 0.09802501648664474, - "learning_rate": 7.361648245203674e-05, - "loss": 0.012181267142295837, - "step": 2465 - }, - { - "epoch": 0.42114236999147486, - "grad_norm": 0.06908553838729858, - "learning_rate": 7.36095786016141e-05, - "loss": 0.010319410264492035, - "step": 2470 - }, - { - "epoch": 0.4219948849104859, - "grad_norm": 0.07190519571304321, - "learning_rate": 7.360265789403896e-05, - "loss": 0.013445201516151428, - "step": 2475 - }, - { - "epoch": 0.42284739982949704, - "grad_norm": 0.06683836877346039, - "learning_rate": 7.359572033254219e-05, - "loss": 0.008111725002527237, - "step": 2480 - }, - { - "epoch": 0.4236999147485081, - "grad_norm": 0.07094739377498627, - "learning_rate": 7.358876592036245e-05, - "loss": 0.012130254507064819, - "step": 2485 - }, - { - "epoch": 0.42455242966751916, - "grad_norm": 0.11974254250526428, - "learning_rate": 7.358179466074629e-05, - "loss": 0.011426160484552384, - "step": 2490 - }, - { - "epoch": 0.4254049445865303, - "grad_norm": 0.07710634917020798, - "learning_rate": 7.357480655694814e-05, - "loss": 0.010044369101524352, - "step": 2495 - }, - { - "epoch": 0.42625745950554134, - "grad_norm": 0.08417962491512299, - "learning_rate": 7.356780161223026e-05, - "loss": 0.010821688175201415, - "step": 2500 - }, - { - "epoch": 0.42710997442455245, - "grad_norm": 0.11058598011732101, - "learning_rate": 7.35607798298628e-05, - "loss": 0.012949730455875396, - "step": 2505 - }, - { - "epoch": 0.4279624893435635, - "grad_norm": 0.08686384558677673, - "learning_rate": 7.355374121312377e-05, - "loss": 0.009096769988536835, - "step": 2510 - }, - { - "epoch": 0.4288150042625746, - "grad_norm": 0.11153281480073929, - "learning_rate": 7.354668576529903e-05, - "loss": 0.010433172434568405, - "step": 2515 - }, - { - "epoch": 0.4296675191815857, - "grad_norm": 0.08490245044231415, - "learning_rate": 7.353961348968229e-05, - "loss": 0.008478586375713349, - "step": 2520 - }, - { - "epoch": 0.43052003410059675, - "grad_norm": 0.06651579588651657, - "learning_rate": 7.353252438957511e-05, - "loss": 0.012342555820941925, - "step": 2525 - }, - { - "epoch": 0.43137254901960786, - "grad_norm": 0.08961665630340576, - "learning_rate": 7.352541846828694e-05, - "loss": 0.010387994349002838, - "step": 2530 - }, - { - "epoch": 0.4322250639386189, - "grad_norm": 0.08726584166288376, - "learning_rate": 7.351829572913505e-05, - "loss": 0.009760166704654693, - "step": 2535 - }, - { - "epoch": 0.43307757885763, - "grad_norm": 0.06280151754617691, - "learning_rate": 7.351115617544459e-05, - "loss": 0.01087048500776291, - "step": 2540 - }, - { - "epoch": 0.4339300937766411, - "grad_norm": 0.09519831836223602, - "learning_rate": 7.350399981054851e-05, - "loss": 0.011516393721103668, - "step": 2545 - }, - { - "epoch": 0.43478260869565216, - "grad_norm": 0.09179427474737167, - "learning_rate": 7.349682663778766e-05, - "loss": 0.013757939636707305, - "step": 2550 - }, - { - "epoch": 0.4356351236146633, - "grad_norm": 0.10378465801477432, - "learning_rate": 7.34896366605107e-05, - "loss": 0.011337973177433014, - "step": 2555 - }, - { - "epoch": 0.43648763853367434, - "grad_norm": 0.14043129980564117, - "learning_rate": 7.348242988207418e-05, - "loss": 0.01203509122133255, - "step": 2560 - }, - { - "epoch": 0.4373401534526854, - "grad_norm": 0.06442756950855255, - "learning_rate": 7.347520630584243e-05, - "loss": 0.007210708409547806, - "step": 2565 - }, - { - "epoch": 0.4381926683716965, - "grad_norm": 0.05981998145580292, - "learning_rate": 7.346796593518768e-05, - "loss": 0.009825873374938964, - "step": 2570 - }, - { - "epoch": 0.4390451832907076, - "grad_norm": 0.10198855400085449, - "learning_rate": 7.346070877348996e-05, - "loss": 0.013066151738166809, - "step": 2575 - }, - { - "epoch": 0.4398976982097187, - "grad_norm": 0.12545716762542725, - "learning_rate": 7.345343482413716e-05, - "loss": 0.008229418843984603, - "step": 2580 - }, - { - "epoch": 0.44075021312872975, - "grad_norm": 0.1352240890264511, - "learning_rate": 7.344614409052501e-05, - "loss": 0.013183671236038207, - "step": 2585 - }, - { - "epoch": 0.4416027280477408, - "grad_norm": 0.07198570668697357, - "learning_rate": 7.343883657605704e-05, - "loss": 0.010311058908700942, - "step": 2590 - }, - { - "epoch": 0.4424552429667519, - "grad_norm": 0.08454001694917679, - "learning_rate": 7.343151228414469e-05, - "loss": 0.009928110986948013, - "step": 2595 - }, - { - "epoch": 0.443307757885763, - "grad_norm": 0.07289708405733109, - "learning_rate": 7.342417121820714e-05, - "loss": 0.011071844398975373, - "step": 2600 - }, - { - "epoch": 0.4441602728047741, - "grad_norm": 0.12291301786899567, - "learning_rate": 7.341681338167145e-05, - "loss": 0.011248499900102616, - "step": 2605 - }, - { - "epoch": 0.44501278772378516, - "grad_norm": 0.14277565479278564, - "learning_rate": 7.340943877797252e-05, - "loss": 0.010025183856487273, - "step": 2610 - }, - { - "epoch": 0.4458653026427962, - "grad_norm": 0.07569251209497452, - "learning_rate": 7.340204741055304e-05, - "loss": 0.009996208548545837, - "step": 2615 - }, - { - "epoch": 0.44671781756180734, - "grad_norm": 0.10494589060544968, - "learning_rate": 7.339463928286357e-05, - "loss": 0.01392391324043274, - "step": 2620 - }, - { - "epoch": 0.4475703324808184, - "grad_norm": 0.14377856254577637, - "learning_rate": 7.338721439836245e-05, - "loss": 0.012823046743869781, - "step": 2625 - }, - { - "epoch": 0.4484228473998295, - "grad_norm": 0.06943785399198532, - "learning_rate": 7.337977276051586e-05, - "loss": 0.009452010691165923, - "step": 2630 - }, - { - "epoch": 0.4492753623188406, - "grad_norm": 0.09933419525623322, - "learning_rate": 7.337231437279783e-05, - "loss": 0.008945996314287186, - "step": 2635 - }, - { - "epoch": 0.45012787723785164, - "grad_norm": 0.09861225634813309, - "learning_rate": 7.336483923869016e-05, - "loss": 0.010671885311603546, - "step": 2640 - }, - { - "epoch": 0.45098039215686275, - "grad_norm": 0.08303772658109665, - "learning_rate": 7.335734736168249e-05, - "loss": 0.009589634835720062, - "step": 2645 - }, - { - "epoch": 0.4518329070758738, - "grad_norm": 0.08657588064670563, - "learning_rate": 7.334983874527231e-05, - "loss": 0.008064036071300507, - "step": 2650 - }, - { - "epoch": 0.45268542199488493, - "grad_norm": 0.10513710975646973, - "learning_rate": 7.334231339296485e-05, - "loss": 0.01647743284702301, - "step": 2655 - }, - { - "epoch": 0.453537936913896, - "grad_norm": 0.10341943055391312, - "learning_rate": 7.333477130827322e-05, - "loss": 0.009101226180791854, - "step": 2660 - }, - { - "epoch": 0.45439045183290705, - "grad_norm": 0.09740681946277618, - "learning_rate": 7.33272124947183e-05, - "loss": 0.011460770666599274, - "step": 2665 - }, - { - "epoch": 0.45524296675191817, - "grad_norm": 0.06477998197078705, - "learning_rate": 7.331963695582881e-05, - "loss": 0.011711791157722473, - "step": 2670 - }, - { - "epoch": 0.4560954816709292, - "grad_norm": 0.0881948322057724, - "learning_rate": 7.331204469514127e-05, - "loss": 0.009621420502662658, - "step": 2675 - }, - { - "epoch": 0.45694799658994034, - "grad_norm": 0.09553391486406326, - "learning_rate": 7.330443571619998e-05, - "loss": 0.011725078523159026, - "step": 2680 - }, - { - "epoch": 0.4578005115089514, - "grad_norm": 0.10480209439992905, - "learning_rate": 7.329681002255706e-05, - "loss": 0.012353558838367463, - "step": 2685 - }, - { - "epoch": 0.45865302642796246, - "grad_norm": 0.08409439772367477, - "learning_rate": 7.328916761777247e-05, - "loss": 0.01114615797996521, - "step": 2690 - }, - { - "epoch": 0.4595055413469736, - "grad_norm": 0.07166923582553864, - "learning_rate": 7.32815085054139e-05, - "loss": 0.008672221004962921, - "step": 2695 - }, - { - "epoch": 0.46035805626598464, - "grad_norm": 0.07308658212423325, - "learning_rate": 7.327383268905691e-05, - "loss": 0.012448658794164657, - "step": 2700 - }, - { - "epoch": 0.46121057118499575, - "grad_norm": 0.14019793272018433, - "learning_rate": 7.32661401722848e-05, - "loss": 0.013477186858654022, - "step": 2705 - }, - { - "epoch": 0.4620630861040068, - "grad_norm": 0.0753963515162468, - "learning_rate": 7.325843095868872e-05, - "loss": 0.011373884975910187, - "step": 2710 - }, - { - "epoch": 0.4629156010230179, - "grad_norm": 0.07312130182981491, - "learning_rate": 7.325070505186756e-05, - "loss": 0.012329152971506118, - "step": 2715 - }, - { - "epoch": 0.463768115942029, - "grad_norm": 0.06200556829571724, - "learning_rate": 7.324296245542806e-05, - "loss": 0.008847354352474213, - "step": 2720 - }, - { - "epoch": 0.46462063086104005, - "grad_norm": 0.11015846580266953, - "learning_rate": 7.32352031729847e-05, - "loss": 0.013304698467254638, - "step": 2725 - }, - { - "epoch": 0.46547314578005117, - "grad_norm": 0.05926821380853653, - "learning_rate": 7.322742720815978e-05, - "loss": 0.011919337511062621, - "step": 2730 - }, - { - "epoch": 0.4663256606990622, - "grad_norm": 0.102846160531044, - "learning_rate": 7.321963456458337e-05, - "loss": 0.010952814668416976, - "step": 2735 - }, - { - "epoch": 0.46717817561807334, - "grad_norm": 0.10767021775245667, - "learning_rate": 7.321182524589334e-05, - "loss": 0.012438956648111343, - "step": 2740 - }, - { - "epoch": 0.4680306905370844, - "grad_norm": 0.08611919730901718, - "learning_rate": 7.320399925573534e-05, - "loss": 0.008686845004558564, - "step": 2745 - }, - { - "epoch": 0.46888320545609546, - "grad_norm": 0.07483147829771042, - "learning_rate": 7.31961565977628e-05, - "loss": 0.011065713316202163, - "step": 2750 - }, - { - "epoch": 0.4697357203751066, - "grad_norm": 0.08029857277870178, - "learning_rate": 7.318829727563696e-05, - "loss": 0.012208929657936097, - "step": 2755 - }, - { - "epoch": 0.47058823529411764, - "grad_norm": 0.09076030552387238, - "learning_rate": 7.318042129302676e-05, - "loss": 0.010283030569553375, - "step": 2760 - }, - { - "epoch": 0.47144075021312876, - "grad_norm": 0.07009804993867874, - "learning_rate": 7.317252865360902e-05, - "loss": 0.010625988245010376, - "step": 2765 - }, - { - "epoch": 0.4722932651321398, - "grad_norm": 0.07213665544986725, - "learning_rate": 7.316461936106826e-05, - "loss": 0.010299822688102723, - "step": 2770 - }, - { - "epoch": 0.4731457800511509, - "grad_norm": 0.08464398980140686, - "learning_rate": 7.315669341909679e-05, - "loss": 0.010440715402364732, - "step": 2775 - }, - { - "epoch": 0.473998294970162, - "grad_norm": 0.08878160268068314, - "learning_rate": 7.314875083139475e-05, - "loss": 0.01015128344297409, - "step": 2780 - }, - { - "epoch": 0.47485080988917305, - "grad_norm": 0.05885029211640358, - "learning_rate": 7.314079160166996e-05, - "loss": 0.00943310335278511, - "step": 2785 - }, - { - "epoch": 0.47570332480818417, - "grad_norm": 0.07288813591003418, - "learning_rate": 7.313281573363809e-05, - "loss": 0.009116576611995697, - "step": 2790 - }, - { - "epoch": 0.47655583972719523, - "grad_norm": 0.09088344126939774, - "learning_rate": 7.31248232310225e-05, - "loss": 0.010344403237104416, - "step": 2795 - }, - { - "epoch": 0.4774083546462063, - "grad_norm": 0.08182916790246964, - "learning_rate": 7.311681409755437e-05, - "loss": 0.010874876379966735, - "step": 2800 - }, - { - "epoch": 0.4782608695652174, - "grad_norm": 0.08280645310878754, - "learning_rate": 7.310878833697264e-05, - "loss": 0.007568147033452988, - "step": 2805 - }, - { - "epoch": 0.47911338448422847, - "grad_norm": 0.10462478548288345, - "learning_rate": 7.3100745953024e-05, - "loss": 0.011740683764219283, - "step": 2810 - }, - { - "epoch": 0.4799658994032396, - "grad_norm": 0.07685881853103638, - "learning_rate": 7.30926869494629e-05, - "loss": 0.009284010529518128, - "step": 2815 - }, - { - "epoch": 0.48081841432225064, - "grad_norm": 0.05211766064167023, - "learning_rate": 7.308461133005156e-05, - "loss": 0.009633362293243408, - "step": 2820 - }, - { - "epoch": 0.4816709292412617, - "grad_norm": 0.07862114161252975, - "learning_rate": 7.307651909855993e-05, - "loss": 0.012355846166610718, - "step": 2825 - }, - { - "epoch": 0.4825234441602728, - "grad_norm": 0.09950421750545502, - "learning_rate": 7.306841025876573e-05, - "loss": 0.010842062532901764, - "step": 2830 - }, - { - "epoch": 0.4833759590792839, - "grad_norm": 0.08446205407381058, - "learning_rate": 7.306028481445446e-05, - "loss": 0.008424797654151916, - "step": 2835 - }, - { - "epoch": 0.484228473998295, - "grad_norm": 0.1424778699874878, - "learning_rate": 7.305214276941934e-05, - "loss": 0.01177324503660202, - "step": 2840 - }, - { - "epoch": 0.48508098891730606, - "grad_norm": 0.07312945276498795, - "learning_rate": 7.304398412746134e-05, - "loss": 0.010038022696971894, - "step": 2845 - }, - { - "epoch": 0.4859335038363171, - "grad_norm": 0.07043888419866562, - "learning_rate": 7.303580889238917e-05, - "loss": 0.008848214149475097, - "step": 2850 - }, - { - "epoch": 0.48678601875532823, - "grad_norm": 0.09851706773042679, - "learning_rate": 7.302761706801934e-05, - "loss": 0.011452250182628632, - "step": 2855 - }, - { - "epoch": 0.4876385336743393, - "grad_norm": 0.07379815727472305, - "learning_rate": 7.301940865817604e-05, - "loss": 0.010087071359157563, - "step": 2860 - }, - { - "epoch": 0.4884910485933504, - "grad_norm": 0.12832187116146088, - "learning_rate": 7.301118366669123e-05, - "loss": 0.013372799754142762, - "step": 2865 - }, - { - "epoch": 0.48934356351236147, - "grad_norm": 0.06776788830757141, - "learning_rate": 7.300294209740462e-05, - "loss": 0.010031795501708985, - "step": 2870 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 0.06495808809995651, - "learning_rate": 7.299468395416364e-05, - "loss": 0.011152566224336625, - "step": 2875 - }, - { - "epoch": 0.49104859335038364, - "grad_norm": 0.06433792412281036, - "learning_rate": 7.298640924082346e-05, - "loss": 0.012774203717708588, - "step": 2880 - }, - { - "epoch": 0.4919011082693947, - "grad_norm": 0.066926009953022, - "learning_rate": 7.2978117961247e-05, - "loss": 0.011111211776733399, - "step": 2885 - }, - { - "epoch": 0.4927536231884058, - "grad_norm": 0.08211687207221985, - "learning_rate": 7.296981011930493e-05, - "loss": 0.009508269280195237, - "step": 2890 - }, - { - "epoch": 0.4936061381074169, - "grad_norm": 0.09815993160009384, - "learning_rate": 7.296148571887558e-05, - "loss": 0.0117066890001297, - "step": 2895 - }, - { - "epoch": 0.49445865302642794, - "grad_norm": 0.07543535530567169, - "learning_rate": 7.295314476384508e-05, - "loss": 0.008867967873811722, - "step": 2900 - }, - { - "epoch": 0.49531116794543906, - "grad_norm": 0.07558202743530273, - "learning_rate": 7.294478725810728e-05, - "loss": 0.01093400940299034, - "step": 2905 - }, - { - "epoch": 0.4961636828644501, - "grad_norm": 0.06642191112041473, - "learning_rate": 7.293641320556371e-05, - "loss": 0.008366364240646362, - "step": 2910 - }, - { - "epoch": 0.49701619778346123, - "grad_norm": 0.07226760685443878, - "learning_rate": 7.292802261012368e-05, - "loss": 0.012197307497262954, - "step": 2915 - }, - { - "epoch": 0.4978687127024723, - "grad_norm": 0.08546584844589233, - "learning_rate": 7.29196154757042e-05, - "loss": 0.010272269695997238, - "step": 2920 - }, - { - "epoch": 0.49872122762148335, - "grad_norm": 0.0559270940721035, - "learning_rate": 7.291119180622998e-05, - "loss": 0.009690707921981812, - "step": 2925 - }, - { - "epoch": 0.49957374254049447, - "grad_norm": 0.11211635917425156, - "learning_rate": 7.290275160563349e-05, - "loss": 0.01505405604839325, - "step": 2930 - }, - { - "epoch": 0.4999147485080989, - "eval_loss": 0.035044603049755096, - "eval_runtime": 3.5861, - "eval_samples_per_second": 70.272, - "eval_steps_per_second": 1.115, - "step": 2932 - }, - { - "eval_cer_subset": 0.01374436139202371, - "eval_cer_subset_edit_distance": 844, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 2932 - }, - { - "epoch": 0.5004262574595055, - "grad_norm": 0.08485773205757141, - "learning_rate": 7.289429487785488e-05, - "loss": 0.01260426789522171, - "step": 2935 - }, - { - "epoch": 0.5012787723785166, - "grad_norm": 0.08039058744907379, - "learning_rate": 7.288582162684203e-05, - "loss": 0.012322144955396653, - "step": 2940 - }, - { - "epoch": 0.5021312872975278, - "grad_norm": 0.16017615795135498, - "learning_rate": 7.287733185655057e-05, - "loss": 0.009620364010334014, - "step": 2945 - }, - { - "epoch": 0.5029838022165388, - "grad_norm": 0.06721053272485733, - "learning_rate": 7.286882557094376e-05, - "loss": 0.009893904626369476, - "step": 2950 - }, - { - "epoch": 0.5038363171355499, - "grad_norm": 0.08132930099964142, - "learning_rate": 7.286030277399264e-05, - "loss": 0.012833705544471741, - "step": 2955 - }, - { - "epoch": 0.5046888320545609, - "grad_norm": 0.09076893329620361, - "learning_rate": 7.285176346967595e-05, - "loss": 0.011492121219635009, - "step": 2960 - }, - { - "epoch": 0.505541346973572, - "grad_norm": 0.1023377999663353, - "learning_rate": 7.284320766198008e-05, - "loss": 0.01216188371181488, - "step": 2965 - }, - { - "epoch": 0.5063938618925832, - "grad_norm": 0.07568195462226868, - "learning_rate": 7.283463535489921e-05, - "loss": 0.014794780313968659, - "step": 2970 - }, - { - "epoch": 0.5072463768115942, - "grad_norm": 0.11283870786428452, - "learning_rate": 7.282604655243515e-05, - "loss": 0.012774300575256348, - "step": 2975 - }, - { - "epoch": 0.5080988917306053, - "grad_norm": 0.07101167738437653, - "learning_rate": 7.281744125859746e-05, - "loss": 0.010759322345256806, - "step": 2980 - }, - { - "epoch": 0.5089514066496164, - "grad_norm": 0.07677409052848816, - "learning_rate": 7.280881947740336e-05, - "loss": 0.010482230037450791, - "step": 2985 - }, - { - "epoch": 0.5098039215686274, - "grad_norm": 0.08568017184734344, - "learning_rate": 7.280018121287777e-05, - "loss": 0.012674462795257569, - "step": 2990 - }, - { - "epoch": 0.5106564364876386, - "grad_norm": 0.07830876111984253, - "learning_rate": 7.279152646905336e-05, - "loss": 0.009349775314331055, - "step": 2995 - }, - { - "epoch": 0.5115089514066496, - "grad_norm": 0.07408280670642853, - "learning_rate": 7.278285524997044e-05, - "loss": 0.010303238779306412, - "step": 3000 - }, - { - "epoch": 0.5123614663256607, - "grad_norm": 0.09053376317024231, - "learning_rate": 7.277416755967698e-05, - "loss": 0.012187518179416656, - "step": 3005 - }, - { - "epoch": 0.5132139812446718, - "grad_norm": 0.07432437688112259, - "learning_rate": 7.276546340222875e-05, - "loss": 0.009504207968711853, - "step": 3010 - }, - { - "epoch": 0.5140664961636828, - "grad_norm": 0.09075863659381866, - "learning_rate": 7.275674278168908e-05, - "loss": 0.010764679312705994, - "step": 3015 - }, - { - "epoch": 0.514919011082694, - "grad_norm": 0.08363319933414459, - "learning_rate": 7.274800570212909e-05, - "loss": 0.011034657061100007, - "step": 3020 - }, - { - "epoch": 0.5157715260017051, - "grad_norm": 0.08179081231355667, - "learning_rate": 7.273925216762753e-05, - "loss": 0.012276624888181686, - "step": 3025 - }, - { - "epoch": 0.5166240409207161, - "grad_norm": 0.10797501355409622, - "learning_rate": 7.273048218227083e-05, - "loss": 0.008887678384780884, - "step": 3030 - }, - { - "epoch": 0.5174765558397272, - "grad_norm": 0.08237873017787933, - "learning_rate": 7.27216957501531e-05, - "loss": 0.010879174619913102, - "step": 3035 - }, - { - "epoch": 0.5183290707587382, - "grad_norm": 0.10010047256946564, - "learning_rate": 7.271289287537616e-05, - "loss": 0.0103249654173851, - "step": 3040 - }, - { - "epoch": 0.5191815856777494, - "grad_norm": 0.06411991268396378, - "learning_rate": 7.270407356204948e-05, - "loss": 0.006414853036403656, - "step": 3045 - }, - { - "epoch": 0.5200341005967605, - "grad_norm": 0.09925824403762817, - "learning_rate": 7.26952378142902e-05, - "loss": 0.010811964422464371, - "step": 3050 - }, - { - "epoch": 0.5208866155157715, - "grad_norm": 0.07986702769994736, - "learning_rate": 7.268638563622317e-05, - "loss": 0.011965467780828475, - "step": 3055 - }, - { - "epoch": 0.5217391304347826, - "grad_norm": 0.07426656037569046, - "learning_rate": 7.267751703198082e-05, - "loss": 0.0093523807823658, - "step": 3060 - }, - { - "epoch": 0.5225916453537937, - "grad_norm": 0.11460934579372406, - "learning_rate": 7.266863200570338e-05, - "loss": 0.01224176660180092, - "step": 3065 - }, - { - "epoch": 0.5234441602728048, - "grad_norm": 0.10174648463726044, - "learning_rate": 7.265973056153864e-05, - "loss": 0.011203842610120774, - "step": 3070 - }, - { - "epoch": 0.5242966751918159, - "grad_norm": 0.06445316970348358, - "learning_rate": 7.265081270364209e-05, - "loss": 0.010346656292676925, - "step": 3075 - }, - { - "epoch": 0.525149190110827, - "grad_norm": 0.08397547155618668, - "learning_rate": 7.264187843617688e-05, - "loss": 0.011372068524360656, - "step": 3080 - }, - { - "epoch": 0.526001705029838, - "grad_norm": 0.07325135916471481, - "learning_rate": 7.263292776331384e-05, - "loss": 0.01116851419210434, - "step": 3085 - }, - { - "epoch": 0.5268542199488491, - "grad_norm": 0.1034390926361084, - "learning_rate": 7.262396068923144e-05, - "loss": 0.011953853815793992, - "step": 3090 - }, - { - "epoch": 0.5277067348678602, - "grad_norm": 0.08395690470933914, - "learning_rate": 7.26149772181158e-05, - "loss": 0.011437299847602844, - "step": 3095 - }, - { - "epoch": 0.5285592497868713, - "grad_norm": 0.09495387226343155, - "learning_rate": 7.260597735416068e-05, - "loss": 0.009634804725646973, - "step": 3100 - }, - { - "epoch": 0.5294117647058824, - "grad_norm": 0.07444775849580765, - "learning_rate": 7.259696110156756e-05, - "loss": 0.009771790355443954, - "step": 3105 - }, - { - "epoch": 0.5302642796248934, - "grad_norm": 0.061964571475982666, - "learning_rate": 7.258792846454551e-05, - "loss": 0.007979755848646164, - "step": 3110 - }, - { - "epoch": 0.5311167945439045, - "grad_norm": 0.11025935411453247, - "learning_rate": 7.257887944731125e-05, - "loss": 0.012162110209465027, - "step": 3115 - }, - { - "epoch": 0.5319693094629157, - "grad_norm": 0.07793140411376953, - "learning_rate": 7.256981405408918e-05, - "loss": 0.00897146388888359, - "step": 3120 - }, - { - "epoch": 0.5328218243819267, - "grad_norm": 0.0773436427116394, - "learning_rate": 7.256073228911132e-05, - "loss": 0.009621264040470123, - "step": 3125 - }, - { - "epoch": 0.5336743393009378, - "grad_norm": 0.07340693473815918, - "learning_rate": 7.255163415661735e-05, - "loss": 0.01072111278772354, - "step": 3130 - }, - { - "epoch": 0.5345268542199488, - "grad_norm": 0.0971943810582161, - "learning_rate": 7.254251966085455e-05, - "loss": 0.009457825869321822, - "step": 3135 - }, - { - "epoch": 0.5353793691389599, - "grad_norm": 0.08840794116258621, - "learning_rate": 7.25333888060779e-05, - "loss": 0.015866565704345702, - "step": 3140 - }, - { - "epoch": 0.5362318840579711, - "grad_norm": 0.07126007229089737, - "learning_rate": 7.252424159654999e-05, - "loss": 0.012925322353839874, - "step": 3145 - }, - { - "epoch": 0.5370843989769821, - "grad_norm": 0.05989958345890045, - "learning_rate": 7.251507803654103e-05, - "loss": 0.007374878972768784, - "step": 3150 - }, - { - "epoch": 0.5379369138959932, - "grad_norm": 0.0661931037902832, - "learning_rate": 7.250589813032885e-05, - "loss": 0.009713394194841385, - "step": 3155 - }, - { - "epoch": 0.5387894288150042, - "grad_norm": 0.0813523456454277, - "learning_rate": 7.2496701882199e-05, - "loss": 0.007980254292488099, - "step": 3160 - }, - { - "epoch": 0.5396419437340153, - "grad_norm": 0.0565156452357769, - "learning_rate": 7.248748929644453e-05, - "loss": 0.010806798934936523, - "step": 3165 - }, - { - "epoch": 0.5404944586530265, - "grad_norm": 0.045107364654541016, - "learning_rate": 7.247826037736621e-05, - "loss": 0.013011330366134643, - "step": 3170 - }, - { - "epoch": 0.5413469735720375, - "grad_norm": 0.0623495988547802, - "learning_rate": 7.246901512927241e-05, - "loss": 0.012109772115945817, - "step": 3175 - }, - { - "epoch": 0.5421994884910486, - "grad_norm": 0.09943851083517075, - "learning_rate": 7.24597535564791e-05, - "loss": 0.011384092271327972, - "step": 3180 - }, - { - "epoch": 0.5430520034100597, - "grad_norm": 0.12090208381414413, - "learning_rate": 7.245047566330991e-05, - "loss": 0.011156149953603745, - "step": 3185 - }, - { - "epoch": 0.5439045183290707, - "grad_norm": 0.10226333141326904, - "learning_rate": 7.244118145409607e-05, - "loss": 0.01164291426539421, - "step": 3190 - }, - { - "epoch": 0.5447570332480819, - "grad_norm": 0.09011051058769226, - "learning_rate": 7.24318709331764e-05, - "loss": 0.009608177840709687, - "step": 3195 - }, - { - "epoch": 0.545609548167093, - "grad_norm": 0.08180241286754608, - "learning_rate": 7.24225441048974e-05, - "loss": 0.010098953545093537, - "step": 3200 - }, - { - "epoch": 0.546462063086104, - "grad_norm": 0.08325407654047012, - "learning_rate": 7.241320097361312e-05, - "loss": 0.012687146663665771, - "step": 3205 - }, - { - "epoch": 0.5473145780051151, - "grad_norm": 0.11662351340055466, - "learning_rate": 7.240384154368523e-05, - "loss": 0.012003959715366363, - "step": 3210 - }, - { - "epoch": 0.5481670929241261, - "grad_norm": 0.05904731899499893, - "learning_rate": 7.239446581948306e-05, - "loss": 0.012311330437660218, - "step": 3215 - }, - { - "epoch": 0.5490196078431373, - "grad_norm": 0.12498651444911957, - "learning_rate": 7.238507380538347e-05, - "loss": 0.011272794008255005, - "step": 3220 - }, - { - "epoch": 0.5498721227621484, - "grad_norm": 0.06047634035348892, - "learning_rate": 7.2375665505771e-05, - "loss": 0.010353527963161469, - "step": 3225 - }, - { - "epoch": 0.5507246376811594, - "grad_norm": 0.07596508413553238, - "learning_rate": 7.236624092503774e-05, - "loss": 0.011058451980352402, - "step": 3230 - }, - { - "epoch": 0.5515771526001705, - "grad_norm": 0.10000273585319519, - "learning_rate": 7.235680006758339e-05, - "loss": 0.012288159132003785, - "step": 3235 - }, - { - "epoch": 0.5524296675191815, - "grad_norm": 0.08154033869504929, - "learning_rate": 7.234734293781527e-05, - "loss": 0.015510989725589753, - "step": 3240 - }, - { - "epoch": 0.5532821824381927, - "grad_norm": 0.10024677217006683, - "learning_rate": 7.233786954014828e-05, - "loss": 0.010542219877243042, - "step": 3245 - }, - { - "epoch": 0.5541346973572038, - "grad_norm": 0.08001844584941864, - "learning_rate": 7.232837987900492e-05, - "loss": 0.009433221817016602, - "step": 3250 - }, - { - "epoch": 0.5549872122762148, - "grad_norm": 0.05274324119091034, - "learning_rate": 7.231887395881528e-05, - "loss": 0.010475738346576691, - "step": 3255 - }, - { - "epoch": 0.5558397271952259, - "grad_norm": 0.08753672242164612, - "learning_rate": 7.230935178401703e-05, - "loss": 0.007628431916236878, - "step": 3260 - }, - { - "epoch": 0.556692242114237, - "grad_norm": 0.10221699625253677, - "learning_rate": 7.229981335905545e-05, - "loss": 0.011822684109210968, - "step": 3265 - }, - { - "epoch": 0.5575447570332481, - "grad_norm": 0.07665866613388062, - "learning_rate": 7.229025868838336e-05, - "loss": 0.010916930437088013, - "step": 3270 - }, - { - "epoch": 0.5583972719522592, - "grad_norm": 0.08861260861158371, - "learning_rate": 7.228068777646125e-05, - "loss": 0.008925830572843551, - "step": 3275 - }, - { - "epoch": 0.5592497868712702, - "grad_norm": 0.08963657170534134, - "learning_rate": 7.227110062775712e-05, - "loss": 0.014812195301055908, - "step": 3280 - }, - { - "epoch": 0.5601023017902813, - "grad_norm": 0.28550851345062256, - "learning_rate": 7.226149724674655e-05, - "loss": 0.009522277861833572, - "step": 3285 - }, - { - "epoch": 0.5609548167092924, - "grad_norm": 0.057680875062942505, - "learning_rate": 7.225187763791273e-05, - "loss": 0.012893497943878174, - "step": 3290 - }, - { - "epoch": 0.5618073316283035, - "grad_norm": 0.08956284821033478, - "learning_rate": 7.224224180574642e-05, - "loss": 0.012499228864908219, - "step": 3295 - }, - { - "epoch": 0.5626598465473146, - "grad_norm": 0.11929965764284134, - "learning_rate": 7.223258975474596e-05, - "loss": 0.010640453547239304, - "step": 3300 - }, - { - "epoch": 0.5635123614663257, - "grad_norm": 0.09788426756858826, - "learning_rate": 7.222292148941722e-05, - "loss": 0.014677588641643525, - "step": 3305 - }, - { - "epoch": 0.5643648763853367, - "grad_norm": 0.08845673501491547, - "learning_rate": 7.221323701427368e-05, - "loss": 0.009266233444213868, - "step": 3310 - }, - { - "epoch": 0.5652173913043478, - "grad_norm": 0.07864493131637573, - "learning_rate": 7.220353633383636e-05, - "loss": 0.01019999384880066, - "step": 3315 - }, - { - "epoch": 0.566069906223359, - "grad_norm": 0.07658441364765167, - "learning_rate": 7.21938194526339e-05, - "loss": 0.010098284482955933, - "step": 3320 - }, - { - "epoch": 0.56692242114237, - "grad_norm": 0.058863960206508636, - "learning_rate": 7.218408637520243e-05, - "loss": 0.01043831706047058, - "step": 3325 - }, - { - "epoch": 0.5677749360613811, - "grad_norm": 0.05992535129189491, - "learning_rate": 7.217433710608567e-05, - "loss": 0.010804108530282974, - "step": 3330 - }, - { - "epoch": 0.5686274509803921, - "grad_norm": 0.10607994347810745, - "learning_rate": 7.216457164983494e-05, - "loss": 0.01115414798259735, - "step": 3335 - }, - { - "epoch": 0.5694799658994032, - "grad_norm": 0.07557345181703568, - "learning_rate": 7.215479001100904e-05, - "loss": 0.01279982328414917, - "step": 3340 - }, - { - "epoch": 0.5703324808184144, - "grad_norm": 0.064768947660923, - "learning_rate": 7.214499219417439e-05, - "loss": 0.01112583726644516, - "step": 3345 - }, - { - "epoch": 0.5711849957374254, - "grad_norm": 0.08013112843036652, - "learning_rate": 7.213517820390492e-05, - "loss": 0.01265912652015686, - "step": 3350 - }, - { - "epoch": 0.5720375106564365, - "grad_norm": 0.06619428843259811, - "learning_rate": 7.212534804478214e-05, - "loss": 0.01231289878487587, - "step": 3355 - }, - { - "epoch": 0.5728900255754475, - "grad_norm": 0.06123036891222, - "learning_rate": 7.211550172139507e-05, - "loss": 0.012096628546714783, - "step": 3360 - }, - { - "epoch": 0.5737425404944586, - "grad_norm": 0.10050475597381592, - "learning_rate": 7.210563923834034e-05, - "loss": 0.014050082862377166, - "step": 3365 - }, - { - "epoch": 0.5745950554134698, - "grad_norm": 0.05243556201457977, - "learning_rate": 7.209576060022207e-05, - "loss": 0.009351913630962373, - "step": 3370 - }, - { - "epoch": 0.5754475703324808, - "grad_norm": 0.12591946125030518, - "learning_rate": 7.208586581165192e-05, - "loss": 0.012423963844776153, - "step": 3375 - }, - { - "epoch": 0.5763000852514919, - "grad_norm": 0.11871001869440079, - "learning_rate": 7.207595487724912e-05, - "loss": 0.014398403465747833, - "step": 3380 - }, - { - "epoch": 0.577152600170503, - "grad_norm": 0.09194283187389374, - "learning_rate": 7.206602780164044e-05, - "loss": 0.009020231664180756, - "step": 3385 - }, - { - "epoch": 0.578005115089514, - "grad_norm": 0.1465149074792862, - "learning_rate": 7.205608458946013e-05, - "loss": 0.009870749711990357, - "step": 3390 - }, - { - "epoch": 0.5788576300085252, - "grad_norm": 0.07948209345340729, - "learning_rate": 7.204612524535006e-05, - "loss": 0.013135011494159698, - "step": 3395 - }, - { - "epoch": 0.5797101449275363, - "grad_norm": 0.07187635451555252, - "learning_rate": 7.203614977395952e-05, - "loss": 0.010598786920309067, - "step": 3400 - }, - { - "epoch": 0.5805626598465473, - "grad_norm": 0.05511854961514473, - "learning_rate": 7.202615817994545e-05, - "loss": 0.009227041155099869, - "step": 3405 - }, - { - "epoch": 0.5814151747655584, - "grad_norm": 0.05830230563879013, - "learning_rate": 7.201615046797224e-05, - "loss": 0.008167321979999542, - "step": 3410 - }, - { - "epoch": 0.5822676896845694, - "grad_norm": 0.08624587953090668, - "learning_rate": 7.200612664271184e-05, - "loss": 0.012134125083684921, - "step": 3415 - }, - { - "epoch": 0.5831202046035806, - "grad_norm": 0.0744808092713356, - "learning_rate": 7.199608670884366e-05, - "loss": 0.012493259459733962, - "step": 3420 - }, - { - "epoch": 0.5839727195225917, - "grad_norm": 0.07272766530513763, - "learning_rate": 7.19860306710547e-05, - "loss": 0.00806736946105957, - "step": 3425 - }, - { - "epoch": 0.5848252344416027, - "grad_norm": 0.0804983377456665, - "learning_rate": 7.197595853403946e-05, - "loss": 0.01102890819311142, - "step": 3430 - }, - { - "epoch": 0.5856777493606138, - "grad_norm": 0.05326579511165619, - "learning_rate": 7.196587030249994e-05, - "loss": 0.009381016343832016, - "step": 3435 - }, - { - "epoch": 0.5865302642796248, - "grad_norm": 0.07588013261556625, - "learning_rate": 7.195576598114567e-05, - "loss": 0.010961712896823883, - "step": 3440 - }, - { - "epoch": 0.587382779198636, - "grad_norm": 0.09725244343280792, - "learning_rate": 7.194564557469368e-05, - "loss": 0.012034715712070465, - "step": 3445 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 0.0938539057970047, - "learning_rate": 7.193550908786851e-05, - "loss": 0.012069541215896606, - "step": 3450 - }, - { - "epoch": 0.5890878090366581, - "grad_norm": 0.052410729229450226, - "learning_rate": 7.19253565254022e-05, - "loss": 0.011174223572015762, - "step": 3455 - }, - { - "epoch": 0.5899403239556692, - "grad_norm": 0.08317258954048157, - "learning_rate": 7.191518789203432e-05, - "loss": 0.014452503621578216, - "step": 3460 - }, - { - "epoch": 0.5907928388746803, - "grad_norm": 0.062619149684906, - "learning_rate": 7.190500319251193e-05, - "loss": 0.012830793857574463, - "step": 3465 - }, - { - "epoch": 0.5916453537936914, - "grad_norm": 0.06287284195423126, - "learning_rate": 7.189480243158956e-05, - "loss": 0.013282649219036102, - "step": 3470 - }, - { - "epoch": 0.5924978687127025, - "grad_norm": 0.07136182487010956, - "learning_rate": 7.188458561402928e-05, - "loss": 0.009024892747402192, - "step": 3475 - }, - { - "epoch": 0.5933503836317136, - "grad_norm": 0.09081269055604935, - "learning_rate": 7.187435274460064e-05, - "loss": 0.012044035643339158, - "step": 3480 - }, - { - "epoch": 0.5942028985507246, - "grad_norm": 0.08475978672504425, - "learning_rate": 7.18641038280807e-05, - "loss": 0.010785829275846481, - "step": 3485 - }, - { - "epoch": 0.5950554134697357, - "grad_norm": 0.06322979927062988, - "learning_rate": 7.185383886925397e-05, - "loss": 0.011609486490488052, - "step": 3490 - }, - { - "epoch": 0.5959079283887468, - "grad_norm": 0.07065978646278381, - "learning_rate": 7.18435578729125e-05, - "loss": 0.01128239706158638, - "step": 3495 - }, - { - "epoch": 0.5967604433077579, - "grad_norm": 0.057962607592344284, - "learning_rate": 7.183326084385577e-05, - "loss": 0.009382489323616027, - "step": 3500 - }, - { - "epoch": 0.597612958226769, - "grad_norm": 0.05717672407627106, - "learning_rate": 7.182294778689079e-05, - "loss": 0.010072773694992066, - "step": 3505 - }, - { - "epoch": 0.59846547314578, - "grad_norm": 0.07161569595336914, - "learning_rate": 7.181261870683205e-05, - "loss": 0.011324245482683182, - "step": 3510 - }, - { - "epoch": 0.5993179880647911, - "grad_norm": 0.07468906790018082, - "learning_rate": 7.180227360850148e-05, - "loss": 0.00984283909201622, - "step": 3515 - }, - { - "epoch": 0.6001705029838023, - "grad_norm": 0.071560800075531, - "learning_rate": 7.179191249672855e-05, - "loss": 0.011276674270629884, - "step": 3520 - }, - { - "epoch": 0.6010230179028133, - "grad_norm": 0.05588390305638313, - "learning_rate": 7.178153537635014e-05, - "loss": 0.008921106159687043, - "step": 3525 - }, - { - "epoch": 0.6018755328218244, - "grad_norm": 0.11065732687711716, - "learning_rate": 7.177114225221066e-05, - "loss": 0.0122377447783947, - "step": 3530 - }, - { - "epoch": 0.6027280477408354, - "grad_norm": 0.10121116787195206, - "learning_rate": 7.176073312916194e-05, - "loss": 0.007999545335769654, - "step": 3535 - }, - { - "epoch": 0.6035805626598465, - "grad_norm": 0.06102030724287033, - "learning_rate": 7.175030801206335e-05, - "loss": 0.008767658472061157, - "step": 3540 - }, - { - "epoch": 0.6044330775788577, - "grad_norm": 0.08233699947595596, - "learning_rate": 7.173986690578164e-05, - "loss": 0.010089017450809479, - "step": 3545 - }, - { - "epoch": 0.6052855924978687, - "grad_norm": 0.1655152440071106, - "learning_rate": 7.172940981519108e-05, - "loss": 0.012077460438013077, - "step": 3550 - }, - { - "epoch": 0.6061381074168798, - "grad_norm": 0.11178915202617645, - "learning_rate": 7.171893674517337e-05, - "loss": 0.009319285303354264, - "step": 3555 - }, - { - "epoch": 0.6069906223358909, - "grad_norm": 0.0778600424528122, - "learning_rate": 7.170844770061772e-05, - "loss": 0.012114962190389633, - "step": 3560 - }, - { - "epoch": 0.6078431372549019, - "grad_norm": 0.08708171546459198, - "learning_rate": 7.169794268642075e-05, - "loss": 0.011569589376449585, - "step": 3565 - }, - { - "epoch": 0.6086956521739131, - "grad_norm": 0.06438080966472626, - "learning_rate": 7.168742170748654e-05, - "loss": 0.010296766459941865, - "step": 3570 - }, - { - "epoch": 0.6095481670929241, - "grad_norm": 0.10569975525140762, - "learning_rate": 7.167688476872664e-05, - "loss": 0.008922196924686432, - "step": 3575 - }, - { - "epoch": 0.6104006820119352, - "grad_norm": 0.07466918975114822, - "learning_rate": 7.166633187506004e-05, - "loss": 0.009365256130695342, - "step": 3580 - }, - { - "epoch": 0.6112531969309463, - "grad_norm": 0.1070641577243805, - "learning_rate": 7.16557630314132e-05, - "loss": 0.011525402963161468, - "step": 3585 - }, - { - "epoch": 0.6121057118499573, - "grad_norm": 0.09534542262554169, - "learning_rate": 7.164517824271999e-05, - "loss": 0.010068083554506302, - "step": 3590 - }, - { - "epoch": 0.6129582267689685, - "grad_norm": 0.0643506869673729, - "learning_rate": 7.163457751392175e-05, - "loss": 0.010679592937231063, - "step": 3595 - }, - { - "epoch": 0.6138107416879796, - "grad_norm": 0.11610018461942673, - "learning_rate": 7.162396084996723e-05, - "loss": 0.010074391961097717, - "step": 3600 - }, - { - "epoch": 0.6146632566069906, - "grad_norm": 0.07560709863901138, - "learning_rate": 7.161332825581269e-05, - "loss": 0.013245916366577149, - "step": 3605 - }, - { - "epoch": 0.6155157715260017, - "grad_norm": 0.06540799885988235, - "learning_rate": 7.160267973642173e-05, - "loss": 0.01055695340037346, - "step": 3610 - }, - { - "epoch": 0.6163682864450127, - "grad_norm": 0.05610837787389755, - "learning_rate": 7.159201529676546e-05, - "loss": 0.010231484472751618, - "step": 3615 - }, - { - "epoch": 0.6172208013640239, - "grad_norm": 0.11630856245756149, - "learning_rate": 7.158133494182237e-05, - "loss": 0.01117742881178856, - "step": 3620 - }, - { - "epoch": 0.618073316283035, - "grad_norm": 0.08508500456809998, - "learning_rate": 7.157063867657844e-05, - "loss": 0.010253986716270447, - "step": 3625 - }, - { - "epoch": 0.618925831202046, - "grad_norm": 0.067935511469841, - "learning_rate": 7.155992650602702e-05, - "loss": 0.009731527417898178, - "step": 3630 - }, - { - "epoch": 0.6197783461210571, - "grad_norm": 0.0784364566206932, - "learning_rate": 7.154919843516892e-05, - "loss": 0.009552852809429168, - "step": 3635 - }, - { - "epoch": 0.6206308610400681, - "grad_norm": 0.10788855701684952, - "learning_rate": 7.153845446901234e-05, - "loss": 0.011269643902778625, - "step": 3640 - }, - { - "epoch": 0.6214833759590793, - "grad_norm": 0.08664087951183319, - "learning_rate": 7.152769461257294e-05, - "loss": 0.010251335799694061, - "step": 3645 - }, - { - "epoch": 0.6223358908780904, - "grad_norm": 0.06885403394699097, - "learning_rate": 7.151691887087377e-05, - "loss": 0.008078257739543914, - "step": 3650 - }, - { - "epoch": 0.6231884057971014, - "grad_norm": 0.09345501661300659, - "learning_rate": 7.150612724894531e-05, - "loss": 0.012022207677364349, - "step": 3655 - }, - { - "epoch": 0.6240409207161125, - "grad_norm": 0.08502865582704544, - "learning_rate": 7.149531975182543e-05, - "loss": 0.00932946428656578, - "step": 3660 - }, - { - "epoch": 0.6248934356351236, - "grad_norm": 0.06249995157122612, - "learning_rate": 7.148449638455947e-05, - "loss": 0.011525212973356246, - "step": 3665 - }, - { - "epoch": 0.6257459505541347, - "grad_norm": 0.04836896434426308, - "learning_rate": 7.14736571522001e-05, - "loss": 0.010347714275121688, - "step": 3670 - }, - { - "epoch": 0.6265984654731458, - "grad_norm": 0.06358285248279572, - "learning_rate": 7.146280205980745e-05, - "loss": 0.009779715538024902, - "step": 3675 - }, - { - "epoch": 0.6274509803921569, - "grad_norm": 0.07596850395202637, - "learning_rate": 7.145193111244903e-05, - "loss": 0.010838811099529267, - "step": 3680 - }, - { - "epoch": 0.6283034953111679, - "grad_norm": 0.05986448749899864, - "learning_rate": 7.144104431519977e-05, - "loss": 0.009671849757432937, - "step": 3685 - }, - { - "epoch": 0.629156010230179, - "grad_norm": 0.047881439328193665, - "learning_rate": 7.143014167314197e-05, - "loss": 0.007660867273807525, - "step": 3690 - }, - { - "epoch": 0.6300085251491901, - "grad_norm": 0.06409293413162231, - "learning_rate": 7.141922319136537e-05, - "loss": 0.013374905288219451, - "step": 3695 - }, - { - "epoch": 0.6308610400682012, - "grad_norm": 0.0767306461930275, - "learning_rate": 7.140828887496707e-05, - "loss": 0.006885652989149093, - "step": 3700 - }, - { - "epoch": 0.6317135549872123, - "grad_norm": 0.08192065358161926, - "learning_rate": 7.139733872905158e-05, - "loss": 0.013760556280612946, - "step": 3705 - }, - { - "epoch": 0.6325660699062233, - "grad_norm": 0.09693574160337448, - "learning_rate": 7.138637275873078e-05, - "loss": 0.009739194065332413, - "step": 3710 - }, - { - "epoch": 0.6334185848252344, - "grad_norm": 0.08232755959033966, - "learning_rate": 7.137539096912395e-05, - "loss": 0.010294197499752045, - "step": 3715 - }, - { - "epoch": 0.6342710997442456, - "grad_norm": 0.06582340598106384, - "learning_rate": 7.136439336535776e-05, - "loss": 0.010686574131250381, - "step": 3720 - }, - { - "epoch": 0.6351236146632566, - "grad_norm": 0.07385887205600739, - "learning_rate": 7.135337995256626e-05, - "loss": 0.011403677612543106, - "step": 3725 - }, - { - "epoch": 0.6359761295822677, - "grad_norm": 0.11854248493909836, - "learning_rate": 7.134235073589087e-05, - "loss": 0.01180308759212494, - "step": 3730 - }, - { - "epoch": 0.6368286445012787, - "grad_norm": 0.076481893658638, - "learning_rate": 7.133130572048041e-05, - "loss": 0.011076596379280091, - "step": 3735 - }, - { - "epoch": 0.6376811594202898, - "grad_norm": 0.09552651643753052, - "learning_rate": 7.132024491149103e-05, - "loss": 0.014420199394226074, - "step": 3740 - }, - { - "epoch": 0.638533674339301, - "grad_norm": 0.04855124279856682, - "learning_rate": 7.130916831408633e-05, - "loss": 0.008350960910320282, - "step": 3745 - }, - { - "epoch": 0.639386189258312, - "grad_norm": 0.0796368345618248, - "learning_rate": 7.12980759334372e-05, - "loss": 0.010764746367931366, - "step": 3750 - }, - { - "epoch": 0.6402387041773231, - "grad_norm": 0.07030697911977768, - "learning_rate": 7.128696777472193e-05, - "loss": 0.010386807471513748, - "step": 3755 - }, - { - "epoch": 0.6410912190963342, - "grad_norm": 0.05930609628558159, - "learning_rate": 7.127584384312619e-05, - "loss": 0.008884093910455703, - "step": 3760 - }, - { - "epoch": 0.6419437340153452, - "grad_norm": 0.07495228201150894, - "learning_rate": 7.126470414384299e-05, - "loss": 0.010249865800142288, - "step": 3765 - }, - { - "epoch": 0.6427962489343564, - "grad_norm": 0.12954963743686676, - "learning_rate": 7.125354868207275e-05, - "loss": 0.013017497956752777, - "step": 3770 - }, - { - "epoch": 0.6436487638533674, - "grad_norm": 0.08893310278654099, - "learning_rate": 7.124237746302317e-05, - "loss": 0.010649867355823517, - "step": 3775 - }, - { - "epoch": 0.6445012787723785, - "grad_norm": 0.08650866150856018, - "learning_rate": 7.123119049190935e-05, - "loss": 0.012544044852256775, - "step": 3780 - }, - { - "epoch": 0.6453537936913896, - "grad_norm": 0.06374052166938782, - "learning_rate": 7.121998777395375e-05, - "loss": 0.007669864594936371, - "step": 3785 - }, - { - "epoch": 0.6462063086104007, - "grad_norm": 0.08226713538169861, - "learning_rate": 7.120876931438618e-05, - "loss": 0.007969621568918228, - "step": 3790 - }, - { - "epoch": 0.6470588235294118, - "grad_norm": 0.10450884699821472, - "learning_rate": 7.119753511844377e-05, - "loss": 0.013088032603263855, - "step": 3795 - }, - { - "epoch": 0.6479113384484229, - "grad_norm": 0.08459076285362244, - "learning_rate": 7.118628519137104e-05, - "loss": 0.01125529408454895, - "step": 3800 - }, - { - "epoch": 0.6487638533674339, - "grad_norm": 0.09018636494874954, - "learning_rate": 7.11750195384198e-05, - "loss": 0.008683501929044723, - "step": 3805 - }, - { - "epoch": 0.649616368286445, - "grad_norm": 0.07949680835008621, - "learning_rate": 7.116373816484927e-05, - "loss": 0.008904790878295899, - "step": 3810 - }, - { - "epoch": 0.6504688832054561, - "grad_norm": 0.14297716319561005, - "learning_rate": 7.115244107592593e-05, - "loss": 0.01503775417804718, - "step": 3815 - }, - { - "epoch": 0.6513213981244672, - "grad_norm": 0.051478032022714615, - "learning_rate": 7.114112827692367e-05, - "loss": 0.011145923286676407, - "step": 3820 - }, - { - "epoch": 0.6521739130434783, - "grad_norm": 0.0686139240860939, - "learning_rate": 7.112979977312365e-05, - "loss": 0.009445450454950332, - "step": 3825 - }, - { - "epoch": 0.6530264279624893, - "grad_norm": 0.08674909919500351, - "learning_rate": 7.111845556981444e-05, - "loss": 0.009345399588346482, - "step": 3830 - }, - { - "epoch": 0.6538789428815004, - "grad_norm": 0.07799270749092102, - "learning_rate": 7.110709567229182e-05, - "loss": 0.009722919762134552, - "step": 3835 - }, - { - "epoch": 0.6547314578005116, - "grad_norm": 0.07891912013292313, - "learning_rate": 7.109572008585905e-05, - "loss": 0.009985177218914032, - "step": 3840 - }, - { - "epoch": 0.6555839727195226, - "grad_norm": 0.07315738499164581, - "learning_rate": 7.108432881582656e-05, - "loss": 0.011729113757610321, - "step": 3845 - }, - { - "epoch": 0.6564364876385337, - "grad_norm": 0.04961124807596207, - "learning_rate": 7.107292186751222e-05, - "loss": 0.008087723702192306, - "step": 3850 - }, - { - "epoch": 0.6572890025575447, - "grad_norm": 0.0745200589299202, - "learning_rate": 7.106149924624115e-05, - "loss": 0.010474404692649842, - "step": 3855 - }, - { - "epoch": 0.6581415174765558, - "grad_norm": 0.06290512531995773, - "learning_rate": 7.105006095734581e-05, - "loss": 0.009356130659580231, - "step": 3860 - }, - { - "epoch": 0.658994032395567, - "grad_norm": 0.058479905128479004, - "learning_rate": 7.1038607006166e-05, - "loss": 0.008637580275535583, - "step": 3865 - }, - { - "epoch": 0.659846547314578, - "grad_norm": 0.07301484048366547, - "learning_rate": 7.102713739804879e-05, - "loss": 0.015610474348068237, - "step": 3870 - }, - { - "epoch": 0.6606990622335891, - "grad_norm": 0.07421465218067169, - "learning_rate": 7.101565213834855e-05, - "loss": 0.011201824992895126, - "step": 3875 - }, - { - "epoch": 0.6615515771526002, - "grad_norm": 0.06928746402263641, - "learning_rate": 7.100415123242701e-05, - "loss": 0.007224821299314499, - "step": 3880 - }, - { - "epoch": 0.6624040920716112, - "grad_norm": 0.0669165551662445, - "learning_rate": 7.099263468565317e-05, - "loss": 0.007274401932954788, - "step": 3885 - }, - { - "epoch": 0.6632566069906224, - "grad_norm": 0.09326919168233871, - "learning_rate": 7.098110250340334e-05, - "loss": 0.008258016407489776, - "step": 3890 - }, - { - "epoch": 0.6641091219096334, - "grad_norm": 0.07563190162181854, - "learning_rate": 7.096955469106111e-05, - "loss": 0.01005811095237732, - "step": 3895 - }, - { - "epoch": 0.6649616368286445, - "grad_norm": 0.10135438293218613, - "learning_rate": 7.09579912540174e-05, - "loss": 0.009129725396633148, - "step": 3900 - }, - { - "epoch": 0.6658141517476556, - "grad_norm": 0.07946127653121948, - "learning_rate": 7.094641219767041e-05, - "loss": 0.013300496339797973, - "step": 3905 - }, - { - "epoch": 0.6666666666666666, - "grad_norm": 0.0727713331580162, - "learning_rate": 7.093481752742561e-05, - "loss": 0.01028701215982437, - "step": 3910 - }, - { - "epoch": 0.6675191815856778, - "grad_norm": 0.0718616396188736, - "learning_rate": 7.092320724869578e-05, - "loss": 0.009694813191890717, - "step": 3915 - }, - { - "epoch": 0.6683716965046889, - "grad_norm": 0.07789818942546844, - "learning_rate": 7.091158136690102e-05, - "loss": 0.009028838574886322, - "step": 3920 - }, - { - "epoch": 0.6692242114236999, - "grad_norm": 0.07319378852844238, - "learning_rate": 7.089993988746862e-05, - "loss": 0.008582034707069397, - "step": 3925 - }, - { - "epoch": 0.670076726342711, - "grad_norm": 0.086976557970047, - "learning_rate": 7.088828281583326e-05, - "loss": 0.013991822302341462, - "step": 3930 - }, - { - "epoch": 0.670929241261722, - "grad_norm": 0.07413294911384583, - "learning_rate": 7.087661015743681e-05, - "loss": 0.010896880924701691, - "step": 3935 - }, - { - "epoch": 0.6717817561807332, - "grad_norm": 0.12066303938627243, - "learning_rate": 7.08649219177285e-05, - "loss": 0.011574408411979676, - "step": 3940 - }, - { - "epoch": 0.6726342710997443, - "grad_norm": 0.11789914965629578, - "learning_rate": 7.085321810216474e-05, - "loss": 0.011523760855197906, - "step": 3945 - }, - { - "epoch": 0.6734867860187553, - "grad_norm": 0.07654725015163422, - "learning_rate": 7.084149871620929e-05, - "loss": 0.010388451814651489, - "step": 3950 - }, - { - "epoch": 0.6743393009377664, - "grad_norm": 0.05072671175003052, - "learning_rate": 7.082976376533315e-05, - "loss": 0.009674163907766343, - "step": 3955 - }, - { - "epoch": 0.6751918158567775, - "grad_norm": 0.08331634104251862, - "learning_rate": 7.081801325501458e-05, - "loss": 0.01096268892288208, - "step": 3960 - }, - { - "epoch": 0.6760443307757886, - "grad_norm": 0.03134739026427269, - "learning_rate": 7.08062471907391e-05, - "loss": 0.009522407501935958, - "step": 3965 - }, - { - "epoch": 0.6768968456947997, - "grad_norm": 0.09123755246400833, - "learning_rate": 7.079446557799951e-05, - "loss": 0.011530914902687072, - "step": 3970 - }, - { - "epoch": 0.6777493606138107, - "grad_norm": 0.04438139498233795, - "learning_rate": 7.078266842229585e-05, - "loss": 0.007757561653852463, - "step": 3975 - }, - { - "epoch": 0.6786018755328218, - "grad_norm": 0.06562457978725433, - "learning_rate": 7.077085572913543e-05, - "loss": 0.010243573784828186, - "step": 3980 - }, - { - "epoch": 0.6794543904518329, - "grad_norm": 0.08872365206480026, - "learning_rate": 7.075902750403283e-05, - "loss": 0.009365381300449371, - "step": 3985 - }, - { - "epoch": 0.680306905370844, - "grad_norm": 0.06977558881044388, - "learning_rate": 7.074718375250982e-05, - "loss": 0.010138784348964692, - "step": 3990 - }, - { - "epoch": 0.6811594202898551, - "grad_norm": 0.08196771889925003, - "learning_rate": 7.073532448009547e-05, - "loss": 0.011172623187303544, - "step": 3995 - }, - { - "epoch": 0.6820119352088662, - "grad_norm": 0.09635947644710541, - "learning_rate": 7.072344969232611e-05, - "loss": 0.011570926010608672, - "step": 4000 - }, - { - "epoch": 0.6828644501278772, - "grad_norm": 0.10703961551189423, - "learning_rate": 7.071155939474525e-05, - "loss": 0.010987398028373719, - "step": 4005 - }, - { - "epoch": 0.6837169650468883, - "grad_norm": 0.08124027401208878, - "learning_rate": 7.06996535929037e-05, - "loss": 0.009500280767679215, - "step": 4010 - }, - { - "epoch": 0.6845694799658995, - "grad_norm": 0.1272915005683899, - "learning_rate": 7.068773229235946e-05, - "loss": 0.011316341906785965, - "step": 4015 - }, - { - "epoch": 0.6854219948849105, - "grad_norm": 0.05040539428591728, - "learning_rate": 7.067579549867782e-05, - "loss": 0.009714095294475556, - "step": 4020 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 0.0954902172088623, - "learning_rate": 7.066384321743125e-05, - "loss": 0.01280902624130249, - "step": 4025 - }, - { - "epoch": 0.6871270247229326, - "grad_norm": 0.06131720915436745, - "learning_rate": 7.065187545419947e-05, - "loss": 0.00962383598089218, - "step": 4030 - }, - { - "epoch": 0.6879795396419437, - "grad_norm": 0.09720136225223541, - "learning_rate": 7.063989221456946e-05, - "loss": 0.00951121300458908, - "step": 4035 - }, - { - "epoch": 0.6888320545609549, - "grad_norm": 0.09116765856742859, - "learning_rate": 7.062789350413536e-05, - "loss": 0.012013505399227142, - "step": 4040 - }, - { - "epoch": 0.6896845694799659, - "grad_norm": 0.15665945410728455, - "learning_rate": 7.061587932849858e-05, - "loss": 0.012792985141277313, - "step": 4045 - }, - { - "epoch": 0.690537084398977, - "grad_norm": 0.05531725287437439, - "learning_rate": 7.060384969326775e-05, - "loss": 0.009135130047798156, - "step": 4050 - }, - { - "epoch": 0.691389599317988, - "grad_norm": 0.05050938203930855, - "learning_rate": 7.059180460405869e-05, - "loss": 0.01005997508764267, - "step": 4055 - }, - { - "epoch": 0.6922421142369991, - "grad_norm": 0.08903607726097107, - "learning_rate": 7.057974406649444e-05, - "loss": 0.008456438779830933, - "step": 4060 - }, - { - "epoch": 0.6930946291560103, - "grad_norm": 0.1395196169614792, - "learning_rate": 7.056766808620529e-05, - "loss": 0.012946255505084991, - "step": 4065 - }, - { - "epoch": 0.6939471440750213, - "grad_norm": 0.13844923675060272, - "learning_rate": 7.055557666882866e-05, - "loss": 0.008691005408763885, - "step": 4070 - }, - { - "epoch": 0.6947996589940324, - "grad_norm": 0.0776091143488884, - "learning_rate": 7.054346982000928e-05, - "loss": 0.011200450360774994, - "step": 4075 - }, - { - "epoch": 0.6956521739130435, - "grad_norm": 0.06444083154201508, - "learning_rate": 7.0531347545399e-05, - "loss": 0.010937537997961044, - "step": 4080 - }, - { - "epoch": 0.6965046888320545, - "grad_norm": 0.07561453431844711, - "learning_rate": 7.05192098506569e-05, - "loss": 0.00827426165342331, - "step": 4085 - }, - { - "epoch": 0.6973572037510657, - "grad_norm": 0.06595294177532196, - "learning_rate": 7.050705674144927e-05, - "loss": 0.007974696159362794, - "step": 4090 - }, - { - "epoch": 0.6982097186700768, - "grad_norm": 0.08887284994125366, - "learning_rate": 7.049488822344959e-05, - "loss": 0.009547770768404008, - "step": 4095 - }, - { - "epoch": 0.6990622335890878, - "grad_norm": 0.06858290731906891, - "learning_rate": 7.04827043023385e-05, - "loss": 0.012419018894433975, - "step": 4100 - }, - { - "epoch": 0.6999147485080989, - "grad_norm": 0.09107037633657455, - "learning_rate": 7.047050498380391e-05, - "loss": 0.008159243315458298, - "step": 4105 - }, - { - "epoch": 0.7007672634271099, - "grad_norm": 0.062287479639053345, - "learning_rate": 7.045829027354082e-05, - "loss": 0.00995248556137085, - "step": 4110 - }, - { - "epoch": 0.7016197783461211, - "grad_norm": 0.11668206751346588, - "learning_rate": 7.044606017725148e-05, - "loss": 0.012902414798736573, - "step": 4115 - }, - { - "epoch": 0.7024722932651322, - "grad_norm": 0.08674585819244385, - "learning_rate": 7.043381470064532e-05, - "loss": 0.010076310485601425, - "step": 4120 - }, - { - "epoch": 0.7033248081841432, - "grad_norm": 0.09690031409263611, - "learning_rate": 7.042155384943892e-05, - "loss": 0.011086124181747436, - "step": 4125 - }, - { - "epoch": 0.7041773231031543, - "grad_norm": 0.09527027606964111, - "learning_rate": 7.040927762935605e-05, - "loss": 0.010631310194730759, - "step": 4130 - }, - { - "epoch": 0.7050298380221653, - "grad_norm": 0.07089316844940186, - "learning_rate": 7.039698604612765e-05, - "loss": 0.010472215712070465, - "step": 4135 - }, - { - "epoch": 0.7058823529411765, - "grad_norm": 0.07314343005418777, - "learning_rate": 7.038467910549188e-05, - "loss": 0.011205179244279861, - "step": 4140 - }, - { - "epoch": 0.7067348678601876, - "grad_norm": 0.10004976391792297, - "learning_rate": 7.037235681319399e-05, - "loss": 0.011671188473701476, - "step": 4145 - }, - { - "epoch": 0.7075873827791986, - "grad_norm": 0.06655722856521606, - "learning_rate": 7.036001917498645e-05, - "loss": 0.008725546300411224, - "step": 4150 - }, - { - "epoch": 0.7084398976982097, - "grad_norm": 0.0563860684633255, - "learning_rate": 7.034766619662888e-05, - "loss": 0.009952855855226516, - "step": 4155 - }, - { - "epoch": 0.7092924126172208, - "grad_norm": 0.09032288193702698, - "learning_rate": 7.033529788388806e-05, - "loss": 0.010940121859312058, - "step": 4160 - }, - { - "epoch": 0.7101449275362319, - "grad_norm": 0.10090665519237518, - "learning_rate": 7.032291424253793e-05, - "loss": 0.0093452550470829, - "step": 4165 - }, - { - "epoch": 0.710997442455243, - "grad_norm": 0.08737532049417496, - "learning_rate": 7.03105152783596e-05, - "loss": 0.011567962169647217, - "step": 4170 - }, - { - "epoch": 0.711849957374254, - "grad_norm": 0.08184633404016495, - "learning_rate": 7.029810099714128e-05, - "loss": 0.011243235319852829, - "step": 4175 - }, - { - "epoch": 0.7127024722932651, - "grad_norm": 0.10227608680725098, - "learning_rate": 7.028567140467842e-05, - "loss": 0.01062348037958145, - "step": 4180 - }, - { - "epoch": 0.7135549872122762, - "grad_norm": 0.08998764306306839, - "learning_rate": 7.027322650677353e-05, - "loss": 0.01058843582868576, - "step": 4185 - }, - { - "epoch": 0.7144075021312873, - "grad_norm": 0.06969588994979858, - "learning_rate": 7.02607663092363e-05, - "loss": 0.009745591133832932, - "step": 4190 - }, - { - "epoch": 0.7152600170502984, - "grad_norm": 0.08256277441978455, - "learning_rate": 7.024829081788359e-05, - "loss": 0.009450466185808182, - "step": 4195 - }, - { - "epoch": 0.7161125319693095, - "grad_norm": 0.06720574200153351, - "learning_rate": 7.023580003853937e-05, - "loss": 0.006700781732797622, - "step": 4200 - }, - { - "epoch": 0.7169650468883205, - "grad_norm": 0.1428842693567276, - "learning_rate": 7.022329397703474e-05, - "loss": 0.009295140206813813, - "step": 4205 - }, - { - "epoch": 0.7178175618073316, - "grad_norm": 0.11677515506744385, - "learning_rate": 7.021077263920794e-05, - "loss": 0.011417123675346374, - "step": 4210 - }, - { - "epoch": 0.7186700767263428, - "grad_norm": 0.06874742358922958, - "learning_rate": 7.019823603090437e-05, - "loss": 0.013518881797790528, - "step": 4215 - }, - { - "epoch": 0.7195225916453538, - "grad_norm": 0.06695922464132309, - "learning_rate": 7.018568415797651e-05, - "loss": 0.008886832743883133, - "step": 4220 - }, - { - "epoch": 0.7203751065643649, - "grad_norm": 0.09428033232688904, - "learning_rate": 7.017311702628402e-05, - "loss": 0.009926854819059371, - "step": 4225 - }, - { - "epoch": 0.7212276214833759, - "grad_norm": 0.08420582115650177, - "learning_rate": 7.016053464169362e-05, - "loss": 0.011952979117631912, - "step": 4230 - }, - { - "epoch": 0.722080136402387, - "grad_norm": 0.07804932445287704, - "learning_rate": 7.014793701007922e-05, - "loss": 0.009345601499080657, - "step": 4235 - }, - { - "epoch": 0.7229326513213982, - "grad_norm": 0.10204415768384933, - "learning_rate": 7.013532413732179e-05, - "loss": 0.009596188366413117, - "step": 4240 - }, - { - "epoch": 0.7237851662404092, - "grad_norm": 0.06207561865448952, - "learning_rate": 7.012269602930946e-05, - "loss": 0.010864783823490144, - "step": 4245 - }, - { - "epoch": 0.7246376811594203, - "grad_norm": 0.07258995622396469, - "learning_rate": 7.011005269193743e-05, - "loss": 0.010417935252189637, - "step": 4250 - }, - { - "epoch": 0.7254901960784313, - "grad_norm": 0.09797866642475128, - "learning_rate": 7.009739413110803e-05, - "loss": 0.009975450485944748, - "step": 4255 - }, - { - "epoch": 0.7263427109974424, - "grad_norm": 0.14229723811149597, - "learning_rate": 7.008472035273071e-05, - "loss": 0.013093425333499909, - "step": 4260 - }, - { - "epoch": 0.7271952259164536, - "grad_norm": 0.10052912682294846, - "learning_rate": 7.007203136272202e-05, - "loss": 0.008044174313545227, - "step": 4265 - }, - { - "epoch": 0.7280477408354646, - "grad_norm": 0.06391840428113937, - "learning_rate": 7.005932716700558e-05, - "loss": 0.009895801544189453, - "step": 4270 - }, - { - "epoch": 0.7289002557544757, - "grad_norm": 0.08301703631877899, - "learning_rate": 7.004660777151213e-05, - "loss": 0.008399789780378341, - "step": 4275 - }, - { - "epoch": 0.7297527706734868, - "grad_norm": 0.09191301465034485, - "learning_rate": 7.003387318217954e-05, - "loss": 0.010163726657629013, - "step": 4280 - }, - { - "epoch": 0.7306052855924978, - "grad_norm": 0.06292443722486496, - "learning_rate": 7.00211234049527e-05, - "loss": 0.010847686976194381, - "step": 4285 - }, - { - "epoch": 0.731457800511509, - "grad_norm": 0.08020442724227905, - "learning_rate": 7.000835844578365e-05, - "loss": 0.010198664665222169, - "step": 4290 - }, - { - "epoch": 0.73231031543052, - "grad_norm": 0.08008337765932083, - "learning_rate": 6.999557831063152e-05, - "loss": 0.010532062500715256, - "step": 4295 - }, - { - "epoch": 0.7331628303495311, - "grad_norm": 0.09209048002958298, - "learning_rate": 6.998278300546245e-05, - "loss": 0.012655872106552123, - "step": 4300 - }, - { - "epoch": 0.7340153452685422, - "grad_norm": 0.1040966734290123, - "learning_rate": 6.996997253624974e-05, - "loss": 0.009482499212026596, - "step": 4305 - }, - { - "epoch": 0.7348678601875532, - "grad_norm": 0.06724270433187485, - "learning_rate": 6.995714690897376e-05, - "loss": 0.008470554649829865, - "step": 4310 - }, - { - "epoch": 0.7357203751065644, - "grad_norm": 0.050487734377384186, - "learning_rate": 6.994430612962192e-05, - "loss": 0.009740649163722992, - "step": 4315 - }, - { - "epoch": 0.7365728900255755, - "grad_norm": 0.07633031159639359, - "learning_rate": 6.993145020418873e-05, - "loss": 0.009455478191375733, - "step": 4320 - }, - { - "epoch": 0.7374254049445865, - "grad_norm": 0.11053632944822311, - "learning_rate": 6.991857913867575e-05, - "loss": 0.0111383855342865, - "step": 4325 - }, - { - "epoch": 0.7382779198635976, - "grad_norm": 0.07932359725236893, - "learning_rate": 6.990569293909165e-05, - "loss": 0.010170862078666687, - "step": 4330 - }, - { - "epoch": 0.7391304347826086, - "grad_norm": 0.06205928325653076, - "learning_rate": 6.98927916114521e-05, - "loss": 0.009742221236228943, - "step": 4335 - }, - { - "epoch": 0.7399829497016198, - "grad_norm": 0.07431669533252716, - "learning_rate": 6.987987516177989e-05, - "loss": 0.009026934206485749, - "step": 4340 - }, - { - "epoch": 0.7408354646206309, - "grad_norm": 0.06495700776576996, - "learning_rate": 6.986694359610486e-05, - "loss": 0.010289526730775832, - "step": 4345 - }, - { - "epoch": 0.7416879795396419, - "grad_norm": 0.07561559230089188, - "learning_rate": 6.985399692046387e-05, - "loss": 0.012499828636646271, - "step": 4350 - }, - { - "epoch": 0.742540494458653, - "grad_norm": 0.07708913832902908, - "learning_rate": 6.984103514090087e-05, - "loss": 0.01143759787082672, - "step": 4355 - }, - { - "epoch": 0.7433930093776641, - "grad_norm": 0.1315995305776596, - "learning_rate": 6.982805826346687e-05, - "loss": 0.010377982258796692, - "step": 4360 - }, - { - "epoch": 0.7442455242966752, - "grad_norm": 0.08346904814243317, - "learning_rate": 6.981506629421986e-05, - "loss": 0.008995984494686127, - "step": 4365 - }, - { - "epoch": 0.7450980392156863, - "grad_norm": 0.0814853310585022, - "learning_rate": 6.980205923922497e-05, - "loss": 0.009719532728195191, - "step": 4370 - }, - { - "epoch": 0.7459505541346974, - "grad_norm": 0.06594623625278473, - "learning_rate": 6.978903710455431e-05, - "loss": 0.008998245745897294, - "step": 4375 - }, - { - "epoch": 0.7468030690537084, - "grad_norm": 0.09526190906763077, - "learning_rate": 6.977599989628704e-05, - "loss": 0.010040522366762162, - "step": 4380 - }, - { - "epoch": 0.7476555839727195, - "grad_norm": 0.0938214361667633, - "learning_rate": 6.976294762050935e-05, - "loss": 0.010504753142595292, - "step": 4385 - }, - { - "epoch": 0.7485080988917306, - "grad_norm": 0.09816118329763412, - "learning_rate": 6.97498802833145e-05, - "loss": 0.011645899713039398, - "step": 4390 - }, - { - "epoch": 0.7493606138107417, - "grad_norm": 0.0780767872929573, - "learning_rate": 6.973679789080276e-05, - "loss": 0.011689887195825577, - "step": 4395 - }, - { - "epoch": 0.7498721227621483, - "eval_loss": 0.03396161273121834, - "eval_runtime": 3.6324, - "eval_samples_per_second": 69.376, - "eval_steps_per_second": 1.101, - "step": 4398 - }, - { - "eval_cer_subset": 0.01302783070334001, - "eval_cer_subset_edit_distance": 800, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 4398 - }, - { - "epoch": 0.7502131287297528, - "grad_norm": 0.061287231743335724, - "learning_rate": 6.972370044908141e-05, - "loss": 0.011720015108585358, - "step": 4400 - }, - { - "epoch": 0.7510656436487638, - "grad_norm": 0.0668778270483017, - "learning_rate": 6.971058796426478e-05, - "loss": 0.012064819037914277, - "step": 4405 - }, - { - "epoch": 0.7519181585677749, - "grad_norm": 0.07897942513227463, - "learning_rate": 6.969746044247421e-05, - "loss": 0.010592888295650481, - "step": 4410 - }, - { - "epoch": 0.7527706734867861, - "grad_norm": 0.09090534597635269, - "learning_rate": 6.968431788983806e-05, - "loss": 0.011600010097026825, - "step": 4415 - }, - { - "epoch": 0.7536231884057971, - "grad_norm": 0.080506332218647, - "learning_rate": 6.967116031249172e-05, - "loss": 0.013006125390529633, - "step": 4420 - }, - { - "epoch": 0.7544757033248082, - "grad_norm": 0.04851040989160538, - "learning_rate": 6.965798771657758e-05, - "loss": 0.010165790468454361, - "step": 4425 - }, - { - "epoch": 0.7553282182438192, - "grad_norm": 0.06298603117465973, - "learning_rate": 6.964480010824505e-05, - "loss": 0.007800602912902832, - "step": 4430 - }, - { - "epoch": 0.7561807331628303, - "grad_norm": 0.09919694811105728, - "learning_rate": 6.963159749365053e-05, - "loss": 0.010877586901187897, - "step": 4435 - }, - { - "epoch": 0.7570332480818415, - "grad_norm": 0.083896704018116, - "learning_rate": 6.961837987895747e-05, - "loss": 0.011114003509283066, - "step": 4440 - }, - { - "epoch": 0.7578857630008525, - "grad_norm": 0.0452699176967144, - "learning_rate": 6.960514727033626e-05, - "loss": 0.008609171956777573, - "step": 4445 - }, - { - "epoch": 0.7587382779198636, - "grad_norm": 0.08951374143362045, - "learning_rate": 6.959189967396435e-05, - "loss": 0.01193360835313797, - "step": 4450 - }, - { - "epoch": 0.7595907928388747, - "grad_norm": 0.08749551326036453, - "learning_rate": 6.957863709602611e-05, - "loss": 0.009163837879896164, - "step": 4455 - }, - { - "epoch": 0.7604433077578857, - "grad_norm": 0.09304409474134445, - "learning_rate": 6.956535954271301e-05, - "loss": 0.01038273349404335, - "step": 4460 - }, - { - "epoch": 0.7612958226768969, - "grad_norm": 0.06662629544734955, - "learning_rate": 6.955206702022342e-05, - "loss": 0.010570932179689407, - "step": 4465 - }, - { - "epoch": 0.7621483375959079, - "grad_norm": 0.07736595720052719, - "learning_rate": 6.953875953476276e-05, - "loss": 0.009856238961219788, - "step": 4470 - }, - { - "epoch": 0.763000852514919, - "grad_norm": 0.04692552238702774, - "learning_rate": 6.952543709254338e-05, - "loss": 0.006967573612928391, - "step": 4475 - }, - { - "epoch": 0.7638533674339301, - "grad_norm": 0.06901179254055023, - "learning_rate": 6.951209969978464e-05, - "loss": 0.008588603138923645, - "step": 4480 - }, - { - "epoch": 0.7647058823529411, - "grad_norm": 0.07733304053544998, - "learning_rate": 6.949874736271289e-05, - "loss": 0.012579981982707978, - "step": 4485 - }, - { - "epoch": 0.7655583972719523, - "grad_norm": 0.0693301409482956, - "learning_rate": 6.948538008756144e-05, - "loss": 0.009747470915317535, - "step": 4490 - }, - { - "epoch": 0.7664109121909634, - "grad_norm": 0.07054253667593002, - "learning_rate": 6.947199788057059e-05, - "loss": 0.008387601375579834, - "step": 4495 - }, - { - "epoch": 0.7672634271099744, - "grad_norm": 0.06526053696870804, - "learning_rate": 6.945860074798757e-05, - "loss": 0.008389735966920853, - "step": 4500 - }, - { - "epoch": 0.7681159420289855, - "grad_norm": 0.06862380355596542, - "learning_rate": 6.944518869606662e-05, - "loss": 0.008736115694046021, - "step": 4505 - }, - { - "epoch": 0.7689684569479965, - "grad_norm": 0.06233246996998787, - "learning_rate": 6.943176173106897e-05, - "loss": 0.008519527316093446, - "step": 4510 - }, - { - "epoch": 0.7698209718670077, - "grad_norm": 0.07696249336004257, - "learning_rate": 6.941831985926273e-05, - "loss": 0.011381441354751587, - "step": 4515 - }, - { - "epoch": 0.7706734867860188, - "grad_norm": 0.11450641602277756, - "learning_rate": 6.940486308692302e-05, - "loss": 0.012895810604095458, - "step": 4520 - }, - { - "epoch": 0.7715260017050298, - "grad_norm": 0.09141158312559128, - "learning_rate": 6.939139142033191e-05, - "loss": 0.009508632868528367, - "step": 4525 - }, - { - "epoch": 0.7723785166240409, - "grad_norm": 0.09469986706972122, - "learning_rate": 6.937790486577844e-05, - "loss": 0.014812557399272919, - "step": 4530 - }, - { - "epoch": 0.773231031543052, - "grad_norm": 0.08494299650192261, - "learning_rate": 6.936440342955855e-05, - "loss": 0.01355334222316742, - "step": 4535 - }, - { - "epoch": 0.7740835464620631, - "grad_norm": 0.09328251332044601, - "learning_rate": 6.93508871179752e-05, - "loss": 0.011529977619647979, - "step": 4540 - }, - { - "epoch": 0.7749360613810742, - "grad_norm": 0.06696850806474686, - "learning_rate": 6.933735593733821e-05, - "loss": 0.010230815410614014, - "step": 4545 - }, - { - "epoch": 0.7757885763000852, - "grad_norm": 0.07968153059482574, - "learning_rate": 6.932380989396442e-05, - "loss": 0.012129776924848557, - "step": 4550 - }, - { - "epoch": 0.7766410912190963, - "grad_norm": 0.07916650176048279, - "learning_rate": 6.931024899417756e-05, - "loss": 0.009455519914627075, - "step": 4555 - }, - { - "epoch": 0.7774936061381074, - "grad_norm": 0.05229945108294487, - "learning_rate": 6.92966732443083e-05, - "loss": 0.008516684174537659, - "step": 4560 - }, - { - "epoch": 0.7783461210571185, - "grad_norm": 0.08314234018325806, - "learning_rate": 6.928308265069428e-05, - "loss": 0.010914114117622376, - "step": 4565 - }, - { - "epoch": 0.7791986359761296, - "grad_norm": 0.05075672268867493, - "learning_rate": 6.926947721968001e-05, - "loss": 0.008188208192586898, - "step": 4570 - }, - { - "epoch": 0.7800511508951407, - "grad_norm": 0.09465362876653671, - "learning_rate": 6.925585695761697e-05, - "loss": 0.009074468165636063, - "step": 4575 - }, - { - "epoch": 0.7809036658141517, - "grad_norm": 0.09024044126272202, - "learning_rate": 6.924222187086356e-05, - "loss": 0.006571034342050553, - "step": 4580 - }, - { - "epoch": 0.7817561807331628, - "grad_norm": 0.050575681030750275, - "learning_rate": 6.922857196578507e-05, - "loss": 0.008829852938652039, - "step": 4585 - }, - { - "epoch": 0.782608695652174, - "grad_norm": 0.09888230264186859, - "learning_rate": 6.921490724875376e-05, - "loss": 0.01053793728351593, - "step": 4590 - }, - { - "epoch": 0.783461210571185, - "grad_norm": 0.042567264288663864, - "learning_rate": 6.920122772614875e-05, - "loss": 0.009682109951972962, - "step": 4595 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 0.10021623969078064, - "learning_rate": 6.91875334043561e-05, - "loss": 0.012160807102918624, - "step": 4600 - }, - { - "epoch": 0.7851662404092071, - "grad_norm": 0.07016255706548691, - "learning_rate": 6.917382428976878e-05, - "loss": 0.008590599894523621, - "step": 4605 - }, - { - "epoch": 0.7860187553282183, - "grad_norm": 0.06567320227622986, - "learning_rate": 6.916010038878667e-05, - "loss": 0.00809207409620285, - "step": 4610 - }, - { - "epoch": 0.7868712702472294, - "grad_norm": 0.05032164603471756, - "learning_rate": 6.914636170781652e-05, - "loss": 0.009291460365056991, - "step": 4615 - }, - { - "epoch": 0.7877237851662404, - "grad_norm": 0.06303273886442184, - "learning_rate": 6.913260825327204e-05, - "loss": 0.00837131291627884, - "step": 4620 - }, - { - "epoch": 0.7885763000852515, - "grad_norm": 0.05837355926632881, - "learning_rate": 6.911884003157376e-05, - "loss": 0.007800968736410141, - "step": 4625 - }, - { - "epoch": 0.7894288150042625, - "grad_norm": 0.07052712142467499, - "learning_rate": 6.910505704914916e-05, - "loss": 0.010577390342950821, - "step": 4630 - }, - { - "epoch": 0.7902813299232737, - "grad_norm": 0.08550997078418732, - "learning_rate": 6.909125931243259e-05, - "loss": 0.012821859121322632, - "step": 4635 - }, - { - "epoch": 0.7911338448422848, - "grad_norm": 0.060406558215618134, - "learning_rate": 6.90774468278653e-05, - "loss": 0.0065113060176372525, - "step": 4640 - }, - { - "epoch": 0.7919863597612958, - "grad_norm": 0.13999445736408234, - "learning_rate": 6.906361960189542e-05, - "loss": 0.012699820101261139, - "step": 4645 - }, - { - "epoch": 0.7928388746803069, - "grad_norm": 0.07585978507995605, - "learning_rate": 6.904977764097797e-05, - "loss": 0.008435635268688202, - "step": 4650 - }, - { - "epoch": 0.793691389599318, - "grad_norm": 0.07488108426332474, - "learning_rate": 6.90359209515748e-05, - "loss": 0.011925875395536422, - "step": 4655 - }, - { - "epoch": 0.7945439045183291, - "grad_norm": 0.12476535886526108, - "learning_rate": 6.902204954015471e-05, - "loss": 0.009086847305297852, - "step": 4660 - }, - { - "epoch": 0.7953964194373402, - "grad_norm": 0.08779732137918472, - "learning_rate": 6.900816341319331e-05, - "loss": 0.00962812826037407, - "step": 4665 - }, - { - "epoch": 0.7962489343563512, - "grad_norm": 0.15791405737400055, - "learning_rate": 6.899426257717312e-05, - "loss": 0.011767397075891495, - "step": 4670 - }, - { - "epoch": 0.7971014492753623, - "grad_norm": 0.11228909343481064, - "learning_rate": 6.898034703858352e-05, - "loss": 0.008271434903144836, - "step": 4675 - }, - { - "epoch": 0.7979539641943734, - "grad_norm": 0.07288003712892532, - "learning_rate": 6.896641680392073e-05, - "loss": 0.009384474158287049, - "step": 4680 - }, - { - "epoch": 0.7988064791133845, - "grad_norm": 0.08285173773765564, - "learning_rate": 6.895247187968784e-05, - "loss": 0.012600034475326538, - "step": 4685 - }, - { - "epoch": 0.7996589940323956, - "grad_norm": 0.07812397927045822, - "learning_rate": 6.893851227239484e-05, - "loss": 0.008935874700546265, - "step": 4690 - }, - { - "epoch": 0.8005115089514067, - "grad_norm": 0.07500546425580978, - "learning_rate": 6.892453798855852e-05, - "loss": 0.010619471222162247, - "step": 4695 - }, - { - "epoch": 0.8013640238704177, - "grad_norm": 0.05211177095770836, - "learning_rate": 6.891054903470251e-05, - "loss": 0.008601508289575576, - "step": 4700 - }, - { - "epoch": 0.8022165387894288, - "grad_norm": 0.029616642743349075, - "learning_rate": 6.889654541735738e-05, - "loss": 0.007921247184276581, - "step": 4705 - }, - { - "epoch": 0.80306905370844, - "grad_norm": 0.06894131749868393, - "learning_rate": 6.888252714306044e-05, - "loss": 0.010935742408037186, - "step": 4710 - }, - { - "epoch": 0.803921568627451, - "grad_norm": 0.0766182467341423, - "learning_rate": 6.886849421835587e-05, - "loss": 0.010556706041097642, - "step": 4715 - }, - { - "epoch": 0.8047740835464621, - "grad_norm": 0.09164462238550186, - "learning_rate": 6.885444664979477e-05, - "loss": 0.010812586545944214, - "step": 4720 - }, - { - "epoch": 0.8056265984654731, - "grad_norm": 0.06463408470153809, - "learning_rate": 6.884038444393496e-05, - "loss": 0.009179002791643142, - "step": 4725 - }, - { - "epoch": 0.8064791133844842, - "grad_norm": 0.06639672070741653, - "learning_rate": 6.882630760734118e-05, - "loss": 0.012755092978477479, - "step": 4730 - }, - { - "epoch": 0.8073316283034954, - "grad_norm": 0.062024496495723724, - "learning_rate": 6.881221614658493e-05, - "loss": 0.009655499458312988, - "step": 4735 - }, - { - "epoch": 0.8081841432225064, - "grad_norm": 0.06751494854688644, - "learning_rate": 6.879811006824459e-05, - "loss": 0.010860173404216767, - "step": 4740 - }, - { - "epoch": 0.8090366581415175, - "grad_norm": 0.05535218119621277, - "learning_rate": 6.878398937890535e-05, - "loss": 0.011440058052539826, - "step": 4745 - }, - { - "epoch": 0.8098891730605285, - "grad_norm": 0.08365204185247421, - "learning_rate": 6.876985408515922e-05, - "loss": 0.011058357357978821, - "step": 4750 - }, - { - "epoch": 0.8107416879795396, - "grad_norm": 0.06450537592172623, - "learning_rate": 6.875570419360501e-05, - "loss": 0.01046149879693985, - "step": 4755 - }, - { - "epoch": 0.8115942028985508, - "grad_norm": 0.08542726188898087, - "learning_rate": 6.874153971084837e-05, - "loss": 0.009869573265314102, - "step": 4760 - }, - { - "epoch": 0.8124467178175618, - "grad_norm": 0.08184531331062317, - "learning_rate": 6.872736064350176e-05, - "loss": 0.01054040789604187, - "step": 4765 - }, - { - "epoch": 0.8132992327365729, - "grad_norm": 0.07068512588739395, - "learning_rate": 6.871316699818442e-05, - "loss": 0.009573462605476379, - "step": 4770 - }, - { - "epoch": 0.814151747655584, - "grad_norm": 0.08866564929485321, - "learning_rate": 6.869895878152244e-05, - "loss": 0.008078956604003906, - "step": 4775 - }, - { - "epoch": 0.815004262574595, - "grad_norm": 0.08215270191431046, - "learning_rate": 6.868473600014867e-05, - "loss": 0.010586659610271453, - "step": 4780 - }, - { - "epoch": 0.8158567774936062, - "grad_norm": 0.0449003241956234, - "learning_rate": 6.867049866070278e-05, - "loss": 0.008572281152009965, - "step": 4785 - }, - { - "epoch": 0.8167092924126172, - "grad_norm": 0.0766722783446312, - "learning_rate": 6.865624676983124e-05, - "loss": 0.009015947580337524, - "step": 4790 - }, - { - "epoch": 0.8175618073316283, - "grad_norm": 0.07404733449220657, - "learning_rate": 6.864198033418732e-05, - "loss": 0.014639028906822204, - "step": 4795 - }, - { - "epoch": 0.8184143222506394, - "grad_norm": 0.10437514632940292, - "learning_rate": 6.862769936043102e-05, - "loss": 0.009333716332912445, - "step": 4800 - }, - { - "epoch": 0.8192668371696504, - "grad_norm": 0.06732609868049622, - "learning_rate": 6.861340385522921e-05, - "loss": 0.007169592380523682, - "step": 4805 - }, - { - "epoch": 0.8201193520886616, - "grad_norm": 0.06016068905591965, - "learning_rate": 6.859909382525552e-05, - "loss": 0.009211564064025879, - "step": 4810 - }, - { - "epoch": 0.8209718670076727, - "grad_norm": 0.07302942126989365, - "learning_rate": 6.858476927719031e-05, - "loss": 0.009643231332302094, - "step": 4815 - }, - { - "epoch": 0.8218243819266837, - "grad_norm": 0.07511111348867416, - "learning_rate": 6.857043021772079e-05, - "loss": 0.010751830041408538, - "step": 4820 - }, - { - "epoch": 0.8226768968456948, - "grad_norm": 0.04791528359055519, - "learning_rate": 6.855607665354088e-05, - "loss": 0.008413314074277877, - "step": 4825 - }, - { - "epoch": 0.8235294117647058, - "grad_norm": 0.08279003202915192, - "learning_rate": 6.854170859135132e-05, - "loss": 0.009260118752717972, - "step": 4830 - }, - { - "epoch": 0.824381926683717, - "grad_norm": 0.06907783448696136, - "learning_rate": 6.85273260378596e-05, - "loss": 0.009681220352649688, - "step": 4835 - }, - { - "epoch": 0.8252344416027281, - "grad_norm": 0.09847953170537949, - "learning_rate": 6.851292899977997e-05, - "loss": 0.009847448766231537, - "step": 4840 - }, - { - "epoch": 0.8260869565217391, - "grad_norm": 0.0683966800570488, - "learning_rate": 6.849851748383343e-05, - "loss": 0.007601346075534821, - "step": 4845 - }, - { - "epoch": 0.8269394714407502, - "grad_norm": 0.0523662269115448, - "learning_rate": 6.848409149674779e-05, - "loss": 0.00900915488600731, - "step": 4850 - }, - { - "epoch": 0.8277919863597613, - "grad_norm": 0.07112257927656174, - "learning_rate": 6.846965104525757e-05, - "loss": 0.011230588704347611, - "step": 4855 - }, - { - "epoch": 0.8286445012787724, - "grad_norm": 0.09305348247289658, - "learning_rate": 6.845519613610402e-05, - "loss": 0.01087992861866951, - "step": 4860 - }, - { - "epoch": 0.8294970161977835, - "grad_norm": 0.062347084283828735, - "learning_rate": 6.84407267760352e-05, - "loss": 0.00806276947259903, - "step": 4865 - }, - { - "epoch": 0.8303495311167945, - "grad_norm": 0.09091926366090775, - "learning_rate": 6.84262429718059e-05, - "loss": 0.010562103241682053, - "step": 4870 - }, - { - "epoch": 0.8312020460358056, - "grad_norm": 0.05807443708181381, - "learning_rate": 6.841174473017762e-05, - "loss": 0.010788433253765106, - "step": 4875 - }, - { - "epoch": 0.8320545609548167, - "grad_norm": 0.06664252281188965, - "learning_rate": 6.839723205791863e-05, - "loss": 0.01030244082212448, - "step": 4880 - }, - { - "epoch": 0.8329070758738278, - "grad_norm": 0.07469561696052551, - "learning_rate": 6.838270496180392e-05, - "loss": 0.01250479370355606, - "step": 4885 - }, - { - "epoch": 0.8337595907928389, - "grad_norm": 0.0469096302986145, - "learning_rate": 6.836816344861523e-05, - "loss": 0.010546717792749405, - "step": 4890 - }, - { - "epoch": 0.83461210571185, - "grad_norm": 0.0646355077624321, - "learning_rate": 6.835360752514104e-05, - "loss": 0.008491561561822892, - "step": 4895 - }, - { - "epoch": 0.835464620630861, - "grad_norm": 0.06006006523966789, - "learning_rate": 6.83390371981765e-05, - "loss": 0.010175065696239471, - "step": 4900 - }, - { - "epoch": 0.8363171355498721, - "grad_norm": 0.0595518983900547, - "learning_rate": 6.832445247452355e-05, - "loss": 0.009471315890550613, - "step": 4905 - }, - { - "epoch": 0.8371696504688833, - "grad_norm": 0.0722845196723938, - "learning_rate": 6.830985336099081e-05, - "loss": 0.011656039208173753, - "step": 4910 - }, - { - "epoch": 0.8380221653878943, - "grad_norm": 0.09830670058727264, - "learning_rate": 6.829523986439366e-05, - "loss": 0.0106172576546669, - "step": 4915 - }, - { - "epoch": 0.8388746803069054, - "grad_norm": 0.0725899264216423, - "learning_rate": 6.828061199155413e-05, - "loss": 0.00935768336057663, - "step": 4920 - }, - { - "epoch": 0.8397271952259164, - "grad_norm": 0.06721889227628708, - "learning_rate": 6.826596974930101e-05, - "loss": 0.010951700061559677, - "step": 4925 - }, - { - "epoch": 0.8405797101449275, - "grad_norm": 0.09289079904556274, - "learning_rate": 6.82513131444698e-05, - "loss": 0.010057362169027329, - "step": 4930 - }, - { - "epoch": 0.8414322250639387, - "grad_norm": 0.07667957991361618, - "learning_rate": 6.823664218390267e-05, - "loss": 0.012943412363529205, - "step": 4935 - }, - { - "epoch": 0.8422847399829497, - "grad_norm": 0.041785743087530136, - "learning_rate": 6.822195687444853e-05, - "loss": 0.009845246374607087, - "step": 4940 - }, - { - "epoch": 0.8431372549019608, - "grad_norm": 0.062134817242622375, - "learning_rate": 6.820725722296295e-05, - "loss": 0.010506168007850647, - "step": 4945 - }, - { - "epoch": 0.8439897698209718, - "grad_norm": 0.0870882049202919, - "learning_rate": 6.819254323630825e-05, - "loss": 0.008953387290239334, - "step": 4950 - }, - { - "epoch": 0.8448422847399829, - "grad_norm": 0.06943989545106888, - "learning_rate": 6.817781492135337e-05, - "loss": 0.009975537657737732, - "step": 4955 - }, - { - "epoch": 0.8456947996589941, - "grad_norm": 0.0961625948548317, - "learning_rate": 6.8163072284974e-05, - "loss": 0.010299818217754364, - "step": 4960 - }, - { - "epoch": 0.8465473145780051, - "grad_norm": 0.0685473084449768, - "learning_rate": 6.814831533405249e-05, - "loss": 0.007892660051584243, - "step": 4965 - }, - { - "epoch": 0.8473998294970162, - "grad_norm": 0.06370922178030014, - "learning_rate": 6.813354407547787e-05, - "loss": 0.011673354357481004, - "step": 4970 - }, - { - "epoch": 0.8482523444160273, - "grad_norm": 0.1212867870926857, - "learning_rate": 6.811875851614586e-05, - "loss": 0.01049395203590393, - "step": 4975 - }, - { - "epoch": 0.8491048593350383, - "grad_norm": 0.07590476423501968, - "learning_rate": 6.810395866295885e-05, - "loss": 0.011261900514364242, - "step": 4980 - }, - { - "epoch": 0.8499573742540495, - "grad_norm": 0.06342966109514236, - "learning_rate": 6.808914452282592e-05, - "loss": 0.011414043605327606, - "step": 4985 - }, - { - "epoch": 0.8508098891730606, - "grad_norm": 0.08939556777477264, - "learning_rate": 6.807431610266278e-05, - "loss": 0.008719882369041443, - "step": 4990 - }, - { - "epoch": 0.8516624040920716, - "grad_norm": 0.09418119490146637, - "learning_rate": 6.805947340939183e-05, - "loss": 0.011406099796295166, - "step": 4995 - }, - { - "epoch": 0.8525149190110827, - "grad_norm": 0.09113836288452148, - "learning_rate": 6.804461644994213e-05, - "loss": 0.011070792376995087, - "step": 5000 - }, - { - "epoch": 0.8533674339300937, - "grad_norm": 0.06545080989599228, - "learning_rate": 6.802974523124941e-05, - "loss": 0.009602059423923493, - "step": 5005 - }, - { - "epoch": 0.8542199488491049, - "grad_norm": 0.07779792696237564, - "learning_rate": 6.801485976025607e-05, - "loss": 0.008207190036773681, - "step": 5010 - }, - { - "epoch": 0.855072463768116, - "grad_norm": 0.038606271147727966, - "learning_rate": 6.799996004391113e-05, - "loss": 0.008772189915180206, - "step": 5015 - }, - { - "epoch": 0.855924978687127, - "grad_norm": 0.07329479604959488, - "learning_rate": 6.798504608917025e-05, - "loss": 0.011226999759674072, - "step": 5020 - }, - { - "epoch": 0.8567774936061381, - "grad_norm": 0.08631903678178787, - "learning_rate": 6.797011790299579e-05, - "loss": 0.012361649423837662, - "step": 5025 - }, - { - "epoch": 0.8576300085251491, - "grad_norm": 0.09041957557201385, - "learning_rate": 6.79551754923567e-05, - "loss": 0.010576151311397552, - "step": 5030 - }, - { - "epoch": 0.8584825234441603, - "grad_norm": 0.07003892213106155, - "learning_rate": 6.794021886422861e-05, - "loss": 0.008748160302639007, - "step": 5035 - }, - { - "epoch": 0.8593350383631714, - "grad_norm": 0.06976106762886047, - "learning_rate": 6.792524802559378e-05, - "loss": 0.010539846867322922, - "step": 5040 - }, - { - "epoch": 0.8601875532821824, - "grad_norm": 0.05501266196370125, - "learning_rate": 6.791026298344107e-05, - "loss": 0.01045292615890503, - "step": 5045 - }, - { - "epoch": 0.8610400682011935, - "grad_norm": 0.051503781229257584, - "learning_rate": 6.789526374476602e-05, - "loss": 0.009410140663385391, - "step": 5050 - }, - { - "epoch": 0.8618925831202046, - "grad_norm": 0.05674072727560997, - "learning_rate": 6.788025031657076e-05, - "loss": 0.00899135023355484, - "step": 5055 - }, - { - "epoch": 0.8627450980392157, - "grad_norm": 0.09688259661197662, - "learning_rate": 6.786522270586406e-05, - "loss": 0.010228607058525085, - "step": 5060 - }, - { - "epoch": 0.8635976129582268, - "grad_norm": 0.08542542159557343, - "learning_rate": 6.785018091966131e-05, - "loss": 0.010819461196660995, - "step": 5065 - }, - { - "epoch": 0.8644501278772379, - "grad_norm": 0.05703757330775261, - "learning_rate": 6.783512496498452e-05, - "loss": 0.011345957219600678, - "step": 5070 - }, - { - "epoch": 0.8653026427962489, - "grad_norm": 0.08096349984407425, - "learning_rate": 6.782005484886231e-05, - "loss": 0.011220332235097885, - "step": 5075 - }, - { - "epoch": 0.86615515771526, - "grad_norm": 0.07224266976118088, - "learning_rate": 6.780497057832988e-05, - "loss": 0.012557309865951539, - "step": 5080 - }, - { - "epoch": 0.8670076726342711, - "grad_norm": 0.08718731999397278, - "learning_rate": 6.778987216042912e-05, - "loss": 0.009770408272743225, - "step": 5085 - }, - { - "epoch": 0.8678601875532822, - "grad_norm": 0.05731170251965523, - "learning_rate": 6.777475960220846e-05, - "loss": 0.008567098528146744, - "step": 5090 - }, - { - "epoch": 0.8687127024722933, - "grad_norm": 0.07938708364963531, - "learning_rate": 6.775963291072292e-05, - "loss": 0.010664292424917222, - "step": 5095 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.028421485796570778, - "learning_rate": 6.774449209303416e-05, - "loss": 0.010194088518619537, - "step": 5100 - }, - { - "epoch": 0.8704177323103154, - "grad_norm": 0.06094631180167198, - "learning_rate": 6.772933715621042e-05, - "loss": 0.009481045603752136, - "step": 5105 - }, - { - "epoch": 0.8712702472293266, - "grad_norm": 0.08887558430433273, - "learning_rate": 6.771416810732653e-05, - "loss": 0.012845474481582641, - "step": 5110 - }, - { - "epoch": 0.8721227621483376, - "grad_norm": 0.07431238144636154, - "learning_rate": 6.76989849534639e-05, - "loss": 0.00815560668706894, - "step": 5115 - }, - { - "epoch": 0.8729752770673487, - "grad_norm": 0.09380137175321579, - "learning_rate": 6.768378770171052e-05, - "loss": 0.009580246359109878, - "step": 5120 - }, - { - "epoch": 0.8738277919863597, - "grad_norm": 0.08001488447189331, - "learning_rate": 6.766857635916099e-05, - "loss": 0.00857289507985115, - "step": 5125 - }, - { - "epoch": 0.8746803069053708, - "grad_norm": 0.049355555325746536, - "learning_rate": 6.765335093291647e-05, - "loss": 0.009263276308774947, - "step": 5130 - }, - { - "epoch": 0.875532821824382, - "grad_norm": 0.05873994901776314, - "learning_rate": 6.763811143008469e-05, - "loss": 0.008233514428138734, - "step": 5135 - }, - { - "epoch": 0.876385336743393, - "grad_norm": 0.10190756618976593, - "learning_rate": 6.762285785777995e-05, - "loss": 0.01529676467180252, - "step": 5140 - }, - { - "epoch": 0.8772378516624041, - "grad_norm": 0.08395158499479294, - "learning_rate": 6.760759022312313e-05, - "loss": 0.00961325541138649, - "step": 5145 - }, - { - "epoch": 0.8780903665814151, - "grad_norm": 0.07476748526096344, - "learning_rate": 6.759230853324169e-05, - "loss": 0.010477164387702942, - "step": 5150 - }, - { - "epoch": 0.8789428815004262, - "grad_norm": 0.07773051410913467, - "learning_rate": 6.757701279526961e-05, - "loss": 0.010389962792396545, - "step": 5155 - }, - { - "epoch": 0.8797953964194374, - "grad_norm": 0.07345708459615707, - "learning_rate": 6.756170301634745e-05, - "loss": 0.009174319356679917, - "step": 5160 - }, - { - "epoch": 0.8806479113384484, - "grad_norm": 0.07917368412017822, - "learning_rate": 6.754637920362233e-05, - "loss": 0.012756256759166718, - "step": 5165 - }, - { - "epoch": 0.8815004262574595, - "grad_norm": 0.06016271933913231, - "learning_rate": 6.75310413642479e-05, - "loss": 0.011058077961206437, - "step": 5170 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 0.06637005507946014, - "learning_rate": 6.751568950538441e-05, - "loss": 0.013590328395366669, - "step": 5175 - }, - { - "epoch": 0.8832054560954816, - "grad_norm": 0.06486016511917114, - "learning_rate": 6.750032363419857e-05, - "loss": 0.010195261240005494, - "step": 5180 - }, - { - "epoch": 0.8840579710144928, - "grad_norm": 0.09800687432289124, - "learning_rate": 6.748494375786372e-05, - "loss": 0.011106249690055848, - "step": 5185 - }, - { - "epoch": 0.8849104859335039, - "grad_norm": 0.04665162041783333, - "learning_rate": 6.746954988355967e-05, - "loss": 0.009880972653627395, - "step": 5190 - }, - { - "epoch": 0.8857630008525149, - "grad_norm": 0.05554487928748131, - "learning_rate": 6.745414201847282e-05, - "loss": 0.009480565786361694, - "step": 5195 - }, - { - "epoch": 0.886615515771526, - "grad_norm": 0.059967316687107086, - "learning_rate": 6.743872016979605e-05, - "loss": 0.010072766989469528, - "step": 5200 - }, - { - "epoch": 0.887468030690537, - "grad_norm": 0.08275031298398972, - "learning_rate": 6.74232843447288e-05, - "loss": 0.010208947211503982, - "step": 5205 - }, - { - "epoch": 0.8883205456095482, - "grad_norm": 0.07091715931892395, - "learning_rate": 6.740783455047704e-05, - "loss": 0.011881709098815918, - "step": 5210 - }, - { - "epoch": 0.8891730605285593, - "grad_norm": 0.1465480923652649, - "learning_rate": 6.739237079425322e-05, - "loss": 0.010970161855220794, - "step": 5215 - }, - { - "epoch": 0.8900255754475703, - "grad_norm": 0.07571437954902649, - "learning_rate": 6.737689308327636e-05, - "loss": 0.010722124576568603, - "step": 5220 - }, - { - "epoch": 0.8908780903665814, - "grad_norm": 0.06671100109815598, - "learning_rate": 6.736140142477194e-05, - "loss": 0.010463282465934753, - "step": 5225 - }, - { - "epoch": 0.8917306052855924, - "grad_norm": 0.05676295980811119, - "learning_rate": 6.734589582597204e-05, - "loss": 0.00933043509721756, - "step": 5230 - }, - { - "epoch": 0.8925831202046036, - "grad_norm": 0.09708777070045471, - "learning_rate": 6.733037629411514e-05, - "loss": 0.011712780594825745, - "step": 5235 - }, - { - "epoch": 0.8934356351236147, - "grad_norm": 0.07073090970516205, - "learning_rate": 6.731484283644626e-05, - "loss": 0.008112293481826783, - "step": 5240 - }, - { - "epoch": 0.8942881500426257, - "grad_norm": 0.08821752667427063, - "learning_rate": 6.7299295460217e-05, - "loss": 0.01026250645518303, - "step": 5245 - }, - { - "epoch": 0.8951406649616368, - "grad_norm": 0.05165687948465347, - "learning_rate": 6.728373417268533e-05, - "loss": 0.008761890232563019, - "step": 5250 - }, - { - "epoch": 0.8959931798806479, - "grad_norm": 0.06872246414422989, - "learning_rate": 6.726815898111581e-05, - "loss": 0.012551462650299073, - "step": 5255 - }, - { - "epoch": 0.896845694799659, - "grad_norm": 0.07550673931837082, - "learning_rate": 6.725256989277944e-05, - "loss": 0.010165071487426758, - "step": 5260 - }, - { - "epoch": 0.8976982097186701, - "grad_norm": 0.05931933969259262, - "learning_rate": 6.723696691495373e-05, - "loss": 0.009457996487617493, - "step": 5265 - }, - { - "epoch": 0.8985507246376812, - "grad_norm": 0.10838861763477325, - "learning_rate": 6.722135005492268e-05, - "loss": 0.012053199112415314, - "step": 5270 - }, - { - "epoch": 0.8994032395566922, - "grad_norm": 0.062102265655994415, - "learning_rate": 6.720571931997676e-05, - "loss": 0.007582514733076096, - "step": 5275 - }, - { - "epoch": 0.9002557544757033, - "grad_norm": 0.05997858941555023, - "learning_rate": 6.719007471741289e-05, - "loss": 0.009348342567682267, - "step": 5280 - }, - { - "epoch": 0.9011082693947144, - "grad_norm": 0.05405129864811897, - "learning_rate": 6.717441625453451e-05, - "loss": 0.009910254925489425, - "step": 5285 - }, - { - "epoch": 0.9019607843137255, - "grad_norm": 0.08071329444646835, - "learning_rate": 6.715874393865152e-05, - "loss": 0.010672248899936676, - "step": 5290 - }, - { - "epoch": 0.9028132992327366, - "grad_norm": 0.06863300502300262, - "learning_rate": 6.714305777708027e-05, - "loss": 0.008570954948663712, - "step": 5295 - }, - { - "epoch": 0.9036658141517476, - "grad_norm": 0.07818135619163513, - "learning_rate": 6.712735777714357e-05, - "loss": 0.008647527545690536, - "step": 5300 - }, - { - "epoch": 0.9045183290707587, - "grad_norm": 0.14757376909255981, - "learning_rate": 6.711164394617072e-05, - "loss": 0.010825049877166749, - "step": 5305 - }, - { - "epoch": 0.9053708439897699, - "grad_norm": 0.07376445829868317, - "learning_rate": 6.709591629149746e-05, - "loss": 0.01299697458744049, - "step": 5310 - }, - { - "epoch": 0.9062233589087809, - "grad_norm": 0.05860469490289688, - "learning_rate": 6.708017482046597e-05, - "loss": 0.009549598395824432, - "step": 5315 - }, - { - "epoch": 0.907075873827792, - "grad_norm": 0.0799872875213623, - "learning_rate": 6.706441954042488e-05, - "loss": 0.009733843803405761, - "step": 5320 - }, - { - "epoch": 0.907928388746803, - "grad_norm": 0.05245954543352127, - "learning_rate": 6.704865045872932e-05, - "loss": 0.009799794852733612, - "step": 5325 - }, - { - "epoch": 0.9087809036658141, - "grad_norm": 0.05515241622924805, - "learning_rate": 6.703286758274079e-05, - "loss": 0.007391643524169922, - "step": 5330 - }, - { - "epoch": 0.9096334185848253, - "grad_norm": 0.05900256708264351, - "learning_rate": 6.701707091982726e-05, - "loss": 0.009107303619384766, - "step": 5335 - }, - { - "epoch": 0.9104859335038363, - "grad_norm": 0.09559495002031326, - "learning_rate": 6.700126047736317e-05, - "loss": 0.009052158147096635, - "step": 5340 - }, - { - "epoch": 0.9113384484228474, - "grad_norm": 0.11189334839582443, - "learning_rate": 6.698543626272932e-05, - "loss": 0.011292549222707749, - "step": 5345 - }, - { - "epoch": 0.9121909633418585, - "grad_norm": 0.07031659781932831, - "learning_rate": 6.6969598283313e-05, - "loss": 0.008589480072259903, - "step": 5350 - }, - { - "epoch": 0.9130434782608695, - "grad_norm": 0.1652907431125641, - "learning_rate": 6.69537465465079e-05, - "loss": 0.010865563899278641, - "step": 5355 - }, - { - "epoch": 0.9138959931798807, - "grad_norm": 0.06157436594367027, - "learning_rate": 6.693788105971413e-05, - "loss": 0.012611952424049378, - "step": 5360 - }, - { - "epoch": 0.9147485080988917, - "grad_norm": 0.03928734362125397, - "learning_rate": 6.692200183033826e-05, - "loss": 0.009115418046712875, - "step": 5365 - }, - { - "epoch": 0.9156010230179028, - "grad_norm": 0.06604880094528198, - "learning_rate": 6.690610886579321e-05, - "loss": 0.010015038400888443, - "step": 5370 - }, - { - "epoch": 0.9164535379369139, - "grad_norm": 0.07625336199998856, - "learning_rate": 6.689020217349835e-05, - "loss": 0.010416677594184876, - "step": 5375 - }, - { - "epoch": 0.9173060528559249, - "grad_norm": 0.07674526423215866, - "learning_rate": 6.687428176087946e-05, - "loss": 0.01016802191734314, - "step": 5380 - }, - { - "epoch": 0.9181585677749361, - "grad_norm": 0.08422617614269257, - "learning_rate": 6.685834763536872e-05, - "loss": 0.011127004027366638, - "step": 5385 - }, - { - "epoch": 0.9190110826939472, - "grad_norm": 0.057719554752111435, - "learning_rate": 6.684239980440472e-05, - "loss": 0.008915853500366212, - "step": 5390 - }, - { - "epoch": 0.9198635976129582, - "grad_norm": 0.056555263698101044, - "learning_rate": 6.682643827543241e-05, - "loss": 0.0095272496342659, - "step": 5395 - }, - { - "epoch": 0.9207161125319693, - "grad_norm": 0.07605638355016708, - "learning_rate": 6.681046305590317e-05, - "loss": 0.010731159895658492, - "step": 5400 - }, - { - "epoch": 0.9215686274509803, - "grad_norm": 0.07499220222234726, - "learning_rate": 6.679447415327479e-05, - "loss": 0.010919998586177825, - "step": 5405 - }, - { - "epoch": 0.9224211423699915, - "grad_norm": 0.052863143384456635, - "learning_rate": 6.677847157501137e-05, - "loss": 0.011300939321517944, - "step": 5410 - }, - { - "epoch": 0.9232736572890026, - "grad_norm": 0.09210597723722458, - "learning_rate": 6.676245532858351e-05, - "loss": 0.013997772336006164, - "step": 5415 - }, - { - "epoch": 0.9241261722080136, - "grad_norm": 0.0746840238571167, - "learning_rate": 6.674642542146807e-05, - "loss": 0.012542533874511718, - "step": 5420 - }, - { - "epoch": 0.9249786871270247, - "grad_norm": 0.06137506663799286, - "learning_rate": 6.67303818611484e-05, - "loss": 0.008029398322105408, - "step": 5425 - }, - { - "epoch": 0.9258312020460358, - "grad_norm": 0.03867131471633911, - "learning_rate": 6.671432465511411e-05, - "loss": 0.009305672347545623, - "step": 5430 - }, - { - "epoch": 0.9266837169650469, - "grad_norm": 0.08291540294885635, - "learning_rate": 6.669825381086128e-05, - "loss": 0.011059926450252533, - "step": 5435 - }, - { - "epoch": 0.927536231884058, - "grad_norm": 0.0689411610364914, - "learning_rate": 6.668216933589228e-05, - "loss": 0.008350597321987152, - "step": 5440 - }, - { - "epoch": 0.928388746803069, - "grad_norm": 0.051505669951438904, - "learning_rate": 6.666607123771591e-05, - "loss": 0.011102759093046189, - "step": 5445 - }, - { - "epoch": 0.9292412617220801, - "grad_norm": 0.08774327486753464, - "learning_rate": 6.664995952384729e-05, - "loss": 0.009498609602451325, - "step": 5450 - }, - { - "epoch": 0.9300937766410913, - "grad_norm": 0.060566093772649765, - "learning_rate": 6.663383420180789e-05, - "loss": 0.007811173051595688, - "step": 5455 - }, - { - "epoch": 0.9309462915601023, - "grad_norm": 0.09952156245708466, - "learning_rate": 6.661769527912555e-05, - "loss": 0.010514630377292633, - "step": 5460 - }, - { - "epoch": 0.9317988064791134, - "grad_norm": 0.04871741309762001, - "learning_rate": 6.660154276333446e-05, - "loss": 0.006630983203649521, - "step": 5465 - }, - { - "epoch": 0.9326513213981245, - "grad_norm": 0.06254981458187103, - "learning_rate": 6.658537666197517e-05, - "loss": 0.009895097464323044, - "step": 5470 - }, - { - "epoch": 0.9335038363171355, - "grad_norm": 0.08351470530033112, - "learning_rate": 6.656919698259452e-05, - "loss": 0.010659988969564438, - "step": 5475 - }, - { - "epoch": 0.9343563512361467, - "grad_norm": 0.07085305452346802, - "learning_rate": 6.655300373274575e-05, - "loss": 0.008971457183361054, - "step": 5480 - }, - { - "epoch": 0.9352088661551577, - "grad_norm": 0.06461923569440842, - "learning_rate": 6.653679691998839e-05, - "loss": 0.009138958156108856, - "step": 5485 - }, - { - "epoch": 0.9360613810741688, - "grad_norm": 0.11675399541854858, - "learning_rate": 6.652057655188832e-05, - "loss": 0.008388948440551759, - "step": 5490 - }, - { - "epoch": 0.9369138959931799, - "grad_norm": 0.09698229283094406, - "learning_rate": 6.650434263601777e-05, - "loss": 0.011885351687669753, - "step": 5495 - }, - { - "epoch": 0.9377664109121909, - "grad_norm": 0.06786464154720306, - "learning_rate": 6.648809517995524e-05, - "loss": 0.012351768463850022, - "step": 5500 - }, - { - "epoch": 0.9386189258312021, - "grad_norm": 0.09192351251840591, - "learning_rate": 6.647183419128561e-05, - "loss": 0.010940471291542053, - "step": 5505 - }, - { - "epoch": 0.9394714407502132, - "grad_norm": 0.05660499259829521, - "learning_rate": 6.645555967760003e-05, - "loss": 0.01160380095243454, - "step": 5510 - }, - { - "epoch": 0.9403239556692242, - "grad_norm": 0.0829106792807579, - "learning_rate": 6.6439271646496e-05, - "loss": 0.009475469589233398, - "step": 5515 - }, - { - "epoch": 0.9411764705882353, - "grad_norm": 0.08007021248340607, - "learning_rate": 6.642297010557733e-05, - "loss": 0.010524801164865493, - "step": 5520 - }, - { - "epoch": 0.9420289855072463, - "grad_norm": 0.07578855752944946, - "learning_rate": 6.640665506245406e-05, - "loss": 0.008864742517471314, - "step": 5525 - }, - { - "epoch": 0.9428815004262575, - "grad_norm": 0.064674511551857, - "learning_rate": 6.639032652474265e-05, - "loss": 0.010619612783193589, - "step": 5530 - }, - { - "epoch": 0.9437340153452686, - "grad_norm": 0.07176528871059418, - "learning_rate": 6.637398450006579e-05, - "loss": 0.011696039140224457, - "step": 5535 - }, - { - "epoch": 0.9445865302642796, - "grad_norm": 0.16578713059425354, - "learning_rate": 6.635762899605248e-05, - "loss": 0.009867334365844726, - "step": 5540 - }, - { - "epoch": 0.9454390451832907, - "grad_norm": 0.069394052028656, - "learning_rate": 6.634126002033802e-05, - "loss": 0.012951886653900147, - "step": 5545 - }, - { - "epoch": 0.9462915601023018, - "grad_norm": 0.07247213274240494, - "learning_rate": 6.632487758056397e-05, - "loss": 0.009127721190452576, - "step": 5550 - }, - { - "epoch": 0.9471440750213129, - "grad_norm": 0.11084317415952682, - "learning_rate": 6.630848168437822e-05, - "loss": 0.009197863936424255, - "step": 5555 - }, - { - "epoch": 0.947996589940324, - "grad_norm": 0.08719248324632645, - "learning_rate": 6.629207233943492e-05, - "loss": 0.010768509656190871, - "step": 5560 - }, - { - "epoch": 0.948849104859335, - "grad_norm": 0.0857851505279541, - "learning_rate": 6.62756495533945e-05, - "loss": 0.009163270145654679, - "step": 5565 - }, - { - "epoch": 0.9497016197783461, - "grad_norm": 0.09808778762817383, - "learning_rate": 6.625921333392362e-05, - "loss": 0.01005362868309021, - "step": 5570 - }, - { - "epoch": 0.9505541346973572, - "grad_norm": 0.08482059836387634, - "learning_rate": 6.624276368869532e-05, - "loss": 0.008303509652614593, - "step": 5575 - }, - { - "epoch": 0.9514066496163683, - "grad_norm": 0.07460886240005493, - "learning_rate": 6.62263006253888e-05, - "loss": 0.00857923850417137, - "step": 5580 - }, - { - "epoch": 0.9522591645353794, - "grad_norm": 0.09494256228208542, - "learning_rate": 6.620982415168956e-05, - "loss": 0.009573552012443542, - "step": 5585 - }, - { - "epoch": 0.9531116794543905, - "grad_norm": 0.08753519505262375, - "learning_rate": 6.61933342752894e-05, - "loss": 0.010430536419153213, - "step": 5590 - }, - { - "epoch": 0.9539641943734015, - "grad_norm": 0.05132949724793434, - "learning_rate": 6.617683100388632e-05, - "loss": 0.009080658107995987, - "step": 5595 - }, - { - "epoch": 0.9548167092924126, - "grad_norm": 0.07516856491565704, - "learning_rate": 6.61603143451846e-05, - "loss": 0.009718524664640427, - "step": 5600 - }, - { - "epoch": 0.9556692242114238, - "grad_norm": 0.12723733484745026, - "learning_rate": 6.614378430689477e-05, - "loss": 0.01136334240436554, - "step": 5605 - }, - { - "epoch": 0.9565217391304348, - "grad_norm": 0.0905863493680954, - "learning_rate": 6.612724089673359e-05, - "loss": 0.013209307193756103, - "step": 5610 - }, - { - "epoch": 0.9573742540494459, - "grad_norm": 0.06084009259939194, - "learning_rate": 6.611068412242409e-05, - "loss": 0.01001257449388504, - "step": 5615 - }, - { - "epoch": 0.9582267689684569, - "grad_norm": 0.11799532175064087, - "learning_rate": 6.60941139916955e-05, - "loss": 0.008610795438289642, - "step": 5620 - }, - { - "epoch": 0.959079283887468, - "grad_norm": 0.08627504110336304, - "learning_rate": 6.607753051228333e-05, - "loss": 0.009049218893051148, - "step": 5625 - }, - { - "epoch": 0.9599317988064792, - "grad_norm": 0.0910186693072319, - "learning_rate": 6.60609336919293e-05, - "loss": 0.01068672090768814, - "step": 5630 - }, - { - "epoch": 0.9607843137254902, - "grad_norm": 0.06503022462129593, - "learning_rate": 6.604432353838134e-05, - "loss": 0.010604655742645264, - "step": 5635 - }, - { - "epoch": 0.9616368286445013, - "grad_norm": 0.07681523263454437, - "learning_rate": 6.602770005939363e-05, - "loss": 0.010527564585208893, - "step": 5640 - }, - { - "epoch": 0.9624893435635123, - "grad_norm": 0.0680806040763855, - "learning_rate": 6.601106326272659e-05, - "loss": 0.009375665336847305, - "step": 5645 - }, - { - "epoch": 0.9633418584825234, - "grad_norm": 0.06601905822753906, - "learning_rate": 6.599441315614678e-05, - "loss": 0.009470004588365555, - "step": 5650 - }, - { - "epoch": 0.9641943734015346, - "grad_norm": 0.06291890889406204, - "learning_rate": 6.597774974742706e-05, - "loss": 0.012320800870656966, - "step": 5655 - }, - { - "epoch": 0.9650468883205456, - "grad_norm": 0.0956176221370697, - "learning_rate": 6.596107304434645e-05, - "loss": 0.01018187329173088, - "step": 5660 - }, - { - "epoch": 0.9658994032395567, - "grad_norm": 0.06642715632915497, - "learning_rate": 6.59443830546902e-05, - "loss": 0.010271859169006348, - "step": 5665 - }, - { - "epoch": 0.9667519181585678, - "grad_norm": 0.06783592700958252, - "learning_rate": 6.592767978624973e-05, - "loss": 0.00986798033118248, - "step": 5670 - }, - { - "epoch": 0.9676044330775788, - "grad_norm": 0.05877846106886864, - "learning_rate": 6.591096324682272e-05, - "loss": 0.009708859026432037, - "step": 5675 - }, - { - "epoch": 0.96845694799659, - "grad_norm": 0.049258604645729065, - "learning_rate": 6.589423344421297e-05, - "loss": 0.008615868538618088, - "step": 5680 - }, - { - "epoch": 0.969309462915601, - "grad_norm": 0.05622515454888344, - "learning_rate": 6.587749038623052e-05, - "loss": 0.010138686001300811, - "step": 5685 - }, - { - "epoch": 0.9701619778346121, - "grad_norm": 0.05719893425703049, - "learning_rate": 6.586073408069159e-05, - "loss": 0.00869678258895874, - "step": 5690 - }, - { - "epoch": 0.9710144927536232, - "grad_norm": 0.07675095647573471, - "learning_rate": 6.584396453541856e-05, - "loss": 0.010557885468006133, - "step": 5695 - }, - { - "epoch": 0.9718670076726342, - "grad_norm": 0.0999779924750328, - "learning_rate": 6.582718175824006e-05, - "loss": 0.010667790472507478, - "step": 5700 - }, - { - "epoch": 0.9727195225916454, - "grad_norm": 0.04620293527841568, - "learning_rate": 6.58103857569908e-05, - "loss": 0.0078192800283432, - "step": 5705 - }, - { - "epoch": 0.9735720375106565, - "grad_norm": 0.06725125759840012, - "learning_rate": 6.579357653951174e-05, - "loss": 0.010441574454307555, - "step": 5710 - }, - { - "epoch": 0.9744245524296675, - "grad_norm": 0.09062530100345612, - "learning_rate": 6.577675411364997e-05, - "loss": 0.011757946014404297, - "step": 5715 - }, - { - "epoch": 0.9752770673486786, - "grad_norm": 0.050651032477617264, - "learning_rate": 6.575991848725876e-05, - "loss": 0.009817829728126526, - "step": 5720 - }, - { - "epoch": 0.9761295822676896, - "grad_norm": 0.06951560825109482, - "learning_rate": 6.574306966819755e-05, - "loss": 0.008903174102306366, - "step": 5725 - }, - { - "epoch": 0.9769820971867008, - "grad_norm": 0.0733589306473732, - "learning_rate": 6.57262076643319e-05, - "loss": 0.009786784648895264, - "step": 5730 - }, - { - "epoch": 0.9778346121057119, - "grad_norm": 0.0736282467842102, - "learning_rate": 6.570933248353359e-05, - "loss": 0.012176553905010223, - "step": 5735 - }, - { - "epoch": 0.9786871270247229, - "grad_norm": 0.069704569876194, - "learning_rate": 6.56924441336805e-05, - "loss": 0.008654942363500595, - "step": 5740 - }, - { - "epoch": 0.979539641943734, - "grad_norm": 0.07497496902942657, - "learning_rate": 6.567554262265668e-05, - "loss": 0.010902392119169236, - "step": 5745 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 0.07559038698673248, - "learning_rate": 6.56586279583523e-05, - "loss": 0.00921270027756691, - "step": 5750 - }, - { - "epoch": 0.9812446717817562, - "grad_norm": 0.05894545465707779, - "learning_rate": 6.56417001486637e-05, - "loss": 0.009073206037282944, - "step": 5755 - }, - { - "epoch": 0.9820971867007673, - "grad_norm": 0.06555377691984177, - "learning_rate": 6.562475920149335e-05, - "loss": 0.010872729122638702, - "step": 5760 - }, - { - "epoch": 0.9829497016197783, - "grad_norm": 0.15036429464817047, - "learning_rate": 6.560780512474984e-05, - "loss": 0.009879975020885468, - "step": 5765 - }, - { - "epoch": 0.9838022165387894, - "grad_norm": 0.06842299550771713, - "learning_rate": 6.559083792634791e-05, - "loss": 0.00965554341673851, - "step": 5770 - }, - { - "epoch": 0.9846547314578005, - "grad_norm": 0.0486510805785656, - "learning_rate": 6.557385761420839e-05, - "loss": 0.00872802734375, - "step": 5775 - }, - { - "epoch": 0.9855072463768116, - "grad_norm": 0.059796739369630814, - "learning_rate": 6.555686419625826e-05, - "loss": 0.009720289707183838, - "step": 5780 - }, - { - "epoch": 0.9863597612958227, - "grad_norm": 0.07971934229135513, - "learning_rate": 6.553985768043062e-05, - "loss": 0.008043316006660462, - "step": 5785 - }, - { - "epoch": 0.9872122762148338, - "grad_norm": 0.09556971490383148, - "learning_rate": 6.552283807466468e-05, - "loss": 0.009030704945325851, - "step": 5790 - }, - { - "epoch": 0.9880647911338448, - "grad_norm": 0.04631726071238518, - "learning_rate": 6.550580538690577e-05, - "loss": 0.007388583570718765, - "step": 5795 - }, - { - "epoch": 0.9889173060528559, - "grad_norm": 0.08172665536403656, - "learning_rate": 6.548875962510528e-05, - "loss": 0.007863265275955201, - "step": 5800 - }, - { - "epoch": 0.989769820971867, - "grad_norm": 0.11129096895456314, - "learning_rate": 6.547170079722076e-05, - "loss": 0.012218999862670898, - "step": 5805 - }, - { - "epoch": 0.9906223358908781, - "grad_norm": 0.06619804352521896, - "learning_rate": 6.545462891121584e-05, - "loss": 0.007535400986671448, - "step": 5810 - }, - { - "epoch": 0.9914748508098892, - "grad_norm": 0.11495351046323776, - "learning_rate": 6.543754397506025e-05, - "loss": 0.0121284119784832, - "step": 5815 - }, - { - "epoch": 0.9923273657289002, - "grad_norm": 0.06017669290304184, - "learning_rate": 6.542044599672978e-05, - "loss": 0.008776353299617767, - "step": 5820 - }, - { - "epoch": 0.9931798806479113, - "grad_norm": 0.08049561828374863, - "learning_rate": 6.540333498420637e-05, - "loss": 0.010460492223501205, - "step": 5825 - }, - { - "epoch": 0.9940323955669225, - "grad_norm": 0.07041274011135101, - "learning_rate": 6.538621094547798e-05, - "loss": 0.008290639519691468, - "step": 5830 - }, - { - "epoch": 0.9948849104859335, - "grad_norm": 0.061981480568647385, - "learning_rate": 6.53690738885387e-05, - "loss": 0.007011125236749649, - "step": 5835 - }, - { - "epoch": 0.9957374254049446, - "grad_norm": 0.09022640436887741, - "learning_rate": 6.535192382138867e-05, - "loss": 0.012455084919929504, - "step": 5840 - }, - { - "epoch": 0.9965899403239556, - "grad_norm": 0.05652628839015961, - "learning_rate": 6.53347607520341e-05, - "loss": 0.011704784631729127, - "step": 5845 - }, - { - "epoch": 0.9974424552429667, - "grad_norm": 0.0717577114701271, - "learning_rate": 6.531758468848732e-05, - "loss": 0.007738448679447174, - "step": 5850 - }, - { - "epoch": 0.9982949701619779, - "grad_norm": 0.04797588661313057, - "learning_rate": 6.530039563876665e-05, - "loss": 0.00894927978515625, - "step": 5855 - }, - { - "epoch": 0.9991474850809889, - "grad_norm": 0.06541015207767487, - "learning_rate": 6.528319361089651e-05, - "loss": 0.00731588676571846, - "step": 5860 - }, - { - "epoch": 0.9998294970161978, - "eval_loss": 0.03369956836104393, - "eval_runtime": 3.5892, - "eval_samples_per_second": 70.21, - "eval_steps_per_second": 1.114, - "step": 5864 - }, - { - "eval_cer_subset": 0.014444607292328236, - "eval_cer_subset_edit_distance": 887, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 5864 - }, - { - "epoch": 1.0, - "grad_norm": 0.06960473209619522, - "learning_rate": 6.52659786129074e-05, - "loss": 0.009423434734344482, - "step": 5865 - }, - { - "epoch": 1.000852514919011, - "grad_norm": 0.08438396453857422, - "learning_rate": 6.524875065283587e-05, - "loss": 0.009560108184814453, - "step": 5870 - }, - { - "epoch": 1.0017050298380221, - "grad_norm": 0.06281089037656784, - "learning_rate": 6.523150973872446e-05, - "loss": 0.007503298670053482, - "step": 5875 - }, - { - "epoch": 1.0025575447570332, - "grad_norm": 0.11766793578863144, - "learning_rate": 6.52142558786218e-05, - "loss": 0.008890827000141144, - "step": 5880 - }, - { - "epoch": 1.0034100596760442, - "grad_norm": 0.058115314692258835, - "learning_rate": 6.519698908058262e-05, - "loss": 0.006190531700849533, - "step": 5885 - }, - { - "epoch": 1.0042625745950555, - "grad_norm": 0.06857501715421677, - "learning_rate": 6.51797093526676e-05, - "loss": 0.007162582129240036, - "step": 5890 - }, - { - "epoch": 1.0051150895140666, - "grad_norm": 0.03517467528581619, - "learning_rate": 6.51624167029435e-05, - "loss": 0.0060476396232843396, - "step": 5895 - }, - { - "epoch": 1.0059676044330776, - "grad_norm": 0.10047292709350586, - "learning_rate": 6.514511113948307e-05, - "loss": 0.006416718661785126, - "step": 5900 - }, - { - "epoch": 1.0068201193520887, - "grad_norm": 0.07266796380281448, - "learning_rate": 6.512779267036518e-05, - "loss": 0.005519292503595352, - "step": 5905 - }, - { - "epoch": 1.0076726342710998, - "grad_norm": 0.05385264754295349, - "learning_rate": 6.511046130367464e-05, - "loss": 0.006731215119361878, - "step": 5910 - }, - { - "epoch": 1.0085251491901108, - "grad_norm": 0.0927869975566864, - "learning_rate": 6.50931170475023e-05, - "loss": 0.0073065564036369325, - "step": 5915 - }, - { - "epoch": 1.0093776641091219, - "grad_norm": 0.08416371792554855, - "learning_rate": 6.507575990994504e-05, - "loss": 0.005843915045261383, - "step": 5920 - }, - { - "epoch": 1.010230179028133, - "grad_norm": 0.06585095822811127, - "learning_rate": 6.505838989910576e-05, - "loss": 0.006345044076442719, - "step": 5925 - }, - { - "epoch": 1.011082693947144, - "grad_norm": 0.06341785192489624, - "learning_rate": 6.504100702309336e-05, - "loss": 0.005391617119312286, - "step": 5930 - }, - { - "epoch": 1.011935208866155, - "grad_norm": 0.08260001242160797, - "learning_rate": 6.502361129002273e-05, - "loss": 0.008031262457370758, - "step": 5935 - }, - { - "epoch": 1.0127877237851663, - "grad_norm": 0.08805666118860245, - "learning_rate": 6.500620270801478e-05, - "loss": 0.006408621370792389, - "step": 5940 - }, - { - "epoch": 1.0136402387041774, - "grad_norm": 0.0704861581325531, - "learning_rate": 6.498878128519642e-05, - "loss": 0.006208440661430359, - "step": 5945 - }, - { - "epoch": 1.0144927536231885, - "grad_norm": 0.07539117336273193, - "learning_rate": 6.497134702970055e-05, - "loss": 0.005263582617044449, - "step": 5950 - }, - { - "epoch": 1.0153452685421995, - "grad_norm": 0.022507963702082634, - "learning_rate": 6.495389994966606e-05, - "loss": 0.005692056566476822, - "step": 5955 - }, - { - "epoch": 1.0161977834612106, - "grad_norm": 0.05641510710120201, - "learning_rate": 6.493644005323783e-05, - "loss": 0.007954449951648712, - "step": 5960 - }, - { - "epoch": 1.0170502983802217, - "grad_norm": 0.04853788763284683, - "learning_rate": 6.49189673485667e-05, - "loss": 0.006910678744316101, - "step": 5965 - }, - { - "epoch": 1.0179028132992327, - "grad_norm": 0.07868898659944534, - "learning_rate": 6.490148184380956e-05, - "loss": 0.007678037136793136, - "step": 5970 - }, - { - "epoch": 1.0187553282182438, - "grad_norm": 0.08481275290250778, - "learning_rate": 6.488398354712917e-05, - "loss": 0.0060794509947299956, - "step": 5975 - }, - { - "epoch": 1.0196078431372548, - "grad_norm": 0.05573422089219093, - "learning_rate": 6.486647246669435e-05, - "loss": 0.0050107244402170185, - "step": 5980 - }, - { - "epoch": 1.020460358056266, - "grad_norm": 0.10777781158685684, - "learning_rate": 6.484894861067983e-05, - "loss": 0.006611569225788117, - "step": 5985 - }, - { - "epoch": 1.0213128729752772, - "grad_norm": 0.041842151433229446, - "learning_rate": 6.483141198726635e-05, - "loss": 0.0060344856232404705, - "step": 5990 - }, - { - "epoch": 1.0221653878942882, - "grad_norm": 0.05765567347407341, - "learning_rate": 6.48138626046406e-05, - "loss": 0.005772604793310166, - "step": 5995 - }, - { - "epoch": 1.0230179028132993, - "grad_norm": 0.05987582355737686, - "learning_rate": 6.479630047099517e-05, - "loss": 0.006899695098400116, - "step": 6000 - }, - { - "epoch": 1.0238704177323104, - "grad_norm": 0.046085257083177567, - "learning_rate": 6.477872559452867e-05, - "loss": 0.006151453405618667, - "step": 6005 - }, - { - "epoch": 1.0247229326513214, - "grad_norm": 0.05994739755988121, - "learning_rate": 6.476113798344566e-05, - "loss": 0.007787984609603882, - "step": 6010 - }, - { - "epoch": 1.0255754475703325, - "grad_norm": 0.08866287767887115, - "learning_rate": 6.47435376459566e-05, - "loss": 0.007754974067211151, - "step": 6015 - }, - { - "epoch": 1.0264279624893435, - "grad_norm": 0.07492240518331528, - "learning_rate": 6.472592459027793e-05, - "loss": 0.005562775582075119, - "step": 6020 - }, - { - "epoch": 1.0272804774083546, - "grad_norm": 0.058771468698978424, - "learning_rate": 6.470829882463198e-05, - "loss": 0.008101420104503631, - "step": 6025 - }, - { - "epoch": 1.0281329923273657, - "grad_norm": 0.08099868148565292, - "learning_rate": 6.469066035724708e-05, - "loss": 0.007585109025239944, - "step": 6030 - }, - { - "epoch": 1.0289855072463767, - "grad_norm": 0.09368649870157242, - "learning_rate": 6.467300919635743e-05, - "loss": 0.007342393696308136, - "step": 6035 - }, - { - "epoch": 1.029838022165388, - "grad_norm": 0.07358572632074356, - "learning_rate": 6.465534535020317e-05, - "loss": 0.007179292291402817, - "step": 6040 - }, - { - "epoch": 1.030690537084399, - "grad_norm": 0.0542459636926651, - "learning_rate": 6.46376688270304e-05, - "loss": 0.007178785651922226, - "step": 6045 - }, - { - "epoch": 1.0315430520034101, - "grad_norm": 0.04534808546304703, - "learning_rate": 6.461997963509109e-05, - "loss": 0.005939013883471489, - "step": 6050 - }, - { - "epoch": 1.0323955669224212, - "grad_norm": 0.04498334974050522, - "learning_rate": 6.460227778264314e-05, - "loss": 0.007932021468877792, - "step": 6055 - }, - { - "epoch": 1.0332480818414322, - "grad_norm": 0.09503943473100662, - "learning_rate": 6.458456327795038e-05, - "loss": 0.006005316227674484, - "step": 6060 - }, - { - "epoch": 1.0341005967604433, - "grad_norm": 0.06634567677974701, - "learning_rate": 6.456683612928252e-05, - "loss": 0.00472346730530262, - "step": 6065 - }, - { - "epoch": 1.0349531116794544, - "grad_norm": 0.06090138852596283, - "learning_rate": 6.454909634491518e-05, - "loss": 0.0071956045925617215, - "step": 6070 - }, - { - "epoch": 1.0358056265984654, - "grad_norm": 0.09833965450525284, - "learning_rate": 6.453134393312988e-05, - "loss": 0.00738539919257164, - "step": 6075 - }, - { - "epoch": 1.0366581415174765, - "grad_norm": 0.07924133539199829, - "learning_rate": 6.451357890221406e-05, - "loss": 0.008464773744344711, - "step": 6080 - }, - { - "epoch": 1.0375106564364875, - "grad_norm": 0.04132373258471489, - "learning_rate": 6.4495801260461e-05, - "loss": 0.005705388635396958, - "step": 6085 - }, - { - "epoch": 1.0383631713554988, - "grad_norm": 0.08653424680233002, - "learning_rate": 6.44780110161699e-05, - "loss": 0.00777137503027916, - "step": 6090 - }, - { - "epoch": 1.0392156862745099, - "grad_norm": 0.08147025108337402, - "learning_rate": 6.446020817764583e-05, - "loss": 0.005003783106803894, - "step": 6095 - }, - { - "epoch": 1.040068201193521, - "grad_norm": 0.07091398537158966, - "learning_rate": 6.444239275319977e-05, - "loss": 0.005957254022359848, - "step": 6100 - }, - { - "epoch": 1.040920716112532, - "grad_norm": 0.06259306520223618, - "learning_rate": 6.442456475114855e-05, - "loss": 0.005096634104847908, - "step": 6105 - }, - { - "epoch": 1.041773231031543, - "grad_norm": 0.07044103741645813, - "learning_rate": 6.440672417981485e-05, - "loss": 0.00557241328060627, - "step": 6110 - }, - { - "epoch": 1.0426257459505541, - "grad_norm": 0.05029159039258957, - "learning_rate": 6.438887104752726e-05, - "loss": 0.0056043524295091626, - "step": 6115 - }, - { - "epoch": 1.0434782608695652, - "grad_norm": 0.04778699576854706, - "learning_rate": 6.437100536262022e-05, - "loss": 0.00855453684926033, - "step": 6120 - }, - { - "epoch": 1.0443307757885762, - "grad_norm": 0.07467184215784073, - "learning_rate": 6.435312713343401e-05, - "loss": 0.006690071523189544, - "step": 6125 - }, - { - "epoch": 1.0451832907075873, - "grad_norm": 0.07189153879880905, - "learning_rate": 6.433523636831481e-05, - "loss": 0.007009527087211609, - "step": 6130 - }, - { - "epoch": 1.0460358056265984, - "grad_norm": 0.08000020682811737, - "learning_rate": 6.431733307561459e-05, - "loss": 0.007411211729049683, - "step": 6135 - }, - { - "epoch": 1.0468883205456097, - "grad_norm": 0.06737730652093887, - "learning_rate": 6.429941726369124e-05, - "loss": 0.006843548268079758, - "step": 6140 - }, - { - "epoch": 1.0477408354646207, - "grad_norm": 0.09834714978933334, - "learning_rate": 6.428148894090841e-05, - "loss": 0.007167841494083405, - "step": 6145 - }, - { - "epoch": 1.0485933503836318, - "grad_norm": 0.06415695697069168, - "learning_rate": 6.426354811563567e-05, - "loss": 0.005131457373499871, - "step": 6150 - }, - { - "epoch": 1.0494458653026428, - "grad_norm": 0.07823871076107025, - "learning_rate": 6.424559479624839e-05, - "loss": 0.004797356575727463, - "step": 6155 - }, - { - "epoch": 1.050298380221654, - "grad_norm": 0.07165013998746872, - "learning_rate": 6.422762899112777e-05, - "loss": 0.006430945545434952, - "step": 6160 - }, - { - "epoch": 1.051150895140665, - "grad_norm": 0.10924427956342697, - "learning_rate": 6.420965070866086e-05, - "loss": 0.008151047676801682, - "step": 6165 - }, - { - "epoch": 1.052003410059676, - "grad_norm": 0.10381831228733063, - "learning_rate": 6.41916599572405e-05, - "loss": 0.009056917577981948, - "step": 6170 - }, - { - "epoch": 1.052855924978687, - "grad_norm": 0.05251248553395271, - "learning_rate": 6.417365674526539e-05, - "loss": 0.004240944981575012, - "step": 6175 - }, - { - "epoch": 1.0537084398976981, - "grad_norm": 0.0812104344367981, - "learning_rate": 6.415564108114001e-05, - "loss": 0.008805926889181137, - "step": 6180 - }, - { - "epoch": 1.0545609548167092, - "grad_norm": 0.05640942230820656, - "learning_rate": 6.413761297327469e-05, - "loss": 0.005727213248610497, - "step": 6185 - }, - { - "epoch": 1.0554134697357205, - "grad_norm": 0.10114334523677826, - "learning_rate": 6.411957243008552e-05, - "loss": 0.008660107105970382, - "step": 6190 - }, - { - "epoch": 1.0562659846547315, - "grad_norm": 0.06809760630130768, - "learning_rate": 6.410151945999447e-05, - "loss": 0.006786180287599563, - "step": 6195 - }, - { - "epoch": 1.0571184995737426, - "grad_norm": 0.08121974021196365, - "learning_rate": 6.408345407142924e-05, - "loss": 0.004730105027556419, - "step": 6200 - }, - { - "epoch": 1.0579710144927537, - "grad_norm": 0.0630379393696785, - "learning_rate": 6.406537627282336e-05, - "loss": 0.006532897800207138, - "step": 6205 - }, - { - "epoch": 1.0588235294117647, - "grad_norm": 0.09354323893785477, - "learning_rate": 6.404728607261612e-05, - "loss": 0.008165966719388962, - "step": 6210 - }, - { - "epoch": 1.0596760443307758, - "grad_norm": 0.0509798526763916, - "learning_rate": 6.402918347925267e-05, - "loss": 0.006781977415084839, - "step": 6215 - }, - { - "epoch": 1.0605285592497868, - "grad_norm": 0.09830603748559952, - "learning_rate": 6.401106850118389e-05, - "loss": 0.00675075501203537, - "step": 6220 - }, - { - "epoch": 1.061381074168798, - "grad_norm": 0.08417326211929321, - "learning_rate": 6.399294114686645e-05, - "loss": 0.005759935826063156, - "step": 6225 - }, - { - "epoch": 1.062233589087809, - "grad_norm": 0.04999511316418648, - "learning_rate": 6.39748014247628e-05, - "loss": 0.0059943776577711105, - "step": 6230 - }, - { - "epoch": 1.06308610400682, - "grad_norm": 0.0355304591357708, - "learning_rate": 6.395664934334116e-05, - "loss": 0.003978967294096946, - "step": 6235 - }, - { - "epoch": 1.0639386189258313, - "grad_norm": 0.09096778929233551, - "learning_rate": 6.393848491107554e-05, - "loss": 0.006428928673267364, - "step": 6240 - }, - { - "epoch": 1.0647911338448424, - "grad_norm": 0.09047707170248032, - "learning_rate": 6.392030813644569e-05, - "loss": 0.005584535002708435, - "step": 6245 - }, - { - "epoch": 1.0656436487638534, - "grad_norm": 0.07133036106824875, - "learning_rate": 6.390211902793714e-05, - "loss": 0.00610351674258709, - "step": 6250 - }, - { - "epoch": 1.0664961636828645, - "grad_norm": 0.1025620549917221, - "learning_rate": 6.388391759404117e-05, - "loss": 0.006316560506820679, - "step": 6255 - }, - { - "epoch": 1.0673486786018755, - "grad_norm": 0.0922650694847107, - "learning_rate": 6.386570384325482e-05, - "loss": 0.008717238903045654, - "step": 6260 - }, - { - "epoch": 1.0682011935208866, - "grad_norm": 0.094338558614254, - "learning_rate": 6.384747778408085e-05, - "loss": 0.0067199327051639555, - "step": 6265 - }, - { - "epoch": 1.0690537084398977, - "grad_norm": 0.07260075211524963, - "learning_rate": 6.382923942502782e-05, - "loss": 0.007249505072832107, - "step": 6270 - }, - { - "epoch": 1.0699062233589087, - "grad_norm": 0.06572386622428894, - "learning_rate": 6.381098877460999e-05, - "loss": 0.007879015803337098, - "step": 6275 - }, - { - "epoch": 1.0707587382779198, - "grad_norm": 0.11646077036857605, - "learning_rate": 6.379272584134737e-05, - "loss": 0.006477512419223785, - "step": 6280 - }, - { - "epoch": 1.0716112531969308, - "grad_norm": 0.14154180884361267, - "learning_rate": 6.37744506337657e-05, - "loss": 0.0069471016526222226, - "step": 6285 - }, - { - "epoch": 1.0724637681159421, - "grad_norm": 0.113606296479702, - "learning_rate": 6.375616316039647e-05, - "loss": 0.010210946947336198, - "step": 6290 - }, - { - "epoch": 1.0733162830349532, - "grad_norm": 0.07193166017532349, - "learning_rate": 6.373786342977687e-05, - "loss": 0.00820360854268074, - "step": 6295 - }, - { - "epoch": 1.0741687979539642, - "grad_norm": 0.06180251017212868, - "learning_rate": 6.371955145044983e-05, - "loss": 0.006048502773046494, - "step": 6300 - }, - { - "epoch": 1.0750213128729753, - "grad_norm": 0.06956778466701508, - "learning_rate": 6.370122723096398e-05, - "loss": 0.005345676839351654, - "step": 6305 - }, - { - "epoch": 1.0758738277919864, - "grad_norm": 0.09170625358819962, - "learning_rate": 6.368289077987368e-05, - "loss": 0.0068355493247509004, - "step": 6310 - }, - { - "epoch": 1.0767263427109974, - "grad_norm": 0.07023731619119644, - "learning_rate": 6.366454210573901e-05, - "loss": 0.004600600153207779, - "step": 6315 - }, - { - "epoch": 1.0775788576300085, - "grad_norm": 0.07429320365190506, - "learning_rate": 6.36461812171257e-05, - "loss": 0.006272794306278228, - "step": 6320 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 0.11356805264949799, - "learning_rate": 6.362780812260528e-05, - "loss": 0.0048342026770114895, - "step": 6325 - }, - { - "epoch": 1.0792838874680306, - "grad_norm": 0.11231013387441635, - "learning_rate": 6.360942283075489e-05, - "loss": 0.00653451681137085, - "step": 6330 - }, - { - "epoch": 1.0801364023870417, - "grad_norm": 0.09655431658029556, - "learning_rate": 6.359102535015739e-05, - "loss": 0.008280844241380692, - "step": 6335 - }, - { - "epoch": 1.080988917306053, - "grad_norm": 0.10172779113054276, - "learning_rate": 6.357261568940135e-05, - "loss": 0.007757744938135147, - "step": 6340 - }, - { - "epoch": 1.081841432225064, - "grad_norm": 0.06417235732078552, - "learning_rate": 6.3554193857081e-05, - "loss": 0.007309675216674805, - "step": 6345 - }, - { - "epoch": 1.082693947144075, - "grad_norm": 0.053178418427705765, - "learning_rate": 6.35357598617963e-05, - "loss": 0.007162143290042877, - "step": 6350 - }, - { - "epoch": 1.0835464620630861, - "grad_norm": 0.07408315688371658, - "learning_rate": 6.351731371215278e-05, - "loss": 0.008392173796892166, - "step": 6355 - }, - { - "epoch": 1.0843989769820972, - "grad_norm": 0.03643275052309036, - "learning_rate": 6.349885541676179e-05, - "loss": 0.00610513798892498, - "step": 6360 - }, - { - "epoch": 1.0852514919011083, - "grad_norm": 0.04701307415962219, - "learning_rate": 6.348038498424023e-05, - "loss": 0.00612705871462822, - "step": 6365 - }, - { - "epoch": 1.0861040068201193, - "grad_norm": 0.05050053820014, - "learning_rate": 6.346190242321075e-05, - "loss": 0.005640604719519615, - "step": 6370 - }, - { - "epoch": 1.0869565217391304, - "grad_norm": 0.05203640088438988, - "learning_rate": 6.344340774230159e-05, - "loss": 0.005340654775500298, - "step": 6375 - }, - { - "epoch": 1.0878090366581414, - "grad_norm": 0.07451866567134857, - "learning_rate": 6.342490095014669e-05, - "loss": 0.006459225714206695, - "step": 6380 - }, - { - "epoch": 1.0886615515771525, - "grad_norm": 0.09951499849557877, - "learning_rate": 6.340638205538566e-05, - "loss": 0.008529558777809143, - "step": 6385 - }, - { - "epoch": 1.0895140664961638, - "grad_norm": 0.06064416840672493, - "learning_rate": 6.33878510666637e-05, - "loss": 0.007885071635246276, - "step": 6390 - }, - { - "epoch": 1.0903665814151748, - "grad_norm": 0.09382321685552597, - "learning_rate": 6.33693079926317e-05, - "loss": 0.007992906123399734, - "step": 6395 - }, - { - "epoch": 1.091219096334186, - "grad_norm": 0.054066915065050125, - "learning_rate": 6.335075284194621e-05, - "loss": 0.007473263889551163, - "step": 6400 - }, - { - "epoch": 1.092071611253197, - "grad_norm": 0.06763065606355667, - "learning_rate": 6.333218562326937e-05, - "loss": 0.006374929845333099, - "step": 6405 - }, - { - "epoch": 1.092924126172208, - "grad_norm": 0.0656818076968193, - "learning_rate": 6.331360634526899e-05, - "loss": 0.006085469573736191, - "step": 6410 - }, - { - "epoch": 1.093776641091219, - "grad_norm": 0.060463279485702515, - "learning_rate": 6.329501501661848e-05, - "loss": 0.005605050176382065, - "step": 6415 - }, - { - "epoch": 1.0946291560102301, - "grad_norm": 0.05734890326857567, - "learning_rate": 6.32764116459969e-05, - "loss": 0.00563613623380661, - "step": 6420 - }, - { - "epoch": 1.0954816709292412, - "grad_norm": 0.0502542182803154, - "learning_rate": 6.32577962420889e-05, - "loss": 0.004675766825675965, - "step": 6425 - }, - { - "epoch": 1.0963341858482523, - "grad_norm": 0.06550677120685577, - "learning_rate": 6.32391688135848e-05, - "loss": 0.006265480071306229, - "step": 6430 - }, - { - "epoch": 1.0971867007672633, - "grad_norm": 0.1298699826002121, - "learning_rate": 6.322052936918048e-05, - "loss": 0.008352620899677277, - "step": 6435 - }, - { - "epoch": 1.0980392156862746, - "grad_norm": 0.08422241359949112, - "learning_rate": 6.320187791757748e-05, - "loss": 0.005868597701191902, - "step": 6440 - }, - { - "epoch": 1.0988917306052857, - "grad_norm": 0.07807652652263641, - "learning_rate": 6.318321446748291e-05, - "loss": 0.010353461652994157, - "step": 6445 - }, - { - "epoch": 1.0997442455242967, - "grad_norm": 0.0663999617099762, - "learning_rate": 6.316453902760946e-05, - "loss": 0.00667201578617096, - "step": 6450 - }, - { - "epoch": 1.1005967604433078, - "grad_norm": 0.06362646073102951, - "learning_rate": 6.314585160667547e-05, - "loss": 0.006539353728294372, - "step": 6455 - }, - { - "epoch": 1.1014492753623188, - "grad_norm": 0.04160219058394432, - "learning_rate": 6.312715221340485e-05, - "loss": 0.004082740843296051, - "step": 6460 - }, - { - "epoch": 1.10230179028133, - "grad_norm": 0.05449013039469719, - "learning_rate": 6.31084408565271e-05, - "loss": 0.007537595182657242, - "step": 6465 - }, - { - "epoch": 1.103154305200341, - "grad_norm": 0.06819169223308563, - "learning_rate": 6.308971754477729e-05, - "loss": 0.006866573542356491, - "step": 6470 - }, - { - "epoch": 1.104006820119352, - "grad_norm": 0.06622573733329773, - "learning_rate": 6.307098228689611e-05, - "loss": 0.009055091440677643, - "step": 6475 - }, - { - "epoch": 1.104859335038363, - "grad_norm": 0.05732693895697594, - "learning_rate": 6.305223509162978e-05, - "loss": 0.006077280640602112, - "step": 6480 - }, - { - "epoch": 1.1057118499573741, - "grad_norm": 0.06630431115627289, - "learning_rate": 6.303347596773012e-05, - "loss": 0.0064442440867424015, - "step": 6485 - }, - { - "epoch": 1.1065643648763854, - "grad_norm": 0.06782745569944382, - "learning_rate": 6.301470492395451e-05, - "loss": 0.005072608217597007, - "step": 6490 - }, - { - "epoch": 1.1074168797953965, - "grad_norm": 0.05796601250767708, - "learning_rate": 6.299592196906591e-05, - "loss": 0.0074319586157798765, - "step": 6495 - }, - { - "epoch": 1.1082693947144076, - "grad_norm": 0.04627149552106857, - "learning_rate": 6.297712711183282e-05, - "loss": 0.00512685589492321, - "step": 6500 - }, - { - "epoch": 1.1091219096334186, - "grad_norm": 0.08210720866918564, - "learning_rate": 6.295832036102929e-05, - "loss": 0.006917678564786911, - "step": 6505 - }, - { - "epoch": 1.1099744245524297, - "grad_norm": 0.08367052674293518, - "learning_rate": 6.293950172543496e-05, - "loss": 0.0054212499409914015, - "step": 6510 - }, - { - "epoch": 1.1108269394714407, - "grad_norm": 0.08192700892686844, - "learning_rate": 6.292067121383499e-05, - "loss": 0.00791442021727562, - "step": 6515 - }, - { - "epoch": 1.1116794543904518, - "grad_norm": 0.05766206234693527, - "learning_rate": 6.290182883502008e-05, - "loss": 0.006540960818529129, - "step": 6520 - }, - { - "epoch": 1.1125319693094629, - "grad_norm": 0.07752664387226105, - "learning_rate": 6.28829745977865e-05, - "loss": 0.009196925908327103, - "step": 6525 - }, - { - "epoch": 1.113384484228474, - "grad_norm": 0.07419038563966751, - "learning_rate": 6.2864108510936e-05, - "loss": 0.007524489611387253, - "step": 6530 - }, - { - "epoch": 1.1142369991474852, - "grad_norm": 0.04851066321134567, - "learning_rate": 6.284523058327593e-05, - "loss": 0.004060015082359314, - "step": 6535 - }, - { - "epoch": 1.1150895140664963, - "grad_norm": 0.0764140635728836, - "learning_rate": 6.282634082361911e-05, - "loss": 0.006797254830598831, - "step": 6540 - }, - { - "epoch": 1.1159420289855073, - "grad_norm": 0.06873292475938797, - "learning_rate": 6.280743924078392e-05, - "loss": 0.007637844234704971, - "step": 6545 - }, - { - "epoch": 1.1167945439045184, - "grad_norm": 0.047832686454057693, - "learning_rate": 6.278852584359425e-05, - "loss": 0.00542646199464798, - "step": 6550 - }, - { - "epoch": 1.1176470588235294, - "grad_norm": 0.10061443597078323, - "learning_rate": 6.27696006408795e-05, - "loss": 0.008591605722904206, - "step": 6555 - }, - { - "epoch": 1.1184995737425405, - "grad_norm": 0.09729041159152985, - "learning_rate": 6.27506636414746e-05, - "loss": 0.0064585842192173, - "step": 6560 - }, - { - "epoch": 1.1193520886615516, - "grad_norm": 0.04427873343229294, - "learning_rate": 6.273171485421992e-05, - "loss": 0.004846593365073204, - "step": 6565 - }, - { - "epoch": 1.1202046035805626, - "grad_norm": 0.07813888788223267, - "learning_rate": 6.271275428796146e-05, - "loss": 0.007345958054065705, - "step": 6570 - }, - { - "epoch": 1.1210571184995737, - "grad_norm": 0.12500733137130737, - "learning_rate": 6.269378195155058e-05, - "loss": 0.006376177072525024, - "step": 6575 - }, - { - "epoch": 1.1219096334185847, - "grad_norm": 0.09636004269123077, - "learning_rate": 6.267479785384422e-05, - "loss": 0.0069282323122024534, - "step": 6580 - }, - { - "epoch": 1.1227621483375958, - "grad_norm": 0.06236017122864723, - "learning_rate": 6.265580200370478e-05, - "loss": 0.0050656192004680635, - "step": 6585 - }, - { - "epoch": 1.123614663256607, - "grad_norm": 0.0596463568508625, - "learning_rate": 6.263679441000019e-05, - "loss": 0.006198804825544357, - "step": 6590 - }, - { - "epoch": 1.1244671781756181, - "grad_norm": 0.0846855491399765, - "learning_rate": 6.261777508160378e-05, - "loss": 0.0073812372982501985, - "step": 6595 - }, - { - "epoch": 1.1253196930946292, - "grad_norm": 0.05228402093052864, - "learning_rate": 6.259874402739442e-05, - "loss": 0.006196213513612747, - "step": 6600 - }, - { - "epoch": 1.1261722080136403, - "grad_norm": 0.08395595103502274, - "learning_rate": 6.257970125625647e-05, - "loss": 0.0060448311269283295, - "step": 6605 - }, - { - "epoch": 1.1270247229326513, - "grad_norm": 0.055274344980716705, - "learning_rate": 6.25606467770797e-05, - "loss": 0.006578336656093598, - "step": 6610 - }, - { - "epoch": 1.1278772378516624, - "grad_norm": 0.054609380662441254, - "learning_rate": 6.254158059875936e-05, - "loss": 0.008093905448913575, - "step": 6615 - }, - { - "epoch": 1.1287297527706734, - "grad_norm": 0.05168715491890907, - "learning_rate": 6.25225027301962e-05, - "loss": 0.006086795404553413, - "step": 6620 - }, - { - "epoch": 1.1295822676896845, - "grad_norm": 0.06260590255260468, - "learning_rate": 6.250341318029641e-05, - "loss": 0.007288631051778793, - "step": 6625 - }, - { - "epoch": 1.1304347826086956, - "grad_norm": 0.06585957854986191, - "learning_rate": 6.24843119579716e-05, - "loss": 0.005779954791069031, - "step": 6630 - }, - { - "epoch": 1.1312872975277068, - "grad_norm": 0.05828391760587692, - "learning_rate": 6.246519907213888e-05, - "loss": 0.006320308148860932, - "step": 6635 - }, - { - "epoch": 1.132139812446718, - "grad_norm": 0.08400154113769531, - "learning_rate": 6.244607453172078e-05, - "loss": 0.00452205128967762, - "step": 6640 - }, - { - "epoch": 1.132992327365729, - "grad_norm": 0.059920940548181534, - "learning_rate": 6.242693834564525e-05, - "loss": 0.00620727390050888, - "step": 6645 - }, - { - "epoch": 1.13384484228474, - "grad_norm": 0.1100456491112709, - "learning_rate": 6.240779052284571e-05, - "loss": 0.006768511235713958, - "step": 6650 - }, - { - "epoch": 1.134697357203751, - "grad_norm": 0.07722730189561844, - "learning_rate": 6.238863107226102e-05, - "loss": 0.008207496255636215, - "step": 6655 - }, - { - "epoch": 1.1355498721227621, - "grad_norm": 0.05468403548002243, - "learning_rate": 6.236946000283542e-05, - "loss": 0.005272969231009483, - "step": 6660 - }, - { - "epoch": 1.1364023870417732, - "grad_norm": 0.0685155913233757, - "learning_rate": 6.235027732351863e-05, - "loss": 0.008074409514665603, - "step": 6665 - }, - { - "epoch": 1.1372549019607843, - "grad_norm": 0.07667854428291321, - "learning_rate": 6.233108304326571e-05, - "loss": 0.00863628089427948, - "step": 6670 - }, - { - "epoch": 1.1381074168797953, - "grad_norm": 0.0727204978466034, - "learning_rate": 6.231187717103727e-05, - "loss": 0.004446333646774292, - "step": 6675 - }, - { - "epoch": 1.1389599317988064, - "grad_norm": 0.06465403735637665, - "learning_rate": 6.229265971579918e-05, - "loss": 0.007380707561969757, - "step": 6680 - }, - { - "epoch": 1.1398124467178175, - "grad_norm": 0.04102586954832077, - "learning_rate": 6.227343068652281e-05, - "loss": 0.006103607639670372, - "step": 6685 - }, - { - "epoch": 1.1406649616368287, - "grad_norm": 0.06988929212093353, - "learning_rate": 6.225419009218493e-05, - "loss": 0.007358456403017044, - "step": 6690 - }, - { - "epoch": 1.1415174765558398, - "grad_norm": 0.07802724838256836, - "learning_rate": 6.223493794176767e-05, - "loss": 0.007887010276317597, - "step": 6695 - }, - { - "epoch": 1.1423699914748509, - "grad_norm": 0.10777991265058517, - "learning_rate": 6.221567424425857e-05, - "loss": 0.007653985172510147, - "step": 6700 - }, - { - "epoch": 1.143222506393862, - "grad_norm": 0.0922352522611618, - "learning_rate": 6.219639900865058e-05, - "loss": 0.007459624856710434, - "step": 6705 - }, - { - "epoch": 1.144075021312873, - "grad_norm": 0.06321967393159866, - "learning_rate": 6.217711224394202e-05, - "loss": 0.00540911853313446, - "step": 6710 - }, - { - "epoch": 1.144927536231884, - "grad_norm": 0.09344825148582458, - "learning_rate": 6.215781395913656e-05, - "loss": 0.0053936421871185304, - "step": 6715 - }, - { - "epoch": 1.145780051150895, - "grad_norm": 0.03457584232091904, - "learning_rate": 6.213850416324333e-05, - "loss": 0.006388355046510696, - "step": 6720 - }, - { - "epoch": 1.1466325660699062, - "grad_norm": 0.06931985914707184, - "learning_rate": 6.211918286527676e-05, - "loss": 0.005831810832023621, - "step": 6725 - }, - { - "epoch": 1.1474850809889172, - "grad_norm": 0.05588890612125397, - "learning_rate": 6.209985007425668e-05, - "loss": 0.0041655078530311584, - "step": 6730 - }, - { - "epoch": 1.1483375959079285, - "grad_norm": 0.07582694292068481, - "learning_rate": 6.208050579920826e-05, - "loss": 0.006521198153495789, - "step": 6735 - }, - { - "epoch": 1.1491901108269396, - "grad_norm": 0.07055433094501495, - "learning_rate": 6.206115004916209e-05, - "loss": 0.0066129244863986966, - "step": 6740 - }, - { - "epoch": 1.1500426257459506, - "grad_norm": 0.07039172202348709, - "learning_rate": 6.204178283315405e-05, - "loss": 0.005633739382028579, - "step": 6745 - }, - { - "epoch": 1.1508951406649617, - "grad_norm": 0.07833350449800491, - "learning_rate": 6.202240416022541e-05, - "loss": 0.006761975586414337, - "step": 6750 - }, - { - "epoch": 1.1517476555839727, - "grad_norm": 0.05553733557462692, - "learning_rate": 6.200301403942278e-05, - "loss": 0.006545065343379975, - "step": 6755 - }, - { - "epoch": 1.1526001705029838, - "grad_norm": 0.07306832075119019, - "learning_rate": 6.198361247979809e-05, - "loss": 0.008323725312948227, - "step": 6760 - }, - { - "epoch": 1.1534526854219949, - "grad_norm": 0.04787914454936981, - "learning_rate": 6.196419949040867e-05, - "loss": 0.004425797611474991, - "step": 6765 - }, - { - "epoch": 1.154305200341006, - "grad_norm": 0.08021930605173111, - "learning_rate": 6.194477508031712e-05, - "loss": 0.005103312805294991, - "step": 6770 - }, - { - "epoch": 1.155157715260017, - "grad_norm": 0.0821428894996643, - "learning_rate": 6.192533925859144e-05, - "loss": 0.009274877607822418, - "step": 6775 - }, - { - "epoch": 1.156010230179028, - "grad_norm": 0.09880609810352325, - "learning_rate": 6.190589203430486e-05, - "loss": 0.007863005250692367, - "step": 6780 - }, - { - "epoch": 1.156862745098039, - "grad_norm": 0.08075276762247086, - "learning_rate": 6.188643341653604e-05, - "loss": 0.004675677418708802, - "step": 6785 - }, - { - "epoch": 1.1577152600170504, - "grad_norm": 0.0633573830127716, - "learning_rate": 6.186696341436889e-05, - "loss": 0.007359890639781952, - "step": 6790 - }, - { - "epoch": 1.1585677749360614, - "grad_norm": 0.03828895092010498, - "learning_rate": 6.184748203689265e-05, - "loss": 0.004494070634245872, - "step": 6795 - }, - { - "epoch": 1.1594202898550725, - "grad_norm": 0.07907325774431229, - "learning_rate": 6.18279892932019e-05, - "loss": 0.006256269663572312, - "step": 6800 - }, - { - "epoch": 1.1602728047740836, - "grad_norm": 0.055188342928886414, - "learning_rate": 6.180848519239647e-05, - "loss": 0.009548474848270417, - "step": 6805 - }, - { - "epoch": 1.1611253196930946, - "grad_norm": 0.05850991606712341, - "learning_rate": 6.178896974358154e-05, - "loss": 0.0056076571345329285, - "step": 6810 - }, - { - "epoch": 1.1619778346121057, - "grad_norm": 0.0626932755112648, - "learning_rate": 6.176944295586757e-05, - "loss": 0.005667714029550552, - "step": 6815 - }, - { - "epoch": 1.1628303495311167, - "grad_norm": 0.06506946682929993, - "learning_rate": 6.174990483837031e-05, - "loss": 0.006154880672693252, - "step": 6820 - }, - { - "epoch": 1.1636828644501278, - "grad_norm": 0.0535859651863575, - "learning_rate": 6.17303554002108e-05, - "loss": 0.0042555928230285645, - "step": 6825 - }, - { - "epoch": 1.1645353793691389, - "grad_norm": 0.05577898398041725, - "learning_rate": 6.171079465051538e-05, - "loss": 0.006060104072093964, - "step": 6830 - }, - { - "epoch": 1.1653878942881502, - "grad_norm": 0.05574663355946541, - "learning_rate": 6.169122259841566e-05, - "loss": 0.008667515218257904, - "step": 6835 - }, - { - "epoch": 1.1662404092071612, - "grad_norm": 0.09157130867242813, - "learning_rate": 6.16716392530485e-05, - "loss": 0.007259176671504974, - "step": 6840 - }, - { - "epoch": 1.1670929241261723, - "grad_norm": 0.06404415518045425, - "learning_rate": 6.165204462355608e-05, - "loss": 0.007140593230724334, - "step": 6845 - }, - { - "epoch": 1.1679454390451833, - "grad_norm": 0.0713329091668129, - "learning_rate": 6.163243871908581e-05, - "loss": 0.006118748337030411, - "step": 6850 - }, - { - "epoch": 1.1687979539641944, - "grad_norm": 0.04038231074810028, - "learning_rate": 6.16128215487904e-05, - "loss": 0.006028918176889419, - "step": 6855 - }, - { - "epoch": 1.1696504688832055, - "grad_norm": 0.07793593406677246, - "learning_rate": 6.159319312182777e-05, - "loss": 0.006851163506507873, - "step": 6860 - }, - { - "epoch": 1.1705029838022165, - "grad_norm": 0.07543511688709259, - "learning_rate": 6.157355344736114e-05, - "loss": 0.006878272444009781, - "step": 6865 - }, - { - "epoch": 1.1713554987212276, - "grad_norm": 0.06332696974277496, - "learning_rate": 6.155390253455897e-05, - "loss": 0.006324999034404755, - "step": 6870 - }, - { - "epoch": 1.1722080136402386, - "grad_norm": 0.06920734792947769, - "learning_rate": 6.153424039259495e-05, - "loss": 0.00536310225725174, - "step": 6875 - }, - { - "epoch": 1.1730605285592497, - "grad_norm": 0.09313163161277771, - "learning_rate": 6.151456703064802e-05, - "loss": 0.00795424059033394, - "step": 6880 - }, - { - "epoch": 1.1739130434782608, - "grad_norm": 0.08588451147079468, - "learning_rate": 6.149488245790234e-05, - "loss": 0.006889034807682037, - "step": 6885 - }, - { - "epoch": 1.174765558397272, - "grad_norm": 0.09814538061618805, - "learning_rate": 6.147518668354737e-05, - "loss": 0.007332245260477066, - "step": 6890 - }, - { - "epoch": 1.175618073316283, - "grad_norm": 0.05141104385256767, - "learning_rate": 6.145547971677772e-05, - "loss": 0.00333656407892704, - "step": 6895 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 0.05575519800186157, - "learning_rate": 6.143576156679327e-05, - "loss": 0.005542768910527229, - "step": 6900 - }, - { - "epoch": 1.1773231031543052, - "grad_norm": 0.04917008429765701, - "learning_rate": 6.14160322427991e-05, - "loss": 0.007007633149623871, - "step": 6905 - }, - { - "epoch": 1.1781756180733163, - "grad_norm": 0.06385336071252823, - "learning_rate": 6.139629175400552e-05, - "loss": 0.007495941221714019, - "step": 6910 - }, - { - "epoch": 1.1790281329923273, - "grad_norm": 0.08664151281118393, - "learning_rate": 6.137654010962805e-05, - "loss": 0.0075534448027610775, - "step": 6915 - }, - { - "epoch": 1.1798806479113384, - "grad_norm": 0.08881189674139023, - "learning_rate": 6.13567773188874e-05, - "loss": 0.0059935558587312695, - "step": 6920 - }, - { - "epoch": 1.1807331628303495, - "grad_norm": 0.07297934591770172, - "learning_rate": 6.133700339100952e-05, - "loss": 0.006142234057188034, - "step": 6925 - }, - { - "epoch": 1.1815856777493605, - "grad_norm": 0.053351663053035736, - "learning_rate": 6.131721833522552e-05, - "loss": 0.006038139387965202, - "step": 6930 - }, - { - "epoch": 1.1824381926683718, - "grad_norm": 0.12923622131347656, - "learning_rate": 6.129742216077172e-05, - "loss": 0.008645947277545928, - "step": 6935 - }, - { - "epoch": 1.1832907075873829, - "grad_norm": 0.095455601811409, - "learning_rate": 6.127761487688964e-05, - "loss": 0.004740688577294349, - "step": 6940 - }, - { - "epoch": 1.184143222506394, - "grad_norm": 0.11498606204986572, - "learning_rate": 6.125779649282599e-05, - "loss": 0.00805070549249649, - "step": 6945 - }, - { - "epoch": 1.184995737425405, - "grad_norm": 0.07489286363124847, - "learning_rate": 6.123796701783264e-05, - "loss": 0.0060746859759092334, - "step": 6950 - }, - { - "epoch": 1.185848252344416, - "grad_norm": 0.07027438282966614, - "learning_rate": 6.121812646116663e-05, - "loss": 0.006816025823354721, - "step": 6955 - }, - { - "epoch": 1.186700767263427, - "grad_norm": 0.08542973548173904, - "learning_rate": 6.119827483209024e-05, - "loss": 0.005315831303596497, - "step": 6960 - }, - { - "epoch": 1.1875532821824382, - "grad_norm": 0.08409032970666885, - "learning_rate": 6.117841213987082e-05, - "loss": 0.0061601437628269196, - "step": 6965 - }, - { - "epoch": 1.1884057971014492, - "grad_norm": 0.10387974232435226, - "learning_rate": 6.115853839378096e-05, - "loss": 0.0071022816002368925, - "step": 6970 - }, - { - "epoch": 1.1892583120204603, - "grad_norm": 0.056441329419612885, - "learning_rate": 6.113865360309838e-05, - "loss": 0.004539218544960022, - "step": 6975 - }, - { - "epoch": 1.1901108269394713, - "grad_norm": 0.10132234543561935, - "learning_rate": 6.111875777710598e-05, - "loss": 0.0060778014361858364, - "step": 6980 - }, - { - "epoch": 1.1909633418584824, - "grad_norm": 0.07129226624965668, - "learning_rate": 6.109885092509177e-05, - "loss": 0.007794113457202911, - "step": 6985 - }, - { - "epoch": 1.1918158567774937, - "grad_norm": 0.09267892688512802, - "learning_rate": 6.107893305634893e-05, - "loss": 0.006867295503616333, - "step": 6990 - }, - { - "epoch": 1.1926683716965047, - "grad_norm": 0.0739888921380043, - "learning_rate": 6.10590041801758e-05, - "loss": 0.006263263523578644, - "step": 6995 - }, - { - "epoch": 1.1935208866155158, - "grad_norm": 0.10201311856508255, - "learning_rate": 6.103906430587583e-05, - "loss": 0.006245525926351548, - "step": 7000 - }, - { - "epoch": 1.1943734015345269, - "grad_norm": 0.08561360090970993, - "learning_rate": 6.1019113442757636e-05, - "loss": 0.005739351361989975, - "step": 7005 - }, - { - "epoch": 1.195225916453538, - "grad_norm": 0.06410367786884308, - "learning_rate": 6.099915160013491e-05, - "loss": 0.00598936527967453, - "step": 7010 - }, - { - "epoch": 1.196078431372549, - "grad_norm": 0.11765716224908829, - "learning_rate": 6.0979178787326544e-05, - "loss": 0.010477253794670105, - "step": 7015 - }, - { - "epoch": 1.19693094629156, - "grad_norm": 0.06659694015979767, - "learning_rate": 6.095919501365648e-05, - "loss": 0.0072447523474693295, - "step": 7020 - }, - { - "epoch": 1.197783461210571, - "grad_norm": 0.05651358142495155, - "learning_rate": 6.093920028845381e-05, - "loss": 0.004644358158111572, - "step": 7025 - }, - { - "epoch": 1.1986359761295824, - "grad_norm": 0.07198809087276459, - "learning_rate": 6.0919194621052764e-05, - "loss": 0.00517328754067421, - "step": 7030 - }, - { - "epoch": 1.1994884910485935, - "grad_norm": 0.06188420578837395, - "learning_rate": 6.0899178020792614e-05, - "loss": 0.005182894691824913, - "step": 7035 - }, - { - "epoch": 1.2003410059676045, - "grad_norm": 0.07731341570615768, - "learning_rate": 6.087915049701783e-05, - "loss": 0.006863371282815933, - "step": 7040 - }, - { - "epoch": 1.2011935208866156, - "grad_norm": 0.07693833857774734, - "learning_rate": 6.0859112059077866e-05, - "loss": 0.008634812384843826, - "step": 7045 - }, - { - "epoch": 1.2020460358056266, - "grad_norm": 0.08118413388729095, - "learning_rate": 6.083906271632736e-05, - "loss": 0.008003174513578414, - "step": 7050 - }, - { - "epoch": 1.2028985507246377, - "grad_norm": 0.07794218510389328, - "learning_rate": 6.0819002478126016e-05, - "loss": 0.005899757146835327, - "step": 7055 - }, - { - "epoch": 1.2037510656436488, - "grad_norm": 0.08107218146324158, - "learning_rate": 6.079893135383861e-05, - "loss": 0.007581057399511338, - "step": 7060 - }, - { - "epoch": 1.2046035805626598, - "grad_norm": 0.06910198926925659, - "learning_rate": 6.077884935283502e-05, - "loss": 0.00794234573841095, - "step": 7065 - }, - { - "epoch": 1.2054560954816709, - "grad_norm": 0.08426421135663986, - "learning_rate": 6.0758756484490186e-05, - "loss": 0.0057635679841041565, - "step": 7070 - }, - { - "epoch": 1.206308610400682, - "grad_norm": 0.08670961856842041, - "learning_rate": 6.073865275818413e-05, - "loss": 0.006369538605213165, - "step": 7075 - }, - { - "epoch": 1.207161125319693, - "grad_norm": 0.04245399683713913, - "learning_rate": 6.071853818330193e-05, - "loss": 0.0067548036575317385, - "step": 7080 - }, - { - "epoch": 1.208013640238704, - "grad_norm": 0.10600235313177109, - "learning_rate": 6.069841276923376e-05, - "loss": 0.006923867762088776, - "step": 7085 - }, - { - "epoch": 1.2088661551577153, - "grad_norm": 0.07093790173530579, - "learning_rate": 6.0678276525374814e-05, - "loss": 0.005083417519927025, - "step": 7090 - }, - { - "epoch": 1.2097186700767264, - "grad_norm": 0.04997126758098602, - "learning_rate": 6.065812946112537e-05, - "loss": 0.006207586824893951, - "step": 7095 - }, - { - "epoch": 1.2105711849957375, - "grad_norm": 0.04425262287259102, - "learning_rate": 6.063797158589074e-05, - "loss": 0.0046977601945400235, - "step": 7100 - }, - { - "epoch": 1.2114236999147485, - "grad_norm": 0.07300136238336563, - "learning_rate": 6.0617802909081295e-05, - "loss": 0.005589437484741211, - "step": 7105 - }, - { - "epoch": 1.2122762148337596, - "grad_norm": 0.0878889262676239, - "learning_rate": 6.0597623440112445e-05, - "loss": 0.005844222381711006, - "step": 7110 - }, - { - "epoch": 1.2131287297527706, - "grad_norm": 0.09079992771148682, - "learning_rate": 6.0577433188404635e-05, - "loss": 0.007329034805297852, - "step": 7115 - }, - { - "epoch": 1.2139812446717817, - "grad_norm": 0.07165387272834778, - "learning_rate": 6.055723216338336e-05, - "loss": 0.006642927974462509, - "step": 7120 - }, - { - "epoch": 1.2148337595907928, - "grad_norm": 0.07113955169916153, - "learning_rate": 6.053702037447911e-05, - "loss": 0.006992670893669129, - "step": 7125 - }, - { - "epoch": 1.215686274509804, - "grad_norm": 0.08737215399742126, - "learning_rate": 6.0516797831127425e-05, - "loss": 0.006702055037021637, - "step": 7130 - }, - { - "epoch": 1.216538789428815, - "grad_norm": 0.07276564091444016, - "learning_rate": 6.049656454276887e-05, - "loss": 0.005692619457840919, - "step": 7135 - }, - { - "epoch": 1.2173913043478262, - "grad_norm": 0.09203831106424332, - "learning_rate": 6.0476320518849006e-05, - "loss": 0.006464710086584091, - "step": 7140 - }, - { - "epoch": 1.2182438192668372, - "grad_norm": 0.07749375700950623, - "learning_rate": 6.045606576881843e-05, - "loss": 0.008598372340202332, - "step": 7145 - }, - { - "epoch": 1.2190963341858483, - "grad_norm": 0.04338040575385094, - "learning_rate": 6.043580030213272e-05, - "loss": 0.006892016530036927, - "step": 7150 - }, - { - "epoch": 1.2199488491048593, - "grad_norm": 0.06691034138202667, - "learning_rate": 6.0415524128252474e-05, - "loss": 0.007622670382261276, - "step": 7155 - }, - { - "epoch": 1.2208013640238704, - "grad_norm": 0.07532396167516708, - "learning_rate": 6.039523725664329e-05, - "loss": 0.00698106437921524, - "step": 7160 - }, - { - "epoch": 1.2216538789428815, - "grad_norm": 0.0408058688044548, - "learning_rate": 6.037493969677575e-05, - "loss": 0.005919446796178817, - "step": 7165 - }, - { - "epoch": 1.2225063938618925, - "grad_norm": 0.07306578010320663, - "learning_rate": 6.0354631458125425e-05, - "loss": 0.008953345566987991, - "step": 7170 - }, - { - "epoch": 1.2233589087809036, - "grad_norm": 0.08269601315259933, - "learning_rate": 6.033431255017289e-05, - "loss": 0.007224951684474945, - "step": 7175 - }, - { - "epoch": 1.2242114236999146, - "grad_norm": 0.045140884816646576, - "learning_rate": 6.0313982982403676e-05, - "loss": 0.006175454705953598, - "step": 7180 - }, - { - "epoch": 1.2250639386189257, - "grad_norm": 0.0721440315246582, - "learning_rate": 6.0293642764308304e-05, - "loss": 0.007267911732196808, - "step": 7185 - }, - { - "epoch": 1.225916453537937, - "grad_norm": 0.081813283264637, - "learning_rate": 6.027329190538227e-05, - "loss": 0.006872846186161042, - "step": 7190 - }, - { - "epoch": 1.226768968456948, - "grad_norm": 0.05662613734602928, - "learning_rate": 6.025293041512602e-05, - "loss": 0.004837202653288841, - "step": 7195 - }, - { - "epoch": 1.227621483375959, - "grad_norm": 0.10023492574691772, - "learning_rate": 6.023255830304498e-05, - "loss": 0.0060194481164217, - "step": 7200 - }, - { - "epoch": 1.2284739982949702, - "grad_norm": 0.06398235261440277, - "learning_rate": 6.021217557864954e-05, - "loss": 0.007653398066759109, - "step": 7205 - }, - { - "epoch": 1.2293265132139812, - "grad_norm": 0.09494475275278091, - "learning_rate": 6.019178225145503e-05, - "loss": 0.007547302544116974, - "step": 7210 - }, - { - "epoch": 1.2301790281329923, - "grad_norm": 0.05356467142701149, - "learning_rate": 6.017137833098171e-05, - "loss": 0.007133310288190841, - "step": 7215 - }, - { - "epoch": 1.2310315430520034, - "grad_norm": 0.09225092083215714, - "learning_rate": 6.0150963826754836e-05, - "loss": 0.006320309638977051, - "step": 7220 - }, - { - "epoch": 1.2318840579710144, - "grad_norm": 0.07072161883115768, - "learning_rate": 6.013053874830458e-05, - "loss": 0.007313568145036697, - "step": 7225 - }, - { - "epoch": 1.2327365728900257, - "grad_norm": 0.07206818461418152, - "learning_rate": 6.0110103105166026e-05, - "loss": 0.0054031949490308765, - "step": 7230 - }, - { - "epoch": 1.2335890878090368, - "grad_norm": 0.08611681312322617, - "learning_rate": 6.008965690687922e-05, - "loss": 0.00670153945684433, - "step": 7235 - }, - { - "epoch": 1.2344416027280478, - "grad_norm": 0.07864221930503845, - "learning_rate": 6.0069200162989154e-05, - "loss": 0.0069690033793449405, - "step": 7240 - }, - { - "epoch": 1.2352941176470589, - "grad_norm": 0.06847227364778519, - "learning_rate": 6.0048732883045665e-05, - "loss": 0.006755173206329346, - "step": 7245 - }, - { - "epoch": 1.23614663256607, - "grad_norm": 0.06264699995517731, - "learning_rate": 6.0028255076603606e-05, - "loss": 0.00519348569214344, - "step": 7250 - }, - { - "epoch": 1.236999147485081, - "grad_norm": 0.04114431515336037, - "learning_rate": 6.0007766753222665e-05, - "loss": 0.006071234866976738, - "step": 7255 - }, - { - "epoch": 1.237851662404092, - "grad_norm": 0.03660140559077263, - "learning_rate": 5.998726792246751e-05, - "loss": 0.007517064362764359, - "step": 7260 - }, - { - "epoch": 1.2387041773231031, - "grad_norm": 0.10343052446842194, - "learning_rate": 5.9966758593907636e-05, - "loss": 0.0070131182670593265, - "step": 7265 - }, - { - "epoch": 1.2395566922421142, - "grad_norm": 0.08169959485530853, - "learning_rate": 5.994623877711751e-05, - "loss": 0.006279528886079788, - "step": 7270 - }, - { - "epoch": 1.2404092071611252, - "grad_norm": 0.06312677264213562, - "learning_rate": 5.992570848167645e-05, - "loss": 0.0041657909750938416, - "step": 7275 - }, - { - "epoch": 1.2412617220801363, - "grad_norm": 0.08725713193416595, - "learning_rate": 5.990516771716869e-05, - "loss": 0.007908149063587189, - "step": 7280 - }, - { - "epoch": 1.2421142369991476, - "grad_norm": 0.05857875198125839, - "learning_rate": 5.988461649318333e-05, - "loss": 0.005137740075588227, - "step": 7285 - }, - { - "epoch": 1.2429667519181586, - "grad_norm": 0.04836168512701988, - "learning_rate": 5.986405481931438e-05, - "loss": 0.005033157765865326, - "step": 7290 - }, - { - "epoch": 1.2438192668371697, - "grad_norm": 0.09514568001031876, - "learning_rate": 5.98434827051607e-05, - "loss": 0.007460397481918335, - "step": 7295 - }, - { - "epoch": 1.2446717817561808, - "grad_norm": 0.049415748566389084, - "learning_rate": 5.982290016032604e-05, - "loss": 0.0049881644546985624, - "step": 7300 - }, - { - "epoch": 1.2455242966751918, - "grad_norm": 0.1069302037358284, - "learning_rate": 5.980230719441903e-05, - "loss": 0.005356961116194725, - "step": 7305 - }, - { - "epoch": 1.2463768115942029, - "grad_norm": 0.1031380444765091, - "learning_rate": 5.9781703817053136e-05, - "loss": 0.0067513369023799895, - "step": 7310 - }, - { - "epoch": 1.247229326513214, - "grad_norm": 0.05909884348511696, - "learning_rate": 5.976109003784671e-05, - "loss": 0.005357486009597778, - "step": 7315 - }, - { - "epoch": 1.248081841432225, - "grad_norm": 0.09306607395410538, - "learning_rate": 5.974046586642295e-05, - "loss": 0.005747456848621368, - "step": 7320 - }, - { - "epoch": 1.248934356351236, - "grad_norm": 0.0688752606511116, - "learning_rate": 5.971983131240988e-05, - "loss": 0.0073902375996112825, - "step": 7325 - }, - { - "epoch": 1.2497868712702473, - "grad_norm": 0.06592141836881638, - "learning_rate": 5.969918638544044e-05, - "loss": 0.008268805593252182, - "step": 7330 - }, - { - "epoch": 1.2497868712702473, - "eval_loss": 0.037298671901226044, - "eval_runtime": 3.6917, - "eval_samples_per_second": 68.261, - "eval_steps_per_second": 1.084, - "step": 7330 - }, - { - "eval_cer_subset": 0.01283241324278991, - "eval_cer_subset_edit_distance": 788, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 7330 - }, - { - "epoch": 1.2506393861892584, - "grad_norm": 0.06094380095601082, - "learning_rate": 5.9678531095152326e-05, - "loss": 0.005528298765420913, - "step": 7335 - }, - { - "epoch": 1.2514919011082695, - "grad_norm": 0.1417030543088913, - "learning_rate": 5.965786545118815e-05, - "loss": 0.00984017476439476, - "step": 7340 - }, - { - "epoch": 1.2523444160272805, - "grad_norm": 0.08209668844938278, - "learning_rate": 5.963718946319529e-05, - "loss": 0.007516486942768097, - "step": 7345 - }, - { - "epoch": 1.2531969309462916, - "grad_norm": 0.06825494766235352, - "learning_rate": 5.9616503140826006e-05, - "loss": 0.005924524366855621, - "step": 7350 - }, - { - "epoch": 1.2540494458653026, - "grad_norm": 0.11229037493467331, - "learning_rate": 5.959580649373736e-05, - "loss": 0.006495627760887146, - "step": 7355 - }, - { - "epoch": 1.2549019607843137, - "grad_norm": 0.13235078752040863, - "learning_rate": 5.957509953159123e-05, - "loss": 0.00942063182592392, - "step": 7360 - }, - { - "epoch": 1.2557544757033248, - "grad_norm": 0.04514055699110031, - "learning_rate": 5.955438226405432e-05, - "loss": 0.006601292639970779, - "step": 7365 - }, - { - "epoch": 1.2566069906223358, - "grad_norm": 0.08192043751478195, - "learning_rate": 5.9533654700798126e-05, - "loss": 0.007403627783060074, - "step": 7370 - }, - { - "epoch": 1.257459505541347, - "grad_norm": 0.07101254910230637, - "learning_rate": 5.951291685149898e-05, - "loss": 0.006301522254943848, - "step": 7375 - }, - { - "epoch": 1.258312020460358, - "grad_norm": 0.05598035827279091, - "learning_rate": 5.949216872583799e-05, - "loss": 0.006812388449907303, - "step": 7380 - }, - { - "epoch": 1.259164535379369, - "grad_norm": 0.06444506347179413, - "learning_rate": 5.9471410333501085e-05, - "loss": 0.005891536176204681, - "step": 7385 - }, - { - "epoch": 1.2600170502983803, - "grad_norm": 0.04921717569231987, - "learning_rate": 5.945064168417895e-05, - "loss": 0.004649973660707474, - "step": 7390 - }, - { - "epoch": 1.2608695652173914, - "grad_norm": 0.09095602482557297, - "learning_rate": 5.94298627875671e-05, - "loss": 0.007515725493431091, - "step": 7395 - }, - { - "epoch": 1.2617220801364024, - "grad_norm": 0.09932803362607956, - "learning_rate": 5.9409073653365816e-05, - "loss": 0.006223166733980179, - "step": 7400 - }, - { - "epoch": 1.2625745950554135, - "grad_norm": 0.08616010844707489, - "learning_rate": 5.938827429128014e-05, - "loss": 0.006999516487121582, - "step": 7405 - }, - { - "epoch": 1.2634271099744245, - "grad_norm": 0.11979297548532486, - "learning_rate": 5.936746471101993e-05, - "loss": 0.00812242105603218, - "step": 7410 - }, - { - "epoch": 1.2642796248934356, - "grad_norm": 0.12872007489204407, - "learning_rate": 5.934664492229976e-05, - "loss": 0.006246988475322723, - "step": 7415 - }, - { - "epoch": 1.2651321398124467, - "grad_norm": 0.0831044539809227, - "learning_rate": 5.932581493483903e-05, - "loss": 0.00590248554944992, - "step": 7420 - }, - { - "epoch": 1.265984654731458, - "grad_norm": 0.09913221001625061, - "learning_rate": 5.9304974758361857e-05, - "loss": 0.007224322855472564, - "step": 7425 - }, - { - "epoch": 1.266837169650469, - "grad_norm": 0.08654595911502838, - "learning_rate": 5.928412440259713e-05, - "loss": 0.007056090980768204, - "step": 7430 - }, - { - "epoch": 1.26768968456948, - "grad_norm": 0.07882801443338394, - "learning_rate": 5.926326387727849e-05, - "loss": 0.00572751946747303, - "step": 7435 - }, - { - "epoch": 1.2685421994884911, - "grad_norm": 0.12886428833007812, - "learning_rate": 5.924239319214432e-05, - "loss": 0.0106881283223629, - "step": 7440 - }, - { - "epoch": 1.2693947144075022, - "grad_norm": 0.05597686767578125, - "learning_rate": 5.922151235693775e-05, - "loss": 0.005041084438562393, - "step": 7445 - }, - { - "epoch": 1.2702472293265132, - "grad_norm": 0.10719682276248932, - "learning_rate": 5.920062138140665e-05, - "loss": 0.007724158465862274, - "step": 7450 - }, - { - "epoch": 1.2710997442455243, - "grad_norm": 0.045485325157642365, - "learning_rate": 5.917972027530363e-05, - "loss": 0.003246675431728363, - "step": 7455 - }, - { - "epoch": 1.2719522591645354, - "grad_norm": 0.09602563083171844, - "learning_rate": 5.9158809048386017e-05, - "loss": 0.006592199206352234, - "step": 7460 - }, - { - "epoch": 1.2728047740835464, - "grad_norm": 0.0555407889187336, - "learning_rate": 5.913788771041586e-05, - "loss": 0.00537751168012619, - "step": 7465 - }, - { - "epoch": 1.2736572890025575, - "grad_norm": 0.15820109844207764, - "learning_rate": 5.911695627115994e-05, - "loss": 0.005968114733695984, - "step": 7470 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 0.05781199410557747, - "learning_rate": 5.9096014740389754e-05, - "loss": 0.00887204110622406, - "step": 7475 - }, - { - "epoch": 1.2753623188405796, - "grad_norm": 0.07927337288856506, - "learning_rate": 5.90750631278815e-05, - "loss": 0.006439142674207687, - "step": 7480 - }, - { - "epoch": 1.2762148337595907, - "grad_norm": 0.03843824937939644, - "learning_rate": 5.905410144341609e-05, - "loss": 0.007792883366346359, - "step": 7485 - }, - { - "epoch": 1.277067348678602, - "grad_norm": 0.0692640095949173, - "learning_rate": 5.903312969677914e-05, - "loss": 0.006274447590112686, - "step": 7490 - }, - { - "epoch": 1.277919863597613, - "grad_norm": 0.07501527667045593, - "learning_rate": 5.901214789776094e-05, - "loss": 0.007496471703052521, - "step": 7495 - }, - { - "epoch": 1.278772378516624, - "grad_norm": 0.10271260142326355, - "learning_rate": 5.8991156056156514e-05, - "loss": 0.008766942471265794, - "step": 7500 - }, - { - "epoch": 1.2796248934356351, - "grad_norm": 0.03995242714881897, - "learning_rate": 5.897015418176555e-05, - "loss": 0.0055749226361513134, - "step": 7505 - }, - { - "epoch": 1.2804774083546462, - "grad_norm": 0.09215585142374039, - "learning_rate": 5.8949142284392406e-05, - "loss": 0.005763960257172585, - "step": 7510 - }, - { - "epoch": 1.2813299232736572, - "grad_norm": 0.07763402909040451, - "learning_rate": 5.892812037384615e-05, - "loss": 0.006439389288425445, - "step": 7515 - }, - { - "epoch": 1.2821824381926683, - "grad_norm": 0.04945438355207443, - "learning_rate": 5.890708845994049e-05, - "loss": 0.006960665434598922, - "step": 7520 - }, - { - "epoch": 1.2830349531116796, - "grad_norm": 0.05348283797502518, - "learning_rate": 5.888604655249384e-05, - "loss": 0.0061422914266586305, - "step": 7525 - }, - { - "epoch": 1.2838874680306906, - "grad_norm": 0.10389877110719681, - "learning_rate": 5.886499466132926e-05, - "loss": 0.009247081726789475, - "step": 7530 - }, - { - "epoch": 1.2847399829497017, - "grad_norm": 0.07753872126340866, - "learning_rate": 5.884393279627448e-05, - "loss": 0.004902977123856544, - "step": 7535 - }, - { - "epoch": 1.2855924978687128, - "grad_norm": 0.10553103685379028, - "learning_rate": 5.8822860967161856e-05, - "loss": 0.004547145590186119, - "step": 7540 - }, - { - "epoch": 1.2864450127877238, - "grad_norm": 0.08235067129135132, - "learning_rate": 5.880177918382844e-05, - "loss": 0.005282455682754516, - "step": 7545 - }, - { - "epoch": 1.287297527706735, - "grad_norm": 0.08135014772415161, - "learning_rate": 5.878068745611591e-05, - "loss": 0.006127358600497246, - "step": 7550 - }, - { - "epoch": 1.288150042625746, - "grad_norm": 0.04027952626347542, - "learning_rate": 5.875958579387056e-05, - "loss": 0.008251778036355972, - "step": 7555 - }, - { - "epoch": 1.289002557544757, - "grad_norm": 0.1060953438282013, - "learning_rate": 5.8738474206943385e-05, - "loss": 0.008290941268205643, - "step": 7560 - }, - { - "epoch": 1.289855072463768, - "grad_norm": 0.06716421991586685, - "learning_rate": 5.871735270518995e-05, - "loss": 0.004932524263858795, - "step": 7565 - }, - { - "epoch": 1.2907075873827791, - "grad_norm": 0.07644582539796829, - "learning_rate": 5.869622129847048e-05, - "loss": 0.006172410026192665, - "step": 7570 - }, - { - "epoch": 1.2915601023017902, - "grad_norm": 0.06018557399511337, - "learning_rate": 5.867507999664983e-05, - "loss": 0.005532362312078476, - "step": 7575 - }, - { - "epoch": 1.2924126172208013, - "grad_norm": 0.06454342603683472, - "learning_rate": 5.865392880959745e-05, - "loss": 0.005053167790174484, - "step": 7580 - }, - { - "epoch": 1.2932651321398123, - "grad_norm": 0.07618142664432526, - "learning_rate": 5.863276774718742e-05, - "loss": 0.005658206716179848, - "step": 7585 - }, - { - "epoch": 1.2941176470588236, - "grad_norm": 0.05649973824620247, - "learning_rate": 5.8611596819298434e-05, - "loss": 0.007477214187383651, - "step": 7590 - }, - { - "epoch": 1.2949701619778347, - "grad_norm": 0.09222351759672165, - "learning_rate": 5.859041603581377e-05, - "loss": 0.006974493712186813, - "step": 7595 - }, - { - "epoch": 1.2958226768968457, - "grad_norm": 0.07462326437234879, - "learning_rate": 5.856922540662134e-05, - "loss": 0.008175718039274216, - "step": 7600 - }, - { - "epoch": 1.2966751918158568, - "grad_norm": 0.10593193024396896, - "learning_rate": 5.854802494161364e-05, - "loss": 0.006635700166225433, - "step": 7605 - }, - { - "epoch": 1.2975277067348678, - "grad_norm": 0.08673358708620071, - "learning_rate": 5.8526814650687724e-05, - "loss": 0.007347754389047623, - "step": 7610 - }, - { - "epoch": 1.298380221653879, - "grad_norm": 0.10450063645839691, - "learning_rate": 5.850559454374528e-05, - "loss": 0.008085139095783234, - "step": 7615 - }, - { - "epoch": 1.29923273657289, - "grad_norm": 0.04219435900449753, - "learning_rate": 5.848436463069257e-05, - "loss": 0.006296204030513763, - "step": 7620 - }, - { - "epoch": 1.3000852514919012, - "grad_norm": 0.08187524974346161, - "learning_rate": 5.84631249214404e-05, - "loss": 0.007680010050535202, - "step": 7625 - }, - { - "epoch": 1.3009377664109123, - "grad_norm": 0.21044164896011353, - "learning_rate": 5.844187542590418e-05, - "loss": 0.008709554374217988, - "step": 7630 - }, - { - "epoch": 1.3017902813299234, - "grad_norm": 0.09822215139865875, - "learning_rate": 5.842061615400389e-05, - "loss": 0.007412384450435639, - "step": 7635 - }, - { - "epoch": 1.3026427962489344, - "grad_norm": 0.05957398563623428, - "learning_rate": 5.8399347115664053e-05, - "loss": 0.0062717020511627196, - "step": 7640 - }, - { - "epoch": 1.3034953111679455, - "grad_norm": 0.07013436406850815, - "learning_rate": 5.837806832081378e-05, - "loss": 0.005471421033143997, - "step": 7645 - }, - { - "epoch": 1.3043478260869565, - "grad_norm": 0.09616916626691818, - "learning_rate": 5.835677977938671e-05, - "loss": 0.008985907584428788, - "step": 7650 - }, - { - "epoch": 1.3052003410059676, - "grad_norm": 0.07946161180734634, - "learning_rate": 5.833548150132105e-05, - "loss": 0.00563003197312355, - "step": 7655 - }, - { - "epoch": 1.3060528559249787, - "grad_norm": 0.0630686804652214, - "learning_rate": 5.831417349655953e-05, - "loss": 0.007591667026281357, - "step": 7660 - }, - { - "epoch": 1.3069053708439897, - "grad_norm": 0.08530164510011673, - "learning_rate": 5.829285577504944e-05, - "loss": 0.006751708686351776, - "step": 7665 - }, - { - "epoch": 1.3077578857630008, - "grad_norm": 0.045148320496082306, - "learning_rate": 5.8271528346742616e-05, - "loss": 0.0052963607013225555, - "step": 7670 - }, - { - "epoch": 1.3086104006820118, - "grad_norm": 0.07147885859012604, - "learning_rate": 5.82501912215954e-05, - "loss": 0.005282463133335113, - "step": 7675 - }, - { - "epoch": 1.309462915601023, - "grad_norm": 0.0933302789926529, - "learning_rate": 5.8228844409568654e-05, - "loss": 0.0073209434747695925, - "step": 7680 - }, - { - "epoch": 1.310315430520034, - "grad_norm": 0.07449645549058914, - "learning_rate": 5.820748792062781e-05, - "loss": 0.007801111787557602, - "step": 7685 - }, - { - "epoch": 1.3111679454390452, - "grad_norm": 0.04569214582443237, - "learning_rate": 5.8186121764742774e-05, - "loss": 0.006659354269504547, - "step": 7690 - }, - { - "epoch": 1.3120204603580563, - "grad_norm": 0.07046396285295486, - "learning_rate": 5.8164745951887995e-05, - "loss": 0.006448440253734589, - "step": 7695 - }, - { - "epoch": 1.3128729752770674, - "grad_norm": 0.09704319387674332, - "learning_rate": 5.814336049204239e-05, - "loss": 0.008210816234350205, - "step": 7700 - }, - { - "epoch": 1.3137254901960784, - "grad_norm": 0.06477776169776917, - "learning_rate": 5.81219653951894e-05, - "loss": 0.005369330942630768, - "step": 7705 - }, - { - "epoch": 1.3145780051150895, - "grad_norm": 0.11657397449016571, - "learning_rate": 5.810056067131698e-05, - "loss": 0.010190412402153015, - "step": 7710 - }, - { - "epoch": 1.3154305200341005, - "grad_norm": 0.06578268110752106, - "learning_rate": 5.8079146330417575e-05, - "loss": 0.006289052963256836, - "step": 7715 - }, - { - "epoch": 1.3162830349531116, - "grad_norm": 0.06296945363283157, - "learning_rate": 5.80577223824881e-05, - "loss": 0.008120459318161011, - "step": 7720 - }, - { - "epoch": 1.317135549872123, - "grad_norm": 0.08275634050369263, - "learning_rate": 5.803628883752996e-05, - "loss": 0.006926379352807999, - "step": 7725 - }, - { - "epoch": 1.317988064791134, - "grad_norm": 0.0693436712026596, - "learning_rate": 5.8014845705549086e-05, - "loss": 0.006521113961935043, - "step": 7730 - }, - { - "epoch": 1.318840579710145, - "grad_norm": 0.05845775827765465, - "learning_rate": 5.799339299655579e-05, - "loss": 0.00762510895729065, - "step": 7735 - }, - { - "epoch": 1.319693094629156, - "grad_norm": 0.08802217245101929, - "learning_rate": 5.7971930720564947e-05, - "loss": 0.008071760833263397, - "step": 7740 - }, - { - "epoch": 1.3205456095481671, - "grad_norm": 0.08866037428379059, - "learning_rate": 5.795045888759585e-05, - "loss": 0.006111105903983116, - "step": 7745 - }, - { - "epoch": 1.3213981244671782, - "grad_norm": 0.0844360888004303, - "learning_rate": 5.792897750767225e-05, - "loss": 0.005196729302406311, - "step": 7750 - }, - { - "epoch": 1.3222506393861893, - "grad_norm": 0.06763950735330582, - "learning_rate": 5.79074865908224e-05, - "loss": 0.006462454050779343, - "step": 7755 - }, - { - "epoch": 1.3231031543052003, - "grad_norm": 0.06333937495946884, - "learning_rate": 5.7885986147078946e-05, - "loss": 0.0068017512559890745, - "step": 7760 - }, - { - "epoch": 1.3239556692242114, - "grad_norm": 0.05730217695236206, - "learning_rate": 5.786447618647904e-05, - "loss": 0.0065845087170600895, - "step": 7765 - }, - { - "epoch": 1.3248081841432224, - "grad_norm": 0.06838720291852951, - "learning_rate": 5.784295671906422e-05, - "loss": 0.0059626404196023945, - "step": 7770 - }, - { - "epoch": 1.3256606990622335, - "grad_norm": 0.06693503260612488, - "learning_rate": 5.782142775488051e-05, - "loss": 0.008056168258190156, - "step": 7775 - }, - { - "epoch": 1.3265132139812446, - "grad_norm": 0.07886708527803421, - "learning_rate": 5.7799889303978324e-05, - "loss": 0.006670922040939331, - "step": 7780 - }, - { - "epoch": 1.3273657289002558, - "grad_norm": 0.06932322680950165, - "learning_rate": 5.777834137641255e-05, - "loss": 0.006734507530927658, - "step": 7785 - }, - { - "epoch": 1.328218243819267, - "grad_norm": 0.08057818561792374, - "learning_rate": 5.775678398224247e-05, - "loss": 0.005952415242791176, - "step": 7790 - }, - { - "epoch": 1.329070758738278, - "grad_norm": 0.06614059209823608, - "learning_rate": 5.7735217131531785e-05, - "loss": 0.007471600174903869, - "step": 7795 - }, - { - "epoch": 1.329923273657289, - "grad_norm": 0.06335467845201492, - "learning_rate": 5.771364083434862e-05, - "loss": 0.007279399782419205, - "step": 7800 - }, - { - "epoch": 1.3307757885763, - "grad_norm": 0.11745526641607285, - "learning_rate": 5.769205510076552e-05, - "loss": 0.006242561340332031, - "step": 7805 - }, - { - "epoch": 1.3316283034953111, - "grad_norm": 0.0590963289141655, - "learning_rate": 5.7670459940859414e-05, - "loss": 0.006263129413127899, - "step": 7810 - }, - { - "epoch": 1.3324808184143222, - "grad_norm": 0.05416800454258919, - "learning_rate": 5.764885536471164e-05, - "loss": 0.00531160868704319, - "step": 7815 - }, - { - "epoch": 1.3333333333333333, - "grad_norm": 0.05527244135737419, - "learning_rate": 5.7627241382407933e-05, - "loss": 0.005747637152671814, - "step": 7820 - }, - { - "epoch": 1.3341858482523445, - "grad_norm": 0.057753629982471466, - "learning_rate": 5.760561800403844e-05, - "loss": 0.004979781061410904, - "step": 7825 - }, - { - "epoch": 1.3350383631713556, - "grad_norm": 0.10882547497749329, - "learning_rate": 5.758398523969763e-05, - "loss": 0.00589316263794899, - "step": 7830 - }, - { - "epoch": 1.3358908780903667, - "grad_norm": 0.08053787797689438, - "learning_rate": 5.756234309948443e-05, - "loss": 0.004465704411268234, - "step": 7835 - }, - { - "epoch": 1.3367433930093777, - "grad_norm": 0.09168808907270432, - "learning_rate": 5.75406915935021e-05, - "loss": 0.004735191911458969, - "step": 7840 - }, - { - "epoch": 1.3375959079283888, - "grad_norm": 0.0956537052989006, - "learning_rate": 5.751903073185829e-05, - "loss": 0.005519610643386841, - "step": 7845 - }, - { - "epoch": 1.3384484228473998, - "grad_norm": 0.05775619298219681, - "learning_rate": 5.749736052466501e-05, - "loss": 0.005525605380535125, - "step": 7850 - }, - { - "epoch": 1.339300937766411, - "grad_norm": 0.08594895154237747, - "learning_rate": 5.7475680982038616e-05, - "loss": 0.005280618742108345, - "step": 7855 - }, - { - "epoch": 1.340153452685422, - "grad_norm": 0.10326153039932251, - "learning_rate": 5.745399211409987e-05, - "loss": 0.005818159133195877, - "step": 7860 - }, - { - "epoch": 1.341005967604433, - "grad_norm": 0.053448133170604706, - "learning_rate": 5.743229393097384e-05, - "loss": 0.008255011588335037, - "step": 7865 - }, - { - "epoch": 1.341858482523444, - "grad_norm": 0.05307561904191971, - "learning_rate": 5.741058644278995e-05, - "loss": 0.006851959228515625, - "step": 7870 - }, - { - "epoch": 1.3427109974424551, - "grad_norm": 0.050789013504981995, - "learning_rate": 5.738886965968199e-05, - "loss": 0.005396667867898941, - "step": 7875 - }, - { - "epoch": 1.3435635123614662, - "grad_norm": 0.06762190908193588, - "learning_rate": 5.736714359178808e-05, - "loss": 0.005661940947175026, - "step": 7880 - }, - { - "epoch": 1.3444160272804775, - "grad_norm": 0.06955094635486603, - "learning_rate": 5.734540824925066e-05, - "loss": 0.0065834902226924895, - "step": 7885 - }, - { - "epoch": 1.3452685421994885, - "grad_norm": 0.09844111651182175, - "learning_rate": 5.7323663642216525e-05, - "loss": 0.006687184423208236, - "step": 7890 - }, - { - "epoch": 1.3461210571184996, - "grad_norm": 0.05249316990375519, - "learning_rate": 5.7301909780836766e-05, - "loss": 0.00670531764626503, - "step": 7895 - }, - { - "epoch": 1.3469735720375107, - "grad_norm": 0.06578750163316727, - "learning_rate": 5.7280146675266815e-05, - "loss": 0.0063153237104415895, - "step": 7900 - }, - { - "epoch": 1.3478260869565217, - "grad_norm": 0.10460260510444641, - "learning_rate": 5.725837433566643e-05, - "loss": 0.008820119500160217, - "step": 7905 - }, - { - "epoch": 1.3486786018755328, - "grad_norm": 0.06620552390813828, - "learning_rate": 5.7236592772199624e-05, - "loss": 0.006502580642700195, - "step": 7910 - }, - { - "epoch": 1.3495311167945439, - "grad_norm": 0.1033373698592186, - "learning_rate": 5.72148019950348e-05, - "loss": 0.008503454178571701, - "step": 7915 - }, - { - "epoch": 1.350383631713555, - "grad_norm": 0.05790281295776367, - "learning_rate": 5.719300201434458e-05, - "loss": 0.006304294615983963, - "step": 7920 - }, - { - "epoch": 1.3512361466325662, - "grad_norm": 0.06094033271074295, - "learning_rate": 5.717119284030595e-05, - "loss": 0.006775079667568207, - "step": 7925 - }, - { - "epoch": 1.3520886615515773, - "grad_norm": 0.08011666685342789, - "learning_rate": 5.714937448310015e-05, - "loss": 0.0064566083252429966, - "step": 7930 - }, - { - "epoch": 1.3529411764705883, - "grad_norm": 0.06395548582077026, - "learning_rate": 5.7127546952912686e-05, - "loss": 0.009279583394527436, - "step": 7935 - }, - { - "epoch": 1.3537936913895994, - "grad_norm": 0.06697574257850647, - "learning_rate": 5.710571025993342e-05, - "loss": 0.005718713253736496, - "step": 7940 - }, - { - "epoch": 1.3546462063086104, - "grad_norm": 0.08821829408407211, - "learning_rate": 5.7083864414356414e-05, - "loss": 0.008157726377248764, - "step": 7945 - }, - { - "epoch": 1.3554987212276215, - "grad_norm": 0.07097669690847397, - "learning_rate": 5.706200942638006e-05, - "loss": 0.004782359302043915, - "step": 7950 - }, - { - "epoch": 1.3563512361466326, - "grad_norm": 0.05015713721513748, - "learning_rate": 5.7040145306206963e-05, - "loss": 0.004204710572957992, - "step": 7955 - }, - { - "epoch": 1.3572037510656436, - "grad_norm": 0.054049719125032425, - "learning_rate": 5.701827206404406e-05, - "loss": 0.00606432780623436, - "step": 7960 - }, - { - "epoch": 1.3580562659846547, - "grad_norm": 0.0878557488322258, - "learning_rate": 5.6996389710102474e-05, - "loss": 0.008037066459655762, - "step": 7965 - }, - { - "epoch": 1.3589087809036657, - "grad_norm": 0.10873926430940628, - "learning_rate": 5.697449825459762e-05, - "loss": 0.007864821702241898, - "step": 7970 - }, - { - "epoch": 1.3597612958226768, - "grad_norm": 0.05823246389627457, - "learning_rate": 5.695259770774919e-05, - "loss": 0.00715988278388977, - "step": 7975 - }, - { - "epoch": 1.3606138107416879, - "grad_norm": 0.06690117716789246, - "learning_rate": 5.693068807978106e-05, - "loss": 0.006888707727193832, - "step": 7980 - }, - { - "epoch": 1.3614663256606991, - "grad_norm": 0.07290884852409363, - "learning_rate": 5.6908769380921363e-05, - "loss": 0.005684115365147591, - "step": 7985 - }, - { - "epoch": 1.3623188405797102, - "grad_norm": 0.07930465042591095, - "learning_rate": 5.6886841621402504e-05, - "loss": 0.0077220767736434935, - "step": 7990 - }, - { - "epoch": 1.3631713554987213, - "grad_norm": 0.08893048763275146, - "learning_rate": 5.686490481146107e-05, - "loss": 0.007824088633060455, - "step": 7995 - }, - { - "epoch": 1.3640238704177323, - "grad_norm": 0.09335844218730927, - "learning_rate": 5.6842958961337905e-05, - "loss": 0.006522499769926071, - "step": 8000 - }, - { - "epoch": 1.3648763853367434, - "grad_norm": 0.07194571942090988, - "learning_rate": 5.682100408127806e-05, - "loss": 0.008011893928050995, - "step": 8005 - }, - { - "epoch": 1.3657289002557544, - "grad_norm": 0.053429413586854935, - "learning_rate": 5.6799040181530794e-05, - "loss": 0.006260050833225251, - "step": 8010 - }, - { - "epoch": 1.3665814151747655, - "grad_norm": 0.11974478513002396, - "learning_rate": 5.677706727234959e-05, - "loss": 0.006692723929882049, - "step": 8015 - }, - { - "epoch": 1.3674339300937766, - "grad_norm": 0.07810027152299881, - "learning_rate": 5.6755085363992155e-05, - "loss": 0.007429388910531997, - "step": 8020 - }, - { - "epoch": 1.3682864450127878, - "grad_norm": 0.10204190760850906, - "learning_rate": 5.673309446672034e-05, - "loss": 0.005550343170762062, - "step": 8025 - }, - { - "epoch": 1.369138959931799, - "grad_norm": 0.07640541344881058, - "learning_rate": 5.671109459080026e-05, - "loss": 0.006840181350708008, - "step": 8030 - }, - { - "epoch": 1.36999147485081, - "grad_norm": 0.06644181162118912, - "learning_rate": 5.668908574650216e-05, - "loss": 0.005395495146512985, - "step": 8035 - }, - { - "epoch": 1.370843989769821, - "grad_norm": 0.09630967676639557, - "learning_rate": 5.6667067944100526e-05, - "loss": 0.005423872545361519, - "step": 8040 - }, - { - "epoch": 1.371696504688832, - "grad_norm": 0.07114128023386002, - "learning_rate": 5.664504119387398e-05, - "loss": 0.007013414800167084, - "step": 8045 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 0.07324981689453125, - "learning_rate": 5.662300550610535e-05, - "loss": 0.008274464309215546, - "step": 8050 - }, - { - "epoch": 1.3734015345268542, - "grad_norm": 0.06012870743870735, - "learning_rate": 5.660096089108163e-05, - "loss": 0.00520169697701931, - "step": 8055 - }, - { - "epoch": 1.3742540494458653, - "grad_norm": 0.07458557933568954, - "learning_rate": 5.657890735909397e-05, - "loss": 0.006112886965274811, - "step": 8060 - }, - { - "epoch": 1.3751065643648763, - "grad_norm": 0.0470297709107399, - "learning_rate": 5.655684492043771e-05, - "loss": 0.004435106366872788, - "step": 8065 - }, - { - "epoch": 1.3759590792838874, - "grad_norm": 0.05244847387075424, - "learning_rate": 5.653477358541231e-05, - "loss": 0.006484140455722809, - "step": 8070 - }, - { - "epoch": 1.3768115942028984, - "grad_norm": 0.10809201747179031, - "learning_rate": 5.651269336432142e-05, - "loss": 0.006385499238967895, - "step": 8075 - }, - { - "epoch": 1.3776641091219095, - "grad_norm": 0.11761374026536942, - "learning_rate": 5.649060426747281e-05, - "loss": 0.0056259695440530775, - "step": 8080 - }, - { - "epoch": 1.3785166240409208, - "grad_norm": 0.06250949203968048, - "learning_rate": 5.646850630517842e-05, - "loss": 0.005127568915486336, - "step": 8085 - }, - { - "epoch": 1.3793691389599319, - "grad_norm": 0.07686682790517807, - "learning_rate": 5.6446399487754307e-05, - "loss": 0.006484859436750412, - "step": 8090 - }, - { - "epoch": 1.380221653878943, - "grad_norm": 0.10453952848911285, - "learning_rate": 5.6424283825520656e-05, - "loss": 0.007125881314277649, - "step": 8095 - }, - { - "epoch": 1.381074168797954, - "grad_norm": 0.08170976489782333, - "learning_rate": 5.640215932880181e-05, - "loss": 0.007152590900659561, - "step": 8100 - }, - { - "epoch": 1.381926683716965, - "grad_norm": 0.08639637380838394, - "learning_rate": 5.638002600792621e-05, - "loss": 0.006862475723028183, - "step": 8105 - }, - { - "epoch": 1.382779198635976, - "grad_norm": 0.061349738389253616, - "learning_rate": 5.635788387322642e-05, - "loss": 0.006520121544599533, - "step": 8110 - }, - { - "epoch": 1.3836317135549872, - "grad_norm": 0.09568873792886734, - "learning_rate": 5.633573293503915e-05, - "loss": 0.00690893828868866, - "step": 8115 - }, - { - "epoch": 1.3844842284739982, - "grad_norm": 0.05280910059809685, - "learning_rate": 5.631357320370518e-05, - "loss": 0.0068241022527217865, - "step": 8120 - }, - { - "epoch": 1.3853367433930095, - "grad_norm": 0.08307540416717529, - "learning_rate": 5.6291404689569406e-05, - "loss": 0.009796305000782013, - "step": 8125 - }, - { - "epoch": 1.3861892583120206, - "grad_norm": 0.06511564552783966, - "learning_rate": 5.6269227402980824e-05, - "loss": 0.00675605982542038, - "step": 8130 - }, - { - "epoch": 1.3870417732310316, - "grad_norm": 0.09521665424108505, - "learning_rate": 5.624704135429255e-05, - "loss": 0.00661565363407135, - "step": 8135 - }, - { - "epoch": 1.3878942881500427, - "grad_norm": 0.06467590481042862, - "learning_rate": 5.622484655386175e-05, - "loss": 0.007056808471679688, - "step": 8140 - }, - { - "epoch": 1.3887468030690537, - "grad_norm": 0.04240449517965317, - "learning_rate": 5.62026430120497e-05, - "loss": 0.005277678743004799, - "step": 8145 - }, - { - "epoch": 1.3895993179880648, - "grad_norm": 0.08462672680616379, - "learning_rate": 5.618043073922176e-05, - "loss": 0.005951377004384995, - "step": 8150 - }, - { - "epoch": 1.3904518329070759, - "grad_norm": 0.08304573595523834, - "learning_rate": 5.615820974574735e-05, - "loss": 0.006729351729154587, - "step": 8155 - }, - { - "epoch": 1.391304347826087, - "grad_norm": 0.04584382846951485, - "learning_rate": 5.6135980041999964e-05, - "loss": 0.00490913912653923, - "step": 8160 - }, - { - "epoch": 1.392156862745098, - "grad_norm": 0.06771710515022278, - "learning_rate": 5.6113741638357175e-05, - "loss": 0.007046511024236679, - "step": 8165 - }, - { - "epoch": 1.393009377664109, - "grad_norm": 0.06334209442138672, - "learning_rate": 5.609149454520062e-05, - "loss": 0.006314977258443833, - "step": 8170 - }, - { - "epoch": 1.39386189258312, - "grad_norm": 0.06783269345760345, - "learning_rate": 5.606923877291595e-05, - "loss": 0.006176649779081345, - "step": 8175 - }, - { - "epoch": 1.3947144075021312, - "grad_norm": 0.10245220363140106, - "learning_rate": 5.604697433189293e-05, - "loss": 0.006309907138347626, - "step": 8180 - }, - { - "epoch": 1.3955669224211424, - "grad_norm": 0.07151709496974945, - "learning_rate": 5.6024701232525325e-05, - "loss": 0.005038458108901978, - "step": 8185 - }, - { - "epoch": 1.3964194373401535, - "grad_norm": 0.08811933547258377, - "learning_rate": 5.600241948521099e-05, - "loss": 0.006065644696354866, - "step": 8190 - }, - { - "epoch": 1.3972719522591646, - "grad_norm": 0.07598903775215149, - "learning_rate": 5.5980129100351736e-05, - "loss": 0.006201237812638283, - "step": 8195 - }, - { - "epoch": 1.3981244671781756, - "grad_norm": 0.058092061430215836, - "learning_rate": 5.5957830088353475e-05, - "loss": 0.006383272260427475, - "step": 8200 - }, - { - "epoch": 1.3989769820971867, - "grad_norm": 0.18196560442447662, - "learning_rate": 5.593552245962616e-05, - "loss": 0.004768157005310058, - "step": 8205 - }, - { - "epoch": 1.3998294970161977, - "grad_norm": 0.09071574360132217, - "learning_rate": 5.591320622458369e-05, - "loss": 0.007671289891004562, - "step": 8210 - }, - { - "epoch": 1.4006820119352088, - "grad_norm": 0.09470858424901962, - "learning_rate": 5.589088139364405e-05, - "loss": 0.006691985577344894, - "step": 8215 - }, - { - "epoch": 1.40153452685422, - "grad_norm": 0.05345157906413078, - "learning_rate": 5.58685479772292e-05, - "loss": 0.005832263827323913, - "step": 8220 - }, - { - "epoch": 1.4023870417732311, - "grad_norm": 0.08154480904340744, - "learning_rate": 5.584620598576514e-05, - "loss": 0.00907905399799347, - "step": 8225 - }, - { - "epoch": 1.4032395566922422, - "grad_norm": 0.06621691584587097, - "learning_rate": 5.582385542968185e-05, - "loss": 0.005938088893890381, - "step": 8230 - }, - { - "epoch": 1.4040920716112533, - "grad_norm": 0.0557720884680748, - "learning_rate": 5.580149631941329e-05, - "loss": 0.005079039558768273, - "step": 8235 - }, - { - "epoch": 1.4049445865302643, - "grad_norm": 0.07839027792215347, - "learning_rate": 5.577912866539746e-05, - "loss": 0.006307472288608551, - "step": 8240 - }, - { - "epoch": 1.4057971014492754, - "grad_norm": 0.05926419049501419, - "learning_rate": 5.575675247807632e-05, - "loss": 0.0072102643549442295, - "step": 8245 - }, - { - "epoch": 1.4066496163682864, - "grad_norm": 0.0570182129740715, - "learning_rate": 5.5734367767895814e-05, - "loss": 0.0066485337913036345, - "step": 8250 - }, - { - "epoch": 1.4075021312872975, - "grad_norm": 0.0930657833814621, - "learning_rate": 5.571197454530588e-05, - "loss": 0.005854785442352295, - "step": 8255 - }, - { - "epoch": 1.4083546462063086, - "grad_norm": 0.06018427759408951, - "learning_rate": 5.568957282076041e-05, - "loss": 0.0049718767404556274, - "step": 8260 - }, - { - "epoch": 1.4092071611253196, - "grad_norm": 0.0889105498790741, - "learning_rate": 5.566716260471726e-05, - "loss": 0.005993577092885971, - "step": 8265 - }, - { - "epoch": 1.4100596760443307, - "grad_norm": 0.04429823160171509, - "learning_rate": 5.5644743907638294e-05, - "loss": 0.005357314646244049, - "step": 8270 - }, - { - "epoch": 1.4109121909633418, - "grad_norm": 0.054813142865896225, - "learning_rate": 5.5622316739989296e-05, - "loss": 0.005666692927479744, - "step": 8275 - }, - { - "epoch": 1.4117647058823528, - "grad_norm": 0.06909197568893433, - "learning_rate": 5.559988111224001e-05, - "loss": 0.005058525875210762, - "step": 8280 - }, - { - "epoch": 1.412617220801364, - "grad_norm": 0.10060004889965057, - "learning_rate": 5.557743703486413e-05, - "loss": 0.0070966087281703946, - "step": 8285 - }, - { - "epoch": 1.4134697357203752, - "grad_norm": 0.052008096128702164, - "learning_rate": 5.555498451833929e-05, - "loss": 0.006671085208654404, - "step": 8290 - }, - { - "epoch": 1.4143222506393862, - "grad_norm": 0.06272758543491364, - "learning_rate": 5.5532523573147094e-05, - "loss": 0.006071484088897705, - "step": 8295 - }, - { - "epoch": 1.4151747655583973, - "grad_norm": 0.08005380630493164, - "learning_rate": 5.551005420977304e-05, - "loss": 0.006429645419120789, - "step": 8300 - }, - { - "epoch": 1.4160272804774083, - "grad_norm": 0.08246695250272751, - "learning_rate": 5.548757643870659e-05, - "loss": 0.004599097743630409, - "step": 8305 - }, - { - "epoch": 1.4168797953964194, - "grad_norm": 0.1190599724650383, - "learning_rate": 5.54650902704411e-05, - "loss": 0.00652359127998352, - "step": 8310 - }, - { - "epoch": 1.4177323103154305, - "grad_norm": 0.042393747717142105, - "learning_rate": 5.5442595715473867e-05, - "loss": 0.004551848769187928, - "step": 8315 - }, - { - "epoch": 1.4185848252344417, - "grad_norm": 0.0809137374162674, - "learning_rate": 5.54200927843061e-05, - "loss": 0.0062880381941795346, - "step": 8320 - }, - { - "epoch": 1.4194373401534528, - "grad_norm": 0.09030820429325104, - "learning_rate": 5.5397581487442905e-05, - "loss": 0.007365265488624572, - "step": 8325 - }, - { - "epoch": 1.4202898550724639, - "grad_norm": 0.060766976326704025, - "learning_rate": 5.537506183539333e-05, - "loss": 0.0047208376228809355, - "step": 8330 - }, - { - "epoch": 1.421142369991475, - "grad_norm": 0.0763862356543541, - "learning_rate": 5.535253383867027e-05, - "loss": 0.006549081206321717, - "step": 8335 - }, - { - "epoch": 1.421994884910486, - "grad_norm": 0.13493886590003967, - "learning_rate": 5.532999750779056e-05, - "loss": 0.0075773999094963075, - "step": 8340 - }, - { - "epoch": 1.422847399829497, - "grad_norm": 0.07888541370630264, - "learning_rate": 5.53074528532749e-05, - "loss": 0.007893601059913635, - "step": 8345 - }, - { - "epoch": 1.423699914748508, - "grad_norm": 0.04488302394747734, - "learning_rate": 5.528489988564789e-05, - "loss": 0.006052879989147187, - "step": 8350 - }, - { - "epoch": 1.4245524296675192, - "grad_norm": 0.09534436464309692, - "learning_rate": 5.5262338615438e-05, - "loss": 0.006183170899748802, - "step": 8355 - }, - { - "epoch": 1.4254049445865302, - "grad_norm": 0.0796227753162384, - "learning_rate": 5.523976905317758e-05, - "loss": 0.006368820369243622, - "step": 8360 - }, - { - "epoch": 1.4262574595055413, - "grad_norm": 0.039230771362781525, - "learning_rate": 5.521719120940287e-05, - "loss": 0.005282421037554741, - "step": 8365 - }, - { - "epoch": 1.4271099744245523, - "grad_norm": 0.12020807713270187, - "learning_rate": 5.5194605094653935e-05, - "loss": 0.00718868374824524, - "step": 8370 - }, - { - "epoch": 1.4279624893435634, - "grad_norm": 0.07434894144535065, - "learning_rate": 5.5172010719474746e-05, - "loss": 0.007591472566127777, - "step": 8375 - }, - { - "epoch": 1.4288150042625745, - "grad_norm": 0.0722346156835556, - "learning_rate": 5.514940809441309e-05, - "loss": 0.005854631587862968, - "step": 8380 - }, - { - "epoch": 1.4296675191815857, - "grad_norm": 0.07834078371524811, - "learning_rate": 5.5126797230020634e-05, - "loss": 0.007415357977151871, - "step": 8385 - }, - { - "epoch": 1.4305200341005968, - "grad_norm": 0.08718696236610413, - "learning_rate": 5.5104178136852884e-05, - "loss": 0.007089633494615555, - "step": 8390 - }, - { - "epoch": 1.4313725490196079, - "grad_norm": 0.08823035657405853, - "learning_rate": 5.508155082546918e-05, - "loss": 0.007153714448213578, - "step": 8395 - }, - { - "epoch": 1.432225063938619, - "grad_norm": 0.07249119132757187, - "learning_rate": 5.505891530643269e-05, - "loss": 0.007651906460523605, - "step": 8400 - }, - { - "epoch": 1.43307757885763, - "grad_norm": 0.06284502893686295, - "learning_rate": 5.503627159031046e-05, - "loss": 0.007741397619247437, - "step": 8405 - }, - { - "epoch": 1.433930093776641, - "grad_norm": 0.06557357311248779, - "learning_rate": 5.501361968767331e-05, - "loss": 0.007656902819871902, - "step": 8410 - }, - { - "epoch": 1.434782608695652, - "grad_norm": 0.0775318294763565, - "learning_rate": 5.49909596090959e-05, - "loss": 0.006714560091495514, - "step": 8415 - }, - { - "epoch": 1.4356351236146634, - "grad_norm": 0.05347822234034538, - "learning_rate": 5.496829136515672e-05, - "loss": 0.0048537302762269975, - "step": 8420 - }, - { - "epoch": 1.4364876385336744, - "grad_norm": 0.07040467113256454, - "learning_rate": 5.4945614966438046e-05, - "loss": 0.005411979556083679, - "step": 8425 - }, - { - "epoch": 1.4373401534526855, - "grad_norm": 0.09473410993814468, - "learning_rate": 5.492293042352598e-05, - "loss": 0.008005911856889725, - "step": 8430 - }, - { - "epoch": 1.4381926683716966, - "grad_norm": 0.037446580827236176, - "learning_rate": 5.4900237747010426e-05, - "loss": 0.006237779557704925, - "step": 8435 - }, - { - "epoch": 1.4390451832907076, - "grad_norm": 0.11029476672410965, - "learning_rate": 5.4877536947485074e-05, - "loss": 0.008190502226352692, - "step": 8440 - }, - { - "epoch": 1.4398976982097187, - "grad_norm": 0.0514204315841198, - "learning_rate": 5.4854828035547424e-05, - "loss": 0.006500741839408875, - "step": 8445 - }, - { - "epoch": 1.4407502131287298, - "grad_norm": 0.08411483466625214, - "learning_rate": 5.483211102179873e-05, - "loss": 0.0053235463798046116, - "step": 8450 - }, - { - "epoch": 1.4416027280477408, - "grad_norm": 0.09279052913188934, - "learning_rate": 5.480938591684407e-05, - "loss": 0.006842000037431717, - "step": 8455 - }, - { - "epoch": 1.4424552429667519, - "grad_norm": 0.09881046414375305, - "learning_rate": 5.478665273129228e-05, - "loss": 0.007064050436019898, - "step": 8460 - }, - { - "epoch": 1.443307757885763, - "grad_norm": 0.09018172323703766, - "learning_rate": 5.476391147575595e-05, - "loss": 0.005222787708044052, - "step": 8465 - }, - { - "epoch": 1.444160272804774, - "grad_norm": 0.11489493399858475, - "learning_rate": 5.4741162160851455e-05, - "loss": 0.004823528230190277, - "step": 8470 - }, - { - "epoch": 1.445012787723785, - "grad_norm": 0.11010619252920151, - "learning_rate": 5.4718404797198955e-05, - "loss": 0.007554465532302856, - "step": 8475 - }, - { - "epoch": 1.4458653026427961, - "grad_norm": 0.10469060391187668, - "learning_rate": 5.469563939542233e-05, - "loss": 0.005817038565874099, - "step": 8480 - }, - { - "epoch": 1.4467178175618074, - "grad_norm": 0.06776002794504166, - "learning_rate": 5.467286596614922e-05, - "loss": 0.00899386927485466, - "step": 8485 - }, - { - "epoch": 1.4475703324808185, - "grad_norm": 0.08414942771196365, - "learning_rate": 5.4650084520011026e-05, - "loss": 0.00611347034573555, - "step": 8490 - }, - { - "epoch": 1.4484228473998295, - "grad_norm": 0.09625279158353806, - "learning_rate": 5.462729506764289e-05, - "loss": 0.005645812302827835, - "step": 8495 - }, - { - "epoch": 1.4492753623188406, - "grad_norm": 0.10020645707845688, - "learning_rate": 5.4604497619683674e-05, - "loss": 0.0058198563754558565, - "step": 8500 - }, - { - "epoch": 1.4501278772378516, - "grad_norm": 0.08466780185699463, - "learning_rate": 5.4581692186776e-05, - "loss": 0.005098164081573486, - "step": 8505 - }, - { - "epoch": 1.4509803921568627, - "grad_norm": 0.058955296874046326, - "learning_rate": 5.4558878779566194e-05, - "loss": 0.004072735831141472, - "step": 8510 - }, - { - "epoch": 1.4518329070758738, - "grad_norm": 0.14849397540092468, - "learning_rate": 5.4536057408704304e-05, - "loss": 0.011097650229930877, - "step": 8515 - }, - { - "epoch": 1.452685421994885, - "grad_norm": 0.08641809970140457, - "learning_rate": 5.451322808484413e-05, - "loss": 0.006210784614086151, - "step": 8520 - }, - { - "epoch": 1.453537936913896, - "grad_norm": 0.07506752014160156, - "learning_rate": 5.4490390818643136e-05, - "loss": 0.006071462482213974, - "step": 8525 - }, - { - "epoch": 1.4543904518329072, - "grad_norm": 0.10383405536413193, - "learning_rate": 5.4467545620762545e-05, - "loss": 0.008749781548976899, - "step": 8530 - }, - { - "epoch": 1.4552429667519182, - "grad_norm": 0.08180487155914307, - "learning_rate": 5.444469250186721e-05, - "loss": 0.00613279715180397, - "step": 8535 - }, - { - "epoch": 1.4560954816709293, - "grad_norm": 0.07797367125749588, - "learning_rate": 5.442183147262577e-05, - "loss": 0.005885690450668335, - "step": 8540 - }, - { - "epoch": 1.4569479965899403, - "grad_norm": 0.0780278891324997, - "learning_rate": 5.439896254371049e-05, - "loss": 0.007259850949048996, - "step": 8545 - }, - { - "epoch": 1.4578005115089514, - "grad_norm": 0.10005395114421844, - "learning_rate": 5.437608572579737e-05, - "loss": 0.0061523888260126116, - "step": 8550 - }, - { - "epoch": 1.4586530264279625, - "grad_norm": 0.10141763836145401, - "learning_rate": 5.435320102956604e-05, - "loss": 0.006501191109418869, - "step": 8555 - }, - { - "epoch": 1.4595055413469735, - "grad_norm": 0.0938732773065567, - "learning_rate": 5.4330308465699865e-05, - "loss": 0.008337517827749252, - "step": 8560 - }, - { - "epoch": 1.4603580562659846, - "grad_norm": 0.1085699051618576, - "learning_rate": 5.430740804488582e-05, - "loss": 0.005681714415550232, - "step": 8565 - }, - { - "epoch": 1.4612105711849956, - "grad_norm": 0.07967904955148697, - "learning_rate": 5.428449977781463e-05, - "loss": 0.006299185007810593, - "step": 8570 - }, - { - "epoch": 1.4620630861040067, - "grad_norm": 0.090158611536026, - "learning_rate": 5.426158367518061e-05, - "loss": 0.007821831852197647, - "step": 8575 - }, - { - "epoch": 1.4629156010230178, - "grad_norm": 0.12222256511449814, - "learning_rate": 5.4238659747681736e-05, - "loss": 0.0065193742513656614, - "step": 8580 - }, - { - "epoch": 1.463768115942029, - "grad_norm": 0.07724417746067047, - "learning_rate": 5.421572800601971e-05, - "loss": 0.00850745365023613, - "step": 8585 - }, - { - "epoch": 1.46462063086104, - "grad_norm": 0.07322543114423752, - "learning_rate": 5.4192788460899786e-05, - "loss": 0.006478501856327057, - "step": 8590 - }, - { - "epoch": 1.4654731457800512, - "grad_norm": 0.07086360454559326, - "learning_rate": 5.416984112303095e-05, - "loss": 0.007459370046854019, - "step": 8595 - }, - { - "epoch": 1.4663256606990622, - "grad_norm": 0.08460366725921631, - "learning_rate": 5.414688600312575e-05, - "loss": 0.006461035460233688, - "step": 8600 - }, - { - "epoch": 1.4671781756180733, - "grad_norm": 0.06856394559144974, - "learning_rate": 5.412392311190041e-05, - "loss": 0.007420676201581955, - "step": 8605 - }, - { - "epoch": 1.4680306905370843, - "grad_norm": 0.06801126897335052, - "learning_rate": 5.4100952460074766e-05, - "loss": 0.006456401199102402, - "step": 8610 - }, - { - "epoch": 1.4688832054560954, - "grad_norm": 0.06273184716701508, - "learning_rate": 5.4077974058372295e-05, - "loss": 0.00508052185177803, - "step": 8615 - }, - { - "epoch": 1.4697357203751067, - "grad_norm": 0.07751575112342834, - "learning_rate": 5.405498791752007e-05, - "loss": 0.006596812605857849, - "step": 8620 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 0.10850238054990768, - "learning_rate": 5.4031994048248776e-05, - "loss": 0.006385332345962525, - "step": 8625 - }, - { - "epoch": 1.4714407502131288, - "grad_norm": 0.07195930927991867, - "learning_rate": 5.4008992461292736e-05, - "loss": 0.007354143261909485, - "step": 8630 - }, - { - "epoch": 1.4722932651321399, - "grad_norm": 0.061606891453266144, - "learning_rate": 5.3985983167389846e-05, - "loss": 0.007285259664058685, - "step": 8635 - }, - { - "epoch": 1.473145780051151, - "grad_norm": 0.059549275785684586, - "learning_rate": 5.3962966177281616e-05, - "loss": 0.005211231112480163, - "step": 8640 - }, - { - "epoch": 1.473998294970162, - "grad_norm": 0.04548822343349457, - "learning_rate": 5.3939941501713146e-05, - "loss": 0.00805831179022789, - "step": 8645 - }, - { - "epoch": 1.474850809889173, - "grad_norm": 0.046682652086019516, - "learning_rate": 5.3916909151433096e-05, - "loss": 0.005787956342101097, - "step": 8650 - }, - { - "epoch": 1.4757033248081841, - "grad_norm": 0.06405246257781982, - "learning_rate": 5.3893869137193755e-05, - "loss": 0.005377359688282013, - "step": 8655 - }, - { - "epoch": 1.4765558397271952, - "grad_norm": 0.09410709887742996, - "learning_rate": 5.3870821469750964e-05, - "loss": 0.006961540877819061, - "step": 8660 - }, - { - "epoch": 1.4774083546462062, - "grad_norm": 0.0637243241071701, - "learning_rate": 5.384776615986414e-05, - "loss": 0.0060172989964485165, - "step": 8665 - }, - { - "epoch": 1.4782608695652173, - "grad_norm": 0.07082457840442657, - "learning_rate": 5.382470321829627e-05, - "loss": 0.005960140377283096, - "step": 8670 - }, - { - "epoch": 1.4791133844842284, - "grad_norm": 0.06502280384302139, - "learning_rate": 5.380163265581391e-05, - "loss": 0.005596417188644409, - "step": 8675 - }, - { - "epoch": 1.4799658994032396, - "grad_norm": 0.07504235208034515, - "learning_rate": 5.3778554483187134e-05, - "loss": 0.007427094876766205, - "step": 8680 - }, - { - "epoch": 1.4808184143222507, - "grad_norm": 0.08005198836326599, - "learning_rate": 5.375546871118964e-05, - "loss": 0.006888572126626968, - "step": 8685 - }, - { - "epoch": 1.4816709292412618, - "grad_norm": 0.1083201915025711, - "learning_rate": 5.373237535059861e-05, - "loss": 0.007253114879131317, - "step": 8690 - }, - { - "epoch": 1.4825234441602728, - "grad_norm": 0.060000013560056686, - "learning_rate": 5.37092744121948e-05, - "loss": 0.005570416525006294, - "step": 8695 - }, - { - "epoch": 1.4833759590792839, - "grad_norm": 0.04832584038376808, - "learning_rate": 5.3686165906762504e-05, - "loss": 0.005356843769550324, - "step": 8700 - }, - { - "epoch": 1.484228473998295, - "grad_norm": 0.061586812138557434, - "learning_rate": 5.3663049845089534e-05, - "loss": 0.005226074159145356, - "step": 8705 - }, - { - "epoch": 1.485080988917306, - "grad_norm": 0.08486256003379822, - "learning_rate": 5.363992623796724e-05, - "loss": 0.007083939760923386, - "step": 8710 - }, - { - "epoch": 1.485933503836317, - "grad_norm": 0.09085836261510849, - "learning_rate": 5.361679509619048e-05, - "loss": 0.005988218262791634, - "step": 8715 - }, - { - "epoch": 1.4867860187553283, - "grad_norm": 0.06301745027303696, - "learning_rate": 5.359365643055765e-05, - "loss": 0.00595020055770874, - "step": 8720 - }, - { - "epoch": 1.4876385336743394, - "grad_norm": 0.07939866930246353, - "learning_rate": 5.3570510251870646e-05, - "loss": 0.006101110950112343, - "step": 8725 - }, - { - "epoch": 1.4884910485933505, - "grad_norm": 0.10560661554336548, - "learning_rate": 5.354735657093487e-05, - "loss": 0.006781180202960968, - "step": 8730 - }, - { - "epoch": 1.4893435635123615, - "grad_norm": 0.10549639165401459, - "learning_rate": 5.352419539855925e-05, - "loss": 0.006455187499523163, - "step": 8735 - }, - { - "epoch": 1.4901960784313726, - "grad_norm": 0.06474289298057556, - "learning_rate": 5.3501026745556157e-05, - "loss": 0.0078111283481121065, - "step": 8740 - }, - { - "epoch": 1.4910485933503836, - "grad_norm": 0.11109986901283264, - "learning_rate": 5.3477850622741525e-05, - "loss": 0.00798504576086998, - "step": 8745 - }, - { - "epoch": 1.4919011082693947, - "grad_norm": 0.0787222608923912, - "learning_rate": 5.3454667040934715e-05, - "loss": 0.007222773879766465, - "step": 8750 - }, - { - "epoch": 1.4927536231884058, - "grad_norm": 0.06622221320867538, - "learning_rate": 5.3431476010958613e-05, - "loss": 0.0064462460577487946, - "step": 8755 - }, - { - "epoch": 1.4936061381074168, - "grad_norm": 0.07526405900716782, - "learning_rate": 5.340827754363955e-05, - "loss": 0.005344667285680771, - "step": 8760 - }, - { - "epoch": 1.4944586530264279, - "grad_norm": 0.08911366015672684, - "learning_rate": 5.338507164980734e-05, - "loss": 0.006722994893789291, - "step": 8765 - }, - { - "epoch": 1.495311167945439, - "grad_norm": 0.03749583289027214, - "learning_rate": 5.336185834029527e-05, - "loss": 0.006120331957936287, - "step": 8770 - }, - { - "epoch": 1.49616368286445, - "grad_norm": 0.08167645335197449, - "learning_rate": 5.333863762594008e-05, - "loss": 0.007496768981218338, - "step": 8775 - }, - { - "epoch": 1.4970161977834613, - "grad_norm": 0.09045904129743576, - "learning_rate": 5.3315409517581996e-05, - "loss": 0.007222528755664826, - "step": 8780 - }, - { - "epoch": 1.4978687127024723, - "grad_norm": 0.06064090132713318, - "learning_rate": 5.329217402606464e-05, - "loss": 0.0044986031949520115, - "step": 8785 - }, - { - "epoch": 1.4987212276214834, - "grad_norm": 0.07282263785600662, - "learning_rate": 5.3268931162235126e-05, - "loss": 0.005251912400126457, - "step": 8790 - }, - { - "epoch": 1.4995737425404945, - "grad_norm": 0.0674249604344368, - "learning_rate": 5.324568093694401e-05, - "loss": 0.006289477646350861, - "step": 8795 - }, - { - "epoch": 1.4997442455242966, - "eval_loss": 0.03760311380028725, - "eval_runtime": 3.668, - "eval_samples_per_second": 68.702, - "eval_steps_per_second": 1.091, - "step": 8796 - }, - { - "eval_cer_subset": 0.014184050678261437, - "eval_cer_subset_edit_distance": 871, - "eval_cer_subset_groups": 250, - "eval_cer_subset_items": 250, - "eval_cer_subset_ref_chars": 61407, - "step": 8796 - } - ], - "logging_steps": 5, - "max_steps": 23460, - "num_input_tokens_seen": 0, - "num_train_epochs": 4, - "save_steps": 2932, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 2.965379130359931e+18, - "train_batch_size": 32, - "trial_name": null, - "trial_params": null -} diff --git a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/training_args.bin b/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/training_args.bin deleted file mode 100644 index f049a0b30d4abb921310cf007655d399147294f8..0000000000000000000000000000000000000000 --- a/meta-llama__meta-llama-3.1-8b/sft/checkpoints/checkpoint-8796/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6adec376d54028ef57ef3dc856a5cba12bab9c0d580369637fa983b6072064f7 -size 5841