Text Generation
Transformers
Safetensors
English
Korean
gemma4_text
terminal
sft
vllm
tb2-lite
conversational
Instructions to use LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData") model = AutoModelForCausalLM.from_pretrained("LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData
- SGLang
How to use LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData with Docker Model Runner:
docker model run hf.co/LLM-OS-Models/gemma-4-31B-it-Terminal-SFT-1Epoch-HF-FSDP-2BData
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1467, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006816632583503749, | |
| "grad_norm": 4383.8388671875, | |
| "learning_rate": 0.0, | |
| "loss": 6.527473449707031, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0013633265167007499, | |
| "grad_norm": 1823.0880126953125, | |
| "learning_rate": 2.247191011235955e-07, | |
| "loss": 7.508894443511963, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.002044989775051125, | |
| "grad_norm": 40114.21875, | |
| "learning_rate": 4.49438202247191e-07, | |
| "loss": 7.247808456420898, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0027266530334014998, | |
| "grad_norm": 575.8884887695312, | |
| "learning_rate": 6.741573033707865e-07, | |
| "loss": 6.052981376647949, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0034083162917518746, | |
| "grad_norm": 76116.6796875, | |
| "learning_rate": 8.98876404494382e-07, | |
| "loss": 5.867002964019775, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00408997955010225, | |
| "grad_norm": 4967.98291015625, | |
| "learning_rate": 1.1235955056179777e-06, | |
| "loss": 6.827726364135742, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004771642808452625, | |
| "grad_norm": 22014.96484375, | |
| "learning_rate": 1.348314606741573e-06, | |
| "loss": 7.764345169067383, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0054533060668029995, | |
| "grad_norm": 2535.458251953125, | |
| "learning_rate": 1.5730337078651686e-06, | |
| "loss": 6.417271137237549, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.006134969325153374, | |
| "grad_norm": 4581.03662109375, | |
| "learning_rate": 1.797752808988764e-06, | |
| "loss": 7.255514144897461, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.006816632583503749, | |
| "grad_norm": 4421.4501953125, | |
| "learning_rate": 2.02247191011236e-06, | |
| "loss": 4.907093048095703, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007498295841854124, | |
| "grad_norm": 9041.5, | |
| "learning_rate": 2.2471910112359554e-06, | |
| "loss": 4.617020606994629, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0081799591002045, | |
| "grad_norm": 2413.45556640625, | |
| "learning_rate": 2.4719101123595505e-06, | |
| "loss": 5.7079010009765625, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008861622358554875, | |
| "grad_norm": 1334.906982421875, | |
| "learning_rate": 2.696629213483146e-06, | |
| "loss": 3.1134910583496094, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.00954328561690525, | |
| "grad_norm": 2017.1741943359375, | |
| "learning_rate": 2.9213483146067416e-06, | |
| "loss": 4.945159912109375, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.010224948875255624, | |
| "grad_norm": 1304.0582275390625, | |
| "learning_rate": 3.146067415730337e-06, | |
| "loss": 4.624004364013672, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.010906612133605999, | |
| "grad_norm": 3951.13134765625, | |
| "learning_rate": 3.3707865168539327e-06, | |
| "loss": 3.8251986503601074, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.011588275391956374, | |
| "grad_norm": 906.8524780273438, | |
| "learning_rate": 3.595505617977528e-06, | |
| "loss": 3.313896656036377, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.012269938650306749, | |
| "grad_norm": 1372.37353515625, | |
| "learning_rate": 3.820224719101124e-06, | |
| "loss": 3.498051404953003, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.012951601908657124, | |
| "grad_norm": 494.13616943359375, | |
| "learning_rate": 4.04494382022472e-06, | |
| "loss": 3.268960952758789, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.013633265167007498, | |
| "grad_norm": 282.08331298828125, | |
| "learning_rate": 4.269662921348315e-06, | |
| "loss": 2.357489585876465, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.014314928425357873, | |
| "grad_norm": 3040.841064453125, | |
| "learning_rate": 4.494382022471911e-06, | |
| "loss": 3.6005187034606934, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.014996591683708248, | |
| "grad_norm": 1250.9658203125, | |
| "learning_rate": 4.719101123595506e-06, | |
| "loss": 3.457188129425049, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.015678254942058625, | |
| "grad_norm": 5198.23974609375, | |
| "learning_rate": 4.943820224719101e-06, | |
| "loss": 3.725776433944702, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.016359918200409, | |
| "grad_norm": 454.5555114746094, | |
| "learning_rate": 5.168539325842698e-06, | |
| "loss": 2.6974992752075195, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.017041581458759374, | |
| "grad_norm": 1461.07763671875, | |
| "learning_rate": 5.393258426966292e-06, | |
| "loss": 2.363149642944336, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01772324471710975, | |
| "grad_norm": 223.5943145751953, | |
| "learning_rate": 5.617977528089889e-06, | |
| "loss": 1.918904185295105, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.018404907975460124, | |
| "grad_norm": 819.2132568359375, | |
| "learning_rate": 5.842696629213483e-06, | |
| "loss": 2.747713565826416, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0190865712338105, | |
| "grad_norm": 329.3885803222656, | |
| "learning_rate": 6.06741573033708e-06, | |
| "loss": 2.5517754554748535, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.019768234492160874, | |
| "grad_norm": 578.25927734375, | |
| "learning_rate": 6.292134831460674e-06, | |
| "loss": 3.0694191455841064, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.02044989775051125, | |
| "grad_norm": 521.046142578125, | |
| "learning_rate": 6.51685393258427e-06, | |
| "loss": 2.9562296867370605, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.021131561008861623, | |
| "grad_norm": 479.71722412109375, | |
| "learning_rate": 6.741573033707865e-06, | |
| "loss": 3.020545721054077, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.021813224267211998, | |
| "grad_norm": 734.2029418945312, | |
| "learning_rate": 6.966292134831461e-06, | |
| "loss": 3.466350555419922, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.022494887525562373, | |
| "grad_norm": 254.68246459960938, | |
| "learning_rate": 7.191011235955056e-06, | |
| "loss": 3.2190747261047363, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.023176550783912748, | |
| "grad_norm": 130.5963592529297, | |
| "learning_rate": 7.415730337078652e-06, | |
| "loss": 2.0983405113220215, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.023858214042263123, | |
| "grad_norm": 73.80171966552734, | |
| "learning_rate": 7.640449438202247e-06, | |
| "loss": 1.889816164970398, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.024539877300613498, | |
| "grad_norm": 56.68690490722656, | |
| "learning_rate": 7.865168539325843e-06, | |
| "loss": 3.1251683235168457, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.025221540558963872, | |
| "grad_norm": 97.23187255859375, | |
| "learning_rate": 8.08988764044944e-06, | |
| "loss": 3.2829296588897705, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.025903203817314247, | |
| "grad_norm": 135.62997436523438, | |
| "learning_rate": 8.314606741573035e-06, | |
| "loss": 2.9910526275634766, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.026584867075664622, | |
| "grad_norm": 77.11801147460938, | |
| "learning_rate": 8.53932584269663e-06, | |
| "loss": 3.5667967796325684, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.027266530334014997, | |
| "grad_norm": 141.92796325683594, | |
| "learning_rate": 8.764044943820226e-06, | |
| "loss": 1.9355766773223877, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02794819359236537, | |
| "grad_norm": 49.39057159423828, | |
| "learning_rate": 8.988764044943822e-06, | |
| "loss": 1.2001816034317017, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.028629856850715747, | |
| "grad_norm": 78.03803253173828, | |
| "learning_rate": 9.213483146067417e-06, | |
| "loss": 2.352545976638794, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.02931152010906612, | |
| "grad_norm": 26.599884033203125, | |
| "learning_rate": 9.438202247191012e-06, | |
| "loss": 2.0077102184295654, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.029993183367416496, | |
| "grad_norm": 49.84416961669922, | |
| "learning_rate": 9.662921348314608e-06, | |
| "loss": 2.672440528869629, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03067484662576687, | |
| "grad_norm": 35.74471664428711, | |
| "learning_rate": 9.887640449438202e-06, | |
| "loss": 1.4557160139083862, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03135650988411725, | |
| "grad_norm": 29.79218864440918, | |
| "learning_rate": 1.01123595505618e-05, | |
| "loss": 2.77484393119812, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.032038173142467624, | |
| "grad_norm": 242.94088745117188, | |
| "learning_rate": 1.0337078651685396e-05, | |
| "loss": 1.2804923057556152, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.032719836400818, | |
| "grad_norm": 33.16373825073242, | |
| "learning_rate": 1.0561797752808988e-05, | |
| "loss": 1.6744909286499023, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.033401499659168374, | |
| "grad_norm": 47.615360260009766, | |
| "learning_rate": 1.0786516853932584e-05, | |
| "loss": 2.003718376159668, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03408316291751875, | |
| "grad_norm": 37.541908264160156, | |
| "learning_rate": 1.101123595505618e-05, | |
| "loss": 2.387068271636963, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.034764826175869123, | |
| "grad_norm": 29.1586971282959, | |
| "learning_rate": 1.1235955056179778e-05, | |
| "loss": 2.4086151123046875, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.0354464894342195, | |
| "grad_norm": 16.95186996459961, | |
| "learning_rate": 1.146067415730337e-05, | |
| "loss": 1.283890962600708, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03612815269256987, | |
| "grad_norm": 16.21533203125, | |
| "learning_rate": 1.1685393258426966e-05, | |
| "loss": 1.3941848278045654, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.03680981595092025, | |
| "grad_norm": 15.245625495910645, | |
| "learning_rate": 1.1910112359550562e-05, | |
| "loss": 1.799220323562622, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03749147920927062, | |
| "grad_norm": 1592.2939453125, | |
| "learning_rate": 1.213483146067416e-05, | |
| "loss": 2.0396103858947754, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.038173142467621, | |
| "grad_norm": 19.32965087890625, | |
| "learning_rate": 1.2359550561797752e-05, | |
| "loss": 1.3870739936828613, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03885480572597137, | |
| "grad_norm": 55.4034423828125, | |
| "learning_rate": 1.2584269662921348e-05, | |
| "loss": 2.1296072006225586, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.03953646898432175, | |
| "grad_norm": 82.31266784667969, | |
| "learning_rate": 1.2808988764044944e-05, | |
| "loss": 0.8768513798713684, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.04021813224267212, | |
| "grad_norm": 9.533697128295898, | |
| "learning_rate": 1.303370786516854e-05, | |
| "loss": 2.0535597801208496, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0408997955010225, | |
| "grad_norm": 15.587124824523926, | |
| "learning_rate": 1.3258426966292135e-05, | |
| "loss": 1.9851542711257935, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04158145875937287, | |
| "grad_norm": 30.805431365966797, | |
| "learning_rate": 1.348314606741573e-05, | |
| "loss": 1.4596028327941895, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.04226312201772325, | |
| "grad_norm": 9.697044372558594, | |
| "learning_rate": 1.3707865168539327e-05, | |
| "loss": 1.6669340133666992, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04294478527607362, | |
| "grad_norm": 27.938608169555664, | |
| "learning_rate": 1.3932584269662923e-05, | |
| "loss": 0.9620833396911621, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.043626448534423996, | |
| "grad_norm": 7.904890060424805, | |
| "learning_rate": 1.4157303370786517e-05, | |
| "loss": 1.2511191368103027, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.04430811179277437, | |
| "grad_norm": 6.319112777709961, | |
| "learning_rate": 1.4382022471910113e-05, | |
| "loss": 0.6878035068511963, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.044989775051124746, | |
| "grad_norm": 22.30373191833496, | |
| "learning_rate": 1.4606741573033709e-05, | |
| "loss": 2.279068946838379, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.04567143830947512, | |
| "grad_norm": 12.7396879196167, | |
| "learning_rate": 1.4831460674157305e-05, | |
| "loss": 1.6731181144714355, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.046353101567825496, | |
| "grad_norm": 20.310932159423828, | |
| "learning_rate": 1.5056179775280899e-05, | |
| "loss": 0.8521436452865601, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.04703476482617587, | |
| "grad_norm": 13.107906341552734, | |
| "learning_rate": 1.5280898876404495e-05, | |
| "loss": 1.511030912399292, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.047716428084526245, | |
| "grad_norm": 63.82345962524414, | |
| "learning_rate": 1.5505617977528093e-05, | |
| "loss": 1.378786563873291, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04839809134287662, | |
| "grad_norm": 19.864704132080078, | |
| "learning_rate": 1.5730337078651687e-05, | |
| "loss": 1.5483001470565796, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.049079754601226995, | |
| "grad_norm": 16.860614776611328, | |
| "learning_rate": 1.595505617977528e-05, | |
| "loss": 1.5748002529144287, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04976141785957737, | |
| "grad_norm": 35.4315185546875, | |
| "learning_rate": 1.617977528089888e-05, | |
| "loss": 1.4997893571853638, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.050443081117927745, | |
| "grad_norm": 39.20589065551758, | |
| "learning_rate": 1.6404494382022473e-05, | |
| "loss": 2.2233829498291016, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.05112474437627812, | |
| "grad_norm": 1053.0841064453125, | |
| "learning_rate": 1.662921348314607e-05, | |
| "loss": 2.754089117050171, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.051806407634628494, | |
| "grad_norm": 13.371782302856445, | |
| "learning_rate": 1.6853932584269665e-05, | |
| "loss": 0.9811263680458069, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.05248807089297887, | |
| "grad_norm": 10.675305366516113, | |
| "learning_rate": 1.707865168539326e-05, | |
| "loss": 1.7070579528808594, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.053169734151329244, | |
| "grad_norm": 14.619195938110352, | |
| "learning_rate": 1.7303370786516857e-05, | |
| "loss": 1.7202574014663696, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.05385139740967962, | |
| "grad_norm": 3042.486083984375, | |
| "learning_rate": 1.752808988764045e-05, | |
| "loss": 1.9486947059631348, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.054533060668029994, | |
| "grad_norm": 55.10081481933594, | |
| "learning_rate": 1.7752808988764045e-05, | |
| "loss": 1.7752485275268555, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05521472392638037, | |
| "grad_norm": 19.896421432495117, | |
| "learning_rate": 1.7977528089887643e-05, | |
| "loss": 2.0431885719299316, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.05589638718473074, | |
| "grad_norm": 20.129390716552734, | |
| "learning_rate": 1.8202247191011237e-05, | |
| "loss": 1.767225980758667, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.05657805044308112, | |
| "grad_norm": 68.15953826904297, | |
| "learning_rate": 1.8426966292134835e-05, | |
| "loss": 1.4359931945800781, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.05725971370143149, | |
| "grad_norm": 33.0981330871582, | |
| "learning_rate": 1.8651685393258426e-05, | |
| "loss": 0.9191545248031616, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.05794137695978187, | |
| "grad_norm": 17.07698631286621, | |
| "learning_rate": 1.8876404494382024e-05, | |
| "loss": 1.7767210006713867, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05862304021813224, | |
| "grad_norm": 27.136503219604492, | |
| "learning_rate": 1.910112359550562e-05, | |
| "loss": 1.3165686130523682, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05930470347648262, | |
| "grad_norm": 234.57733154296875, | |
| "learning_rate": 1.9325842696629215e-05, | |
| "loss": 1.5632712841033936, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05998636673483299, | |
| "grad_norm": 164.1422882080078, | |
| "learning_rate": 1.955056179775281e-05, | |
| "loss": 1.1874092817306519, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.06066802999318337, | |
| "grad_norm": 34.60917663574219, | |
| "learning_rate": 1.9775280898876404e-05, | |
| "loss": 0.8331537842750549, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.06134969325153374, | |
| "grad_norm": 10.945159912109375, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2834336757659912, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06203135650988412, | |
| "grad_norm": 15.15926742553711, | |
| "learning_rate": 1.9999993903154863e-05, | |
| "loss": 1.7447407245635986, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.0627130197682345, | |
| "grad_norm": 144.3293914794922, | |
| "learning_rate": 1.9999975612626872e-05, | |
| "loss": 1.3708195686340332, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.06339468302658487, | |
| "grad_norm": 25.28166961669922, | |
| "learning_rate": 1.9999945128438338e-05, | |
| "loss": 2.8497724533081055, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.06407634628493525, | |
| "grad_norm": 19.966693878173828, | |
| "learning_rate": 1.9999902450626434e-05, | |
| "loss": 1.6539599895477295, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.06475800954328562, | |
| "grad_norm": 15.393426895141602, | |
| "learning_rate": 1.9999847579243196e-05, | |
| "loss": 2.0296273231506348, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.065439672801636, | |
| "grad_norm": 11.24446964263916, | |
| "learning_rate": 1.9999780514355533e-05, | |
| "loss": 1.3424549102783203, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.06612133605998637, | |
| "grad_norm": 27.612884521484375, | |
| "learning_rate": 1.9999701256045223e-05, | |
| "loss": 1.194828987121582, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.06680299931833675, | |
| "grad_norm": 12.825202941894531, | |
| "learning_rate": 1.999960980440891e-05, | |
| "loss": 0.8488439321517944, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06748466257668712, | |
| "grad_norm": 1464.5302734375, | |
| "learning_rate": 1.9999506159558107e-05, | |
| "loss": 0.9869670867919922, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.0681663258350375, | |
| "grad_norm": 111.20191955566406, | |
| "learning_rate": 1.9999390321619196e-05, | |
| "loss": 1.3429677486419678, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06884798909338787, | |
| "grad_norm": 16.313430786132812, | |
| "learning_rate": 1.9999262290733427e-05, | |
| "loss": 1.9419188499450684, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.06952965235173825, | |
| "grad_norm": 9.3142728805542, | |
| "learning_rate": 1.9999122067056915e-05, | |
| "loss": 1.5300267934799194, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.07021131561008861, | |
| "grad_norm": 22.08498191833496, | |
| "learning_rate": 1.9998969650760646e-05, | |
| "loss": 1.9083926677703857, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.070892978868439, | |
| "grad_norm": 15.76110553741455, | |
| "learning_rate": 1.9998805042030472e-05, | |
| "loss": 1.6124863624572754, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.07157464212678936, | |
| "grad_norm": 30.93997573852539, | |
| "learning_rate": 1.9998628241067113e-05, | |
| "loss": 0.9653267860412598, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.07225630538513975, | |
| "grad_norm": 12.660534858703613, | |
| "learning_rate": 1.999843924808615e-05, | |
| "loss": 1.7511203289031982, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.07293796864349011, | |
| "grad_norm": 18.625757217407227, | |
| "learning_rate": 1.999823806331804e-05, | |
| "loss": 1.297489881515503, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.0736196319018405, | |
| "grad_norm": 56.3899040222168, | |
| "learning_rate": 1.9998024687008098e-05, | |
| "loss": 1.449361801147461, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.07430129516019086, | |
| "grad_norm": 162.703857421875, | |
| "learning_rate": 1.9997799119416508e-05, | |
| "loss": 1.7976202964782715, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.07498295841854125, | |
| "grad_norm": 9.930502891540527, | |
| "learning_rate": 1.9997561360818322e-05, | |
| "loss": 1.5131696462631226, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07566462167689161, | |
| "grad_norm": 31.915830612182617, | |
| "learning_rate": 1.999731141150346e-05, | |
| "loss": 1.3789496421813965, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.076346284935242, | |
| "grad_norm": 11.755589485168457, | |
| "learning_rate": 1.9997049271776693e-05, | |
| "loss": 1.747565746307373, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07702794819359236, | |
| "grad_norm": 31.262149810791016, | |
| "learning_rate": 1.9996774941957673e-05, | |
| "loss": 1.4239745140075684, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.07770961145194274, | |
| "grad_norm": 63.65169143676758, | |
| "learning_rate": 1.999648842238091e-05, | |
| "loss": 1.815131425857544, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.07839127471029311, | |
| "grad_norm": 18.11605453491211, | |
| "learning_rate": 1.999618971339577e-05, | |
| "loss": 1.7555679082870483, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.0790729379686435, | |
| "grad_norm": 160.328369140625, | |
| "learning_rate": 1.9995878815366498e-05, | |
| "loss": 1.8470728397369385, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.07975460122699386, | |
| "grad_norm": 619.9039916992188, | |
| "learning_rate": 1.999555572867218e-05, | |
| "loss": 1.7569350004196167, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.08043626448534424, | |
| "grad_norm": 19.946969985961914, | |
| "learning_rate": 1.9995220453706797e-05, | |
| "loss": 1.3383402824401855, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.08111792774369461, | |
| "grad_norm": 20.062156677246094, | |
| "learning_rate": 1.9994872990879163e-05, | |
| "loss": 1.3586251735687256, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.081799591002045, | |
| "grad_norm": 19.174461364746094, | |
| "learning_rate": 1.9994513340612957e-05, | |
| "loss": 1.8789781332015991, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08248125426039536, | |
| "grad_norm": 14.19649887084961, | |
| "learning_rate": 1.9994141503346735e-05, | |
| "loss": 1.6669260263442993, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.08316291751874574, | |
| "grad_norm": 182.03321838378906, | |
| "learning_rate": 1.99937574795339e-05, | |
| "loss": 2.1706442832946777, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.08384458077709611, | |
| "grad_norm": 25.119983673095703, | |
| "learning_rate": 1.999336126964272e-05, | |
| "loss": 1.6401561498641968, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.0845262440354465, | |
| "grad_norm": 11.802164077758789, | |
| "learning_rate": 1.9992952874156323e-05, | |
| "loss": 2.053577184677124, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.08520790729379686, | |
| "grad_norm": 53.97670364379883, | |
| "learning_rate": 1.9992532293572688e-05, | |
| "loss": 0.6542696952819824, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08588957055214724, | |
| "grad_norm": 23.286149978637695, | |
| "learning_rate": 1.9992099528404664e-05, | |
| "loss": 1.7017326354980469, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.08657123381049761, | |
| "grad_norm": 17.239919662475586, | |
| "learning_rate": 1.9991654579179948e-05, | |
| "loss": 1.2679834365844727, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08725289706884799, | |
| "grad_norm": 16.885732650756836, | |
| "learning_rate": 1.9991197446441096e-05, | |
| "loss": 1.4786620140075684, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.08793456032719836, | |
| "grad_norm": 23.140071868896484, | |
| "learning_rate": 1.9990728130745524e-05, | |
| "loss": 1.5406410694122314, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.08861622358554874, | |
| "grad_norm": 14.451329231262207, | |
| "learning_rate": 1.9990246632665503e-05, | |
| "loss": 1.145501732826233, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08929788684389911, | |
| "grad_norm": 10.32580852508545, | |
| "learning_rate": 1.998975295278815e-05, | |
| "loss": 1.1438711881637573, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.08997955010224949, | |
| "grad_norm": 10.931632041931152, | |
| "learning_rate": 1.9989247091715454e-05, | |
| "loss": 2.1025326251983643, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.09066121336059986, | |
| "grad_norm": 6.50631856918335, | |
| "learning_rate": 1.998872905006423e-05, | |
| "loss": 1.4954837560653687, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.09134287661895024, | |
| "grad_norm": 4.921420574188232, | |
| "learning_rate": 1.9988198828466182e-05, | |
| "loss": 1.5103764533996582, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.09202453987730061, | |
| "grad_norm": 5.854800224304199, | |
| "learning_rate": 1.998765642756783e-05, | |
| "loss": 1.2965787649154663, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.09270620313565099, | |
| "grad_norm": 9.764752388000488, | |
| "learning_rate": 1.9987101848030566e-05, | |
| "loss": 1.9255067110061646, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.09338786639400136, | |
| "grad_norm": 7.5960798263549805, | |
| "learning_rate": 1.998653509053063e-05, | |
| "loss": 1.791310429573059, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.09406952965235174, | |
| "grad_norm": 6.565906524658203, | |
| "learning_rate": 1.9985956155759104e-05, | |
| "loss": 1.3865487575531006, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.09475119291070211, | |
| "grad_norm": 14.060343742370605, | |
| "learning_rate": 1.9985365044421926e-05, | |
| "loss": 2.089996814727783, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.09543285616905249, | |
| "grad_norm": 7.5861687660217285, | |
| "learning_rate": 1.9984761757239878e-05, | |
| "loss": 0.8783048391342163, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09611451942740286, | |
| "grad_norm": 7.125285625457764, | |
| "learning_rate": 1.9984146294948585e-05, | |
| "loss": 1.721929669380188, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.09679618268575324, | |
| "grad_norm": 7.363461971282959, | |
| "learning_rate": 1.998351865829853e-05, | |
| "loss": 0.9673935770988464, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.09747784594410361, | |
| "grad_norm": 4.273559093475342, | |
| "learning_rate": 1.9982878848055036e-05, | |
| "loss": 1.318180799484253, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.09815950920245399, | |
| "grad_norm": 6.188460350036621, | |
| "learning_rate": 1.9982226864998256e-05, | |
| "loss": 1.5355404615402222, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.09884117246080436, | |
| "grad_norm": 3.5846171379089355, | |
| "learning_rate": 1.998156270992321e-05, | |
| "loss": 1.4460152387619019, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09952283571915474, | |
| "grad_norm": 4.710892200469971, | |
| "learning_rate": 1.998088638363974e-05, | |
| "loss": 1.4085346460342407, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.10020449897750511, | |
| "grad_norm": 11.747404098510742, | |
| "learning_rate": 1.998019788697254e-05, | |
| "loss": 1.7807292938232422, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.10088616223585549, | |
| "grad_norm": 3.7539148330688477, | |
| "learning_rate": 1.9979497220761142e-05, | |
| "loss": 1.1747300624847412, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.10156782549420586, | |
| "grad_norm": 6.504991054534912, | |
| "learning_rate": 1.997878438585992e-05, | |
| "loss": 1.5703426599502563, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.10224948875255624, | |
| "grad_norm": 5.009849548339844, | |
| "learning_rate": 1.9978059383138073e-05, | |
| "loss": 0.9697821736335754, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1029311520109066, | |
| "grad_norm": 3.9481139183044434, | |
| "learning_rate": 1.9977322213479655e-05, | |
| "loss": 1.7657496929168701, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.10361281526925699, | |
| "grad_norm": 5.673348903656006, | |
| "learning_rate": 1.9976572877783548e-05, | |
| "loss": 0.8409373164176941, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.10429447852760736, | |
| "grad_norm": 3.3935084342956543, | |
| "learning_rate": 1.9975811376963464e-05, | |
| "loss": 1.313270926475525, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.10497614178595774, | |
| "grad_norm": 4.8457465171813965, | |
| "learning_rate": 1.997503771194796e-05, | |
| "loss": 1.44279146194458, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1056578050443081, | |
| "grad_norm": 5.6174116134643555, | |
| "learning_rate": 1.997425188368041e-05, | |
| "loss": 1.989031434059143, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.10633946830265849, | |
| "grad_norm": 13.403592109680176, | |
| "learning_rate": 1.9973453893119033e-05, | |
| "loss": 0.7220730185508728, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.10702113156100886, | |
| "grad_norm": 4.496342658996582, | |
| "learning_rate": 1.9972643741236882e-05, | |
| "loss": 1.0831682682037354, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.10770279481935924, | |
| "grad_norm": 3.356137752532959, | |
| "learning_rate": 1.9971821429021817e-05, | |
| "loss": 1.2503924369812012, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1083844580777096, | |
| "grad_norm": 3.4993855953216553, | |
| "learning_rate": 1.997098695747655e-05, | |
| "loss": 0.8627510070800781, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.10906612133605999, | |
| "grad_norm": 11.131978988647461, | |
| "learning_rate": 1.9970140327618612e-05, | |
| "loss": 2.386359214782715, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10974778459441036, | |
| "grad_norm": 4.573759078979492, | |
| "learning_rate": 1.9969281540480346e-05, | |
| "loss": 1.5444923639297485, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.11042944785276074, | |
| "grad_norm": 7.6408538818359375, | |
| "learning_rate": 1.9968410597108935e-05, | |
| "loss": 1.550155520439148, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 9.762516975402832, | |
| "learning_rate": 1.9967527498566387e-05, | |
| "loss": 1.9585456848144531, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.11179277436946149, | |
| "grad_norm": 9.570932388305664, | |
| "learning_rate": 1.9966632245929515e-05, | |
| "loss": 1.7024056911468506, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.11247443762781185, | |
| "grad_norm": 556.8116455078125, | |
| "learning_rate": 1.9965724840289972e-05, | |
| "loss": 0.9666114449501038, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.11315610088616224, | |
| "grad_norm": 11.168703079223633, | |
| "learning_rate": 1.996480528275421e-05, | |
| "loss": 1.1300976276397705, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1138377641445126, | |
| "grad_norm": 17.5162410736084, | |
| "learning_rate": 1.996387357444352e-05, | |
| "loss": 1.6300952434539795, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.11451942740286299, | |
| "grad_norm": 9.438365936279297, | |
| "learning_rate": 1.9962929716493987e-05, | |
| "loss": 1.9170794486999512, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.11520109066121335, | |
| "grad_norm": 4.525749683380127, | |
| "learning_rate": 1.9961973710056535e-05, | |
| "loss": 0.7288682460784912, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.11588275391956374, | |
| "grad_norm": 9.106972694396973, | |
| "learning_rate": 1.9961005556296875e-05, | |
| "loss": 1.1762197017669678, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1165644171779141, | |
| "grad_norm": 5.086156845092773, | |
| "learning_rate": 1.9960025256395556e-05, | |
| "loss": 0.9640843868255615, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.11724608043626449, | |
| "grad_norm": 5.4781012535095215, | |
| "learning_rate": 1.9959032811547912e-05, | |
| "loss": 1.398245096206665, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.11792774369461487, | |
| "grad_norm": 8.261008262634277, | |
| "learning_rate": 1.9958028222964114e-05, | |
| "loss": 1.9427658319473267, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.11860940695296524, | |
| "grad_norm": 10.69827651977539, | |
| "learning_rate": 1.9957011491869118e-05, | |
| "loss": 2.783135414123535, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.11929107021131562, | |
| "grad_norm": 14.744508743286133, | |
| "learning_rate": 1.9955982619502693e-05, | |
| "loss": 1.108130931854248, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11997273346966598, | |
| "grad_norm": 12.835195541381836, | |
| "learning_rate": 1.995494160711942e-05, | |
| "loss": 1.1446030139923096, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.12065439672801637, | |
| "grad_norm": 19.859210968017578, | |
| "learning_rate": 1.9953888455988674e-05, | |
| "loss": 1.5773301124572754, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.12133605998636673, | |
| "grad_norm": 8.095100402832031, | |
| "learning_rate": 1.995282316739463e-05, | |
| "loss": 1.3076493740081787, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.12201772324471712, | |
| "grad_norm": 53.70732498168945, | |
| "learning_rate": 1.995174574263628e-05, | |
| "loss": 1.727512240409851, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.12269938650306748, | |
| "grad_norm": 8.65125846862793, | |
| "learning_rate": 1.9950656183027392e-05, | |
| "loss": 1.158308506011963, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12338104976141787, | |
| "grad_norm": 10.54161262512207, | |
| "learning_rate": 1.9949554489896542e-05, | |
| "loss": 2.202761173248291, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.12406271301976823, | |
| "grad_norm": 10.052018165588379, | |
| "learning_rate": 1.994844066458711e-05, | |
| "loss": 2.5571885108947754, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.12474437627811862, | |
| "grad_norm": 18.71660804748535, | |
| "learning_rate": 1.9947314708457245e-05, | |
| "loss": 0.9831675887107849, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.125426039536469, | |
| "grad_norm": 25.179962158203125, | |
| "learning_rate": 1.9946176622879915e-05, | |
| "loss": 1.7804163694381714, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.12610770279481937, | |
| "grad_norm": 10.924018859863281, | |
| "learning_rate": 1.994502640924286e-05, | |
| "loss": 1.2503122091293335, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.12678936605316973, | |
| "grad_norm": 16.85798454284668, | |
| "learning_rate": 1.994386406894862e-05, | |
| "loss": 1.3904640674591064, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.1274710293115201, | |
| "grad_norm": 12.353588104248047, | |
| "learning_rate": 1.9942689603414513e-05, | |
| "loss": 1.937730312347412, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1281526925698705, | |
| "grad_norm": 17.090003967285156, | |
| "learning_rate": 1.9941503014072646e-05, | |
| "loss": 2.1551363468170166, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.12883435582822086, | |
| "grad_norm": 6.396425724029541, | |
| "learning_rate": 1.9940304302369912e-05, | |
| "loss": 1.046122670173645, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.12951601908657123, | |
| "grad_norm": 5.918776988983154, | |
| "learning_rate": 1.993909346976798e-05, | |
| "loss": 0.5687689781188965, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1301976823449216, | |
| "grad_norm": 5.968915939331055, | |
| "learning_rate": 1.9937870517743304e-05, | |
| "loss": 0.9140273332595825, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.130879345603272, | |
| "grad_norm": 23.541154861450195, | |
| "learning_rate": 1.9936635447787112e-05, | |
| "loss": 0.6421869397163391, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.13156100886162236, | |
| "grad_norm": 20.733173370361328, | |
| "learning_rate": 1.9935388261405412e-05, | |
| "loss": 1.4283093214035034, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.13224267211997273, | |
| "grad_norm": 5.4685821533203125, | |
| "learning_rate": 1.993412896011898e-05, | |
| "loss": 0.9551104307174683, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.1329243353783231, | |
| "grad_norm": 6.2597737312316895, | |
| "learning_rate": 1.993285754546338e-05, | |
| "loss": 1.2597966194152832, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.1336059986366735, | |
| "grad_norm": 470.6517028808594, | |
| "learning_rate": 1.9931574018988916e-05, | |
| "loss": 1.691474437713623, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.13428766189502386, | |
| "grad_norm": 5.0026469230651855, | |
| "learning_rate": 1.99302783822607e-05, | |
| "loss": 0.9203601479530334, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.13496932515337423, | |
| "grad_norm": 3.4127256870269775, | |
| "learning_rate": 1.9928970636858584e-05, | |
| "loss": 0.9550940990447998, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.1356509884117246, | |
| "grad_norm": 5.123717784881592, | |
| "learning_rate": 1.992765078437719e-05, | |
| "loss": 2.055361747741699, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.136332651670075, | |
| "grad_norm": 3.371704339981079, | |
| "learning_rate": 1.9926318826425905e-05, | |
| "loss": 1.0328929424285889, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13701431492842536, | |
| "grad_norm": 3.506957769393921, | |
| "learning_rate": 1.992497476462888e-05, | |
| "loss": 0.9359508156776428, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.13769597818677573, | |
| "grad_norm": 2.7216708660125732, | |
| "learning_rate": 1.9923618600625025e-05, | |
| "loss": 1.400390386581421, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1383776414451261, | |
| "grad_norm": 4.871631145477295, | |
| "learning_rate": 1.9922250336068e-05, | |
| "loss": 1.239612102508545, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.1390593047034765, | |
| "grad_norm": 2.473804473876953, | |
| "learning_rate": 1.9920869972626225e-05, | |
| "loss": 1.3447606563568115, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.13974096796182686, | |
| "grad_norm": 6.894538879394531, | |
| "learning_rate": 1.9919477511982873e-05, | |
| "loss": 1.5631332397460938, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.14042263122017723, | |
| "grad_norm": 3.913583517074585, | |
| "learning_rate": 1.991807295583587e-05, | |
| "loss": 1.618501901626587, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1411042944785276, | |
| "grad_norm": 3.0639336109161377, | |
| "learning_rate": 1.991665630589788e-05, | |
| "loss": 1.318312644958496, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.141785957736878, | |
| "grad_norm": 5.246321678161621, | |
| "learning_rate": 1.9915227563896327e-05, | |
| "loss": 0.876002848148346, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.14246762099522836, | |
| "grad_norm": 3.295959949493408, | |
| "learning_rate": 1.9913786731573382e-05, | |
| "loss": 0.4557987451553345, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.14314928425357873, | |
| "grad_norm": 3.8275537490844727, | |
| "learning_rate": 1.991233381068594e-05, | |
| "loss": 0.9583624601364136, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1438309475119291, | |
| "grad_norm": 4.7201128005981445, | |
| "learning_rate": 1.991086880300565e-05, | |
| "loss": 1.0539599657058716, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.1445126107702795, | |
| "grad_norm": 4.5735039710998535, | |
| "learning_rate": 1.9909391710318907e-05, | |
| "loss": 0.9567979574203491, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.14519427402862986, | |
| "grad_norm": 2.669004440307617, | |
| "learning_rate": 1.990790253442682e-05, | |
| "loss": 0.7672552466392517, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.14587593728698023, | |
| "grad_norm": 9.868934631347656, | |
| "learning_rate": 1.990640127714525e-05, | |
| "loss": 2.299375057220459, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1465576005453306, | |
| "grad_norm": 4.517803192138672, | |
| "learning_rate": 1.990488794030478e-05, | |
| "loss": 0.5457000732421875, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.147239263803681, | |
| "grad_norm": 2.341135263442993, | |
| "learning_rate": 1.990336252575073e-05, | |
| "loss": 0.6983858346939087, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.14792092706203136, | |
| "grad_norm": 2.1451797485351562, | |
| "learning_rate": 1.990182503534314e-05, | |
| "loss": 0.9473968148231506, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.14860259032038173, | |
| "grad_norm": 2.1667206287384033, | |
| "learning_rate": 1.9900275470956778e-05, | |
| "loss": 0.6302728056907654, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.1492842535787321, | |
| "grad_norm": 6.366608619689941, | |
| "learning_rate": 1.9898713834481137e-05, | |
| "loss": 1.6893022060394287, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.1499659168370825, | |
| "grad_norm": 2.48405122756958, | |
| "learning_rate": 1.9897140127820432e-05, | |
| "loss": 0.5234397053718567, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.15064758009543286, | |
| "grad_norm": 4.939483642578125, | |
| "learning_rate": 1.9895554352893584e-05, | |
| "loss": 1.7856894731521606, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.15132924335378323, | |
| "grad_norm": 12.190360069274902, | |
| "learning_rate": 1.9893956511634242e-05, | |
| "loss": 1.9232455492019653, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.1520109066121336, | |
| "grad_norm": 8.300911903381348, | |
| "learning_rate": 1.9892346605990764e-05, | |
| "loss": 0.9950571656227112, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.152692569870484, | |
| "grad_norm": 6.4139814376831055, | |
| "learning_rate": 1.989072463792622e-05, | |
| "loss": 1.2645041942596436, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.15337423312883436, | |
| "grad_norm": 6.562028408050537, | |
| "learning_rate": 1.9889090609418384e-05, | |
| "loss": 2.0295963287353516, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.15405589638718473, | |
| "grad_norm": 5.604927062988281, | |
| "learning_rate": 1.9887444522459743e-05, | |
| "loss": 1.2279257774353027, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1547375596455351, | |
| "grad_norm": 5.523647785186768, | |
| "learning_rate": 1.9885786379057487e-05, | |
| "loss": 2.1371541023254395, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.1554192229038855, | |
| "grad_norm": 5.242480754852295, | |
| "learning_rate": 1.9884116181233496e-05, | |
| "loss": 1.2968311309814453, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.15610088616223586, | |
| "grad_norm": 5.943783283233643, | |
| "learning_rate": 1.9882433931024367e-05, | |
| "loss": 0.5952497124671936, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.15678254942058623, | |
| "grad_norm": 210.10638427734375, | |
| "learning_rate": 1.9880739630481376e-05, | |
| "loss": 1.4420039653778076, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1574642126789366, | |
| "grad_norm": 5.810100078582764, | |
| "learning_rate": 1.9879033281670508e-05, | |
| "loss": 1.5601913928985596, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.158145875937287, | |
| "grad_norm": 5.746815204620361, | |
| "learning_rate": 1.9877314886672424e-05, | |
| "loss": 1.6530776023864746, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.15882753919563736, | |
| "grad_norm": 4.114201545715332, | |
| "learning_rate": 1.987558444758249e-05, | |
| "loss": 1.1385934352874756, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.15950920245398773, | |
| "grad_norm": 4.840907096862793, | |
| "learning_rate": 1.9873841966510744e-05, | |
| "loss": 1.0146422386169434, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1601908657123381, | |
| "grad_norm": 3.263094425201416, | |
| "learning_rate": 1.9872087445581912e-05, | |
| "loss": 1.216705560684204, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1608725289706885, | |
| "grad_norm": 5.41212797164917, | |
| "learning_rate": 1.987032088693541e-05, | |
| "loss": 1.4565320014953613, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.16155419222903886, | |
| "grad_norm": 5.620246410369873, | |
| "learning_rate": 1.9868542292725316e-05, | |
| "loss": 1.5943286418914795, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.16223585548738922, | |
| "grad_norm": 7.258380889892578, | |
| "learning_rate": 1.9866751665120398e-05, | |
| "loss": 0.7804931402206421, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.1629175187457396, | |
| "grad_norm": 20.362430572509766, | |
| "learning_rate": 1.9864949006304094e-05, | |
| "loss": 1.0291253328323364, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.16359918200409, | |
| "grad_norm": 4.7177300453186035, | |
| "learning_rate": 1.9863134318474504e-05, | |
| "loss": 1.457777738571167, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16428084526244036, | |
| "grad_norm": 3.546522378921509, | |
| "learning_rate": 1.986130760384441e-05, | |
| "loss": 1.5622825622558594, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.16496250852079072, | |
| "grad_norm": 3.295022964477539, | |
| "learning_rate": 1.9859468864641242e-05, | |
| "loss": 0.9937570095062256, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1656441717791411, | |
| "grad_norm": 4.15577507019043, | |
| "learning_rate": 1.985761810310711e-05, | |
| "loss": 1.1751668453216553, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.1663258350374915, | |
| "grad_norm": 7.615987777709961, | |
| "learning_rate": 1.985575532149877e-05, | |
| "loss": 1.6934175491333008, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.16700749829584186, | |
| "grad_norm": 4.522923469543457, | |
| "learning_rate": 1.985388052208764e-05, | |
| "loss": 1.5461843013763428, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.16768916155419222, | |
| "grad_norm": 4.890939712524414, | |
| "learning_rate": 1.9851993707159794e-05, | |
| "loss": 0.9180086255073547, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.1683708248125426, | |
| "grad_norm": 7.1219096183776855, | |
| "learning_rate": 1.985009487901596e-05, | |
| "loss": 1.4076170921325684, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.169052488070893, | |
| "grad_norm": 6.1358184814453125, | |
| "learning_rate": 1.9848184039971502e-05, | |
| "loss": 1.320831060409546, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.16973415132924335, | |
| "grad_norm": 5.749042510986328, | |
| "learning_rate": 1.9846261192356442e-05, | |
| "loss": 1.53205406665802, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.17041581458759372, | |
| "grad_norm": 2.8806917667388916, | |
| "learning_rate": 1.9844326338515444e-05, | |
| "loss": 1.121159553527832, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1710974778459441, | |
| "grad_norm": 3.486462354660034, | |
| "learning_rate": 1.9842379480807804e-05, | |
| "loss": 0.8663906455039978, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.17177914110429449, | |
| "grad_norm": 2.8455584049224854, | |
| "learning_rate": 1.984042062160746e-05, | |
| "loss": 1.0783741474151611, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.17246080436264485, | |
| "grad_norm": 8.036139488220215, | |
| "learning_rate": 1.983844976330299e-05, | |
| "loss": 1.7741141319274902, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.17314246762099522, | |
| "grad_norm": 6.524624824523926, | |
| "learning_rate": 1.983646690829759e-05, | |
| "loss": 1.7694973945617676, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.1738241308793456, | |
| "grad_norm": 5.912774562835693, | |
| "learning_rate": 1.9834472059009097e-05, | |
| "loss": 1.4783644676208496, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.17450579413769599, | |
| "grad_norm": 3.9578967094421387, | |
| "learning_rate": 1.9832465217869964e-05, | |
| "loss": 1.1265515089035034, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.17518745739604635, | |
| "grad_norm": 9.286506652832031, | |
| "learning_rate": 1.9830446387327277e-05, | |
| "loss": 2.3992319107055664, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.17586912065439672, | |
| "grad_norm": 42.55002975463867, | |
| "learning_rate": 1.9828415569842732e-05, | |
| "loss": 2.409393787384033, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.17655078391274712, | |
| "grad_norm": 33.78275680541992, | |
| "learning_rate": 1.9826372767892644e-05, | |
| "loss": 0.7688829898834229, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.17723244717109748, | |
| "grad_norm": 8.896828651428223, | |
| "learning_rate": 1.982431798396794e-05, | |
| "loss": 1.742114782333374, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.17791411042944785, | |
| "grad_norm": 14.53412914276123, | |
| "learning_rate": 1.982225122057417e-05, | |
| "loss": 1.395640254020691, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.17859577368779822, | |
| "grad_norm": 8.733621597290039, | |
| "learning_rate": 1.9820172480231476e-05, | |
| "loss": 1.3212776184082031, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.17927743694614862, | |
| "grad_norm": 6.948071479797363, | |
| "learning_rate": 1.9818081765474602e-05, | |
| "loss": 1.8596258163452148, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.17995910020449898, | |
| "grad_norm": 7.661222457885742, | |
| "learning_rate": 1.981597907885291e-05, | |
| "loss": 1.5962014198303223, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.18064076346284935, | |
| "grad_norm": 5.942171096801758, | |
| "learning_rate": 1.9813864422930345e-05, | |
| "loss": 1.7828670740127563, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.18132242672119972, | |
| "grad_norm": 8.718239784240723, | |
| "learning_rate": 1.981173780028546e-05, | |
| "loss": 1.8014458417892456, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.18200408997955012, | |
| "grad_norm": 10.833904266357422, | |
| "learning_rate": 1.980959921351139e-05, | |
| "loss": 1.397186517715454, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.18268575323790048, | |
| "grad_norm": 3.20481014251709, | |
| "learning_rate": 1.980744866521586e-05, | |
| "loss": 0.9240924715995789, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.18336741649625085, | |
| "grad_norm": 3.304488182067871, | |
| "learning_rate": 1.980528615802118e-05, | |
| "loss": 1.1787257194519043, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.18404907975460122, | |
| "grad_norm": 3.4648053646087646, | |
| "learning_rate": 1.9803111694564246e-05, | |
| "loss": 0.8863788843154907, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.18473074301295161, | |
| "grad_norm": 2.342740297317505, | |
| "learning_rate": 1.9800925277496532e-05, | |
| "loss": 0.9875154495239258, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.18541240627130198, | |
| "grad_norm": 3.5584747791290283, | |
| "learning_rate": 1.979872690948409e-05, | |
| "loss": 0.8974324464797974, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.18609406952965235, | |
| "grad_norm": 4.4199628829956055, | |
| "learning_rate": 1.9796516593207537e-05, | |
| "loss": 1.2789227962493896, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.18677573278800272, | |
| "grad_norm": 7.764851093292236, | |
| "learning_rate": 1.979429433136207e-05, | |
| "loss": 2.7128586769104004, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.18745739604635311, | |
| "grad_norm": 7.048688888549805, | |
| "learning_rate": 1.9792060126657437e-05, | |
| "loss": 1.1671762466430664, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.18813905930470348, | |
| "grad_norm": 19.91363525390625, | |
| "learning_rate": 1.9789813981817963e-05, | |
| "loss": 1.2773725986480713, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.18882072256305385, | |
| "grad_norm": 13.8008394241333, | |
| "learning_rate": 1.9787555899582533e-05, | |
| "loss": 1.5615161657333374, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.18950238582140422, | |
| "grad_norm": 6.971741199493408, | |
| "learning_rate": 1.978528588270458e-05, | |
| "loss": 1.249703049659729, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.1901840490797546, | |
| "grad_norm": 8.02096939086914, | |
| "learning_rate": 1.9783003933952082e-05, | |
| "loss": 1.4458056688308716, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.19086571233810498, | |
| "grad_norm": 4.437285423278809, | |
| "learning_rate": 1.9780710056107587e-05, | |
| "loss": 1.5829441547393799, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.19154737559645535, | |
| "grad_norm": 7.333526611328125, | |
| "learning_rate": 1.9778404251968176e-05, | |
| "loss": 1.612710952758789, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.19222903885480572, | |
| "grad_norm": 5.8669843673706055, | |
| "learning_rate": 1.977608652434548e-05, | |
| "loss": 1.884479284286499, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1929107021131561, | |
| "grad_norm": 17.100505828857422, | |
| "learning_rate": 1.9773756876065655e-05, | |
| "loss": 2.108002185821533, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.19359236537150648, | |
| "grad_norm": 5.0063090324401855, | |
| "learning_rate": 1.9771415309969406e-05, | |
| "loss": 1.2777209281921387, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.19427402862985685, | |
| "grad_norm": 5.124573707580566, | |
| "learning_rate": 1.976906182891197e-05, | |
| "loss": 1.4534913301467896, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.19495569188820722, | |
| "grad_norm": 5.953328609466553, | |
| "learning_rate": 1.97666964357631e-05, | |
| "loss": 1.2062658071517944, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1956373551465576, | |
| "grad_norm": 5.880934715270996, | |
| "learning_rate": 1.9764319133407095e-05, | |
| "loss": 1.77345871925354, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.19631901840490798, | |
| "grad_norm": 3.9601995944976807, | |
| "learning_rate": 1.9761929924742756e-05, | |
| "loss": 1.3594872951507568, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.19700068166325835, | |
| "grad_norm": 4.685924530029297, | |
| "learning_rate": 1.975952881268341e-05, | |
| "loss": 1.5763859748840332, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.19768234492160872, | |
| "grad_norm": 3.826180934906006, | |
| "learning_rate": 1.97571158001569e-05, | |
| "loss": 1.8830716609954834, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1983640081799591, | |
| "grad_norm": 3.6553056240081787, | |
| "learning_rate": 1.975469089010558e-05, | |
| "loss": 1.1618760824203491, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.19904567143830948, | |
| "grad_norm": 5.389840126037598, | |
| "learning_rate": 1.975225408548631e-05, | |
| "loss": 1.0185208320617676, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.19972733469665985, | |
| "grad_norm": 6.093509674072266, | |
| "learning_rate": 1.9749805389270453e-05, | |
| "loss": 2.012967109680176, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.20040899795501022, | |
| "grad_norm": 3.8174962997436523, | |
| "learning_rate": 1.9747344804443873e-05, | |
| "loss": 1.5239055156707764, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.2010906612133606, | |
| "grad_norm": 2.4020743370056152, | |
| "learning_rate": 1.9744872334006936e-05, | |
| "loss": 1.4020321369171143, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.20177232447171098, | |
| "grad_norm": 2.9826319217681885, | |
| "learning_rate": 1.9742387980974484e-05, | |
| "loss": 1.3844187259674072, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.20245398773006135, | |
| "grad_norm": 6.365255355834961, | |
| "learning_rate": 1.973989174837587e-05, | |
| "loss": 1.321211338043213, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.20313565098841171, | |
| "grad_norm": 100.61157989501953, | |
| "learning_rate": 1.9737383639254924e-05, | |
| "loss": 1.522669792175293, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.2038173142467621, | |
| "grad_norm": 7.6911940574646, | |
| "learning_rate": 1.9734863656669948e-05, | |
| "loss": 1.4330592155456543, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.20449897750511248, | |
| "grad_norm": 4.5384721755981445, | |
| "learning_rate": 1.973233180369374e-05, | |
| "loss": 1.3101820945739746, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.20518064076346285, | |
| "grad_norm": 4.599635601043701, | |
| "learning_rate": 1.9729788083413558e-05, | |
| "loss": 1.6237077713012695, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.2058623040218132, | |
| "grad_norm": 4.026865005493164, | |
| "learning_rate": 1.9727232498931135e-05, | |
| "loss": 1.1968040466308594, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2065439672801636, | |
| "grad_norm": 3.2824039459228516, | |
| "learning_rate": 1.972466505336267e-05, | |
| "loss": 1.1076245307922363, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.20722563053851398, | |
| "grad_norm": 5.328464031219482, | |
| "learning_rate": 1.9722085749838835e-05, | |
| "loss": 1.4950063228607178, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.20790729379686435, | |
| "grad_norm": 5.068223476409912, | |
| "learning_rate": 1.9719494591504747e-05, | |
| "loss": 1.2632877826690674, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2085889570552147, | |
| "grad_norm": 8.749210357666016, | |
| "learning_rate": 1.9716891581519983e-05, | |
| "loss": 2.029329776763916, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.2092706203135651, | |
| "grad_norm": 5.680600643157959, | |
| "learning_rate": 1.9714276723058576e-05, | |
| "loss": 2.031003713607788, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.20995228357191548, | |
| "grad_norm": 8.02801513671875, | |
| "learning_rate": 1.9711650019309e-05, | |
| "loss": 2.549004077911377, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.21063394683026584, | |
| "grad_norm": 15.788527488708496, | |
| "learning_rate": 1.970901147347418e-05, | |
| "loss": 1.2446638345718384, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.2113156100886162, | |
| "grad_norm": 7.427382469177246, | |
| "learning_rate": 1.9706361088771474e-05, | |
| "loss": 1.8798837661743164, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2119972733469666, | |
| "grad_norm": 9.844267845153809, | |
| "learning_rate": 1.9703698868432676e-05, | |
| "loss": 1.9102556705474854, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.21267893660531698, | |
| "grad_norm": 3.674903631210327, | |
| "learning_rate": 1.9701024815704023e-05, | |
| "loss": 2.105062484741211, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.21336059986366734, | |
| "grad_norm": 7.803150177001953, | |
| "learning_rate": 1.9698338933846172e-05, | |
| "loss": 1.1954119205474854, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.2140422631220177, | |
| "grad_norm": 9.175468444824219, | |
| "learning_rate": 1.9695641226134196e-05, | |
| "loss": 1.0092734098434448, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.2147239263803681, | |
| "grad_norm": 5.410757541656494, | |
| "learning_rate": 1.96929316958576e-05, | |
| "loss": 1.2513163089752197, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.21540558963871848, | |
| "grad_norm": 1.7223652601242065, | |
| "learning_rate": 1.9690210346320304e-05, | |
| "loss": 0.8200163841247559, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.21608725289706884, | |
| "grad_norm": 4.928730487823486, | |
| "learning_rate": 1.9687477180840634e-05, | |
| "loss": 1.9416441917419434, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.2167689161554192, | |
| "grad_norm": 3.1371078491210938, | |
| "learning_rate": 1.9684732202751328e-05, | |
| "loss": 1.1810110807418823, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.2174505794137696, | |
| "grad_norm": 4.844557762145996, | |
| "learning_rate": 1.968197541539953e-05, | |
| "loss": 0.7698321342468262, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.21813224267211997, | |
| "grad_norm": 5.738413333892822, | |
| "learning_rate": 1.9679206822146776e-05, | |
| "loss": 1.2975870370864868, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.21881390593047034, | |
| "grad_norm": 4.841405868530273, | |
| "learning_rate": 1.967642642636901e-05, | |
| "loss": 1.0371558666229248, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.2194955691888207, | |
| "grad_norm": 2.5466487407684326, | |
| "learning_rate": 1.9673634231456554e-05, | |
| "loss": 0.5535352230072021, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2201772324471711, | |
| "grad_norm": 6.791589736938477, | |
| "learning_rate": 1.9670830240814127e-05, | |
| "loss": 2.2420239448547363, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.22085889570552147, | |
| "grad_norm": 4.811392307281494, | |
| "learning_rate": 1.9668014457860828e-05, | |
| "loss": 0.7682366371154785, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.22154055896387184, | |
| "grad_norm": 5.438621520996094, | |
| "learning_rate": 1.9665186886030135e-05, | |
| "loss": 1.7728581428527832, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 3.9589333534240723, | |
| "learning_rate": 1.96623475287699e-05, | |
| "loss": 1.0751643180847168, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.2229038854805726, | |
| "grad_norm": 3.6448564529418945, | |
| "learning_rate": 1.965949638954235e-05, | |
| "loss": 1.2647238969802856, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.22358554873892297, | |
| "grad_norm": 4.940537929534912, | |
| "learning_rate": 1.9656633471824075e-05, | |
| "loss": 1.1155469417572021, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.22426721199727334, | |
| "grad_norm": 7.523629188537598, | |
| "learning_rate": 1.9653758779106028e-05, | |
| "loss": 1.4137378931045532, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.2249488752556237, | |
| "grad_norm": 7.168045997619629, | |
| "learning_rate": 1.9650872314893523e-05, | |
| "loss": 0.5647242069244385, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2256305385139741, | |
| "grad_norm": 2.690207004547119, | |
| "learning_rate": 1.964797408270622e-05, | |
| "loss": 1.0476772785186768, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.22631220177232447, | |
| "grad_norm": 2.781921148300171, | |
| "learning_rate": 1.9645064086078135e-05, | |
| "loss": 0.7483969926834106, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.22699386503067484, | |
| "grad_norm": 2.627107858657837, | |
| "learning_rate": 1.964214232855763e-05, | |
| "loss": 0.7267999649047852, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2276755282890252, | |
| "grad_norm": 6.753015995025635, | |
| "learning_rate": 1.9639208813707407e-05, | |
| "loss": 1.645385503768921, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.2283571915473756, | |
| "grad_norm": 1.9686328172683716, | |
| "learning_rate": 1.9636263545104498e-05, | |
| "loss": 0.6474498510360718, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.22903885480572597, | |
| "grad_norm": 5.547754287719727, | |
| "learning_rate": 1.9633306526340273e-05, | |
| "loss": 1.437086582183838, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.22972051806407634, | |
| "grad_norm": 3.0991756916046143, | |
| "learning_rate": 1.9630337761020436e-05, | |
| "loss": 1.0682692527770996, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.2304021813224267, | |
| "grad_norm": 7.258819103240967, | |
| "learning_rate": 1.9627357252765e-05, | |
| "loss": 2.262204647064209, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.2310838445807771, | |
| "grad_norm": 5.0217390060424805, | |
| "learning_rate": 1.9624365005208303e-05, | |
| "loss": 1.6506143808364868, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.23176550783912747, | |
| "grad_norm": 11.728917121887207, | |
| "learning_rate": 1.962136102199901e-05, | |
| "loss": 2.1931800842285156, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.23244717109747784, | |
| "grad_norm": 9.160211563110352, | |
| "learning_rate": 1.961834530680007e-05, | |
| "loss": 1.0702080726623535, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2331288343558282, | |
| "grad_norm": 9.413346290588379, | |
| "learning_rate": 1.9615317863288765e-05, | |
| "loss": 0.8787815570831299, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2338104976141786, | |
| "grad_norm": 13.654266357421875, | |
| "learning_rate": 1.9612278695156662e-05, | |
| "loss": 1.8223183155059814, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.23449216087252897, | |
| "grad_norm": 347.9715881347656, | |
| "learning_rate": 1.9609227806109627e-05, | |
| "loss": 1.0823979377746582, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.23517382413087934, | |
| "grad_norm": 6.461592674255371, | |
| "learning_rate": 1.9606165199867822e-05, | |
| "loss": 2.422264337539673, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.23585548738922973, | |
| "grad_norm": 5.662838459014893, | |
| "learning_rate": 1.960309088016569e-05, | |
| "loss": 1.9186232089996338, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.2365371506475801, | |
| "grad_norm": 6.0235395431518555, | |
| "learning_rate": 1.9600004850751967e-05, | |
| "loss": 1.1358025074005127, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.23721881390593047, | |
| "grad_norm": 3.422726631164551, | |
| "learning_rate": 1.9596907115389656e-05, | |
| "loss": 1.333078384399414, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.23790047716428084, | |
| "grad_norm": 5.582715034484863, | |
| "learning_rate": 1.9593797677856043e-05, | |
| "loss": 1.3061596155166626, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.23858214042263123, | |
| "grad_norm": 3.6086502075195312, | |
| "learning_rate": 1.959067654194268e-05, | |
| "loss": 1.1203460693359375, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2392638036809816, | |
| "grad_norm": 2.288213014602661, | |
| "learning_rate": 1.9587543711455383e-05, | |
| "loss": 0.8664935827255249, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.23994546693933197, | |
| "grad_norm": 3.412855625152588, | |
| "learning_rate": 1.958439919021423e-05, | |
| "loss": 1.3929232358932495, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.24062713019768234, | |
| "grad_norm": 2.167471170425415, | |
| "learning_rate": 1.9581242982053546e-05, | |
| "loss": 0.7552188038825989, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.24130879345603273, | |
| "grad_norm": 4.317893028259277, | |
| "learning_rate": 1.957807509082192e-05, | |
| "loss": 1.6116098165512085, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2419904567143831, | |
| "grad_norm": 5.659145832061768, | |
| "learning_rate": 1.9574895520382183e-05, | |
| "loss": 1.7634755373001099, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.24267211997273347, | |
| "grad_norm": 3.4220974445343018, | |
| "learning_rate": 1.9571704274611397e-05, | |
| "loss": 1.3048694133758545, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.24335378323108384, | |
| "grad_norm": 3.2889888286590576, | |
| "learning_rate": 1.956850135740087e-05, | |
| "loss": 1.5248088836669922, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.24403544648943423, | |
| "grad_norm": 3.1788370609283447, | |
| "learning_rate": 1.9565286772656145e-05, | |
| "loss": 1.4584882259368896, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2447171097477846, | |
| "grad_norm": 4.251647472381592, | |
| "learning_rate": 1.9562060524296983e-05, | |
| "loss": 1.024428367614746, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.24539877300613497, | |
| "grad_norm": 4.799529552459717, | |
| "learning_rate": 1.955882261625737e-05, | |
| "loss": 1.2644996643066406, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.24608043626448534, | |
| "grad_norm": 4.792227745056152, | |
| "learning_rate": 1.9555573052485518e-05, | |
| "loss": 2.040235996246338, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.24676209952283573, | |
| "grad_norm": 4.666950702667236, | |
| "learning_rate": 1.9552311836943832e-05, | |
| "loss": 1.5763626098632812, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2474437627811861, | |
| "grad_norm": 4.873711585998535, | |
| "learning_rate": 1.9549038973608952e-05, | |
| "loss": 1.2719221115112305, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.24812542603953647, | |
| "grad_norm": 5.893383502960205, | |
| "learning_rate": 1.9545754466471696e-05, | |
| "loss": 2.0642623901367188, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.24880708929788684, | |
| "grad_norm": 6.61902379989624, | |
| "learning_rate": 1.9542458319537094e-05, | |
| "loss": 1.2182157039642334, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.24948875255623723, | |
| "grad_norm": 5.884739875793457, | |
| "learning_rate": 1.9539150536824363e-05, | |
| "loss": 1.106882929801941, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2501704158145876, | |
| "grad_norm": 5.0897650718688965, | |
| "learning_rate": 1.953583112236691e-05, | |
| "loss": 0.5587277412414551, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.250852079072938, | |
| "grad_norm": 2.1747047901153564, | |
| "learning_rate": 1.9532500080212333e-05, | |
| "loss": 1.0599546432495117, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.25153374233128833, | |
| "grad_norm": 1.9114444255828857, | |
| "learning_rate": 1.9529157414422398e-05, | |
| "loss": 1.4041662216186523, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.25221540558963873, | |
| "grad_norm": 4.506577014923096, | |
| "learning_rate": 1.9525803129073046e-05, | |
| "loss": 1.44583261013031, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.25289706884798907, | |
| "grad_norm": 5.992087364196777, | |
| "learning_rate": 1.9522437228254386e-05, | |
| "loss": 1.9638934135437012, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.25357873210633947, | |
| "grad_norm": 2.0379884243011475, | |
| "learning_rate": 1.9519059716070702e-05, | |
| "loss": 0.9091148972511292, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.25426039536468986, | |
| "grad_norm": 5.675978660583496, | |
| "learning_rate": 1.951567059664042e-05, | |
| "loss": 1.7077932357788086, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2549420586230402, | |
| "grad_norm": 8.352693557739258, | |
| "learning_rate": 1.9512269874096132e-05, | |
| "loss": 2.6432814598083496, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2556237218813906, | |
| "grad_norm": 16.751062393188477, | |
| "learning_rate": 1.9508857552584574e-05, | |
| "loss": 1.5420377254486084, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.256305385139741, | |
| "grad_norm": 7.535068511962891, | |
| "learning_rate": 1.9505433636266618e-05, | |
| "loss": 1.5092883110046387, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.25698704839809133, | |
| "grad_norm": 5.2896246910095215, | |
| "learning_rate": 1.9501998129317288e-05, | |
| "loss": 1.8087358474731445, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.25766871165644173, | |
| "grad_norm": 3.864203453063965, | |
| "learning_rate": 1.9498551035925736e-05, | |
| "loss": 1.727104663848877, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.25835037491479207, | |
| "grad_norm": 4.895890712738037, | |
| "learning_rate": 1.9495092360295236e-05, | |
| "loss": 0.7697995901107788, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.25903203817314246, | |
| "grad_norm": 4.769713401794434, | |
| "learning_rate": 1.9491622106643195e-05, | |
| "loss": 1.466042399406433, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.25971370143149286, | |
| "grad_norm": 6.704110145568848, | |
| "learning_rate": 1.9488140279201128e-05, | |
| "loss": 1.9534811973571777, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.2603953646898432, | |
| "grad_norm": 3.5679831504821777, | |
| "learning_rate": 1.948464688221467e-05, | |
| "loss": 1.6868244409561157, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.2610770279481936, | |
| "grad_norm": 4.395662307739258, | |
| "learning_rate": 1.948114191994356e-05, | |
| "loss": 2.0778064727783203, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.261758691206544, | |
| "grad_norm": 3.936203718185425, | |
| "learning_rate": 1.9477625396661643e-05, | |
| "loss": 1.0802814960479736, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.26244035446489433, | |
| "grad_norm": 3.488983631134033, | |
| "learning_rate": 1.9474097316656856e-05, | |
| "loss": 1.6045266389846802, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2631220177232447, | |
| "grad_norm": 4.06668758392334, | |
| "learning_rate": 1.947055768423123e-05, | |
| "loss": 1.3836003541946411, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.26380368098159507, | |
| "grad_norm": 3.609584331512451, | |
| "learning_rate": 1.9467006503700886e-05, | |
| "loss": 1.6604485511779785, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.26448534423994546, | |
| "grad_norm": 4.5511064529418945, | |
| "learning_rate": 1.946344377939602e-05, | |
| "loss": 1.2709903717041016, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.26516700749829586, | |
| "grad_norm": 3.8128294944763184, | |
| "learning_rate": 1.945986951566091e-05, | |
| "loss": 1.0375221967697144, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.2658486707566462, | |
| "grad_norm": 3.75148606300354, | |
| "learning_rate": 1.9456283716853906e-05, | |
| "loss": 0.6834596991539001, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2665303340149966, | |
| "grad_norm": 6.924191474914551, | |
| "learning_rate": 1.9452686387347414e-05, | |
| "loss": 1.6890287399291992, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.267211997273347, | |
| "grad_norm": 3.1461596488952637, | |
| "learning_rate": 1.9449077531527906e-05, | |
| "loss": 1.2176769971847534, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.26789366053169733, | |
| "grad_norm": 1.6956125497817993, | |
| "learning_rate": 1.9445457153795912e-05, | |
| "loss": 0.3354303240776062, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.2685753237900477, | |
| "grad_norm": 16.950252532958984, | |
| "learning_rate": 1.944182525856601e-05, | |
| "loss": 1.2652069330215454, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.26925698704839807, | |
| "grad_norm": 4.389862060546875, | |
| "learning_rate": 1.9438181850266815e-05, | |
| "loss": 1.260068416595459, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.26993865030674846, | |
| "grad_norm": 4.1714582443237305, | |
| "learning_rate": 1.9434526933340993e-05, | |
| "loss": 1.3269736766815186, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.27062031356509886, | |
| "grad_norm": 7.820770740509033, | |
| "learning_rate": 1.9430860512245233e-05, | |
| "loss": 2.191876173019409, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.2713019768234492, | |
| "grad_norm": 8.434717178344727, | |
| "learning_rate": 1.9427182591450252e-05, | |
| "loss": 1.7254667282104492, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.2719836400817996, | |
| "grad_norm": 10.429789543151855, | |
| "learning_rate": 1.9423493175440797e-05, | |
| "loss": 2.233715534210205, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.27266530334015, | |
| "grad_norm": 6.914125442504883, | |
| "learning_rate": 1.941979226871563e-05, | |
| "loss": 1.3225195407867432, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.27334696659850033, | |
| "grad_norm": 5.250466823577881, | |
| "learning_rate": 1.9416079875787518e-05, | |
| "loss": 1.5940214395523071, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2740286298568507, | |
| "grad_norm": 3.1187331676483154, | |
| "learning_rate": 1.9412356001183234e-05, | |
| "loss": 1.6403329372406006, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.27471029311520107, | |
| "grad_norm": 3.826421022415161, | |
| "learning_rate": 1.9408620649443563e-05, | |
| "loss": 1.121032953262329, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.27539195637355146, | |
| "grad_norm": 4.634712219238281, | |
| "learning_rate": 1.940487382512328e-05, | |
| "loss": 2.140951156616211, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.27607361963190186, | |
| "grad_norm": 4.077300071716309, | |
| "learning_rate": 1.9401115532791134e-05, | |
| "loss": 1.3883341550827026, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2767552828902522, | |
| "grad_norm": 5.884093761444092, | |
| "learning_rate": 1.9397345777029877e-05, | |
| "loss": 1.732025384902954, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2774369461486026, | |
| "grad_norm": 8.901309967041016, | |
| "learning_rate": 1.9393564562436235e-05, | |
| "loss": 2.0747663974761963, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.278118609406953, | |
| "grad_norm": 5.239158630371094, | |
| "learning_rate": 1.93897718936209e-05, | |
| "loss": 2.3918912410736084, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.27880027266530333, | |
| "grad_norm": 6.223039150238037, | |
| "learning_rate": 1.9385967775208538e-05, | |
| "loss": 0.9834439754486084, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.2794819359236537, | |
| "grad_norm": 70.54039001464844, | |
| "learning_rate": 1.938215221183777e-05, | |
| "loss": 0.9655174016952515, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.28016359918200406, | |
| "grad_norm": 8.442602157592773, | |
| "learning_rate": 1.9378325208161178e-05, | |
| "loss": 2.2607247829437256, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.28084526244035446, | |
| "grad_norm": 10.892454147338867, | |
| "learning_rate": 1.937448676884529e-05, | |
| "loss": 2.0963754653930664, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.28152692569870486, | |
| "grad_norm": 4.259152412414551, | |
| "learning_rate": 1.9370636898570585e-05, | |
| "loss": 0.9232777953147888, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.2822085889570552, | |
| "grad_norm": 4.880434513092041, | |
| "learning_rate": 1.9366775602031466e-05, | |
| "loss": 1.79977285861969, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.2828902522154056, | |
| "grad_norm": 3.0103135108947754, | |
| "learning_rate": 1.936290288393629e-05, | |
| "loss": 0.7951688766479492, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.283571915473756, | |
| "grad_norm": 7.5906171798706055, | |
| "learning_rate": 1.935901874900732e-05, | |
| "loss": 1.6573779582977295, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.2842535787321063, | |
| "grad_norm": 3.9085164070129395, | |
| "learning_rate": 1.9355123201980756e-05, | |
| "loss": 1.1650043725967407, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.2849352419904567, | |
| "grad_norm": 10.126424789428711, | |
| "learning_rate": 1.93512162476067e-05, | |
| "loss": 1.5728178024291992, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.28561690524880706, | |
| "grad_norm": 3.047975540161133, | |
| "learning_rate": 1.934729789064918e-05, | |
| "loss": 1.0197397470474243, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.28629856850715746, | |
| "grad_norm": 3.7309322357177734, | |
| "learning_rate": 1.9343368135886112e-05, | |
| "loss": 1.2601120471954346, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.28698023176550785, | |
| "grad_norm": 8.821162223815918, | |
| "learning_rate": 1.9339426988109325e-05, | |
| "loss": 0.5328366160392761, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.2876618950238582, | |
| "grad_norm": 4.5019097328186035, | |
| "learning_rate": 1.9335474452124524e-05, | |
| "loss": 0.8345069885253906, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.2883435582822086, | |
| "grad_norm": 19.59219741821289, | |
| "learning_rate": 1.9331510532751313e-05, | |
| "loss": 1.2992053031921387, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.289025221540559, | |
| "grad_norm": 5.920818328857422, | |
| "learning_rate": 1.9327535234823174e-05, | |
| "loss": 1.5236142873764038, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.2897068847989093, | |
| "grad_norm": 2.4441089630126953, | |
| "learning_rate": 1.932354856318746e-05, | |
| "loss": 0.8194361925125122, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2903885480572597, | |
| "grad_norm": 3.7855710983276367, | |
| "learning_rate": 1.9319550522705394e-05, | |
| "loss": 1.374048113822937, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.29107021131561006, | |
| "grad_norm": 3.0214314460754395, | |
| "learning_rate": 1.9315541118252068e-05, | |
| "loss": 0.8251756429672241, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.29175187457396046, | |
| "grad_norm": 7.234747886657715, | |
| "learning_rate": 1.9311520354716417e-05, | |
| "loss": 1.16514253616333, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.29243353783231085, | |
| "grad_norm": 1.991133451461792, | |
| "learning_rate": 1.9307488237001247e-05, | |
| "loss": 0.714950442314148, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.2931152010906612, | |
| "grad_norm": 22.254261016845703, | |
| "learning_rate": 1.9303444770023184e-05, | |
| "loss": 0.9669209122657776, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2937968643490116, | |
| "grad_norm": 3.0376803874969482, | |
| "learning_rate": 1.9299389958712717e-05, | |
| "loss": 1.1408151388168335, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.294478527607362, | |
| "grad_norm": 4.4752726554870605, | |
| "learning_rate": 1.9295323808014152e-05, | |
| "loss": 1.4061379432678223, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2951601908657123, | |
| "grad_norm": 2.8586220741271973, | |
| "learning_rate": 1.9291246322885627e-05, | |
| "loss": 1.4070537090301514, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2958418541240627, | |
| "grad_norm": 2.5731873512268066, | |
| "learning_rate": 1.9287157508299104e-05, | |
| "loss": 1.1559405326843262, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.2965235173824131, | |
| "grad_norm": 3.9198739528656006, | |
| "learning_rate": 1.9283057369240358e-05, | |
| "loss": 1.6907612085342407, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.29720518064076346, | |
| "grad_norm": 2.753835916519165, | |
| "learning_rate": 1.9278945910708967e-05, | |
| "loss": 1.4664416313171387, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.29788684389911385, | |
| "grad_norm": 3.459925889968872, | |
| "learning_rate": 1.927482313771832e-05, | |
| "loss": 1.1719900369644165, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.2985685071574642, | |
| "grad_norm": 3.5883936882019043, | |
| "learning_rate": 1.9270689055295596e-05, | |
| "loss": 1.4042953252792358, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.2992501704158146, | |
| "grad_norm": 4.524599075317383, | |
| "learning_rate": 1.926654366848177e-05, | |
| "loss": 1.5059282779693604, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.299931833674165, | |
| "grad_norm": 3.603348970413208, | |
| "learning_rate": 1.9262386982331596e-05, | |
| "loss": 0.7287573218345642, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3006134969325153, | |
| "grad_norm": 3.01680850982666, | |
| "learning_rate": 1.9258219001913607e-05, | |
| "loss": 0.9661978483200073, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.3012951601908657, | |
| "grad_norm": 4.1275482177734375, | |
| "learning_rate": 1.9254039732310113e-05, | |
| "loss": 1.4540948867797852, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.3019768234492161, | |
| "grad_norm": 11.864845275878906, | |
| "learning_rate": 1.9249849178617182e-05, | |
| "loss": 2.273348331451416, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.30265848670756645, | |
| "grad_norm": 3.5200693607330322, | |
| "learning_rate": 1.9245647345944647e-05, | |
| "loss": 1.7083361148834229, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.30334014996591685, | |
| "grad_norm": 2.3711342811584473, | |
| "learning_rate": 1.9241434239416093e-05, | |
| "loss": 1.1357401609420776, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.3040218132242672, | |
| "grad_norm": 130.5746612548828, | |
| "learning_rate": 1.9237209864168855e-05, | |
| "loss": 1.9227306842803955, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.3047034764826176, | |
| "grad_norm": 3.825364589691162, | |
| "learning_rate": 1.9232974225354e-05, | |
| "loss": 0.8690076470375061, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.305385139740968, | |
| "grad_norm": 2.0452988147735596, | |
| "learning_rate": 1.9228727328136337e-05, | |
| "loss": 1.1484711170196533, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3060668029993183, | |
| "grad_norm": 15.158158302307129, | |
| "learning_rate": 1.92244691776944e-05, | |
| "loss": 1.3377666473388672, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.3067484662576687, | |
| "grad_norm": 2.2981607913970947, | |
| "learning_rate": 1.922019977922045e-05, | |
| "loss": 1.059500813484192, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3074301295160191, | |
| "grad_norm": 6.6916327476501465, | |
| "learning_rate": 1.9215919137920452e-05, | |
| "loss": 0.831468939781189, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.30811179277436945, | |
| "grad_norm": 1.9115036725997925, | |
| "learning_rate": 1.921162725901409e-05, | |
| "loss": 0.36133643984794617, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.30879345603271985, | |
| "grad_norm": 4.945026874542236, | |
| "learning_rate": 1.920732414773475e-05, | |
| "loss": 1.5682481527328491, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3094751192910702, | |
| "grad_norm": 3.5810322761535645, | |
| "learning_rate": 1.9203009809329515e-05, | |
| "loss": 1.7324963808059692, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3101567825494206, | |
| "grad_norm": 2.092665433883667, | |
| "learning_rate": 1.919868424905915e-05, | |
| "loss": 0.765301525592804, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.310838445807771, | |
| "grad_norm": 4.602624893188477, | |
| "learning_rate": 1.9194347472198112e-05, | |
| "loss": 0.37820541858673096, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.3115201090661213, | |
| "grad_norm": 3.6788721084594727, | |
| "learning_rate": 1.9189999484034533e-05, | |
| "loss": 0.5887208580970764, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.3122017723244717, | |
| "grad_norm": 190.16064453125, | |
| "learning_rate": 1.9185640289870213e-05, | |
| "loss": 0.9291536211967468, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3128834355828221, | |
| "grad_norm": 7.2953410148620605, | |
| "learning_rate": 1.9181269895020624e-05, | |
| "loss": 1.3983128070831299, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.31356509884117245, | |
| "grad_norm": 2.5724921226501465, | |
| "learning_rate": 1.9176888304814882e-05, | |
| "loss": 0.994464635848999, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.31424676209952285, | |
| "grad_norm": 5.118542194366455, | |
| "learning_rate": 1.9172495524595764e-05, | |
| "loss": 1.9143290519714355, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.3149284253578732, | |
| "grad_norm": 29.36760139465332, | |
| "learning_rate": 1.9168091559719696e-05, | |
| "loss": 2.25297212600708, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.3156100886162236, | |
| "grad_norm": 3.8203303813934326, | |
| "learning_rate": 1.9163676415556734e-05, | |
| "loss": 0.7553459405899048, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.316291751874574, | |
| "grad_norm": 7.625334739685059, | |
| "learning_rate": 1.9159250097490563e-05, | |
| "loss": 2.2948050498962402, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.3169734151329243, | |
| "grad_norm": 10.45826244354248, | |
| "learning_rate": 1.9154812610918503e-05, | |
| "loss": 1.3943508863449097, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3176550783912747, | |
| "grad_norm": 6.181829929351807, | |
| "learning_rate": 1.9150363961251485e-05, | |
| "loss": 0.7830055952072144, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3183367416496251, | |
| "grad_norm": 5.109697341918945, | |
| "learning_rate": 1.914590415391406e-05, | |
| "loss": 1.253631591796875, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.31901840490797545, | |
| "grad_norm": 8.534515380859375, | |
| "learning_rate": 1.9141433194344374e-05, | |
| "loss": 1.3872311115264893, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.31970006816632585, | |
| "grad_norm": 4.851257801055908, | |
| "learning_rate": 1.9136951087994176e-05, | |
| "loss": 2.131148338317871, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.3203817314246762, | |
| "grad_norm": 8.646773338317871, | |
| "learning_rate": 1.913245784032881e-05, | |
| "loss": 1.5957164764404297, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3210633946830266, | |
| "grad_norm": 4.015015125274658, | |
| "learning_rate": 1.9127953456827205e-05, | |
| "loss": 0.8922734260559082, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.321745057941377, | |
| "grad_norm": 5.832241535186768, | |
| "learning_rate": 1.912343794298186e-05, | |
| "loss": 1.2595834732055664, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3224267211997273, | |
| "grad_norm": 2.19978404045105, | |
| "learning_rate": 1.911891130429886e-05, | |
| "loss": 0.8392937183380127, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.3231083844580777, | |
| "grad_norm": 5.221693515777588, | |
| "learning_rate": 1.9114373546297844e-05, | |
| "loss": 1.1632442474365234, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.3237900477164281, | |
| "grad_norm": 5.705185890197754, | |
| "learning_rate": 1.9109824674512014e-05, | |
| "loss": 1.3697636127471924, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.32447171097477845, | |
| "grad_norm": 5.748249530792236, | |
| "learning_rate": 1.9105264694488124e-05, | |
| "loss": 1.4544509649276733, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.32515337423312884, | |
| "grad_norm": 4.253024101257324, | |
| "learning_rate": 1.9100693611786472e-05, | |
| "loss": 0.7538120746612549, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3258350374914792, | |
| "grad_norm": 2.40335750579834, | |
| "learning_rate": 1.9096111431980896e-05, | |
| "loss": 0.9789775013923645, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.3265167007498296, | |
| "grad_norm": 15.633095741271973, | |
| "learning_rate": 1.9091518160658763e-05, | |
| "loss": 1.4342238903045654, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.32719836400818, | |
| "grad_norm": 2.747086763381958, | |
| "learning_rate": 1.9086913803420966e-05, | |
| "loss": 1.5781259536743164, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3278800272665303, | |
| "grad_norm": 3.0719778537750244, | |
| "learning_rate": 1.9082298365881916e-05, | |
| "loss": 1.3554413318634033, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.3285616905248807, | |
| "grad_norm": 2.799806594848633, | |
| "learning_rate": 1.907767185366953e-05, | |
| "loss": 1.06112539768219, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3292433537832311, | |
| "grad_norm": 1.8001681566238403, | |
| "learning_rate": 1.9073034272425245e-05, | |
| "loss": 0.9077748656272888, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.32992501704158145, | |
| "grad_norm": 2.3758649826049805, | |
| "learning_rate": 1.9068385627803972e-05, | |
| "loss": 1.0518077611923218, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.33060668029993184, | |
| "grad_norm": 4.855216026306152, | |
| "learning_rate": 1.906372592547413e-05, | |
| "loss": 0.8362563252449036, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3312883435582822, | |
| "grad_norm": 3.3203141689300537, | |
| "learning_rate": 1.905905517111761e-05, | |
| "loss": 1.283669114112854, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.3319700068166326, | |
| "grad_norm": 9.288386344909668, | |
| "learning_rate": 1.90543733704298e-05, | |
| "loss": 0.9806363582611084, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.332651670074983, | |
| "grad_norm": 2.7394044399261475, | |
| "learning_rate": 1.9049680529119524e-05, | |
| "loss": 1.4370222091674805, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 3.870551586151123, | |
| "learning_rate": 1.9044976652909102e-05, | |
| "loss": 2.071592330932617, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.3340149965916837, | |
| "grad_norm": 2.8228816986083984, | |
| "learning_rate": 1.9040261747534282e-05, | |
| "loss": 0.5981768369674683, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3346966598500341, | |
| "grad_norm": 4.471041679382324, | |
| "learning_rate": 1.9035535818744286e-05, | |
| "loss": 1.2177919149398804, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.33537832310838445, | |
| "grad_norm": 9.052423477172852, | |
| "learning_rate": 1.9030798872301758e-05, | |
| "loss": 1.2425520420074463, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.33605998636673484, | |
| "grad_norm": 3.4728076457977295, | |
| "learning_rate": 1.9026050913982788e-05, | |
| "loss": 0.8533923625946045, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.3367416496250852, | |
| "grad_norm": 1.923197865486145, | |
| "learning_rate": 1.9021291949576883e-05, | |
| "loss": 0.8370528221130371, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.3374233128834356, | |
| "grad_norm": 3.5292904376983643, | |
| "learning_rate": 1.9016521984886984e-05, | |
| "loss": 1.0731617212295532, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.338104976141786, | |
| "grad_norm": 2.2630650997161865, | |
| "learning_rate": 1.901174102572943e-05, | |
| "loss": 0.8468523621559143, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.3387866394001363, | |
| "grad_norm": 4.356873989105225, | |
| "learning_rate": 1.9006949077933984e-05, | |
| "loss": 1.2603099346160889, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.3394683026584867, | |
| "grad_norm": 2.700840473175049, | |
| "learning_rate": 1.900214614734379e-05, | |
| "loss": 1.1067051887512207, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.3401499659168371, | |
| "grad_norm": 5.208966255187988, | |
| "learning_rate": 1.8997332239815403e-05, | |
| "loss": 1.839177131652832, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.34083162917518744, | |
| "grad_norm": 5.6692633628845215, | |
| "learning_rate": 1.8992507361218743e-05, | |
| "loss": 1.3088819980621338, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34151329243353784, | |
| "grad_norm": 8.791088104248047, | |
| "learning_rate": 1.8987671517437122e-05, | |
| "loss": 2.0345616340637207, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.3421949556918882, | |
| "grad_norm": 6.057740211486816, | |
| "learning_rate": 1.8982824714367214e-05, | |
| "loss": 0.721781313419342, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.3428766189502386, | |
| "grad_norm": 4.495492935180664, | |
| "learning_rate": 1.8977966957919068e-05, | |
| "loss": 0.5859732627868652, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.34355828220858897, | |
| "grad_norm": 3.266132116317749, | |
| "learning_rate": 1.8973098254016074e-05, | |
| "loss": 1.680146336555481, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3442399454669393, | |
| "grad_norm": 3.1697919368743896, | |
| "learning_rate": 1.8968218608594987e-05, | |
| "loss": 1.515268087387085, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3449216087252897, | |
| "grad_norm": 3.3663229942321777, | |
| "learning_rate": 1.8963328027605886e-05, | |
| "loss": 0.5287567377090454, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.3456032719836401, | |
| "grad_norm": 2.6414244174957275, | |
| "learning_rate": 1.8958426517012203e-05, | |
| "loss": 0.8111573457717896, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.34628493524199044, | |
| "grad_norm": 6.3523454666137695, | |
| "learning_rate": 1.8953514082790683e-05, | |
| "loss": 1.9260203838348389, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.34696659850034084, | |
| "grad_norm": 3.327556610107422, | |
| "learning_rate": 1.8948590730931394e-05, | |
| "loss": 0.9914911985397339, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.3476482617586912, | |
| "grad_norm": 2.532057523727417, | |
| "learning_rate": 1.8943656467437726e-05, | |
| "loss": 0.9729138612747192, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3483299250170416, | |
| "grad_norm": 4.6469268798828125, | |
| "learning_rate": 1.893871129832636e-05, | |
| "loss": 2.3366539478302, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.34901158827539197, | |
| "grad_norm": 2.643529176712036, | |
| "learning_rate": 1.893375522962729e-05, | |
| "loss": 0.7275136709213257, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.3496932515337423, | |
| "grad_norm": 3.031022787094116, | |
| "learning_rate": 1.8928788267383783e-05, | |
| "loss": 1.2368642091751099, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.3503749147920927, | |
| "grad_norm": 2.6746222972869873, | |
| "learning_rate": 1.8923810417652404e-05, | |
| "loss": 1.6857903003692627, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.3510565780504431, | |
| "grad_norm": 2.2445945739746094, | |
| "learning_rate": 1.8918821686502992e-05, | |
| "loss": 1.0779474973678589, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.35173824130879344, | |
| "grad_norm": 3.943788528442383, | |
| "learning_rate": 1.8913822080018645e-05, | |
| "loss": 1.655326008796692, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.35241990456714384, | |
| "grad_norm": 4.718496322631836, | |
| "learning_rate": 1.8908811604295728e-05, | |
| "loss": 0.8111345767974854, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.35310156782549423, | |
| "grad_norm": 2.1908249855041504, | |
| "learning_rate": 1.8903790265443865e-05, | |
| "loss": 0.7612648010253906, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.3537832310838446, | |
| "grad_norm": 3.267055034637451, | |
| "learning_rate": 1.8898758069585923e-05, | |
| "loss": 0.9781118631362915, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.35446489434219497, | |
| "grad_norm": 5.063033103942871, | |
| "learning_rate": 1.8893715022858e-05, | |
| "loss": 1.366690754890442, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3551465576005453, | |
| "grad_norm": 5.890979290008545, | |
| "learning_rate": 1.888866113140943e-05, | |
| "loss": 2.1514992713928223, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.3558282208588957, | |
| "grad_norm": 4.908336639404297, | |
| "learning_rate": 1.8883596401402777e-05, | |
| "loss": 1.4852519035339355, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3565098841172461, | |
| "grad_norm": 2.1742019653320312, | |
| "learning_rate": 1.8878520839013812e-05, | |
| "loss": 1.223859429359436, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.35719154737559644, | |
| "grad_norm": 6.2009124755859375, | |
| "learning_rate": 1.8873434450431522e-05, | |
| "loss": 1.1366631984710693, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.35787321063394684, | |
| "grad_norm": 4.069712162017822, | |
| "learning_rate": 1.886833724185809e-05, | |
| "loss": 0.6879901885986328, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.35855487389229723, | |
| "grad_norm": 7.754619121551514, | |
| "learning_rate": 1.8863229219508892e-05, | |
| "loss": 2.0778799057006836, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3592365371506476, | |
| "grad_norm": 5.237489223480225, | |
| "learning_rate": 1.8858110389612495e-05, | |
| "loss": 1.3526272773742676, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.35991820040899797, | |
| "grad_norm": 2.5144271850585938, | |
| "learning_rate": 1.885298075841064e-05, | |
| "loss": 1.1102807521820068, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.3605998636673483, | |
| "grad_norm": 3.9299678802490234, | |
| "learning_rate": 1.8847840332158243e-05, | |
| "loss": 2.0058670043945312, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.3612815269256987, | |
| "grad_norm": 3.4149467945098877, | |
| "learning_rate": 1.8842689117123377e-05, | |
| "loss": 0.6895046830177307, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3619631901840491, | |
| "grad_norm": 4.579489707946777, | |
| "learning_rate": 1.8837527119587277e-05, | |
| "loss": 1.8255746364593506, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.36264485344239944, | |
| "grad_norm": 2.0138349533081055, | |
| "learning_rate": 1.883235434584432e-05, | |
| "loss": 0.8882697224617004, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.36332651670074984, | |
| "grad_norm": 3.6011276245117188, | |
| "learning_rate": 1.8827170802202027e-05, | |
| "loss": 1.4054481983184814, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.36400817995910023, | |
| "grad_norm": 129.46046447753906, | |
| "learning_rate": 1.8821976494981055e-05, | |
| "loss": 1.6898300647735596, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.36468984321745057, | |
| "grad_norm": 2.155585527420044, | |
| "learning_rate": 1.8816771430515178e-05, | |
| "loss": 0.7753575444221497, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.36537150647580097, | |
| "grad_norm": 4.523682117462158, | |
| "learning_rate": 1.8811555615151286e-05, | |
| "loss": 1.6820876598358154, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.3660531697341513, | |
| "grad_norm": 4.429269790649414, | |
| "learning_rate": 1.880632905524939e-05, | |
| "loss": 0.6383548378944397, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.3667348329925017, | |
| "grad_norm": 3.7983267307281494, | |
| "learning_rate": 1.8801091757182593e-05, | |
| "loss": 1.0961453914642334, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3674164962508521, | |
| "grad_norm": 3.1742172241210938, | |
| "learning_rate": 1.879584372733709e-05, | |
| "loss": 1.4234809875488281, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.36809815950920244, | |
| "grad_norm": 2.4020187854766846, | |
| "learning_rate": 1.8790584972112174e-05, | |
| "loss": 1.1758843660354614, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.36877982276755283, | |
| "grad_norm": 2.9260194301605225, | |
| "learning_rate": 1.87853154979202e-05, | |
| "loss": 1.0781900882720947, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.36946148602590323, | |
| "grad_norm": 2.0199899673461914, | |
| "learning_rate": 1.8780035311186605e-05, | |
| "loss": 0.7699449062347412, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.37014314928425357, | |
| "grad_norm": 9.097823143005371, | |
| "learning_rate": 1.8774744418349886e-05, | |
| "loss": 1.7699912786483765, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.37082481254260397, | |
| "grad_norm": 3.701662302017212, | |
| "learning_rate": 1.8769442825861594e-05, | |
| "loss": 1.1131584644317627, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3715064758009543, | |
| "grad_norm": 3.3982136249542236, | |
| "learning_rate": 1.876413054018633e-05, | |
| "loss": 1.6480752229690552, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.3721881390593047, | |
| "grad_norm": 2.128912925720215, | |
| "learning_rate": 1.875880756780172e-05, | |
| "loss": 1.0749809741973877, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.3728698023176551, | |
| "grad_norm": 2.3384532928466797, | |
| "learning_rate": 1.8753473915198437e-05, | |
| "loss": 1.4570391178131104, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.37355146557600544, | |
| "grad_norm": 2.2841551303863525, | |
| "learning_rate": 1.874812958888018e-05, | |
| "loss": 1.0104687213897705, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.37423312883435583, | |
| "grad_norm": 1.5464543104171753, | |
| "learning_rate": 1.874277459536364e-05, | |
| "loss": 1.2241737842559814, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.37491479209270623, | |
| "grad_norm": 5.0516815185546875, | |
| "learning_rate": 1.873740894117854e-05, | |
| "loss": 1.6643073558807373, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.37559645535105657, | |
| "grad_norm": 2.9442920684814453, | |
| "learning_rate": 1.8732032632867592e-05, | |
| "loss": 1.5176081657409668, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.37627811860940696, | |
| "grad_norm": 2.480600118637085, | |
| "learning_rate": 1.8726645676986503e-05, | |
| "loss": 1.3346457481384277, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3769597818677573, | |
| "grad_norm": 2.3903183937072754, | |
| "learning_rate": 1.872124808010395e-05, | |
| "loss": 1.3713115453720093, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.3776414451261077, | |
| "grad_norm": 4.027533531188965, | |
| "learning_rate": 1.8715839848801604e-05, | |
| "loss": 0.8249767422676086, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3783231083844581, | |
| "grad_norm": 2.3533899784088135, | |
| "learning_rate": 1.8710420989674093e-05, | |
| "loss": 1.4343526363372803, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.37900477164280844, | |
| "grad_norm": 2.4576408863067627, | |
| "learning_rate": 1.8704991509329002e-05, | |
| "loss": 1.4026904106140137, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.37968643490115883, | |
| "grad_norm": 3.576802968978882, | |
| "learning_rate": 1.8699551414386877e-05, | |
| "loss": 1.5657179355621338, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.3803680981595092, | |
| "grad_norm": 2.6988046169281006, | |
| "learning_rate": 1.8694100711481195e-05, | |
| "loss": 0.6028250455856323, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.38104976141785957, | |
| "grad_norm": 3.8934648036956787, | |
| "learning_rate": 1.868863940725838e-05, | |
| "loss": 1.2468817234039307, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.38173142467620996, | |
| "grad_norm": 2.7071168422698975, | |
| "learning_rate": 1.8683167508377775e-05, | |
| "loss": 0.9047738313674927, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3824130879345603, | |
| "grad_norm": 3.0257182121276855, | |
| "learning_rate": 1.8677685021511643e-05, | |
| "loss": 1.2457003593444824, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.3830947511929107, | |
| "grad_norm": 4.956380367279053, | |
| "learning_rate": 1.8672191953345156e-05, | |
| "loss": 1.811165452003479, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.3837764144512611, | |
| "grad_norm": 6.11633825302124, | |
| "learning_rate": 1.86666883105764e-05, | |
| "loss": 1.6507594585418701, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.38445807770961143, | |
| "grad_norm": 4.5495991706848145, | |
| "learning_rate": 1.866117409991634e-05, | |
| "loss": 1.5494463443756104, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.38513974096796183, | |
| "grad_norm": 4.0347795486450195, | |
| "learning_rate": 1.8655649328088836e-05, | |
| "loss": 1.0738871097564697, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3858214042263122, | |
| "grad_norm": 3.920830249786377, | |
| "learning_rate": 1.865011400183062e-05, | |
| "loss": 0.739644467830658, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.38650306748466257, | |
| "grad_norm": 4.003133296966553, | |
| "learning_rate": 1.8644568127891303e-05, | |
| "loss": 1.6497926712036133, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.38718473074301296, | |
| "grad_norm": 3.9445290565490723, | |
| "learning_rate": 1.8639011713033347e-05, | |
| "loss": 0.5134506225585938, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.3878663940013633, | |
| "grad_norm": 2.661447048187256, | |
| "learning_rate": 1.8633444764032074e-05, | |
| "loss": 1.0034466981887817, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.3885480572597137, | |
| "grad_norm": 2.3295094966888428, | |
| "learning_rate": 1.862786728767565e-05, | |
| "loss": 1.208392858505249, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3892297205180641, | |
| "grad_norm": 2.4365479946136475, | |
| "learning_rate": 1.8622279290765078e-05, | |
| "loss": 0.5979467630386353, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.38991138377641443, | |
| "grad_norm": 5.058823108673096, | |
| "learning_rate": 1.8616680780114183e-05, | |
| "loss": 1.4158920049667358, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.39059304703476483, | |
| "grad_norm": 2.1014676094055176, | |
| "learning_rate": 1.8611071762549623e-05, | |
| "loss": 1.025521993637085, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.3912747102931152, | |
| "grad_norm": 2.405318260192871, | |
| "learning_rate": 1.860545224491085e-05, | |
| "loss": 0.9537474513053894, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.39195637355146556, | |
| "grad_norm": 4.968394756317139, | |
| "learning_rate": 1.8599822234050143e-05, | |
| "loss": 1.0375311374664307, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.39263803680981596, | |
| "grad_norm": 2.6036109924316406, | |
| "learning_rate": 1.859418173683255e-05, | |
| "loss": 1.193580150604248, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.3933197000681663, | |
| "grad_norm": 3.131683826446533, | |
| "learning_rate": 1.858853076013593e-05, | |
| "loss": 1.137315034866333, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.3940013633265167, | |
| "grad_norm": 4.054980278015137, | |
| "learning_rate": 1.8582869310850903e-05, | |
| "loss": 0.7231044173240662, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3946830265848671, | |
| "grad_norm": 2.896749496459961, | |
| "learning_rate": 1.8577197395880866e-05, | |
| "loss": 0.8693374395370483, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.39536468984321743, | |
| "grad_norm": 3.2366976737976074, | |
| "learning_rate": 1.8571515022141974e-05, | |
| "loss": 0.869022786617279, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3960463531015678, | |
| "grad_norm": 1.8894977569580078, | |
| "learning_rate": 1.856582219656314e-05, | |
| "loss": 0.7851641178131104, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.3967280163599182, | |
| "grad_norm": 1.6904579401016235, | |
| "learning_rate": 1.856011892608602e-05, | |
| "loss": 0.6581047773361206, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.39740967961826856, | |
| "grad_norm": 3.3967642784118652, | |
| "learning_rate": 1.8554405217665004e-05, | |
| "loss": 0.8824714422225952, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.39809134287661896, | |
| "grad_norm": 3.3108932971954346, | |
| "learning_rate": 1.854868107826721e-05, | |
| "loss": 1.6939976215362549, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.3987730061349693, | |
| "grad_norm": 5.848761558532715, | |
| "learning_rate": 1.8542946514872478e-05, | |
| "loss": 2.178783416748047, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3994546693933197, | |
| "grad_norm": 3.158979654312134, | |
| "learning_rate": 1.8537201534473353e-05, | |
| "loss": 1.70957612991333, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.4001363326516701, | |
| "grad_norm": 8.649386405944824, | |
| "learning_rate": 1.8531446144075093e-05, | |
| "loss": 0.9513790607452393, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.40081799591002043, | |
| "grad_norm": 7.115112781524658, | |
| "learning_rate": 1.852568035069564e-05, | |
| "loss": 1.7480049133300781, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.4014996591683708, | |
| "grad_norm": 8.075335502624512, | |
| "learning_rate": 1.8519904161365624e-05, | |
| "loss": 1.7717616558074951, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.4021813224267212, | |
| "grad_norm": 8.434859275817871, | |
| "learning_rate": 1.851411758312835e-05, | |
| "loss": 1.2135674953460693, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.40286298568507156, | |
| "grad_norm": 1.8380115032196045, | |
| "learning_rate": 1.8508320623039792e-05, | |
| "loss": 1.348672866821289, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.40354464894342196, | |
| "grad_norm": 2.730818033218384, | |
| "learning_rate": 1.8502513288168584e-05, | |
| "loss": 1.261131763458252, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.4042263122017723, | |
| "grad_norm": 16.677165985107422, | |
| "learning_rate": 1.8496695585596013e-05, | |
| "loss": 1.3604838848114014, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.4049079754601227, | |
| "grad_norm": 7.098438262939453, | |
| "learning_rate": 1.8490867522416e-05, | |
| "loss": 1.7277930974960327, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.4055896387184731, | |
| "grad_norm": 3.663721799850464, | |
| "learning_rate": 1.8485029105735112e-05, | |
| "loss": 1.1387404203414917, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.40627130197682343, | |
| "grad_norm": 3.6738693714141846, | |
| "learning_rate": 1.8479180342672525e-05, | |
| "loss": 0.8586311340332031, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.4069529652351738, | |
| "grad_norm": 4.40339469909668, | |
| "learning_rate": 1.8473321240360048e-05, | |
| "loss": 1.3938498497009277, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.4076346284935242, | |
| "grad_norm": 3.4013774394989014, | |
| "learning_rate": 1.846745180594208e-05, | |
| "loss": 1.429935097694397, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.40831629175187456, | |
| "grad_norm": 2.294057607650757, | |
| "learning_rate": 1.8461572046575638e-05, | |
| "loss": 1.4736216068267822, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.40899795501022496, | |
| "grad_norm": 5.40059232711792, | |
| "learning_rate": 1.8455681969430307e-05, | |
| "loss": 1.7445282936096191, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4096796182685753, | |
| "grad_norm": 8.884811401367188, | |
| "learning_rate": 1.8449781581688274e-05, | |
| "loss": 3.040208578109741, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.4103612815269257, | |
| "grad_norm": 5.935024261474609, | |
| "learning_rate": 1.8443870890544287e-05, | |
| "loss": 0.8014417886734009, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.4110429447852761, | |
| "grad_norm": 3.7073605060577393, | |
| "learning_rate": 1.8437949903205657e-05, | |
| "loss": 0.7055726647377014, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.4117246080436264, | |
| "grad_norm": 3.1601369380950928, | |
| "learning_rate": 1.843201862689225e-05, | |
| "loss": 0.5688856840133667, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.4124062713019768, | |
| "grad_norm": 3.3272056579589844, | |
| "learning_rate": 1.8426077068836487e-05, | |
| "loss": 1.3306844234466553, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.4130879345603272, | |
| "grad_norm": 2.028167247772217, | |
| "learning_rate": 1.842012523628332e-05, | |
| "loss": 0.9651132225990295, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.41376959781867756, | |
| "grad_norm": 3.8314738273620605, | |
| "learning_rate": 1.8414163136490224e-05, | |
| "loss": 1.5358803272247314, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.41445126107702795, | |
| "grad_norm": 6.4982733726501465, | |
| "learning_rate": 1.84081907767272e-05, | |
| "loss": 2.0918755531311035, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.41513292433537835, | |
| "grad_norm": 1.8055447340011597, | |
| "learning_rate": 1.840220816427676e-05, | |
| "loss": 0.6877099275588989, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.4158145875937287, | |
| "grad_norm": 2.6051347255706787, | |
| "learning_rate": 1.839621530643392e-05, | |
| "loss": 1.0851391553878784, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4164962508520791, | |
| "grad_norm": 4.730994701385498, | |
| "learning_rate": 1.839021221050618e-05, | |
| "loss": 1.643237829208374, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.4171779141104294, | |
| "grad_norm": 2.420578956604004, | |
| "learning_rate": 1.838419888381353e-05, | |
| "loss": 0.8266706466674805, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.4178595773687798, | |
| "grad_norm": 2.6848230361938477, | |
| "learning_rate": 1.8378175333688438e-05, | |
| "loss": 0.33754855394363403, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.4185412406271302, | |
| "grad_norm": 4.421104431152344, | |
| "learning_rate": 1.837214156747583e-05, | |
| "loss": 1.3408927917480469, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.41922290388548056, | |
| "grad_norm": 5.390198707580566, | |
| "learning_rate": 1.8366097592533095e-05, | |
| "loss": 0.5011366009712219, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.41990456714383095, | |
| "grad_norm": 3.1726467609405518, | |
| "learning_rate": 1.8360043416230067e-05, | |
| "loss": 1.6905428171157837, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.42058623040218135, | |
| "grad_norm": 2.5729990005493164, | |
| "learning_rate": 1.8353979045949023e-05, | |
| "loss": 1.076096534729004, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.4212678936605317, | |
| "grad_norm": 3.2037148475646973, | |
| "learning_rate": 1.834790448908467e-05, | |
| "loss": 0.5505931377410889, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.4219495569188821, | |
| "grad_norm": 3.306154251098633, | |
| "learning_rate": 1.8341819753044135e-05, | |
| "loss": 0.9625562429428101, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.4226312201772324, | |
| "grad_norm": 2.3715250492095947, | |
| "learning_rate": 1.8335724845246948e-05, | |
| "loss": 0.53058260679245, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4233128834355828, | |
| "grad_norm": 4.0207839012146, | |
| "learning_rate": 1.8329619773125064e-05, | |
| "loss": 1.3297679424285889, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.4239945466939332, | |
| "grad_norm": 6.694503307342529, | |
| "learning_rate": 1.832350454412281e-05, | |
| "loss": 2.504713296890259, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.42467620995228356, | |
| "grad_norm": 3.3091583251953125, | |
| "learning_rate": 1.8317379165696908e-05, | |
| "loss": 1.2117489576339722, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.42535787321063395, | |
| "grad_norm": 2.701916217803955, | |
| "learning_rate": 1.8311243645316458e-05, | |
| "loss": 0.6958295106887817, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.42603953646898435, | |
| "grad_norm": 1.5155378580093384, | |
| "learning_rate": 1.830509799046292e-05, | |
| "loss": 0.8749496936798096, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4267211997273347, | |
| "grad_norm": 1.740338683128357, | |
| "learning_rate": 1.829894220863012e-05, | |
| "loss": 0.7681260108947754, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.4274028629856851, | |
| "grad_norm": 3.138975143432617, | |
| "learning_rate": 1.8292776307324217e-05, | |
| "loss": 1.421783685684204, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.4280845262440354, | |
| "grad_norm": 2.65615177154541, | |
| "learning_rate": 1.8286600294063732e-05, | |
| "loss": 0.5015825629234314, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.4287661895023858, | |
| "grad_norm": 3.291783094406128, | |
| "learning_rate": 1.82804141763795e-05, | |
| "loss": 1.7397900819778442, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.4294478527607362, | |
| "grad_norm": 6.939148426055908, | |
| "learning_rate": 1.8274217961814682e-05, | |
| "loss": 1.7783665657043457, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.43012951601908656, | |
| "grad_norm": 2.345168352127075, | |
| "learning_rate": 1.8268011657924746e-05, | |
| "loss": 0.9429031610488892, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.43081117927743695, | |
| "grad_norm": 2.665942907333374, | |
| "learning_rate": 1.8261795272277472e-05, | |
| "loss": 0.8730664849281311, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.43149284253578735, | |
| "grad_norm": 2.2719271183013916, | |
| "learning_rate": 1.8255568812452923e-05, | |
| "loss": 1.1947182416915894, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.4321745057941377, | |
| "grad_norm": 3.0505599975585938, | |
| "learning_rate": 1.8249332286043456e-05, | |
| "loss": 1.4211301803588867, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.4328561690524881, | |
| "grad_norm": 1.8321754932403564, | |
| "learning_rate": 1.8243085700653698e-05, | |
| "loss": 1.4799766540527344, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.4335378323108384, | |
| "grad_norm": 1.7265902757644653, | |
| "learning_rate": 1.8236829063900535e-05, | |
| "loss": 1.4023617506027222, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.4342194955691888, | |
| "grad_norm": 2.2862470149993896, | |
| "learning_rate": 1.8230562383413127e-05, | |
| "loss": 1.5263274908065796, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.4349011588275392, | |
| "grad_norm": 2.731217384338379, | |
| "learning_rate": 1.822428566683286e-05, | |
| "loss": 1.7954682111740112, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.43558282208588955, | |
| "grad_norm": 2.999441385269165, | |
| "learning_rate": 1.8217998921813375e-05, | |
| "loss": 0.7107315063476562, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.43626448534423995, | |
| "grad_norm": 2.7480056285858154, | |
| "learning_rate": 1.821170215602053e-05, | |
| "loss": 1.6439075469970703, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.43694614860259035, | |
| "grad_norm": 5.756744384765625, | |
| "learning_rate": 1.8205395377132407e-05, | |
| "loss": 0.6778331995010376, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.4376278118609407, | |
| "grad_norm": 2.9490387439727783, | |
| "learning_rate": 1.81990785928393e-05, | |
| "loss": 1.2749234437942505, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.4383094751192911, | |
| "grad_norm": 2.444660186767578, | |
| "learning_rate": 1.8192751810843697e-05, | |
| "loss": 1.1050373315811157, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.4389911383776414, | |
| "grad_norm": 2.1506237983703613, | |
| "learning_rate": 1.8186415038860276e-05, | |
| "loss": 0.3917778432369232, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.4396728016359918, | |
| "grad_norm": 2.1962451934814453, | |
| "learning_rate": 1.818006828461591e-05, | |
| "loss": 0.9240995645523071, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.4403544648943422, | |
| "grad_norm": 4.333210468292236, | |
| "learning_rate": 1.8173711555849626e-05, | |
| "loss": 1.3042209148406982, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.44103612815269255, | |
| "grad_norm": 3.983182191848755, | |
| "learning_rate": 1.8167344860312627e-05, | |
| "loss": 1.383401870727539, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.44171779141104295, | |
| "grad_norm": 1.7078006267547607, | |
| "learning_rate": 1.8160968205768264e-05, | |
| "loss": 0.7653818130493164, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.44239945466939334, | |
| "grad_norm": 1.8697181940078735, | |
| "learning_rate": 1.815458159999203e-05, | |
| "loss": 0.8399191498756409, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.4430811179277437, | |
| "grad_norm": 3.080517053604126, | |
| "learning_rate": 1.8148185050771554e-05, | |
| "loss": 1.2301526069641113, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4437627811860941, | |
| "grad_norm": 4.8597588539123535, | |
| "learning_rate": 1.8141778565906594e-05, | |
| "loss": 1.6627204418182373, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 4.034822940826416, | |
| "learning_rate": 1.8135362153209014e-05, | |
| "loss": 1.291428565979004, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.4451261077027948, | |
| "grad_norm": 4.153934955596924, | |
| "learning_rate": 1.8128935820502792e-05, | |
| "loss": 1.1491031646728516, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.4458077709611452, | |
| "grad_norm": 3.6002509593963623, | |
| "learning_rate": 1.8122499575624e-05, | |
| "loss": 1.6600425243377686, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.44648943421949555, | |
| "grad_norm": 1.543675184249878, | |
| "learning_rate": 1.8116053426420793e-05, | |
| "loss": 0.8339579105377197, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.44717109747784595, | |
| "grad_norm": 2.568967819213867, | |
| "learning_rate": 1.8109597380753404e-05, | |
| "loss": 0.8489872813224792, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.44785276073619634, | |
| "grad_norm": 3.282658815383911, | |
| "learning_rate": 1.8103131446494144e-05, | |
| "loss": 1.2508018016815186, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.4485344239945467, | |
| "grad_norm": 3.4995203018188477, | |
| "learning_rate": 1.8096655631527365e-05, | |
| "loss": 1.5129706859588623, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.4492160872528971, | |
| "grad_norm": 2.7602193355560303, | |
| "learning_rate": 1.8090169943749477e-05, | |
| "loss": 1.2756472826004028, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.4498977505112474, | |
| "grad_norm": 11.795307159423828, | |
| "learning_rate": 1.8083674391068925e-05, | |
| "loss": 1.1611485481262207, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4505794137695978, | |
| "grad_norm": 1.9726192951202393, | |
| "learning_rate": 1.807716898140619e-05, | |
| "loss": 0.83235764503479, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.4512610770279482, | |
| "grad_norm": 2.960864543914795, | |
| "learning_rate": 1.807065372269376e-05, | |
| "loss": 0.7254927158355713, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.45194274028629855, | |
| "grad_norm": 2.131723165512085, | |
| "learning_rate": 1.8064128622876146e-05, | |
| "loss": 0.8774275183677673, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.45262440354464895, | |
| "grad_norm": 3.856238603591919, | |
| "learning_rate": 1.805759368990985e-05, | |
| "loss": 0.6111989617347717, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.45330606680299934, | |
| "grad_norm": 2.0575990676879883, | |
| "learning_rate": 1.8051048931763366e-05, | |
| "loss": 1.1646723747253418, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.4539877300613497, | |
| "grad_norm": 2.3782901763916016, | |
| "learning_rate": 1.804449435641717e-05, | |
| "loss": 1.2033634185791016, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.4546693933197001, | |
| "grad_norm": 3.0704290866851807, | |
| "learning_rate": 1.803792997186371e-05, | |
| "loss": 1.3781287670135498, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.4553510565780504, | |
| "grad_norm": 3.466710090637207, | |
| "learning_rate": 1.803135578610739e-05, | |
| "loss": 0.7485005259513855, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.4560327198364008, | |
| "grad_norm": 2.91351580619812, | |
| "learning_rate": 1.802477180716457e-05, | |
| "loss": 1.451478362083435, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.4567143830947512, | |
| "grad_norm": 2.3317689895629883, | |
| "learning_rate": 1.8018178043063554e-05, | |
| "loss": 1.2862257957458496, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.45739604635310155, | |
| "grad_norm": 2.3442463874816895, | |
| "learning_rate": 1.801157450184457e-05, | |
| "loss": 1.1389061212539673, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.45807770961145194, | |
| "grad_norm": 2.2962794303894043, | |
| "learning_rate": 1.8004961191559765e-05, | |
| "loss": 0.920241117477417, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.45875937286980234, | |
| "grad_norm": 8.112113952636719, | |
| "learning_rate": 1.7998338120273218e-05, | |
| "loss": 0.3879634439945221, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.4594410361281527, | |
| "grad_norm": 2.143040657043457, | |
| "learning_rate": 1.7991705296060888e-05, | |
| "loss": 1.3585686683654785, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.4601226993865031, | |
| "grad_norm": 6.662731647491455, | |
| "learning_rate": 1.798506272701064e-05, | |
| "loss": 1.844017505645752, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.4608043626448534, | |
| "grad_norm": 2.191220760345459, | |
| "learning_rate": 1.797841042122221e-05, | |
| "loss": 1.460452675819397, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.4614860259032038, | |
| "grad_norm": 3.355347156524658, | |
| "learning_rate": 1.797174838680722e-05, | |
| "loss": 1.8264518976211548, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.4621676891615542, | |
| "grad_norm": 1.9022427797317505, | |
| "learning_rate": 1.7965076631889146e-05, | |
| "loss": 1.0955109596252441, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.46284935241990455, | |
| "grad_norm": 2.4177889823913574, | |
| "learning_rate": 1.7958395164603323e-05, | |
| "loss": 1.0090279579162598, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.46353101567825494, | |
| "grad_norm": 3.373490571975708, | |
| "learning_rate": 1.795170399309692e-05, | |
| "loss": 1.6577601432800293, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.46421267893660534, | |
| "grad_norm": 2.119124174118042, | |
| "learning_rate": 1.794500312552895e-05, | |
| "loss": 1.2227559089660645, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.4648943421949557, | |
| "grad_norm": 4.674558639526367, | |
| "learning_rate": 1.7938292570070238e-05, | |
| "loss": 1.5070548057556152, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.4655760054533061, | |
| "grad_norm": 4.290884494781494, | |
| "learning_rate": 1.7931572334903427e-05, | |
| "loss": 0.7182972431182861, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.4662576687116564, | |
| "grad_norm": 2.9920401573181152, | |
| "learning_rate": 1.792484242822297e-05, | |
| "loss": 1.6099597215652466, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.4669393319700068, | |
| "grad_norm": 2.643561840057373, | |
| "learning_rate": 1.7918102858235103e-05, | |
| "loss": 0.6941257119178772, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.4676209952283572, | |
| "grad_norm": 5.84933614730835, | |
| "learning_rate": 1.7911353633157844e-05, | |
| "loss": 0.5451070070266724, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.46830265848670755, | |
| "grad_norm": 2.0181429386138916, | |
| "learning_rate": 1.7904594761221e-05, | |
| "loss": 1.117553949356079, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.46898432174505794, | |
| "grad_norm": 2.363997459411621, | |
| "learning_rate": 1.789782625066612e-05, | |
| "loss": 1.1420124769210815, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.46966598500340834, | |
| "grad_norm": 8.58547592163086, | |
| "learning_rate": 1.7891048109746522e-05, | |
| "loss": 1.1073362827301025, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.4703476482617587, | |
| "grad_norm": 6.624891757965088, | |
| "learning_rate": 1.7884260346727257e-05, | |
| "loss": 1.8669713735580444, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4710293115201091, | |
| "grad_norm": 4.729033470153809, | |
| "learning_rate": 1.7877462969885114e-05, | |
| "loss": 1.9904009103775024, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.47171097477845947, | |
| "grad_norm": 1.7405580282211304, | |
| "learning_rate": 1.7870655987508613e-05, | |
| "loss": 0.7781103849411011, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4723926380368098, | |
| "grad_norm": 3.082596778869629, | |
| "learning_rate": 1.7863839407897962e-05, | |
| "loss": 1.954369306564331, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.4730743012951602, | |
| "grad_norm": 2.463020086288452, | |
| "learning_rate": 1.7857013239365098e-05, | |
| "loss": 0.8323026299476624, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.47375596455351054, | |
| "grad_norm": 1.7679860591888428, | |
| "learning_rate": 1.7850177490233635e-05, | |
| "loss": 0.9151184558868408, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.47443762781186094, | |
| "grad_norm": 3.689037799835205, | |
| "learning_rate": 1.784333216883887e-05, | |
| "loss": 1.7770330905914307, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.47511929107021134, | |
| "grad_norm": 3.824721097946167, | |
| "learning_rate": 1.7836477283527787e-05, | |
| "loss": 0.8589752912521362, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4758009543285617, | |
| "grad_norm": 3.058713674545288, | |
| "learning_rate": 1.782961284265901e-05, | |
| "loss": 1.1384007930755615, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.47648261758691207, | |
| "grad_norm": 2.086995840072632, | |
| "learning_rate": 1.7822738854602835e-05, | |
| "loss": 0.9110372066497803, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.47716428084526247, | |
| "grad_norm": 2.521082639694214, | |
| "learning_rate": 1.7815855327741185e-05, | |
| "loss": 1.4321343898773193, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4778459441036128, | |
| "grad_norm": 3.35274338722229, | |
| "learning_rate": 1.780896227046762e-05, | |
| "loss": 1.0381226539611816, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.4785276073619632, | |
| "grad_norm": 2.294340133666992, | |
| "learning_rate": 1.7802059691187316e-05, | |
| "loss": 1.3280988931655884, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.47920927062031354, | |
| "grad_norm": 4.459254741668701, | |
| "learning_rate": 1.7795147598317067e-05, | |
| "loss": 1.4365578889846802, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.47989093387866394, | |
| "grad_norm": 2.573671579360962, | |
| "learning_rate": 1.7788226000285272e-05, | |
| "loss": 0.9661579132080078, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.48057259713701433, | |
| "grad_norm": 3.37911319732666, | |
| "learning_rate": 1.7781294905531908e-05, | |
| "loss": 1.8097542524337769, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.4812542603953647, | |
| "grad_norm": 2.7699577808380127, | |
| "learning_rate": 1.7774354322508535e-05, | |
| "loss": 1.1084448099136353, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.48193592365371507, | |
| "grad_norm": 4.020786762237549, | |
| "learning_rate": 1.776740425967829e-05, | |
| "loss": 0.7846464514732361, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.48261758691206547, | |
| "grad_norm": 1.7012214660644531, | |
| "learning_rate": 1.7760444725515856e-05, | |
| "loss": 1.0971169471740723, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.4832992501704158, | |
| "grad_norm": 2.0524704456329346, | |
| "learning_rate": 1.775347572850748e-05, | |
| "loss": 0.6552506685256958, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.4839809134287662, | |
| "grad_norm": 2.792614698410034, | |
| "learning_rate": 1.774649727715094e-05, | |
| "loss": 0.76331627368927, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.48466257668711654, | |
| "grad_norm": 2.1224751472473145, | |
| "learning_rate": 1.7739509379955548e-05, | |
| "loss": 1.1910077333450317, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.48534423994546694, | |
| "grad_norm": 6.945074558258057, | |
| "learning_rate": 1.7732512045442125e-05, | |
| "loss": 1.9449994564056396, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.48602590320381733, | |
| "grad_norm": 7.90489387512207, | |
| "learning_rate": 1.7725505282142997e-05, | |
| "loss": 2.0441508293151855, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.4867075664621677, | |
| "grad_norm": 3.1438591480255127, | |
| "learning_rate": 1.771848909860201e-05, | |
| "loss": 1.3289283514022827, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.48738922972051807, | |
| "grad_norm": 4.14089822769165, | |
| "learning_rate": 1.7711463503374466e-05, | |
| "loss": 1.4352498054504395, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.48807089297886846, | |
| "grad_norm": 5.588857650756836, | |
| "learning_rate": 1.7704428505027165e-05, | |
| "loss": 1.6422855854034424, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.4887525562372188, | |
| "grad_norm": 5.233851432800293, | |
| "learning_rate": 1.7697384112138367e-05, | |
| "loss": 1.6709481477737427, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.4894342194955692, | |
| "grad_norm": 6.2413649559021, | |
| "learning_rate": 1.769033033329778e-05, | |
| "loss": 1.1305233240127563, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.49011588275391954, | |
| "grad_norm": 5.878918647766113, | |
| "learning_rate": 1.7683267177106573e-05, | |
| "loss": 0.5765758156776428, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.49079754601226994, | |
| "grad_norm": 3.9863340854644775, | |
| "learning_rate": 1.7676194652177333e-05, | |
| "loss": 1.8041818141937256, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.49147920927062033, | |
| "grad_norm": 2.9146947860717773, | |
| "learning_rate": 1.7669112767134084e-05, | |
| "loss": 1.0354045629501343, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.49216087252897067, | |
| "grad_norm": 4.547295570373535, | |
| "learning_rate": 1.766202153061225e-05, | |
| "loss": 1.101534128189087, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.49284253578732107, | |
| "grad_norm": 5.544919490814209, | |
| "learning_rate": 1.7654920951258668e-05, | |
| "loss": 0.9572349786758423, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.49352419904567146, | |
| "grad_norm": 3.9510440826416016, | |
| "learning_rate": 1.7647811037731565e-05, | |
| "loss": 0.8672992587089539, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.4942058623040218, | |
| "grad_norm": 4.034142017364502, | |
| "learning_rate": 1.764069179870055e-05, | |
| "loss": 0.8420158624649048, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4948875255623722, | |
| "grad_norm": 4.926695346832275, | |
| "learning_rate": 1.76335632428466e-05, | |
| "loss": 1.3785490989685059, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.49556918882072254, | |
| "grad_norm": 6.231317043304443, | |
| "learning_rate": 1.762642537886206e-05, | |
| "loss": 1.6120049953460693, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.49625085207907293, | |
| "grad_norm": 2.347989320755005, | |
| "learning_rate": 1.7619278215450615e-05, | |
| "loss": 0.36018455028533936, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.49693251533742333, | |
| "grad_norm": 2.300482988357544, | |
| "learning_rate": 1.76121217613273e-05, | |
| "loss": 0.7264208793640137, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.49761417859577367, | |
| "grad_norm": 3.5808284282684326, | |
| "learning_rate": 1.760495602521847e-05, | |
| "loss": 1.2240182161331177, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.49829584185412407, | |
| "grad_norm": 2.3418526649475098, | |
| "learning_rate": 1.7597781015861797e-05, | |
| "loss": 0.4540298581123352, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.49897750511247446, | |
| "grad_norm": 5.1293864250183105, | |
| "learning_rate": 1.7590596742006276e-05, | |
| "loss": 0.9630584716796875, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4996591683708248, | |
| "grad_norm": 4.265676975250244, | |
| "learning_rate": 1.7583403212412183e-05, | |
| "loss": 2.012369394302368, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.5003408316291752, | |
| "grad_norm": 14.98397445678711, | |
| "learning_rate": 1.7576200435851082e-05, | |
| "loss": 1.671754002571106, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5010224948875256, | |
| "grad_norm": 6.400186538696289, | |
| "learning_rate": 1.756898842110582e-05, | |
| "loss": 2.1458077430725098, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.501704158145876, | |
| "grad_norm": 5.821322917938232, | |
| "learning_rate": 1.75617671769705e-05, | |
| "loss": 0.9948315620422363, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.5023858214042263, | |
| "grad_norm": 4.912761211395264, | |
| "learning_rate": 1.7554536712250488e-05, | |
| "loss": 0.9431995153427124, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.5030674846625767, | |
| "grad_norm": 8.251131057739258, | |
| "learning_rate": 1.7547297035762387e-05, | |
| "loss": 1.3028041124343872, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5037491479209271, | |
| "grad_norm": 4.36862325668335, | |
| "learning_rate": 1.7540048156334035e-05, | |
| "loss": 0.7506901621818542, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.5044308111792775, | |
| "grad_norm": 3.1437900066375732, | |
| "learning_rate": 1.753279008280449e-05, | |
| "loss": 1.8270992040634155, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5051124744376279, | |
| "grad_norm": 1.6939406394958496, | |
| "learning_rate": 1.7525522824024023e-05, | |
| "loss": 0.8369800448417664, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.5057941376959781, | |
| "grad_norm": 5.759995460510254, | |
| "learning_rate": 1.75182463888541e-05, | |
| "loss": 1.7431058883666992, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5064758009543285, | |
| "grad_norm": 3.341987133026123, | |
| "learning_rate": 1.751096078616739e-05, | |
| "loss": 1.688689947128296, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.5071574642126789, | |
| "grad_norm": 2.5472590923309326, | |
| "learning_rate": 1.7503666024847722e-05, | |
| "loss": 0.9975855350494385, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5078391274710293, | |
| "grad_norm": 3.2179603576660156, | |
| "learning_rate": 1.74963621137901e-05, | |
| "loss": 0.6496344804763794, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5085207907293797, | |
| "grad_norm": 5.027393817901611, | |
| "learning_rate": 1.7489049061900702e-05, | |
| "loss": 1.0913028717041016, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.50920245398773, | |
| "grad_norm": 3.2983977794647217, | |
| "learning_rate": 1.7481726878096824e-05, | |
| "loss": 0.8632484674453735, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.5098841172460804, | |
| "grad_norm": 2.075984239578247, | |
| "learning_rate": 1.7474395571306914e-05, | |
| "loss": 0.8893939256668091, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.5105657805044308, | |
| "grad_norm": 3.399078130722046, | |
| "learning_rate": 1.746705515047054e-05, | |
| "loss": 0.7946146130561829, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.5112474437627812, | |
| "grad_norm": 3.5610105991363525, | |
| "learning_rate": 1.7459705624538383e-05, | |
| "loss": 1.0715913772583008, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5119291070211316, | |
| "grad_norm": 2.419004440307617, | |
| "learning_rate": 1.745234700247223e-05, | |
| "loss": 1.079657793045044, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.512610770279482, | |
| "grad_norm": 4.799737930297852, | |
| "learning_rate": 1.7444979293244953e-05, | |
| "loss": 1.1214956045150757, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.5132924335378323, | |
| "grad_norm": 2.3297369480133057, | |
| "learning_rate": 1.7437602505840513e-05, | |
| "loss": 1.031355857849121, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.5139740967961827, | |
| "grad_norm": 1.1967624425888062, | |
| "learning_rate": 1.7430216649253934e-05, | |
| "loss": 0.6558365821838379, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.5146557600545331, | |
| "grad_norm": 2.59171462059021, | |
| "learning_rate": 1.7422821732491297e-05, | |
| "loss": 0.8954111337661743, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5153374233128835, | |
| "grad_norm": 3.414041519165039, | |
| "learning_rate": 1.741541776456974e-05, | |
| "loss": 1.0040534734725952, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5160190865712339, | |
| "grad_norm": 3.0361270904541016, | |
| "learning_rate": 1.7408004754517428e-05, | |
| "loss": 0.7500423789024353, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.5167007498295841, | |
| "grad_norm": 3.6555752754211426, | |
| "learning_rate": 1.7400582711373558e-05, | |
| "loss": 0.7112274169921875, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.5173824130879345, | |
| "grad_norm": 5.004818916320801, | |
| "learning_rate": 1.739315164418834e-05, | |
| "loss": 1.391385555267334, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.5180640763462849, | |
| "grad_norm": 3.7263758182525635, | |
| "learning_rate": 1.7385711562022988e-05, | |
| "loss": 1.5679736137390137, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5187457396046353, | |
| "grad_norm": 4.171016693115234, | |
| "learning_rate": 1.7378262473949705e-05, | |
| "loss": 1.539245843887329, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.5194274028629857, | |
| "grad_norm": 2.8865065574645996, | |
| "learning_rate": 1.737080438905168e-05, | |
| "loss": 0.8579041957855225, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.520109066121336, | |
| "grad_norm": 2.8619775772094727, | |
| "learning_rate": 1.736333731642307e-05, | |
| "loss": 1.6193718910217285, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.5207907293796864, | |
| "grad_norm": 3.0032424926757812, | |
| "learning_rate": 1.735586126516899e-05, | |
| "loss": 1.8853137493133545, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.5214723926380368, | |
| "grad_norm": 3.6113500595092773, | |
| "learning_rate": 1.734837624440551e-05, | |
| "loss": 1.374289870262146, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.5221540558963872, | |
| "grad_norm": 4.458436489105225, | |
| "learning_rate": 1.734088226325963e-05, | |
| "loss": 0.8771038055419922, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.5228357191547376, | |
| "grad_norm": 3.736870050430298, | |
| "learning_rate": 1.733337933086928e-05, | |
| "loss": 1.342976450920105, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.523517382413088, | |
| "grad_norm": 3.629173755645752, | |
| "learning_rate": 1.7325867456383303e-05, | |
| "loss": 1.500548243522644, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.5241990456714383, | |
| "grad_norm": 2.3277621269226074, | |
| "learning_rate": 1.7318346648961444e-05, | |
| "loss": 0.8789968490600586, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.5248807089297887, | |
| "grad_norm": 3.080993175506592, | |
| "learning_rate": 1.731081691777434e-05, | |
| "loss": 1.8359971046447754, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5255623721881391, | |
| "grad_norm": 2.8816323280334473, | |
| "learning_rate": 1.7303278272003524e-05, | |
| "loss": 1.0419747829437256, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.5262440354464895, | |
| "grad_norm": 2.5966172218322754, | |
| "learning_rate": 1.7295730720841372e-05, | |
| "loss": 0.6071522235870361, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.5269256987048399, | |
| "grad_norm": 2.166282892227173, | |
| "learning_rate": 1.7288174273491144e-05, | |
| "loss": 1.092464566230774, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.5276073619631901, | |
| "grad_norm": 2.050143241882324, | |
| "learning_rate": 1.7280608939166937e-05, | |
| "loss": 0.3647843599319458, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.5282890252215405, | |
| "grad_norm": 4.108394622802734, | |
| "learning_rate": 1.7273034727093677e-05, | |
| "loss": 1.2300772666931152, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5289706884798909, | |
| "grad_norm": 3.4278016090393066, | |
| "learning_rate": 1.726545164650714e-05, | |
| "loss": 1.1582565307617188, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.5296523517382413, | |
| "grad_norm": 3.606982707977295, | |
| "learning_rate": 1.725785970665388e-05, | |
| "loss": 1.1168954372406006, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.5303340149965917, | |
| "grad_norm": 1.5078668594360352, | |
| "learning_rate": 1.7250258916791286e-05, | |
| "loss": 0.8669070601463318, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.5310156782549421, | |
| "grad_norm": 2.591376543045044, | |
| "learning_rate": 1.7242649286187524e-05, | |
| "loss": 0.9776431322097778, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.5316973415132924, | |
| "grad_norm": 3.464742422103882, | |
| "learning_rate": 1.7235030824121542e-05, | |
| "loss": 1.2853455543518066, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5323790047716428, | |
| "grad_norm": 5.493305683135986, | |
| "learning_rate": 1.722740353988305e-05, | |
| "loss": 1.8691112995147705, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.5330606680299932, | |
| "grad_norm": 2.410437822341919, | |
| "learning_rate": 1.721976744277253e-05, | |
| "loss": 0.814149796962738, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.5337423312883436, | |
| "grad_norm": 16.686614990234375, | |
| "learning_rate": 1.7212122542101202e-05, | |
| "loss": 0.375972181558609, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.534423994546694, | |
| "grad_norm": 2.9362881183624268, | |
| "learning_rate": 1.7204468847191017e-05, | |
| "loss": 0.7596120834350586, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.5351056578050443, | |
| "grad_norm": 9.278156280517578, | |
| "learning_rate": 1.7196806367374656e-05, | |
| "loss": 1.4426381587982178, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5357873210633947, | |
| "grad_norm": 1.8680217266082764, | |
| "learning_rate": 1.718913511199551e-05, | |
| "loss": 0.5805763006210327, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.5364689843217451, | |
| "grad_norm": 5.596136569976807, | |
| "learning_rate": 1.7181455090407667e-05, | |
| "loss": 1.9274470806121826, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.5371506475800955, | |
| "grad_norm": 1.6634202003479004, | |
| "learning_rate": 1.717376631197591e-05, | |
| "loss": 0.9097037315368652, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5378323108384458, | |
| "grad_norm": 2.053905963897705, | |
| "learning_rate": 1.7166068786075697e-05, | |
| "loss": 0.7156496047973633, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.5385139740967961, | |
| "grad_norm": 1.8190159797668457, | |
| "learning_rate": 1.7158362522093153e-05, | |
| "loss": 0.9084742665290833, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5391956373551465, | |
| "grad_norm": 6.989439010620117, | |
| "learning_rate": 1.715064752942506e-05, | |
| "loss": 1.7869974374771118, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.5398773006134969, | |
| "grad_norm": 2.9668173789978027, | |
| "learning_rate": 1.714292381747883e-05, | |
| "loss": 1.452651023864746, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.5405589638718473, | |
| "grad_norm": 2.09556245803833, | |
| "learning_rate": 1.7135191395672532e-05, | |
| "loss": 1.1105618476867676, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.5412406271301977, | |
| "grad_norm": 2.4879820346832275, | |
| "learning_rate": 1.7127450273434837e-05, | |
| "loss": 1.349273681640625, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.5419222903885481, | |
| "grad_norm": 2.475329875946045, | |
| "learning_rate": 1.7119700460205026e-05, | |
| "loss": 1.1380001306533813, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5426039536468984, | |
| "grad_norm": 2.9453556537628174, | |
| "learning_rate": 1.7111941965432985e-05, | |
| "loss": 1.2032630443572998, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.5432856169052488, | |
| "grad_norm": 1.9774304628372192, | |
| "learning_rate": 1.710417479857918e-05, | |
| "loss": 1.3968552350997925, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.5439672801635992, | |
| "grad_norm": 1.4213351011276245, | |
| "learning_rate": 1.709639896911466e-05, | |
| "loss": 1.2200727462768555, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.5446489434219496, | |
| "grad_norm": 2.5062761306762695, | |
| "learning_rate": 1.708861448652102e-05, | |
| "loss": 1.232042908668518, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.5453306066803, | |
| "grad_norm": 2.286937713623047, | |
| "learning_rate": 1.7080821360290426e-05, | |
| "loss": 1.0151350498199463, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5460122699386503, | |
| "grad_norm": 3.6527047157287598, | |
| "learning_rate": 1.707301959992557e-05, | |
| "loss": 0.5274887084960938, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.5466939331970007, | |
| "grad_norm": 5.329606056213379, | |
| "learning_rate": 1.7065209214939677e-05, | |
| "loss": 1.9559152126312256, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.547375596455351, | |
| "grad_norm": 3.702806234359741, | |
| "learning_rate": 1.7057390214856493e-05, | |
| "loss": 1.6446794271469116, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.5480572597137015, | |
| "grad_norm": 3.3207600116729736, | |
| "learning_rate": 1.704956260921026e-05, | |
| "loss": 0.7604854106903076, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.5487389229720518, | |
| "grad_norm": 3.5969300270080566, | |
| "learning_rate": 1.7041726407545716e-05, | |
| "loss": 2.1249866485595703, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5494205862304021, | |
| "grad_norm": 3.1857168674468994, | |
| "learning_rate": 1.703388161941809e-05, | |
| "loss": 1.656814455986023, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.5501022494887525, | |
| "grad_norm": 6.424698829650879, | |
| "learning_rate": 1.7026028254393067e-05, | |
| "loss": 0.5891388058662415, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.5507839127471029, | |
| "grad_norm": 2.86966872215271, | |
| "learning_rate": 1.7018166322046798e-05, | |
| "loss": 1.0807417631149292, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.5514655760054533, | |
| "grad_norm": 2.9843268394470215, | |
| "learning_rate": 1.7010295831965886e-05, | |
| "loss": 1.485582947731018, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.5521472392638037, | |
| "grad_norm": 2.941580295562744, | |
| "learning_rate": 1.7002416793747354e-05, | |
| "loss": 0.7899935841560364, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5528289025221541, | |
| "grad_norm": 3.1687259674072266, | |
| "learning_rate": 1.6994529216998664e-05, | |
| "loss": 0.9913202524185181, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.5535105657805044, | |
| "grad_norm": 3.191603183746338, | |
| "learning_rate": 1.698663311133768e-05, | |
| "loss": 1.5229895114898682, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.5541922290388548, | |
| "grad_norm": 3.0717062950134277, | |
| "learning_rate": 1.697872848639267e-05, | |
| "loss": 1.01241135597229, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.5548738922972052, | |
| "grad_norm": 3.8809890747070312, | |
| "learning_rate": 1.6970815351802285e-05, | |
| "loss": 1.1171845197677612, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 2.6531922817230225, | |
| "learning_rate": 1.696289371721556e-05, | |
| "loss": 1.0600652694702148, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.556237218813906, | |
| "grad_norm": 2.629523754119873, | |
| "learning_rate": 1.695496359229189e-05, | |
| "loss": 1.5618984699249268, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.5569188820722563, | |
| "grad_norm": 3.4782345294952393, | |
| "learning_rate": 1.694702498670102e-05, | |
| "loss": 1.4636913537979126, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.5576005453306067, | |
| "grad_norm": 5.301943778991699, | |
| "learning_rate": 1.693907791012305e-05, | |
| "loss": 2.062746524810791, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.558282208588957, | |
| "grad_norm": 8.220975875854492, | |
| "learning_rate": 1.6931122372248386e-05, | |
| "loss": 1.9080711603164673, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.5589638718473074, | |
| "grad_norm": 9.341310501098633, | |
| "learning_rate": 1.692315838277778e-05, | |
| "loss": 2.5527243614196777, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5596455351056578, | |
| "grad_norm": 3.4971513748168945, | |
| "learning_rate": 1.6915185951422256e-05, | |
| "loss": 0.818741500377655, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.5603271983640081, | |
| "grad_norm": 2.5906221866607666, | |
| "learning_rate": 1.690720508790316e-05, | |
| "loss": 1.187768578529358, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.5610088616223585, | |
| "grad_norm": 2.4157657623291016, | |
| "learning_rate": 1.6899215801952112e-05, | |
| "loss": 0.821315348148346, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.5616905248807089, | |
| "grad_norm": 3.0064284801483154, | |
| "learning_rate": 1.6891218103310994e-05, | |
| "loss": 1.7353657484054565, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.5623721881390593, | |
| "grad_norm": 2.2080047130584717, | |
| "learning_rate": 1.6883212001731956e-05, | |
| "loss": 0.8247939348220825, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5630538513974097, | |
| "grad_norm": 3.125454902648926, | |
| "learning_rate": 1.6875197506977387e-05, | |
| "loss": 0.867475152015686, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.5637355146557601, | |
| "grad_norm": 3.8463499546051025, | |
| "learning_rate": 1.686717462881992e-05, | |
| "loss": 1.0625464916229248, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.5644171779141104, | |
| "grad_norm": 5.8361406326293945, | |
| "learning_rate": 1.685914337704239e-05, | |
| "loss": 2.3758678436279297, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.5650988411724608, | |
| "grad_norm": 2.165736436843872, | |
| "learning_rate": 1.6851103761437876e-05, | |
| "loss": 1.1737010478973389, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.5657805044308112, | |
| "grad_norm": 2.5541231632232666, | |
| "learning_rate": 1.6843055791809623e-05, | |
| "loss": 1.3654487133026123, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5664621676891616, | |
| "grad_norm": 1.6458773612976074, | |
| "learning_rate": 1.6834999477971078e-05, | |
| "loss": 1.1628589630126953, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.567143830947512, | |
| "grad_norm": 2.6931536197662354, | |
| "learning_rate": 1.6826934829745868e-05, | |
| "loss": 1.0243465900421143, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.5678254942058623, | |
| "grad_norm": 2.4119794368743896, | |
| "learning_rate": 1.6818861856967762e-05, | |
| "loss": 1.3056764602661133, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.5685071574642127, | |
| "grad_norm": 2.928074836730957, | |
| "learning_rate": 1.68107805694807e-05, | |
| "loss": 1.3191547393798828, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.569188820722563, | |
| "grad_norm": 1.8255895376205444, | |
| "learning_rate": 1.680269097713876e-05, | |
| "loss": 1.5903112888336182, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5698704839809134, | |
| "grad_norm": 2.3143746852874756, | |
| "learning_rate": 1.6794593089806134e-05, | |
| "loss": 1.0325926542282104, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.5705521472392638, | |
| "grad_norm": 3.990488052368164, | |
| "learning_rate": 1.678648691735713e-05, | |
| "loss": 1.4269142150878906, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5712338104976141, | |
| "grad_norm": 3.2297160625457764, | |
| "learning_rate": 1.6778372469676173e-05, | |
| "loss": 1.53303861618042, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.5719154737559645, | |
| "grad_norm": 2.850156545639038, | |
| "learning_rate": 1.6770249756657762e-05, | |
| "loss": 0.9840932488441467, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.5725971370143149, | |
| "grad_norm": 1.9680904150009155, | |
| "learning_rate": 1.6762118788206488e-05, | |
| "loss": 1.3542200326919556, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5732788002726653, | |
| "grad_norm": 2.278607130050659, | |
| "learning_rate": 1.6753979574236996e-05, | |
| "loss": 0.4934600591659546, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.5739604635310157, | |
| "grad_norm": 2.5241875648498535, | |
| "learning_rate": 1.6745832124673996e-05, | |
| "loss": 0.564077615737915, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.5746421267893661, | |
| "grad_norm": 1.576717495918274, | |
| "learning_rate": 1.6737676449452235e-05, | |
| "loss": 1.2056704759597778, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.5753237900477164, | |
| "grad_norm": 3.5793991088867188, | |
| "learning_rate": 1.672951255851649e-05, | |
| "loss": 1.4175639152526855, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.5760054533060668, | |
| "grad_norm": 3.2483086585998535, | |
| "learning_rate": 1.6721340461821555e-05, | |
| "loss": 1.3645559549331665, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5766871165644172, | |
| "grad_norm": 1.2694355249404907, | |
| "learning_rate": 1.671316016933223e-05, | |
| "loss": 1.0463674068450928, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.5773687798227676, | |
| "grad_norm": 1.5353333950042725, | |
| "learning_rate": 1.6704971691023316e-05, | |
| "loss": 0.9055666923522949, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.578050443081118, | |
| "grad_norm": 7.366179943084717, | |
| "learning_rate": 1.6696775036879588e-05, | |
| "loss": 2.724958896636963, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.5787321063394683, | |
| "grad_norm": 3.2687127590179443, | |
| "learning_rate": 1.6688570216895793e-05, | |
| "loss": 1.4015331268310547, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.5794137695978187, | |
| "grad_norm": 4.6578898429870605, | |
| "learning_rate": 1.6680357241076632e-05, | |
| "loss": 0.6592810153961182, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.580095432856169, | |
| "grad_norm": 2.0691535472869873, | |
| "learning_rate": 1.667213611943675e-05, | |
| "loss": 0.654139518737793, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.5807770961145194, | |
| "grad_norm": 2.046271324157715, | |
| "learning_rate": 1.6663906862000736e-05, | |
| "loss": 1.0838494300842285, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5814587593728698, | |
| "grad_norm": 2.3908135890960693, | |
| "learning_rate": 1.6655669478803086e-05, | |
| "loss": 0.9503333568572998, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.5821404226312201, | |
| "grad_norm": 2.4451968669891357, | |
| "learning_rate": 1.6647423979888214e-05, | |
| "loss": 1.4373940229415894, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5828220858895705, | |
| "grad_norm": 1.9011889696121216, | |
| "learning_rate": 1.6639170375310422e-05, | |
| "loss": 1.1368272304534912, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5835037491479209, | |
| "grad_norm": 5.264576435089111, | |
| "learning_rate": 1.6630908675133905e-05, | |
| "loss": 2.110563278198242, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.5841854124062713, | |
| "grad_norm": 2.0514638423919678, | |
| "learning_rate": 1.6622638889432716e-05, | |
| "loss": 1.768547773361206, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.5848670756646217, | |
| "grad_norm": 2.4283359050750732, | |
| "learning_rate": 1.6614361028290783e-05, | |
| "loss": 1.0054471492767334, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.5855487389229721, | |
| "grad_norm": 2.2833070755004883, | |
| "learning_rate": 1.660607510180187e-05, | |
| "loss": 1.6151342391967773, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.5862304021813224, | |
| "grad_norm": 2.7579729557037354, | |
| "learning_rate": 1.6597781120069584e-05, | |
| "loss": 1.5430269241333008, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5869120654396728, | |
| "grad_norm": 2.7292988300323486, | |
| "learning_rate": 1.658947909320734e-05, | |
| "loss": 0.5025774240493774, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.5875937286980232, | |
| "grad_norm": 2.3136188983917236, | |
| "learning_rate": 1.658116903133838e-05, | |
| "loss": 1.8564517498016357, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.5882753919563736, | |
| "grad_norm": 2.4586784839630127, | |
| "learning_rate": 1.6572850944595735e-05, | |
| "loss": 1.1788643598556519, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.588957055214724, | |
| "grad_norm": 6.834049701690674, | |
| "learning_rate": 1.6564524843122223e-05, | |
| "loss": 2.2534217834472656, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.5896387184730743, | |
| "grad_norm": 2.284600257873535, | |
| "learning_rate": 1.655619073707043e-05, | |
| "loss": 0.9232459664344788, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5903203817314246, | |
| "grad_norm": 5.458041667938232, | |
| "learning_rate": 1.6547848636602708e-05, | |
| "loss": 1.7762432098388672, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.591002044989775, | |
| "grad_norm": 4.977056980133057, | |
| "learning_rate": 1.653949855189116e-05, | |
| "loss": 1.9290387630462646, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.5916837082481254, | |
| "grad_norm": 2.7046382427215576, | |
| "learning_rate": 1.653114049311762e-05, | |
| "loss": 1.8991743326187134, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.5923653715064758, | |
| "grad_norm": 3.796870708465576, | |
| "learning_rate": 1.6522774470473642e-05, | |
| "loss": 1.9370957612991333, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.5930470347648262, | |
| "grad_norm": 2.8540728092193604, | |
| "learning_rate": 1.6514400494160498e-05, | |
| "loss": 1.2413911819458008, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5937286980231765, | |
| "grad_norm": 2.58767032623291, | |
| "learning_rate": 1.6506018574389152e-05, | |
| "loss": 1.1953917741775513, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.5944103612815269, | |
| "grad_norm": 4.319695472717285, | |
| "learning_rate": 1.6497628721380257e-05, | |
| "loss": 1.3608784675598145, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5950920245398773, | |
| "grad_norm": 3.1396563053131104, | |
| "learning_rate": 1.6489230945364148e-05, | |
| "loss": 1.0947556495666504, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.5957736877982277, | |
| "grad_norm": 1.7502825260162354, | |
| "learning_rate": 1.648082525658081e-05, | |
| "loss": 0.942647397518158, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5964553510565781, | |
| "grad_norm": 3.145920991897583, | |
| "learning_rate": 1.6472411665279872e-05, | |
| "loss": 0.8317646980285645, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5971370143149284, | |
| "grad_norm": 2.4428975582122803, | |
| "learning_rate": 1.646399018172061e-05, | |
| "loss": 1.2358999252319336, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5978186775732788, | |
| "grad_norm": 2.7724978923797607, | |
| "learning_rate": 1.6455560816171928e-05, | |
| "loss": 0.3633544445037842, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.5985003408316292, | |
| "grad_norm": 3.3238282203674316, | |
| "learning_rate": 1.644712357891232e-05, | |
| "loss": 1.467452049255371, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5991820040899796, | |
| "grad_norm": 1.9840031862258911, | |
| "learning_rate": 1.643867848022991e-05, | |
| "loss": 1.1709258556365967, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.59986366734833, | |
| "grad_norm": 3.40712308883667, | |
| "learning_rate": 1.643022553042237e-05, | |
| "loss": 1.1537139415740967, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6005453306066802, | |
| "grad_norm": 6.150967597961426, | |
| "learning_rate": 1.6421764739796974e-05, | |
| "loss": 1.8220754861831665, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.6012269938650306, | |
| "grad_norm": 3.064323663711548, | |
| "learning_rate": 1.6413296118670553e-05, | |
| "loss": 0.8146998882293701, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.601908657123381, | |
| "grad_norm": 3.7720255851745605, | |
| "learning_rate": 1.6404819677369474e-05, | |
| "loss": 1.929551124572754, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.6025903203817314, | |
| "grad_norm": 4.224287033081055, | |
| "learning_rate": 1.639633542622965e-05, | |
| "loss": 0.8990840911865234, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.6032719836400818, | |
| "grad_norm": 7.684881687164307, | |
| "learning_rate": 1.6387843375596513e-05, | |
| "loss": 1.4321951866149902, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.6039536468984322, | |
| "grad_norm": 3.8415868282318115, | |
| "learning_rate": 1.6379343535825004e-05, | |
| "loss": 0.9163957834243774, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.6046353101567825, | |
| "grad_norm": 3.149662971496582, | |
| "learning_rate": 1.6370835917279573e-05, | |
| "loss": 1.1009467840194702, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.6053169734151329, | |
| "grad_norm": 4.081305503845215, | |
| "learning_rate": 1.636232053033414e-05, | |
| "loss": 1.9384831190109253, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.6059986366734833, | |
| "grad_norm": 1.7843787670135498, | |
| "learning_rate": 1.63537973853721e-05, | |
| "loss": 0.908522367477417, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.6066802999318337, | |
| "grad_norm": 2.2492616176605225, | |
| "learning_rate": 1.634526649278632e-05, | |
| "loss": 1.1304497718811035, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6073619631901841, | |
| "grad_norm": 2.759291648864746, | |
| "learning_rate": 1.6336727862979108e-05, | |
| "loss": 0.9259381294250488, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.6080436264485344, | |
| "grad_norm": 3.2604150772094727, | |
| "learning_rate": 1.6328181506362193e-05, | |
| "loss": 1.2496137619018555, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.6087252897068848, | |
| "grad_norm": 2.1876111030578613, | |
| "learning_rate": 1.631962743335675e-05, | |
| "loss": 1.1682977676391602, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.6094069529652352, | |
| "grad_norm": 2.5494682788848877, | |
| "learning_rate": 1.631106565439334e-05, | |
| "loss": 1.04998779296875, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.6100886162235856, | |
| "grad_norm": 1.378842830657959, | |
| "learning_rate": 1.630249617991194e-05, | |
| "loss": 0.8368078470230103, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.610770279481936, | |
| "grad_norm": 3.7163796424865723, | |
| "learning_rate": 1.6293919020361895e-05, | |
| "loss": 1.5429718494415283, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.6114519427402862, | |
| "grad_norm": 7.239688873291016, | |
| "learning_rate": 1.6285334186201933e-05, | |
| "loss": 2.765354871749878, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.6121336059986366, | |
| "grad_norm": 1.3090721368789673, | |
| "learning_rate": 1.6276741687900134e-05, | |
| "loss": 0.7607836127281189, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.612815269256987, | |
| "grad_norm": 3.923982858657837, | |
| "learning_rate": 1.626814153593392e-05, | |
| "loss": 1.0957961082458496, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.6134969325153374, | |
| "grad_norm": 3.3863680362701416, | |
| "learning_rate": 1.6259533740790055e-05, | |
| "loss": 1.135899305343628, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6141785957736878, | |
| "grad_norm": 2.371218681335449, | |
| "learning_rate": 1.6250918312964613e-05, | |
| "loss": 1.238204002380371, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.6148602590320382, | |
| "grad_norm": 1.8255348205566406, | |
| "learning_rate": 1.6242295262962983e-05, | |
| "loss": 0.9338752031326294, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.6155419222903885, | |
| "grad_norm": 4.244454383850098, | |
| "learning_rate": 1.6233664601299848e-05, | |
| "loss": 1.579453468322754, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.6162235855487389, | |
| "grad_norm": 1.443424940109253, | |
| "learning_rate": 1.6225026338499166e-05, | |
| "loss": 0.8247011303901672, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.6169052488070893, | |
| "grad_norm": 1.967061996459961, | |
| "learning_rate": 1.6216380485094164e-05, | |
| "loss": 0.9666604995727539, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.6175869120654397, | |
| "grad_norm": 3.75724720954895, | |
| "learning_rate": 1.6207727051627334e-05, | |
| "loss": 1.3404090404510498, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.6182685753237901, | |
| "grad_norm": 2.1765472888946533, | |
| "learning_rate": 1.61990660486504e-05, | |
| "loss": 0.6874090433120728, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.6189502385821404, | |
| "grad_norm": 1.8902146816253662, | |
| "learning_rate": 1.6190397486724324e-05, | |
| "loss": 1.1136112213134766, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.6196319018404908, | |
| "grad_norm": 1.558221459388733, | |
| "learning_rate": 1.6181721376419282e-05, | |
| "loss": 1.1098493337631226, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.6203135650988412, | |
| "grad_norm": 5.35651159286499, | |
| "learning_rate": 1.617303772831465e-05, | |
| "loss": 2.0255041122436523, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6209952283571916, | |
| "grad_norm": 5.652133464813232, | |
| "learning_rate": 1.6164346552999e-05, | |
| "loss": 2.2431159019470215, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.621676891615542, | |
| "grad_norm": 2.664736747741699, | |
| "learning_rate": 1.615564786107009e-05, | |
| "loss": 1.6007494926452637, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.6223585548738922, | |
| "grad_norm": 3.1986606121063232, | |
| "learning_rate": 1.614694166313483e-05, | |
| "loss": 0.9353867769241333, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.6230402181322426, | |
| "grad_norm": 3.8483238220214844, | |
| "learning_rate": 1.6138227969809283e-05, | |
| "loss": 1.6212551593780518, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.623721881390593, | |
| "grad_norm": 6.442831039428711, | |
| "learning_rate": 1.6129506791718665e-05, | |
| "loss": 1.4637436866760254, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.6244035446489434, | |
| "grad_norm": 3.796579122543335, | |
| "learning_rate": 1.6120778139497307e-05, | |
| "loss": 1.3633496761322021, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.6250852079072938, | |
| "grad_norm": 4.911407470703125, | |
| "learning_rate": 1.611204202378866e-05, | |
| "loss": 1.731675386428833, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.6257668711656442, | |
| "grad_norm": 3.0223350524902344, | |
| "learning_rate": 1.6103298455245267e-05, | |
| "loss": 2.0269124507904053, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.6264485344239945, | |
| "grad_norm": 1.8000354766845703, | |
| "learning_rate": 1.6094547444528767e-05, | |
| "loss": 1.2296888828277588, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.6271301976823449, | |
| "grad_norm": 2.296272039413452, | |
| "learning_rate": 1.6085789002309873e-05, | |
| "loss": 0.943554699420929, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6278118609406953, | |
| "grad_norm": 4.166633605957031, | |
| "learning_rate": 1.607702313926836e-05, | |
| "loss": 1.6951007843017578, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.6284935241990457, | |
| "grad_norm": 2.0115439891815186, | |
| "learning_rate": 1.6068249866093046e-05, | |
| "loss": 1.4381415843963623, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.6291751874573961, | |
| "grad_norm": 2.222892999649048, | |
| "learning_rate": 1.605946919348179e-05, | |
| "loss": 0.7805427312850952, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.6298568507157464, | |
| "grad_norm": 7.529016494750977, | |
| "learning_rate": 1.6050681132141473e-05, | |
| "loss": 2.7365059852600098, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.6305385139740968, | |
| "grad_norm": 2.209188461303711, | |
| "learning_rate": 1.6041885692787985e-05, | |
| "loss": 0.549006998538971, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.6312201772324472, | |
| "grad_norm": 1.747800588607788, | |
| "learning_rate": 1.603308288614621e-05, | |
| "loss": 1.1981534957885742, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.6319018404907976, | |
| "grad_norm": 1.9363545179367065, | |
| "learning_rate": 1.602427272295002e-05, | |
| "loss": 1.0019506216049194, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.632583503749148, | |
| "grad_norm": 2.520519256591797, | |
| "learning_rate": 1.6015455213942253e-05, | |
| "loss": 1.3125648498535156, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.6332651670074982, | |
| "grad_norm": 1.7315162420272827, | |
| "learning_rate": 1.600663036987471e-05, | |
| "loss": 1.2615352869033813, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.6339468302658486, | |
| "grad_norm": 2.635585308074951, | |
| "learning_rate": 1.599779820150813e-05, | |
| "loss": 1.0167396068572998, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.634628493524199, | |
| "grad_norm": 2.0008440017700195, | |
| "learning_rate": 1.5988958719612182e-05, | |
| "loss": 1.4046450853347778, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.6353101567825494, | |
| "grad_norm": 1.9721522331237793, | |
| "learning_rate": 1.5980111934965467e-05, | |
| "loss": 1.0870532989501953, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.6359918200408998, | |
| "grad_norm": 2.249335289001465, | |
| "learning_rate": 1.5971257858355467e-05, | |
| "loss": 1.325969934463501, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.6366734832992502, | |
| "grad_norm": 5.9346747398376465, | |
| "learning_rate": 1.596239650057858e-05, | |
| "loss": 2.0995099544525146, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.6373551465576005, | |
| "grad_norm": 4.217530250549316, | |
| "learning_rate": 1.5953527872440063e-05, | |
| "loss": 1.4458832740783691, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.6380368098159509, | |
| "grad_norm": 4.074642658233643, | |
| "learning_rate": 1.5944651984754053e-05, | |
| "loss": 0.7346776723861694, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.6387184730743013, | |
| "grad_norm": 3.239382743835449, | |
| "learning_rate": 1.5935768848343527e-05, | |
| "loss": 1.4215084314346313, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.6394001363326517, | |
| "grad_norm": 2.162938117980957, | |
| "learning_rate": 1.5926878474040313e-05, | |
| "loss": 1.3204901218414307, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.6400817995910021, | |
| "grad_norm": 3.4916062355041504, | |
| "learning_rate": 1.591798087268505e-05, | |
| "loss": 0.5219067335128784, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.6407634628493524, | |
| "grad_norm": 3.3285751342773438, | |
| "learning_rate": 1.5909076055127202e-05, | |
| "loss": 1.9166496992111206, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6414451261077028, | |
| "grad_norm": 3.0225143432617188, | |
| "learning_rate": 1.590016403222503e-05, | |
| "loss": 1.4402289390563965, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.6421267893660532, | |
| "grad_norm": 3.20241117477417, | |
| "learning_rate": 1.5891244814845575e-05, | |
| "loss": 1.6794233322143555, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.6428084526244036, | |
| "grad_norm": 2.3573503494262695, | |
| "learning_rate": 1.5882318413864653e-05, | |
| "loss": 1.4372035264968872, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.643490115882754, | |
| "grad_norm": 4.339122772216797, | |
| "learning_rate": 1.5873384840166846e-05, | |
| "loss": 2.0290374755859375, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.6441717791411042, | |
| "grad_norm": 2.5518579483032227, | |
| "learning_rate": 1.5864444104645473e-05, | |
| "loss": 1.3544771671295166, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.6448534423994546, | |
| "grad_norm": 3.1364567279815674, | |
| "learning_rate": 1.5855496218202592e-05, | |
| "loss": 1.642664909362793, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.645535105657805, | |
| "grad_norm": 4.1128249168396, | |
| "learning_rate": 1.5846541191748978e-05, | |
| "loss": 1.026753306388855, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.6462167689161554, | |
| "grad_norm": 2.6246652603149414, | |
| "learning_rate": 1.5837579036204114e-05, | |
| "loss": 1.317074179649353, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.6468984321745058, | |
| "grad_norm": 2.8232638835906982, | |
| "learning_rate": 1.582860976249617e-05, | |
| "loss": 0.8829560279846191, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.6475800954328562, | |
| "grad_norm": 2.4629805088043213, | |
| "learning_rate": 1.581963338156201e-05, | |
| "loss": 1.2965718507766724, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6482617586912065, | |
| "grad_norm": 1.6031711101531982, | |
| "learning_rate": 1.5810649904347145e-05, | |
| "loss": 0.9839382171630859, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.6489434219495569, | |
| "grad_norm": 2.805589199066162, | |
| "learning_rate": 1.5801659341805752e-05, | |
| "loss": 1.3879642486572266, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.6496250852079073, | |
| "grad_norm": 3.0134496688842773, | |
| "learning_rate": 1.5792661704900648e-05, | |
| "loss": 1.470901608467102, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.6503067484662577, | |
| "grad_norm": 5.161888122558594, | |
| "learning_rate": 1.5783657004603273e-05, | |
| "loss": 2.198390483856201, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.6509884117246081, | |
| "grad_norm": 2.703739881515503, | |
| "learning_rate": 1.5774645251893673e-05, | |
| "loss": 1.5957465171813965, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.6516700749829584, | |
| "grad_norm": 2.825924873352051, | |
| "learning_rate": 1.5765626457760506e-05, | |
| "loss": 0.8621901273727417, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.6523517382413088, | |
| "grad_norm": 2.693979501724243, | |
| "learning_rate": 1.575660063320101e-05, | |
| "loss": 1.402087688446045, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.6530334014996592, | |
| "grad_norm": 2.7669472694396973, | |
| "learning_rate": 1.574756778922099e-05, | |
| "loss": 1.3180046081542969, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.6537150647580096, | |
| "grad_norm": 2.372765302658081, | |
| "learning_rate": 1.5738527936834824e-05, | |
| "loss": 0.9966622591018677, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.65439672801636, | |
| "grad_norm": 1.8964282274246216, | |
| "learning_rate": 1.5729481087065423e-05, | |
| "loss": 1.2379252910614014, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6550783912747103, | |
| "grad_norm": 4.117354869842529, | |
| "learning_rate": 1.5720427250944237e-05, | |
| "loss": 2.0293562412261963, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.6557600545330606, | |
| "grad_norm": 2.0985937118530273, | |
| "learning_rate": 1.5711366439511234e-05, | |
| "loss": 0.9524621963500977, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.656441717791411, | |
| "grad_norm": 1.853117823600769, | |
| "learning_rate": 1.5702298663814884e-05, | |
| "loss": 0.8067352771759033, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.6571233810497614, | |
| "grad_norm": 2.6025946140289307, | |
| "learning_rate": 1.569322393491216e-05, | |
| "loss": 1.2105220556259155, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.6578050443081118, | |
| "grad_norm": 1.4278172254562378, | |
| "learning_rate": 1.5684142263868493e-05, | |
| "loss": 1.3550410270690918, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.6584867075664622, | |
| "grad_norm": 1.4979023933410645, | |
| "learning_rate": 1.5675053661757802e-05, | |
| "loss": 0.5304239988327026, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.6591683708248125, | |
| "grad_norm": 1.8251065015792847, | |
| "learning_rate": 1.566595813966244e-05, | |
| "loss": 0.6945926547050476, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.6598500340831629, | |
| "grad_norm": 1.7620820999145508, | |
| "learning_rate": 1.5656855708673208e-05, | |
| "loss": 1.5049657821655273, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.6605316973415133, | |
| "grad_norm": 1.326943039894104, | |
| "learning_rate": 1.564774637988933e-05, | |
| "loss": 0.8265421986579895, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.6612133605998637, | |
| "grad_norm": 1.3603960275650024, | |
| "learning_rate": 1.5638630164418435e-05, | |
| "loss": 0.9551488757133484, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6618950238582141, | |
| "grad_norm": 3.848780632019043, | |
| "learning_rate": 1.5629507073376556e-05, | |
| "loss": 1.197197675704956, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.6625766871165644, | |
| "grad_norm": 3.2928988933563232, | |
| "learning_rate": 1.562037711788811e-05, | |
| "loss": 1.1889151334762573, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.6632583503749148, | |
| "grad_norm": 2.450935125350952, | |
| "learning_rate": 1.5611240309085877e-05, | |
| "loss": 1.1293702125549316, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.6639400136332652, | |
| "grad_norm": 1.8610042333602905, | |
| "learning_rate": 1.5602096658111003e-05, | |
| "loss": 0.689786970615387, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.6646216768916156, | |
| "grad_norm": 2.4367642402648926, | |
| "learning_rate": 1.5592946176112973e-05, | |
| "loss": 0.8688675761222839, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.665303340149966, | |
| "grad_norm": 2.765475034713745, | |
| "learning_rate": 1.55837888742496e-05, | |
| "loss": 1.4046356678009033, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.6659850034083163, | |
| "grad_norm": 2.2495126724243164, | |
| "learning_rate": 1.5574624763687006e-05, | |
| "loss": 1.0725502967834473, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.21120548248291, | |
| "learning_rate": 1.556545385559964e-05, | |
| "loss": 0.50251704454422, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.667348329925017, | |
| "grad_norm": 2.6886188983917236, | |
| "learning_rate": 1.5556276161170214e-05, | |
| "loss": 1.5717726945877075, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.6680299931833674, | |
| "grad_norm": 2.472787380218506, | |
| "learning_rate": 1.554709169158972e-05, | |
| "loss": 0.9982832670211792, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6687116564417178, | |
| "grad_norm": 2.3250319957733154, | |
| "learning_rate": 1.5537900458057426e-05, | |
| "loss": 1.3237147331237793, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.6693933197000682, | |
| "grad_norm": 3.5646936893463135, | |
| "learning_rate": 1.5528702471780832e-05, | |
| "loss": 1.4620048999786377, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.6700749829584185, | |
| "grad_norm": 3.5376639366149902, | |
| "learning_rate": 1.5519497743975676e-05, | |
| "loss": 1.7363507747650146, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.6707566462167689, | |
| "grad_norm": 2.8820950984954834, | |
| "learning_rate": 1.551028628586592e-05, | |
| "loss": 0.7051398754119873, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.6714383094751193, | |
| "grad_norm": 4.920443058013916, | |
| "learning_rate": 1.550106810868373e-05, | |
| "loss": 2.031773567199707, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6721199727334697, | |
| "grad_norm": 5.214235305786133, | |
| "learning_rate": 1.549184322366947e-05, | |
| "loss": 1.7547272443771362, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.6728016359918201, | |
| "grad_norm": 3.160337209701538, | |
| "learning_rate": 1.5482611642071672e-05, | |
| "loss": 0.8350422978401184, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.6734832992501704, | |
| "grad_norm": 2.562685966491699, | |
| "learning_rate": 1.5473373375147046e-05, | |
| "loss": 1.5145583152770996, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.6741649625085208, | |
| "grad_norm": 2.3527350425720215, | |
| "learning_rate": 1.546412843416045e-05, | |
| "loss": 1.2808892726898193, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.6748466257668712, | |
| "grad_norm": 2.424290180206299, | |
| "learning_rate": 1.5454876830384868e-05, | |
| "loss": 1.0751357078552246, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6755282890252216, | |
| "grad_norm": 3.4699552059173584, | |
| "learning_rate": 1.544561857510143e-05, | |
| "loss": 1.7717998027801514, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.676209952283572, | |
| "grad_norm": 5.047571182250977, | |
| "learning_rate": 1.5436353679599363e-05, | |
| "loss": 1.4212061166763306, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.6768916155419223, | |
| "grad_norm": 1.3153938055038452, | |
| "learning_rate": 1.5427082155175993e-05, | |
| "loss": 0.9446808695793152, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.6775732788002726, | |
| "grad_norm": 3.4039230346679688, | |
| "learning_rate": 1.541780401313673e-05, | |
| "loss": 0.8643260598182678, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.678254942058623, | |
| "grad_norm": 1.9677404165267944, | |
| "learning_rate": 1.540851926479505e-05, | |
| "loss": 1.230716586112976, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6789366053169734, | |
| "grad_norm": 3.332017183303833, | |
| "learning_rate": 1.5399227921472493e-05, | |
| "loss": 1.6875895261764526, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.6796182685753238, | |
| "grad_norm": 2.3876984119415283, | |
| "learning_rate": 1.5389929994498635e-05, | |
| "loss": 0.35820311307907104, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.6802999318336742, | |
| "grad_norm": 3.2951459884643555, | |
| "learning_rate": 1.5380625495211072e-05, | |
| "loss": 0.7848578095436096, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.6809815950920245, | |
| "grad_norm": 2.0296571254730225, | |
| "learning_rate": 1.537131443495543e-05, | |
| "loss": 1.5027271509170532, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.6816632583503749, | |
| "grad_norm": 3.0071208477020264, | |
| "learning_rate": 1.536199682508533e-05, | |
| "loss": 1.7935607433319092, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6823449216087253, | |
| "grad_norm": 3.048903226852417, | |
| "learning_rate": 1.5352672676962365e-05, | |
| "loss": 1.4052588939666748, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.6830265848670757, | |
| "grad_norm": 2.0453131198883057, | |
| "learning_rate": 1.5343342001956125e-05, | |
| "loss": 0.7496938705444336, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.6837082481254261, | |
| "grad_norm": 2.2888898849487305, | |
| "learning_rate": 1.533400481144414e-05, | |
| "loss": 0.9012516736984253, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.6843899113837764, | |
| "grad_norm": 4.049041271209717, | |
| "learning_rate": 1.5324661116811887e-05, | |
| "loss": 1.8084564208984375, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.6850715746421268, | |
| "grad_norm": 3.5373117923736572, | |
| "learning_rate": 1.531531092945279e-05, | |
| "loss": 1.5952699184417725, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.6857532379004772, | |
| "grad_norm": 1.6792633533477783, | |
| "learning_rate": 1.5305954260768166e-05, | |
| "loss": 0.2857983112335205, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.6864349011588275, | |
| "grad_norm": 1.9056538343429565, | |
| "learning_rate": 1.5296591122167254e-05, | |
| "loss": 0.3411814272403717, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.6871165644171779, | |
| "grad_norm": 1.7760215997695923, | |
| "learning_rate": 1.5287221525067168e-05, | |
| "loss": 0.6943484544754028, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.6877982276755283, | |
| "grad_norm": 2.4310076236724854, | |
| "learning_rate": 1.5277845480892914e-05, | |
| "loss": 1.2629508972167969, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.6884798909338786, | |
| "grad_norm": 1.9507763385772705, | |
| "learning_rate": 1.526846300107734e-05, | |
| "loss": 1.1614940166473389, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.689161554192229, | |
| "grad_norm": 4.232208728790283, | |
| "learning_rate": 1.5259074097061166e-05, | |
| "loss": 1.4028643369674683, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.6898432174505794, | |
| "grad_norm": 3.713007688522339, | |
| "learning_rate": 1.5249678780292913e-05, | |
| "loss": 1.0402387380599976, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.6905248807089298, | |
| "grad_norm": 2.0313596725463867, | |
| "learning_rate": 1.5240277062228952e-05, | |
| "loss": 0.4616890251636505, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.6912065439672802, | |
| "grad_norm": 2.8289709091186523, | |
| "learning_rate": 1.523086895433344e-05, | |
| "loss": 1.3428205251693726, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.6918882072256305, | |
| "grad_norm": 4.949586868286133, | |
| "learning_rate": 1.5221454468078336e-05, | |
| "loss": 1.4365060329437256, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.6925698704839809, | |
| "grad_norm": 9.691402435302734, | |
| "learning_rate": 1.5212033614943371e-05, | |
| "loss": 3.060600519180298, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.6932515337423313, | |
| "grad_norm": 2.583277463912964, | |
| "learning_rate": 1.5202606406416043e-05, | |
| "loss": 1.2374069690704346, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.6939331970006817, | |
| "grad_norm": 3.875859022140503, | |
| "learning_rate": 1.5193172853991596e-05, | |
| "loss": 1.4892609119415283, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.6946148602590321, | |
| "grad_norm": 4.193378448486328, | |
| "learning_rate": 1.518373296917301e-05, | |
| "loss": 1.2775728702545166, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.6952965235173824, | |
| "grad_norm": 4.090426921844482, | |
| "learning_rate": 1.5174286763470995e-05, | |
| "loss": 0.4450388550758362, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6959781867757328, | |
| "grad_norm": 4.702569007873535, | |
| "learning_rate": 1.5164834248403959e-05, | |
| "loss": 0.5944312214851379, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.6966598500340832, | |
| "grad_norm": 2.688467264175415, | |
| "learning_rate": 1.5155375435498001e-05, | |
| "loss": 1.0980937480926514, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.6973415132924335, | |
| "grad_norm": 1.5542073249816895, | |
| "learning_rate": 1.5145910336286912e-05, | |
| "loss": 0.9820501208305359, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.6980231765507839, | |
| "grad_norm": 2.9898831844329834, | |
| "learning_rate": 1.5136438962312134e-05, | |
| "loss": 1.3177458047866821, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.6987048398091343, | |
| "grad_norm": 3.9392518997192383, | |
| "learning_rate": 1.5126961325122773e-05, | |
| "loss": 1.6820330619812012, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6993865030674846, | |
| "grad_norm": 1.8033751249313354, | |
| "learning_rate": 1.511747743627556e-05, | |
| "loss": 1.1170791387557983, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.700068166325835, | |
| "grad_norm": 1.4688934087753296, | |
| "learning_rate": 1.5107987307334864e-05, | |
| "loss": 0.9317739605903625, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.7007498295841854, | |
| "grad_norm": 2.8467535972595215, | |
| "learning_rate": 1.5098490949872648e-05, | |
| "loss": 1.3663240671157837, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.7014314928425358, | |
| "grad_norm": 3.6989221572875977, | |
| "learning_rate": 1.5088988375468473e-05, | |
| "loss": 1.3530757427215576, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.7021131561008862, | |
| "grad_norm": 3.1766574382781982, | |
| "learning_rate": 1.5079479595709493e-05, | |
| "loss": 1.1704943180084229, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7027948193592365, | |
| "grad_norm": 1.4907842874526978, | |
| "learning_rate": 1.5069964622190409e-05, | |
| "loss": 0.32295915484428406, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.7034764826175869, | |
| "grad_norm": 2.6720528602600098, | |
| "learning_rate": 1.5060443466513497e-05, | |
| "loss": 1.995790958404541, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.7041581458759373, | |
| "grad_norm": 2.4431049823760986, | |
| "learning_rate": 1.5050916140288552e-05, | |
| "loss": 1.0614051818847656, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.7048398091342877, | |
| "grad_norm": 2.8213205337524414, | |
| "learning_rate": 1.5041382655132899e-05, | |
| "loss": 1.234609842300415, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.7055214723926381, | |
| "grad_norm": 1.7441692352294922, | |
| "learning_rate": 1.5031843022671377e-05, | |
| "loss": 1.0156347751617432, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.7062031356509885, | |
| "grad_norm": 1.8287917375564575, | |
| "learning_rate": 1.5022297254536321e-05, | |
| "loss": 1.3329904079437256, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.7068847989093388, | |
| "grad_norm": 1.5410305261611938, | |
| "learning_rate": 1.5012745362367543e-05, | |
| "loss": 1.0349948406219482, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.7075664621676891, | |
| "grad_norm": 1.9430551528930664, | |
| "learning_rate": 1.5003187357812323e-05, | |
| "loss": 1.4377586841583252, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.7082481254260395, | |
| "grad_norm": 1.7912523746490479, | |
| "learning_rate": 1.4993623252525398e-05, | |
| "loss": 1.3896902799606323, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.7089297886843899, | |
| "grad_norm": 2.173234701156616, | |
| "learning_rate": 1.4984053058168936e-05, | |
| "loss": 1.353006362915039, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7096114519427403, | |
| "grad_norm": 3.65002179145813, | |
| "learning_rate": 1.4974476786412542e-05, | |
| "loss": 1.4411238431930542, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.7102931152010906, | |
| "grad_norm": 2.9713757038116455, | |
| "learning_rate": 1.4964894448933227e-05, | |
| "loss": 0.813322901725769, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.710974778459441, | |
| "grad_norm": 1.54813814163208, | |
| "learning_rate": 1.4955306057415388e-05, | |
| "loss": 1.206241488456726, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.7116564417177914, | |
| "grad_norm": 2.3598363399505615, | |
| "learning_rate": 1.4945711623550822e-05, | |
| "loss": 1.1738507747650146, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.7123381049761418, | |
| "grad_norm": 2.5693681240081787, | |
| "learning_rate": 1.4936111159038677e-05, | |
| "loss": 1.855168342590332, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.7130197682344922, | |
| "grad_norm": 1.6126163005828857, | |
| "learning_rate": 1.4926504675585467e-05, | |
| "loss": 1.1066110134124756, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.7137014314928425, | |
| "grad_norm": 1.2783271074295044, | |
| "learning_rate": 1.4916892184905037e-05, | |
| "loss": 0.6282731294631958, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.7143830947511929, | |
| "grad_norm": 2.2483839988708496, | |
| "learning_rate": 1.4907273698718562e-05, | |
| "loss": 0.9469138383865356, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.7150647580095433, | |
| "grad_norm": 4.296797752380371, | |
| "learning_rate": 1.4897649228754527e-05, | |
| "loss": 1.7291085720062256, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.7157464212678937, | |
| "grad_norm": 3.502342462539673, | |
| "learning_rate": 1.4888018786748713e-05, | |
| "loss": 1.0134327411651611, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7164280845262441, | |
| "grad_norm": 3.2990827560424805, | |
| "learning_rate": 1.487838238444418e-05, | |
| "loss": 1.590086579322815, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.7171097477845945, | |
| "grad_norm": 3.785966157913208, | |
| "learning_rate": 1.4868740033591258e-05, | |
| "loss": 1.6262550354003906, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.7177914110429447, | |
| "grad_norm": 6.545089244842529, | |
| "learning_rate": 1.485909174594753e-05, | |
| "loss": 2.984455108642578, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.7184730743012951, | |
| "grad_norm": 2.6544864177703857, | |
| "learning_rate": 1.484943753327783e-05, | |
| "loss": 1.136319637298584, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.7191547375596455, | |
| "grad_norm": 2.4620413780212402, | |
| "learning_rate": 1.4839777407354194e-05, | |
| "loss": 0.72819983959198, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.7198364008179959, | |
| "grad_norm": 7.4918951988220215, | |
| "learning_rate": 1.4830111379955886e-05, | |
| "loss": 1.7762341499328613, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.7205180640763463, | |
| "grad_norm": 6.07712459564209, | |
| "learning_rate": 1.4820439462869353e-05, | |
| "loss": 1.6564725637435913, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.7211997273346966, | |
| "grad_norm": 2.416376829147339, | |
| "learning_rate": 1.481076166788824e-05, | |
| "loss": 1.1260161399841309, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.721881390593047, | |
| "grad_norm": 3.4239766597747803, | |
| "learning_rate": 1.480107800681335e-05, | |
| "loss": 1.4130642414093018, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.7225630538513974, | |
| "grad_norm": 2.0911810398101807, | |
| "learning_rate": 1.4791388491452637e-05, | |
| "loss": 1.4372596740722656, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7232447171097478, | |
| "grad_norm": 2.543943405151367, | |
| "learning_rate": 1.4781693133621191e-05, | |
| "loss": 0.859691858291626, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.7239263803680982, | |
| "grad_norm": 2.474747657775879, | |
| "learning_rate": 1.4771991945141237e-05, | |
| "loss": 1.1199817657470703, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.7246080436264485, | |
| "grad_norm": 7.446532249450684, | |
| "learning_rate": 1.4762284937842103e-05, | |
| "loss": 2.407529354095459, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.7252897068847989, | |
| "grad_norm": 3.7536842823028564, | |
| "learning_rate": 1.4752572123560216e-05, | |
| "loss": 1.3106913566589355, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.7259713701431493, | |
| "grad_norm": 3.0671236515045166, | |
| "learning_rate": 1.4742853514139076e-05, | |
| "loss": 0.7006158232688904, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.7266530334014997, | |
| "grad_norm": 5.503408432006836, | |
| "learning_rate": 1.4733129121429253e-05, | |
| "loss": 2.3005611896514893, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.7273346966598501, | |
| "grad_norm": 4.134151458740234, | |
| "learning_rate": 1.4723398957288373e-05, | |
| "loss": 1.7469112873077393, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.7280163599182005, | |
| "grad_norm": 3.927537441253662, | |
| "learning_rate": 1.4713663033581099e-05, | |
| "loss": 1.7568607330322266, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.7286980231765507, | |
| "grad_norm": 6.670135021209717, | |
| "learning_rate": 1.470392136217911e-05, | |
| "loss": 1.0784207582473755, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.7293796864349011, | |
| "grad_norm": 4.407510280609131, | |
| "learning_rate": 1.4694173954961105e-05, | |
| "loss": 1.3891305923461914, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7300613496932515, | |
| "grad_norm": 3.5949463844299316, | |
| "learning_rate": 1.4684420823812763e-05, | |
| "loss": 0.9051351547241211, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.7307430129516019, | |
| "grad_norm": 3.821967363357544, | |
| "learning_rate": 1.4674661980626754e-05, | |
| "loss": 0.6995428800582886, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.7314246762099523, | |
| "grad_norm": 2.605086326599121, | |
| "learning_rate": 1.466489743730271e-05, | |
| "loss": 1.1066350936889648, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.7321063394683026, | |
| "grad_norm": 2.183931350708008, | |
| "learning_rate": 1.4655127205747208e-05, | |
| "loss": 1.5164170265197754, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.732788002726653, | |
| "grad_norm": 1.955289602279663, | |
| "learning_rate": 1.4645351297873774e-05, | |
| "loss": 1.0034122467041016, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.7334696659850034, | |
| "grad_norm": 1.7779383659362793, | |
| "learning_rate": 1.463556972560284e-05, | |
| "loss": 1.0842978954315186, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.7341513292433538, | |
| "grad_norm": 2.3080132007598877, | |
| "learning_rate": 1.4625782500861756e-05, | |
| "loss": 0.9262508749961853, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.7348329925017042, | |
| "grad_norm": 2.3780229091644287, | |
| "learning_rate": 1.4615989635584757e-05, | |
| "loss": 0.780438244342804, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.7355146557600545, | |
| "grad_norm": 1.959047794342041, | |
| "learning_rate": 1.4606191141712964e-05, | |
| "loss": 1.0655291080474854, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.7361963190184049, | |
| "grad_norm": 2.633192300796509, | |
| "learning_rate": 1.4596387031194354e-05, | |
| "loss": 0.8464280366897583, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7368779822767553, | |
| "grad_norm": 1.6572291851043701, | |
| "learning_rate": 1.458657731598376e-05, | |
| "loss": 0.3969135880470276, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.7375596455351057, | |
| "grad_norm": 2.454660177230835, | |
| "learning_rate": 1.4576762008042837e-05, | |
| "loss": 1.2453688383102417, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.7382413087934561, | |
| "grad_norm": 4.7392144203186035, | |
| "learning_rate": 1.4566941119340074e-05, | |
| "loss": 1.502977967262268, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.7389229720518065, | |
| "grad_norm": 2.6455323696136475, | |
| "learning_rate": 1.4557114661850755e-05, | |
| "loss": 1.0623095035552979, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.7396046353101567, | |
| "grad_norm": 4.488423824310303, | |
| "learning_rate": 1.4547282647556964e-05, | |
| "loss": 1.8041034936904907, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.7402862985685071, | |
| "grad_norm": 1.9584484100341797, | |
| "learning_rate": 1.4537445088447547e-05, | |
| "loss": 0.7578965425491333, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.7409679618268575, | |
| "grad_norm": 2.6489927768707275, | |
| "learning_rate": 1.4527601996518122e-05, | |
| "loss": 0.7286869287490845, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.7416496250852079, | |
| "grad_norm": 1.6441211700439453, | |
| "learning_rate": 1.4517753383771052e-05, | |
| "loss": 0.9616504311561584, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.7423312883435583, | |
| "grad_norm": 2.347137212753296, | |
| "learning_rate": 1.4507899262215426e-05, | |
| "loss": 1.5748083591461182, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.7430129516019086, | |
| "grad_norm": 2.179248809814453, | |
| "learning_rate": 1.449803964386706e-05, | |
| "loss": 1.1422452926635742, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.743694614860259, | |
| "grad_norm": 2.3553104400634766, | |
| "learning_rate": 1.4488174540748463e-05, | |
| "loss": 1.0269725322723389, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.7443762781186094, | |
| "grad_norm": 0.9572274684906006, | |
| "learning_rate": 1.4478303964888842e-05, | |
| "loss": 0.5475146770477295, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.7450579413769598, | |
| "grad_norm": 2.5568504333496094, | |
| "learning_rate": 1.4468427928324065e-05, | |
| "loss": 1.681059718132019, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.7457396046353102, | |
| "grad_norm": 1.5831090211868286, | |
| "learning_rate": 1.4458546443096663e-05, | |
| "loss": 1.3471312522888184, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.7464212678936605, | |
| "grad_norm": 1.6717140674591064, | |
| "learning_rate": 1.4448659521255823e-05, | |
| "loss": 1.0710022449493408, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.7471029311520109, | |
| "grad_norm": 2.684556007385254, | |
| "learning_rate": 1.4438767174857346e-05, | |
| "loss": 1.708756685256958, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.7477845944103613, | |
| "grad_norm": 2.5712358951568604, | |
| "learning_rate": 1.442886941596365e-05, | |
| "loss": 1.1187701225280762, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.7484662576687117, | |
| "grad_norm": 1.3731135129928589, | |
| "learning_rate": 1.4418966256643762e-05, | |
| "loss": 1.2401134967803955, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.7491479209270621, | |
| "grad_norm": 2.568267345428467, | |
| "learning_rate": 1.4409057708973282e-05, | |
| "loss": 0.7313793897628784, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.7498295841854125, | |
| "grad_norm": 2.1688249111175537, | |
| "learning_rate": 1.4399143785034388e-05, | |
| "loss": 1.3187694549560547, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7505112474437627, | |
| "grad_norm": 2.1575472354888916, | |
| "learning_rate": 1.4389224496915814e-05, | |
| "loss": 0.8957356214523315, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.7511929107021131, | |
| "grad_norm": 2.9634883403778076, | |
| "learning_rate": 1.4379299856712827e-05, | |
| "loss": 0.5563441514968872, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.7518745739604635, | |
| "grad_norm": 3.4954793453216553, | |
| "learning_rate": 1.4369369876527234e-05, | |
| "loss": 1.5315556526184082, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.7525562372188139, | |
| "grad_norm": 1.7266521453857422, | |
| "learning_rate": 1.4359434568467341e-05, | |
| "loss": 0.7705234289169312, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.7532379004771643, | |
| "grad_norm": 4.004641056060791, | |
| "learning_rate": 1.4349493944647953e-05, | |
| "loss": 1.7995654344558716, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.7539195637355146, | |
| "grad_norm": 1.724542498588562, | |
| "learning_rate": 1.4339548017190356e-05, | |
| "loss": 0.3790285587310791, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.754601226993865, | |
| "grad_norm": 2.3936798572540283, | |
| "learning_rate": 1.4329596798222318e-05, | |
| "loss": 0.6810147762298584, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.7552828902522154, | |
| "grad_norm": 5.291030406951904, | |
| "learning_rate": 1.4319640299878038e-05, | |
| "loss": 2.0598695278167725, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.7559645535105658, | |
| "grad_norm": 3.289830446243286, | |
| "learning_rate": 1.4309678534298164e-05, | |
| "loss": 1.0737022161483765, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.7566462167689162, | |
| "grad_norm": 9.536825180053711, | |
| "learning_rate": 1.4299711513629759e-05, | |
| "loss": 1.3822963237762451, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7573278800272665, | |
| "grad_norm": 2.259124755859375, | |
| "learning_rate": 1.428973925002631e-05, | |
| "loss": 1.4830787181854248, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.7580095432856169, | |
| "grad_norm": 1.6364065408706665, | |
| "learning_rate": 1.4279761755647679e-05, | |
| "loss": 1.0743396282196045, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.7586912065439673, | |
| "grad_norm": 1.8119486570358276, | |
| "learning_rate": 1.4269779042660112e-05, | |
| "loss": 0.804845929145813, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.7593728698023177, | |
| "grad_norm": 1.8661983013153076, | |
| "learning_rate": 1.4259791123236227e-05, | |
| "loss": 0.7559719085693359, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.7600545330606681, | |
| "grad_norm": 1.2363063097000122, | |
| "learning_rate": 1.4249798009554979e-05, | |
| "loss": 0.8930497169494629, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7607361963190185, | |
| "grad_norm": 1.8818364143371582, | |
| "learning_rate": 1.4239799713801662e-05, | |
| "loss": 1.1456657648086548, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.7614178595773687, | |
| "grad_norm": 1.87226140499115, | |
| "learning_rate": 1.4229796248167888e-05, | |
| "loss": 0.5097870230674744, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.7620995228357191, | |
| "grad_norm": 3.1565282344818115, | |
| "learning_rate": 1.421978762485157e-05, | |
| "loss": 1.0278431177139282, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.7627811860940695, | |
| "grad_norm": 5.5133957862854, | |
| "learning_rate": 1.4209773856056925e-05, | |
| "loss": 2.1597955226898193, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.7634628493524199, | |
| "grad_norm": 3.499448537826538, | |
| "learning_rate": 1.419975495399442e-05, | |
| "loss": 1.8728885650634766, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7641445126107703, | |
| "grad_norm": 1.7962192296981812, | |
| "learning_rate": 1.4189730930880799e-05, | |
| "loss": 1.4974309206008911, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.7648261758691206, | |
| "grad_norm": 2.1655869483947754, | |
| "learning_rate": 1.417970179893904e-05, | |
| "loss": 1.0126588344573975, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.765507839127471, | |
| "grad_norm": 3.2068166732788086, | |
| "learning_rate": 1.4169667570398367e-05, | |
| "loss": 1.8077328205108643, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.7661895023858214, | |
| "grad_norm": 2.407762289047241, | |
| "learning_rate": 1.4159628257494195e-05, | |
| "loss": 0.6827515363693237, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.7668711656441718, | |
| "grad_norm": 3.49932599067688, | |
| "learning_rate": 1.4149583872468165e-05, | |
| "loss": 1.3755052089691162, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7675528289025222, | |
| "grad_norm": 1.8937991857528687, | |
| "learning_rate": 1.4139534427568073e-05, | |
| "loss": 1.2675697803497314, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.7682344921608726, | |
| "grad_norm": 1.550775170326233, | |
| "learning_rate": 1.4129479935047914e-05, | |
| "loss": 0.6430014371871948, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.7689161554192229, | |
| "grad_norm": 2.033308744430542, | |
| "learning_rate": 1.4119420407167817e-05, | |
| "loss": 0.9698971509933472, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.7695978186775733, | |
| "grad_norm": 1.069423794746399, | |
| "learning_rate": 1.4109355856194062e-05, | |
| "loss": 0.7354646325111389, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.7702794819359237, | |
| "grad_norm": 2.814561128616333, | |
| "learning_rate": 1.4099286294399051e-05, | |
| "loss": 1.539209008216858, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7709611451942741, | |
| "grad_norm": 3.4009978771209717, | |
| "learning_rate": 1.4089211734061294e-05, | |
| "loss": 0.6647955179214478, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.7716428084526245, | |
| "grad_norm": 1.6567187309265137, | |
| "learning_rate": 1.4079132187465403e-05, | |
| "loss": 0.864957332611084, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.7723244717109747, | |
| "grad_norm": 2.7520089149475098, | |
| "learning_rate": 1.4069047666902056e-05, | |
| "loss": 0.6806797385215759, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.7730061349693251, | |
| "grad_norm": 6.400692939758301, | |
| "learning_rate": 1.405895818466801e-05, | |
| "loss": 2.2545909881591797, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.7736877982276755, | |
| "grad_norm": 2.637324333190918, | |
| "learning_rate": 1.404886375306607e-05, | |
| "loss": 1.1330559253692627, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.7743694614860259, | |
| "grad_norm": 4.983392238616943, | |
| "learning_rate": 1.403876438440507e-05, | |
| "loss": 2.1453890800476074, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.7750511247443763, | |
| "grad_norm": 24.753963470458984, | |
| "learning_rate": 1.4028660090999866e-05, | |
| "loss": 0.6390861868858337, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.7757327880027266, | |
| "grad_norm": 2.1686322689056396, | |
| "learning_rate": 1.4018550885171322e-05, | |
| "loss": 1.2475088834762573, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.776414451261077, | |
| "grad_norm": 1.1742987632751465, | |
| "learning_rate": 1.4008436779246288e-05, | |
| "loss": 0.4145359694957733, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.7770961145194274, | |
| "grad_norm": 3.770195722579956, | |
| "learning_rate": 1.3998317785557597e-05, | |
| "loss": 1.0192723274230957, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 3.6359922885894775, | |
| "learning_rate": 1.3988193916444036e-05, | |
| "loss": 1.6351332664489746, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.7784594410361282, | |
| "grad_norm": 1.383095383644104, | |
| "learning_rate": 1.3978065184250334e-05, | |
| "loss": 0.6680575609207153, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.7791411042944786, | |
| "grad_norm": 3.0580966472625732, | |
| "learning_rate": 1.396793160132715e-05, | |
| "loss": 2.0260071754455566, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.7798227675528289, | |
| "grad_norm": 1.9012495279312134, | |
| "learning_rate": 1.3957793180031067e-05, | |
| "loss": 0.7958223819732666, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.7805044308111793, | |
| "grad_norm": 3.0579073429107666, | |
| "learning_rate": 1.3947649932724563e-05, | |
| "loss": 1.2729113101959229, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.7811860940695297, | |
| "grad_norm": 3.44350266456604, | |
| "learning_rate": 1.3937501871775995e-05, | |
| "loss": 1.5779197216033936, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.78186775732788, | |
| "grad_norm": 1.746261715888977, | |
| "learning_rate": 1.3927349009559597e-05, | |
| "loss": 0.3372318744659424, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.7825494205862304, | |
| "grad_norm": 5.006249904632568, | |
| "learning_rate": 1.3917191358455453e-05, | |
| "loss": 2.2594876289367676, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.7832310838445807, | |
| "grad_norm": 5.940740585327148, | |
| "learning_rate": 1.3907028930849489e-05, | |
| "loss": 0.8546584844589233, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.7839127471029311, | |
| "grad_norm": 3.4024150371551514, | |
| "learning_rate": 1.3896861739133456e-05, | |
| "loss": 1.9201159477233887, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7845944103612815, | |
| "grad_norm": 1.3676657676696777, | |
| "learning_rate": 1.388668979570491e-05, | |
| "loss": 0.8639696836471558, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.7852760736196319, | |
| "grad_norm": 1.9111442565917969, | |
| "learning_rate": 1.3876513112967208e-05, | |
| "loss": 0.7849295139312744, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.7859577368779823, | |
| "grad_norm": 1.646479606628418, | |
| "learning_rate": 1.3866331703329477e-05, | |
| "loss": 0.9975169897079468, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.7866394001363326, | |
| "grad_norm": 1.8222453594207764, | |
| "learning_rate": 1.3856145579206612e-05, | |
| "loss": 1.2962346076965332, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.787321063394683, | |
| "grad_norm": 3.7591559886932373, | |
| "learning_rate": 1.384595475301926e-05, | |
| "loss": 1.5346903800964355, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.7880027266530334, | |
| "grad_norm": 1.4336442947387695, | |
| "learning_rate": 1.38357592371938e-05, | |
| "loss": 1.160192847251892, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.7886843899113838, | |
| "grad_norm": 2.9270148277282715, | |
| "learning_rate": 1.3825559044162327e-05, | |
| "loss": 1.0325207710266113, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.7893660531697342, | |
| "grad_norm": 3.7184078693389893, | |
| "learning_rate": 1.381535418636264e-05, | |
| "loss": 0.982779860496521, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.7900477164280846, | |
| "grad_norm": 1.879891276359558, | |
| "learning_rate": 1.3805144676238225e-05, | |
| "loss": 2.0673606395721436, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.7907293796864349, | |
| "grad_norm": 1.9370105266571045, | |
| "learning_rate": 1.3794930526238246e-05, | |
| "loss": 0.573197603225708, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7914110429447853, | |
| "grad_norm": 2.1516077518463135, | |
| "learning_rate": 1.3784711748817519e-05, | |
| "loss": 0.9432398080825806, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.7920927062031357, | |
| "grad_norm": 2.1923482418060303, | |
| "learning_rate": 1.3774488356436505e-05, | |
| "loss": 1.4591634273529053, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.792774369461486, | |
| "grad_norm": 3.7260212898254395, | |
| "learning_rate": 1.376426036156129e-05, | |
| "loss": 1.241162896156311, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.7934560327198364, | |
| "grad_norm": 2.8687682151794434, | |
| "learning_rate": 1.3754027776663579e-05, | |
| "loss": 1.5285322666168213, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.7941376959781867, | |
| "grad_norm": 1.8364909887313843, | |
| "learning_rate": 1.3743790614220664e-05, | |
| "loss": 0.9934275150299072, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.7948193592365371, | |
| "grad_norm": 2.3077330589294434, | |
| "learning_rate": 1.3733548886715427e-05, | |
| "loss": 1.2558547258377075, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.7955010224948875, | |
| "grad_norm": 1.8851512670516968, | |
| "learning_rate": 1.3723302606636311e-05, | |
| "loss": 0.9629714488983154, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.7961826857532379, | |
| "grad_norm": 2.393500804901123, | |
| "learning_rate": 1.3713051786477319e-05, | |
| "loss": 1.1742806434631348, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.7968643490115883, | |
| "grad_norm": 3.2605621814727783, | |
| "learning_rate": 1.3702796438737974e-05, | |
| "loss": 2.027634382247925, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.7975460122699386, | |
| "grad_norm": 2.644625425338745, | |
| "learning_rate": 1.3692536575923334e-05, | |
| "loss": 1.0252785682678223, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.798227675528289, | |
| "grad_norm": 3.954820394515991, | |
| "learning_rate": 1.3682272210543959e-05, | |
| "loss": 2.3715176582336426, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.7989093387866394, | |
| "grad_norm": 2.5120863914489746, | |
| "learning_rate": 1.3672003355115897e-05, | |
| "loss": 2.089763641357422, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.7995910020449898, | |
| "grad_norm": 2.659560441970825, | |
| "learning_rate": 1.3661730022160673e-05, | |
| "loss": 0.6313329339027405, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.8002726653033402, | |
| "grad_norm": 3.3629090785980225, | |
| "learning_rate": 1.365145222420527e-05, | |
| "loss": 0.580939531326294, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.8009543285616906, | |
| "grad_norm": 2.3204710483551025, | |
| "learning_rate": 1.3641169973782117e-05, | |
| "loss": 1.5531153678894043, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.8016359918200409, | |
| "grad_norm": 3.2493536472320557, | |
| "learning_rate": 1.3630883283429071e-05, | |
| "loss": 1.7264301776885986, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.8023176550783913, | |
| "grad_norm": 2.0848870277404785, | |
| "learning_rate": 1.3620592165689405e-05, | |
| "loss": 1.3860576152801514, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.8029993183367417, | |
| "grad_norm": 3.532919406890869, | |
| "learning_rate": 1.3610296633111788e-05, | |
| "loss": 0.6948189735412598, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.803680981595092, | |
| "grad_norm": 1.9613711833953857, | |
| "learning_rate": 1.3599996698250274e-05, | |
| "loss": 1.3784890174865723, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.8043626448534424, | |
| "grad_norm": 2.8494956493377686, | |
| "learning_rate": 1.3589692373664288e-05, | |
| "loss": 0.40545928478240967, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8050443081117927, | |
| "grad_norm": 1.2043472528457642, | |
| "learning_rate": 1.3579383671918598e-05, | |
| "loss": 0.9535353183746338, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.8057259713701431, | |
| "grad_norm": 2.0384488105773926, | |
| "learning_rate": 1.3569070605583319e-05, | |
| "loss": 1.616403341293335, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.8064076346284935, | |
| "grad_norm": 2.768143892288208, | |
| "learning_rate": 1.3558753187233881e-05, | |
| "loss": 2.2363321781158447, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.8070892978868439, | |
| "grad_norm": 2.2129340171813965, | |
| "learning_rate": 1.3548431429451032e-05, | |
| "loss": 1.5187221765518188, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.8077709611451943, | |
| "grad_norm": 2.341991424560547, | |
| "learning_rate": 1.3538105344820798e-05, | |
| "loss": 1.0091967582702637, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.8084526244035446, | |
| "grad_norm": 2.6940460205078125, | |
| "learning_rate": 1.352777494593449e-05, | |
| "loss": 1.6440155506134033, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.809134287661895, | |
| "grad_norm": 2.276599168777466, | |
| "learning_rate": 1.3517440245388672e-05, | |
| "loss": 1.2129032611846924, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.8098159509202454, | |
| "grad_norm": 3.9329209327697754, | |
| "learning_rate": 1.350710125578516e-05, | |
| "loss": 1.818249225616455, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.8104976141785958, | |
| "grad_norm": 2.928197145462036, | |
| "learning_rate": 1.3496757989730997e-05, | |
| "loss": 1.140410304069519, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.8111792774369462, | |
| "grad_norm": 2.0259079933166504, | |
| "learning_rate": 1.3486410459838448e-05, | |
| "loss": 0.7515283823013306, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.8118609406952966, | |
| "grad_norm": 5.171846389770508, | |
| "learning_rate": 1.347605867872496e-05, | |
| "loss": 1.5359857082366943, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.8125426039536469, | |
| "grad_norm": 2.310758590698242, | |
| "learning_rate": 1.346570265901318e-05, | |
| "loss": 1.3270797729492188, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.8132242672119973, | |
| "grad_norm": 2.1669437885284424, | |
| "learning_rate": 1.3455342413330916e-05, | |
| "loss": 1.3661367893218994, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.8139059304703476, | |
| "grad_norm": 2.030578136444092, | |
| "learning_rate": 1.3444977954311133e-05, | |
| "loss": 1.123007893562317, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.814587593728698, | |
| "grad_norm": 3.647148847579956, | |
| "learning_rate": 1.343460929459193e-05, | |
| "loss": 1.5400652885437012, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.8152692569870484, | |
| "grad_norm": 1.4944745302200317, | |
| "learning_rate": 1.3424236446816528e-05, | |
| "loss": 1.2711783647537231, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.8159509202453987, | |
| "grad_norm": 4.806166172027588, | |
| "learning_rate": 1.3413859423633259e-05, | |
| "loss": 1.698742389678955, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.8166325835037491, | |
| "grad_norm": 1.8429958820343018, | |
| "learning_rate": 1.3403478237695542e-05, | |
| "loss": 1.910172939300537, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.8173142467620995, | |
| "grad_norm": 3.9592504501342773, | |
| "learning_rate": 1.3393092901661873e-05, | |
| "loss": 1.5437757968902588, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.8179959100204499, | |
| "grad_norm": 3.513111114501953, | |
| "learning_rate": 1.3382703428195812e-05, | |
| "loss": 1.042831540107727, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8186775732788003, | |
| "grad_norm": 4.69813871383667, | |
| "learning_rate": 1.3372309829965957e-05, | |
| "loss": 2.1290884017944336, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.8193592365371506, | |
| "grad_norm": 4.101344585418701, | |
| "learning_rate": 1.3361912119645943e-05, | |
| "loss": 0.5426227450370789, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.820040899795501, | |
| "grad_norm": 3.597198724746704, | |
| "learning_rate": 1.3351510309914415e-05, | |
| "loss": 0.7113486528396606, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.8207225630538514, | |
| "grad_norm": 2.5351171493530273, | |
| "learning_rate": 1.3341104413455014e-05, | |
| "loss": 0.8288606405258179, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.8214042263122018, | |
| "grad_norm": 2.3007466793060303, | |
| "learning_rate": 1.3330694442956376e-05, | |
| "loss": 1.4681382179260254, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.8220858895705522, | |
| "grad_norm": 2.452038049697876, | |
| "learning_rate": 1.3320280411112092e-05, | |
| "loss": 1.3009843826293945, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.8227675528289026, | |
| "grad_norm": 2.4166622161865234, | |
| "learning_rate": 1.3309862330620709e-05, | |
| "loss": 0.875541090965271, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.8234492160872529, | |
| "grad_norm": 3.604154348373413, | |
| "learning_rate": 1.3299440214185707e-05, | |
| "loss": 1.0196988582611084, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.8241308793456033, | |
| "grad_norm": 2.63765025138855, | |
| "learning_rate": 1.3289014074515505e-05, | |
| "loss": 0.8730734586715698, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.8248125426039536, | |
| "grad_norm": 2.459676504135132, | |
| "learning_rate": 1.3278583924323405e-05, | |
| "loss": 2.113161087036133, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.825494205862304, | |
| "grad_norm": 2.1859536170959473, | |
| "learning_rate": 1.326814977632761e-05, | |
| "loss": 1.2790409326553345, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.8261758691206544, | |
| "grad_norm": 2.1957504749298096, | |
| "learning_rate": 1.3257711643251201e-05, | |
| "loss": 0.5538440346717834, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.8268575323790047, | |
| "grad_norm": 1.5440486669540405, | |
| "learning_rate": 1.3247269537822109e-05, | |
| "loss": 0.7184191942214966, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.8275391956373551, | |
| "grad_norm": 1.557157278060913, | |
| "learning_rate": 1.3236823472773116e-05, | |
| "loss": 0.47062918543815613, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.8282208588957055, | |
| "grad_norm": 5.947640419006348, | |
| "learning_rate": 1.3226373460841835e-05, | |
| "loss": 1.872814655303955, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.8289025221540559, | |
| "grad_norm": 1.9622546434402466, | |
| "learning_rate": 1.3215919514770676e-05, | |
| "loss": 1.1466459035873413, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.8295841854124063, | |
| "grad_norm": 1.643409013748169, | |
| "learning_rate": 1.3205461647306872e-05, | |
| "loss": 1.1066665649414062, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.8302658486707567, | |
| "grad_norm": 1.5371270179748535, | |
| "learning_rate": 1.3194999871202408e-05, | |
| "loss": 0.630884051322937, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.830947511929107, | |
| "grad_norm": 1.544770359992981, | |
| "learning_rate": 1.3184534199214059e-05, | |
| "loss": 0.7396583557128906, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.8316291751874574, | |
| "grad_norm": 3.373950481414795, | |
| "learning_rate": 1.3174064644103334e-05, | |
| "loss": 1.3077354431152344, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8323108384458078, | |
| "grad_norm": 1.815501093864441, | |
| "learning_rate": 1.3163591218636494e-05, | |
| "loss": 1.2874424457550049, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.8329925017041582, | |
| "grad_norm": 5.293972015380859, | |
| "learning_rate": 1.31531139355845e-05, | |
| "loss": 1.6489955186843872, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.8336741649625086, | |
| "grad_norm": 1.9330779314041138, | |
| "learning_rate": 1.3142632807723035e-05, | |
| "loss": 1.4095408916473389, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.8343558282208589, | |
| "grad_norm": 2.491067886352539, | |
| "learning_rate": 1.3132147847832453e-05, | |
| "loss": 1.4048972129821777, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.8350374914792092, | |
| "grad_norm": 2.374852180480957, | |
| "learning_rate": 1.3121659068697797e-05, | |
| "loss": 1.1800034046173096, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.8357191547375596, | |
| "grad_norm": 3.167478561401367, | |
| "learning_rate": 1.3111166483108753e-05, | |
| "loss": 1.1449928283691406, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.83640081799591, | |
| "grad_norm": 1.7197049856185913, | |
| "learning_rate": 1.310067010385966e-05, | |
| "loss": 1.2595001459121704, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.8370824812542604, | |
| "grad_norm": 2.305821657180786, | |
| "learning_rate": 1.3090169943749475e-05, | |
| "loss": 0.5387115478515625, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.8377641445126107, | |
| "grad_norm": 1.9236418008804321, | |
| "learning_rate": 1.307966601558177e-05, | |
| "loss": 1.238250970840454, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.8384458077709611, | |
| "grad_norm": 1.9397454261779785, | |
| "learning_rate": 1.306915833216471e-05, | |
| "loss": 1.2780492305755615, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8391274710293115, | |
| "grad_norm": 5.5808820724487305, | |
| "learning_rate": 1.3058646906311032e-05, | |
| "loss": 0.4606386423110962, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.8398091342876619, | |
| "grad_norm": 1.8887132406234741, | |
| "learning_rate": 1.304813175083805e-05, | |
| "loss": 0.5161309838294983, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.8404907975460123, | |
| "grad_norm": 2.4710726737976074, | |
| "learning_rate": 1.3037612878567623e-05, | |
| "loss": 0.7300988435745239, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.8411724608043627, | |
| "grad_norm": 3.93088960647583, | |
| "learning_rate": 1.3027090302326127e-05, | |
| "loss": 1.6448276042938232, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.841854124062713, | |
| "grad_norm": 5.784260272979736, | |
| "learning_rate": 1.3016564034944473e-05, | |
| "loss": 2.1232104301452637, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.8425357873210634, | |
| "grad_norm": 9.557524681091309, | |
| "learning_rate": 1.3006034089258059e-05, | |
| "loss": 0.8751978874206543, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.8432174505794138, | |
| "grad_norm": 3.1583986282348633, | |
| "learning_rate": 1.2995500478106781e-05, | |
| "loss": 1.4895182847976685, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.8438991138377642, | |
| "grad_norm": 1.9370651245117188, | |
| "learning_rate": 1.2984963214335e-05, | |
| "loss": 1.2552303075790405, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.8445807770961146, | |
| "grad_norm": 2.8927485942840576, | |
| "learning_rate": 1.2974422310791524e-05, | |
| "loss": 0.8721221089363098, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.8452624403544649, | |
| "grad_norm": 3.3905813694000244, | |
| "learning_rate": 1.29638777803296e-05, | |
| "loss": 2.2706828117370605, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8459441036128152, | |
| "grad_norm": 2.136692762374878, | |
| "learning_rate": 1.2953329635806914e-05, | |
| "loss": 0.9477517604827881, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.8466257668711656, | |
| "grad_norm": 6.7342963218688965, | |
| "learning_rate": 1.2942777890085538e-05, | |
| "loss": 2.1006269454956055, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.847307430129516, | |
| "grad_norm": 6.7248616218566895, | |
| "learning_rate": 1.2932222556031946e-05, | |
| "loss": 2.246072292327881, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.8479890933878664, | |
| "grad_norm": 5.852639675140381, | |
| "learning_rate": 1.2921663646516985e-05, | |
| "loss": 1.0963563919067383, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.8486707566462167, | |
| "grad_norm": 3.222761631011963, | |
| "learning_rate": 1.2911101174415861e-05, | |
| "loss": 1.497108817100525, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.8493524199045671, | |
| "grad_norm": 2.8850924968719482, | |
| "learning_rate": 1.290053515260813e-05, | |
| "loss": 1.030866026878357, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.8500340831629175, | |
| "grad_norm": 2.141866445541382, | |
| "learning_rate": 1.288996559397767e-05, | |
| "loss": 1.2230726480484009, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.8507157464212679, | |
| "grad_norm": 2.458935499191284, | |
| "learning_rate": 1.2879392511412668e-05, | |
| "loss": 1.5903170108795166, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.8513974096796183, | |
| "grad_norm": 2.7172353267669678, | |
| "learning_rate": 1.2868815917805619e-05, | |
| "loss": 0.705411970615387, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.8520790729379687, | |
| "grad_norm": 1.890684723854065, | |
| "learning_rate": 1.2858235826053294e-05, | |
| "loss": 0.7464626431465149, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.852760736196319, | |
| "grad_norm": 2.983992576599121, | |
| "learning_rate": 1.2847652249056726e-05, | |
| "loss": 1.1335132122039795, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.8534423994546694, | |
| "grad_norm": 2.6148171424865723, | |
| "learning_rate": 1.2837065199721204e-05, | |
| "loss": 0.6404839754104614, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.8541240627130198, | |
| "grad_norm": 2.2352583408355713, | |
| "learning_rate": 1.2826474690956243e-05, | |
| "loss": 0.8552806377410889, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.8548057259713702, | |
| "grad_norm": 1.6480870246887207, | |
| "learning_rate": 1.2815880735675588e-05, | |
| "loss": 0.601885199546814, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.8554873892297206, | |
| "grad_norm": 11.749967575073242, | |
| "learning_rate": 1.2805283346797179e-05, | |
| "loss": 1.6991385221481323, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.8561690524880708, | |
| "grad_norm": 2.079409122467041, | |
| "learning_rate": 1.279468253724314e-05, | |
| "loss": 1.1878442764282227, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.8568507157464212, | |
| "grad_norm": 4.077375888824463, | |
| "learning_rate": 1.2784078319939769e-05, | |
| "loss": 1.33461594581604, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.8575323790047716, | |
| "grad_norm": 3.0802202224731445, | |
| "learning_rate": 1.2773470707817524e-05, | |
| "loss": 1.5822620391845703, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.858214042263122, | |
| "grad_norm": 3.6835556030273438, | |
| "learning_rate": 1.2762859713810998e-05, | |
| "loss": 1.514790415763855, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.8588957055214724, | |
| "grad_norm": 2.215324878692627, | |
| "learning_rate": 1.2752245350858905e-05, | |
| "loss": 1.1391081809997559, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8595773687798227, | |
| "grad_norm": 4.364893436431885, | |
| "learning_rate": 1.2741627631904077e-05, | |
| "loss": 1.7329888343811035, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.8602590320381731, | |
| "grad_norm": 3.4366931915283203, | |
| "learning_rate": 1.2731006569893427e-05, | |
| "loss": 1.4349662065505981, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.8609406952965235, | |
| "grad_norm": 4.452749729156494, | |
| "learning_rate": 1.272038217777795e-05, | |
| "loss": 1.5987038612365723, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.8616223585548739, | |
| "grad_norm": 232.9865264892578, | |
| "learning_rate": 1.27097544685127e-05, | |
| "loss": 0.9670251607894897, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.8623040218132243, | |
| "grad_norm": 3.7537894248962402, | |
| "learning_rate": 1.2699123455056777e-05, | |
| "loss": 1.1016144752502441, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.8629856850715747, | |
| "grad_norm": 2.8255839347839355, | |
| "learning_rate": 1.268848915037331e-05, | |
| "loss": 0.9010818600654602, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.863667348329925, | |
| "grad_norm": 2.359464406967163, | |
| "learning_rate": 1.2677851567429442e-05, | |
| "loss": 0.7024168968200684, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.8643490115882754, | |
| "grad_norm": 2.3273167610168457, | |
| "learning_rate": 1.2667210719196308e-05, | |
| "loss": 1.6643811464309692, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.8650306748466258, | |
| "grad_norm": 2.9757978916168213, | |
| "learning_rate": 1.2656566618649031e-05, | |
| "loss": 1.4444563388824463, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.8657123381049762, | |
| "grad_norm": 1.7979950904846191, | |
| "learning_rate": 1.26459192787667e-05, | |
| "loss": 0.7493287920951843, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8663940013633266, | |
| "grad_norm": 2.6119706630706787, | |
| "learning_rate": 1.263526871253235e-05, | |
| "loss": 1.1911826133728027, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.8670756646216768, | |
| "grad_norm": 1.5572715997695923, | |
| "learning_rate": 1.2624614932932953e-05, | |
| "loss": 1.0380706787109375, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.8677573278800272, | |
| "grad_norm": 2.26962947845459, | |
| "learning_rate": 1.261395795295939e-05, | |
| "loss": 1.4516929388046265, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.8684389911383776, | |
| "grad_norm": 2.810452938079834, | |
| "learning_rate": 1.260329778560646e-05, | |
| "loss": 0.6930328607559204, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.869120654396728, | |
| "grad_norm": 2.030446767807007, | |
| "learning_rate": 1.2592634443872842e-05, | |
| "loss": 0.3848379850387573, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8698023176550784, | |
| "grad_norm": 2.2821154594421387, | |
| "learning_rate": 1.2581967940761079e-05, | |
| "loss": 0.6541157364845276, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.8704839809134287, | |
| "grad_norm": 3.862823247909546, | |
| "learning_rate": 1.257129828927758e-05, | |
| "loss": 1.4444286823272705, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.8711656441717791, | |
| "grad_norm": 1.122416615486145, | |
| "learning_rate": 1.2560625502432581e-05, | |
| "loss": 0.7840420007705688, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.8718473074301295, | |
| "grad_norm": 2.7760159969329834, | |
| "learning_rate": 1.2549949593240156e-05, | |
| "loss": 1.672440528869629, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.8725289706884799, | |
| "grad_norm": 1.3836902379989624, | |
| "learning_rate": 1.2539270574718172e-05, | |
| "loss": 0.9528952836990356, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8732106339468303, | |
| "grad_norm": 2.4517650604248047, | |
| "learning_rate": 1.2528588459888291e-05, | |
| "loss": 1.0446099042892456, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.8738922972051807, | |
| "grad_norm": 1.6478908061981201, | |
| "learning_rate": 1.2517903261775963e-05, | |
| "loss": 1.1493809223175049, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.874573960463531, | |
| "grad_norm": 1.7826266288757324, | |
| "learning_rate": 1.2507214993410382e-05, | |
| "loss": 0.873786449432373, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.8752556237218814, | |
| "grad_norm": 1.3265115022659302, | |
| "learning_rate": 1.2496523667824487e-05, | |
| "loss": 0.9068435430526733, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.8759372869802318, | |
| "grad_norm": 4.726826190948486, | |
| "learning_rate": 1.2485829298054952e-05, | |
| "loss": 2.038616418838501, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.8766189502385822, | |
| "grad_norm": 2.2327160835266113, | |
| "learning_rate": 1.2475131897142165e-05, | |
| "loss": 1.1699340343475342, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.8773006134969326, | |
| "grad_norm": 2.6090691089630127, | |
| "learning_rate": 1.2464431478130204e-05, | |
| "loss": 1.4517043828964233, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.8779822767552828, | |
| "grad_norm": 3.6594088077545166, | |
| "learning_rate": 1.2453728054066825e-05, | |
| "loss": 1.3603757619857788, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.8786639400136332, | |
| "grad_norm": 1.8763833045959473, | |
| "learning_rate": 1.244302163800345e-05, | |
| "loss": 1.105027198791504, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.8793456032719836, | |
| "grad_norm": 3.21225643157959, | |
| "learning_rate": 1.2432312242995158e-05, | |
| "loss": 1.1615551710128784, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.880027266530334, | |
| "grad_norm": 1.9490127563476562, | |
| "learning_rate": 1.2421599882100647e-05, | |
| "loss": 1.3426179885864258, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.8807089297886844, | |
| "grad_norm": 2.9683022499084473, | |
| "learning_rate": 1.2410884568382245e-05, | |
| "loss": 0.8526750802993774, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.8813905930470347, | |
| "grad_norm": 1.6657367944717407, | |
| "learning_rate": 1.2400166314905868e-05, | |
| "loss": 0.974453330039978, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.8820722563053851, | |
| "grad_norm": 2.1943328380584717, | |
| "learning_rate": 1.2389445134741022e-05, | |
| "loss": 1.0904216766357422, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.8827539195637355, | |
| "grad_norm": 3.002743721008301, | |
| "learning_rate": 1.2378721040960788e-05, | |
| "loss": 1.1856180429458618, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.8834355828220859, | |
| "grad_norm": 3.0109915733337402, | |
| "learning_rate": 1.2367994046641787e-05, | |
| "loss": 1.7809927463531494, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.8841172460804363, | |
| "grad_norm": 1.7773187160491943, | |
| "learning_rate": 1.2357264164864186e-05, | |
| "loss": 0.9765096306800842, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.8847989093387867, | |
| "grad_norm": 1.5350987911224365, | |
| "learning_rate": 1.2346531408711675e-05, | |
| "loss": 1.1852858066558838, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.885480572597137, | |
| "grad_norm": 2.7784996032714844, | |
| "learning_rate": 1.233579579127144e-05, | |
| "loss": 1.5897890329360962, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.8861622358554874, | |
| "grad_norm": 5.824115753173828, | |
| "learning_rate": 1.232505732563416e-05, | |
| "loss": 1.3956981897354126, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8868438991138378, | |
| "grad_norm": 1.4179086685180664, | |
| "learning_rate": 1.2314316024893987e-05, | |
| "loss": 0.5982742309570312, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.8875255623721882, | |
| "grad_norm": 5.166876792907715, | |
| "learning_rate": 1.2303571902148532e-05, | |
| "loss": 1.2101175785064697, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.8882072256305386, | |
| "grad_norm": 2.7313640117645264, | |
| "learning_rate": 1.2292824970498847e-05, | |
| "loss": 1.0363984107971191, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 2.3922321796417236, | |
| "learning_rate": 1.2282075243049408e-05, | |
| "loss": 1.292094349861145, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.8895705521472392, | |
| "grad_norm": 2.692531108856201, | |
| "learning_rate": 1.2271322732908091e-05, | |
| "loss": 0.5840368866920471, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.8902522154055896, | |
| "grad_norm": 1.4718867540359497, | |
| "learning_rate": 1.2260567453186185e-05, | |
| "loss": 0.5893124341964722, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.89093387866394, | |
| "grad_norm": 4.281642436981201, | |
| "learning_rate": 1.2249809416998339e-05, | |
| "loss": 1.3083157539367676, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.8916155419222904, | |
| "grad_norm": 1.3782727718353271, | |
| "learning_rate": 1.2239048637462572e-05, | |
| "loss": 0.3187441825866699, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.8922972051806408, | |
| "grad_norm": 6.625371932983398, | |
| "learning_rate": 1.2228285127700244e-05, | |
| "loss": 2.061591386795044, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.8929788684389911, | |
| "grad_norm": 1.15508234500885, | |
| "learning_rate": 1.2217518900836045e-05, | |
| "loss": 0.7010345458984375, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8936605316973415, | |
| "grad_norm": 2.2813875675201416, | |
| "learning_rate": 1.2206749969997979e-05, | |
| "loss": 1.2704284191131592, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.8943421949556919, | |
| "grad_norm": 1.3211407661437988, | |
| "learning_rate": 1.2195978348317347e-05, | |
| "loss": 0.8025050163269043, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.8950238582140423, | |
| "grad_norm": 3.8039255142211914, | |
| "learning_rate": 1.2185204048928729e-05, | |
| "loss": 1.2957152128219604, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.8957055214723927, | |
| "grad_norm": 2.144146203994751, | |
| "learning_rate": 1.2174427084969973e-05, | |
| "loss": 1.082507610321045, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.896387184730743, | |
| "grad_norm": 2.7374770641326904, | |
| "learning_rate": 1.2163647469582181e-05, | |
| "loss": 1.42399001121521, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.8970688479890934, | |
| "grad_norm": 6.868131637573242, | |
| "learning_rate": 1.2152865215909673e-05, | |
| "loss": 1.5974717140197754, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.8977505112474438, | |
| "grad_norm": 2.1185736656188965, | |
| "learning_rate": 1.2142080337099998e-05, | |
| "loss": 1.3208156824111938, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.8984321745057942, | |
| "grad_norm": 4.58150053024292, | |
| "learning_rate": 1.2131292846303901e-05, | |
| "loss": 1.752542495727539, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.8991138377641446, | |
| "grad_norm": 3.0584685802459717, | |
| "learning_rate": 1.2120502756675324e-05, | |
| "loss": 0.8576693534851074, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.8997955010224948, | |
| "grad_norm": 2.083662748336792, | |
| "learning_rate": 1.210971008137136e-05, | |
| "loss": 0.9496646523475647, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9004771642808452, | |
| "grad_norm": 2.260331869125366, | |
| "learning_rate": 1.2098914833552262e-05, | |
| "loss": 1.085833191871643, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.9011588275391956, | |
| "grad_norm": 2.5296528339385986, | |
| "learning_rate": 1.2088117026381422e-05, | |
| "loss": 1.541821002960205, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.901840490797546, | |
| "grad_norm": 1.7845332622528076, | |
| "learning_rate": 1.2077316673025354e-05, | |
| "loss": 0.7951586246490479, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.9025221540558964, | |
| "grad_norm": 1.9708597660064697, | |
| "learning_rate": 1.2066513786653675e-05, | |
| "loss": 1.301053762435913, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.9032038173142468, | |
| "grad_norm": 2.5523624420166016, | |
| "learning_rate": 1.2055708380439089e-05, | |
| "loss": 1.4844532012939453, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.9038854805725971, | |
| "grad_norm": 1.4186663627624512, | |
| "learning_rate": 1.204490046755737e-05, | |
| "loss": 0.7520268559455872, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.9045671438309475, | |
| "grad_norm": 3.578901767730713, | |
| "learning_rate": 1.2034090061187358e-05, | |
| "loss": 1.8416643142700195, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.9052488070892979, | |
| "grad_norm": 1.7172307968139648, | |
| "learning_rate": 1.2023277174510923e-05, | |
| "loss": 0.981949508190155, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.9059304703476483, | |
| "grad_norm": 4.107685565948486, | |
| "learning_rate": 1.2012461820712966e-05, | |
| "loss": 2.1946287155151367, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.9066121336059987, | |
| "grad_norm": 2.601182460784912, | |
| "learning_rate": 1.2001644012981392e-05, | |
| "loss": 1.226723313331604, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.907293796864349, | |
| "grad_norm": 1.4821761846542358, | |
| "learning_rate": 1.1990823764507108e-05, | |
| "loss": 1.1278297901153564, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.9079754601226994, | |
| "grad_norm": 3.0466654300689697, | |
| "learning_rate": 1.1980001088483986e-05, | |
| "loss": 1.0089236497879028, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.9086571233810498, | |
| "grad_norm": 1.8594928979873657, | |
| "learning_rate": 1.1969175998108857e-05, | |
| "loss": 1.0941195487976074, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.9093387866394002, | |
| "grad_norm": 6.154777526855469, | |
| "learning_rate": 1.1958348506581503e-05, | |
| "loss": 2.400118112564087, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.9100204498977505, | |
| "grad_norm": 3.1990363597869873, | |
| "learning_rate": 1.1947518627104637e-05, | |
| "loss": 0.6849943995475769, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.9107021131561008, | |
| "grad_norm": 2.930387496948242, | |
| "learning_rate": 1.1936686372883877e-05, | |
| "loss": 0.5548266768455505, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.9113837764144512, | |
| "grad_norm": 2.311358690261841, | |
| "learning_rate": 1.1925851757127735e-05, | |
| "loss": 0.995979905128479, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.9120654396728016, | |
| "grad_norm": 3.1525368690490723, | |
| "learning_rate": 1.1915014793047606e-05, | |
| "loss": 0.44815969467163086, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.912747102931152, | |
| "grad_norm": 3.2352945804595947, | |
| "learning_rate": 1.190417549385775e-05, | |
| "loss": 1.2947889566421509, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.9134287661895024, | |
| "grad_norm": 1.5861382484436035, | |
| "learning_rate": 1.1893333872775275e-05, | |
| "loss": 0.9021660089492798, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9141104294478528, | |
| "grad_norm": 4.334277629852295, | |
| "learning_rate": 1.1882489943020115e-05, | |
| "loss": 1.8276174068450928, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.9147920927062031, | |
| "grad_norm": 2.043137788772583, | |
| "learning_rate": 1.187164371781502e-05, | |
| "loss": 0.3331798017024994, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.9154737559645535, | |
| "grad_norm": 3.3330094814300537, | |
| "learning_rate": 1.1860795210385547e-05, | |
| "loss": 2.193763256072998, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.9161554192229039, | |
| "grad_norm": 2.059584379196167, | |
| "learning_rate": 1.1849944433960026e-05, | |
| "loss": 0.5906944870948792, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.9168370824812543, | |
| "grad_norm": 2.9722602367401123, | |
| "learning_rate": 1.1839091401769559e-05, | |
| "loss": 1.6175780296325684, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.9175187457396047, | |
| "grad_norm": 3.0332117080688477, | |
| "learning_rate": 1.1828236127047991e-05, | |
| "loss": 1.4893518686294556, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.918200408997955, | |
| "grad_norm": 1.3801240921020508, | |
| "learning_rate": 1.1817378623031921e-05, | |
| "loss": 1.227494716644287, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.9188820722563054, | |
| "grad_norm": 1.5017725229263306, | |
| "learning_rate": 1.1806518902960643e-05, | |
| "loss": 0.8263968229293823, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.9195637355146558, | |
| "grad_norm": 2.4677810668945312, | |
| "learning_rate": 1.1795656980076164e-05, | |
| "loss": 1.325760006904602, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.9202453987730062, | |
| "grad_norm": 6.49123477935791, | |
| "learning_rate": 1.1784792867623179e-05, | |
| "loss": 1.9121781587600708, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9209270620313565, | |
| "grad_norm": 1.6628427505493164, | |
| "learning_rate": 1.1773926578849049e-05, | |
| "loss": 0.689493715763092, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.9216087252897068, | |
| "grad_norm": 3.224045753479004, | |
| "learning_rate": 1.1763058127003793e-05, | |
| "loss": 1.6399683952331543, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.9222903885480572, | |
| "grad_norm": 2.3120028972625732, | |
| "learning_rate": 1.1752187525340061e-05, | |
| "loss": 0.34457504749298096, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.9229720518064076, | |
| "grad_norm": 2.854567289352417, | |
| "learning_rate": 1.1741314787113129e-05, | |
| "loss": 1.567234992980957, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.923653715064758, | |
| "grad_norm": 2.151095151901245, | |
| "learning_rate": 1.1730439925580876e-05, | |
| "loss": 1.320873737335205, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.9243353783231084, | |
| "grad_norm": 1.4363987445831299, | |
| "learning_rate": 1.1719562954003774e-05, | |
| "loss": 0.8847675919532776, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.9250170415814588, | |
| "grad_norm": 3.134401321411133, | |
| "learning_rate": 1.1708683885644865e-05, | |
| "loss": 1.750314474105835, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.9256987048398091, | |
| "grad_norm": 1.9255026578903198, | |
| "learning_rate": 1.1697802733769745e-05, | |
| "loss": 1.4583141803741455, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.9263803680981595, | |
| "grad_norm": 3.1198015213012695, | |
| "learning_rate": 1.1686919511646557e-05, | |
| "loss": 1.9378410577774048, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.9270620313565099, | |
| "grad_norm": 2.286750555038452, | |
| "learning_rate": 1.1676034232545963e-05, | |
| "loss": 0.9694126844406128, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9277436946148603, | |
| "grad_norm": 1.7516858577728271, | |
| "learning_rate": 1.1665146909741134e-05, | |
| "loss": 1.0411911010742188, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.9284253578732107, | |
| "grad_norm": 3.267704963684082, | |
| "learning_rate": 1.1654257556507735e-05, | |
| "loss": 1.0651662349700928, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.929107021131561, | |
| "grad_norm": 1.5885710716247559, | |
| "learning_rate": 1.1643366186123913e-05, | |
| "loss": 0.6454929113388062, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.9297886843899114, | |
| "grad_norm": 3.1804027557373047, | |
| "learning_rate": 1.163247281187026e-05, | |
| "loss": 1.0113682746887207, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.9304703476482618, | |
| "grad_norm": 2.0777599811553955, | |
| "learning_rate": 1.1621577447029816e-05, | |
| "loss": 0.6646759510040283, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.9311520109066121, | |
| "grad_norm": 4.906405448913574, | |
| "learning_rate": 1.1610680104888057e-05, | |
| "loss": 2.013087272644043, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.9318336741649625, | |
| "grad_norm": 1.0533791780471802, | |
| "learning_rate": 1.1599780798732868e-05, | |
| "loss": 0.5444119572639465, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.9325153374233128, | |
| "grad_norm": 3.627973794937134, | |
| "learning_rate": 1.158887954185452e-05, | |
| "loss": 1.0098434686660767, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.9331970006816632, | |
| "grad_norm": 2.2816169261932373, | |
| "learning_rate": 1.157797634754567e-05, | |
| "loss": 0.5894101858139038, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.9338786639400136, | |
| "grad_norm": 1.3498238325119019, | |
| "learning_rate": 1.1567071229101332e-05, | |
| "loss": 0.8812987804412842, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.934560327198364, | |
| "grad_norm": 2.0164008140563965, | |
| "learning_rate": 1.1556164199818871e-05, | |
| "loss": 1.0886428356170654, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.9352419904567144, | |
| "grad_norm": 5.006718635559082, | |
| "learning_rate": 1.1545255272997983e-05, | |
| "loss": 2.039119005203247, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.9359236537150648, | |
| "grad_norm": 1.3555481433868408, | |
| "learning_rate": 1.153434446194068e-05, | |
| "loss": 1.6098264455795288, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.9366053169734151, | |
| "grad_norm": 3.5221757888793945, | |
| "learning_rate": 1.1523431779951255e-05, | |
| "loss": 1.293176531791687, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.9372869802317655, | |
| "grad_norm": 2.4867475032806396, | |
| "learning_rate": 1.1512517240336304e-05, | |
| "loss": 1.3837792873382568, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.9379686434901159, | |
| "grad_norm": 5.508114337921143, | |
| "learning_rate": 1.1501600856404676e-05, | |
| "loss": 2.0230002403259277, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.9386503067484663, | |
| "grad_norm": 2.9103028774261475, | |
| "learning_rate": 1.149068264146747e-05, | |
| "loss": 1.313584327697754, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.9393319700068167, | |
| "grad_norm": 4.0926594734191895, | |
| "learning_rate": 1.1479762608838018e-05, | |
| "loss": 1.785624623298645, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.940013633265167, | |
| "grad_norm": 2.0462605953216553, | |
| "learning_rate": 1.1468840771831874e-05, | |
| "loss": 0.5250790119171143, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.9406952965235174, | |
| "grad_norm": 2.402515172958374, | |
| "learning_rate": 1.1457917143766786e-05, | |
| "loss": 0.5881788730621338, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9413769597818678, | |
| "grad_norm": 1.6995738744735718, | |
| "learning_rate": 1.1446991737962688e-05, | |
| "loss": 0.9168304800987244, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.9420586230402181, | |
| "grad_norm": 1.801107406616211, | |
| "learning_rate": 1.1436064567741679e-05, | |
| "loss": 0.9895972013473511, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.9427402862985685, | |
| "grad_norm": 4.026597023010254, | |
| "learning_rate": 1.1425135646428011e-05, | |
| "loss": 2.1194210052490234, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.9434219495569189, | |
| "grad_norm": 3.322819232940674, | |
| "learning_rate": 1.141420498734808e-05, | |
| "loss": 0.801817774772644, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.9441036128152692, | |
| "grad_norm": 1.8702592849731445, | |
| "learning_rate": 1.1403272603830384e-05, | |
| "loss": 1.154260277748108, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.9447852760736196, | |
| "grad_norm": 4.2028489112854, | |
| "learning_rate": 1.139233850920554e-05, | |
| "loss": 2.3136725425720215, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.94546693933197, | |
| "grad_norm": 2.436260223388672, | |
| "learning_rate": 1.1381402716806237e-05, | |
| "loss": 0.9741934537887573, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.9461486025903204, | |
| "grad_norm": 1.9411702156066895, | |
| "learning_rate": 1.137046523996725e-05, | |
| "loss": 1.2022333145141602, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.9468302658486708, | |
| "grad_norm": 2.452498435974121, | |
| "learning_rate": 1.1359526092025395e-05, | |
| "loss": 0.7073385715484619, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.9475119291070211, | |
| "grad_norm": 1.9482067823410034, | |
| "learning_rate": 1.1348585286319529e-05, | |
| "loss": 0.8009145259857178, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.9481935923653715, | |
| "grad_norm": 1.6864196062088013, | |
| "learning_rate": 1.1337642836190532e-05, | |
| "loss": 0.9256442785263062, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.9488752556237219, | |
| "grad_norm": 1.9843751192092896, | |
| "learning_rate": 1.1326698754981292e-05, | |
| "loss": 0.8110345602035522, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.9495569188820723, | |
| "grad_norm": 2.8555917739868164, | |
| "learning_rate": 1.131575305603668e-05, | |
| "loss": 0.9550151824951172, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.9502385821404227, | |
| "grad_norm": 1.2877635955810547, | |
| "learning_rate": 1.130480575270354e-05, | |
| "loss": 0.7633803486824036, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.950920245398773, | |
| "grad_norm": 1.3084089756011963, | |
| "learning_rate": 1.1293856858330678e-05, | |
| "loss": 1.1857316493988037, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.9516019086571234, | |
| "grad_norm": 2.01818585395813, | |
| "learning_rate": 1.1282906386268842e-05, | |
| "loss": 1.207491159439087, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.9522835719154737, | |
| "grad_norm": 2.232659101486206, | |
| "learning_rate": 1.1271954349870686e-05, | |
| "loss": 1.1294128894805908, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.9529652351738241, | |
| "grad_norm": 2.3538355827331543, | |
| "learning_rate": 1.1261000762490793e-05, | |
| "loss": 1.0520989894866943, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.9536468984321745, | |
| "grad_norm": 2.2052597999572754, | |
| "learning_rate": 1.1250045637485624e-05, | |
| "loss": 0.3434506356716156, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.9543285616905249, | |
| "grad_norm": 2.2298030853271484, | |
| "learning_rate": 1.1239088988213522e-05, | |
| "loss": 1.4217681884765625, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9550102249488752, | |
| "grad_norm": 3.030216932296753, | |
| "learning_rate": 1.1228130828034685e-05, | |
| "loss": 0.8571851253509521, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.9556918882072256, | |
| "grad_norm": 1.500774621963501, | |
| "learning_rate": 1.1217171170311157e-05, | |
| "loss": 1.0443930625915527, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.956373551465576, | |
| "grad_norm": 2.5589287281036377, | |
| "learning_rate": 1.1206210028406797e-05, | |
| "loss": 1.8444973230361938, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.9570552147239264, | |
| "grad_norm": 1.1014353036880493, | |
| "learning_rate": 1.1195247415687286e-05, | |
| "loss": 0.6612838506698608, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.9577368779822768, | |
| "grad_norm": 1.461776614189148, | |
| "learning_rate": 1.11842833455201e-05, | |
| "loss": 0.9575937986373901, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.9584185412406271, | |
| "grad_norm": 2.6493754386901855, | |
| "learning_rate": 1.1173317831274479e-05, | |
| "loss": 1.0812537670135498, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.9591002044989775, | |
| "grad_norm": 2.1644461154937744, | |
| "learning_rate": 1.1162350886321435e-05, | |
| "loss": 0.9594922065734863, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.9597818677573279, | |
| "grad_norm": 3.7024781703948975, | |
| "learning_rate": 1.115138252403372e-05, | |
| "loss": 1.1270642280578613, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.9604635310156783, | |
| "grad_norm": 2.0624656677246094, | |
| "learning_rate": 1.1140412757785818e-05, | |
| "loss": 0.8593518733978271, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.9611451942740287, | |
| "grad_norm": 1.5966134071350098, | |
| "learning_rate": 1.1129441600953916e-05, | |
| "loss": 1.2847955226898193, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.961826857532379, | |
| "grad_norm": 1.5607731342315674, | |
| "learning_rate": 1.1118469066915907e-05, | |
| "loss": 0.973434329032898, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.9625085207907293, | |
| "grad_norm": 2.171595811843872, | |
| "learning_rate": 1.1107495169051364e-05, | |
| "loss": 1.2536604404449463, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.9631901840490797, | |
| "grad_norm": 2.4465882778167725, | |
| "learning_rate": 1.1096519920741509e-05, | |
| "loss": 1.2964740991592407, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.9638718473074301, | |
| "grad_norm": 1.1267685890197754, | |
| "learning_rate": 1.1085543335369224e-05, | |
| "loss": 0.756505012512207, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.9645535105657805, | |
| "grad_norm": 4.980567455291748, | |
| "learning_rate": 1.1074565426319014e-05, | |
| "loss": 1.916570782661438, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.9652351738241309, | |
| "grad_norm": 2.2414605617523193, | |
| "learning_rate": 1.1063586206977009e-05, | |
| "loss": 1.068878173828125, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.9659168370824812, | |
| "grad_norm": 1.3340094089508057, | |
| "learning_rate": 1.1052605690730922e-05, | |
| "loss": 0.5894954204559326, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.9665985003408316, | |
| "grad_norm": 1.699578881263733, | |
| "learning_rate": 1.1041623890970061e-05, | |
| "loss": 0.9399139881134033, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.967280163599182, | |
| "grad_norm": 1.8976777791976929, | |
| "learning_rate": 1.1030640821085284e-05, | |
| "loss": 1.1349092721939087, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.9679618268575324, | |
| "grad_norm": 1.6042039394378662, | |
| "learning_rate": 1.101965649446901e-05, | |
| "loss": 1.9341599941253662, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9686434901158828, | |
| "grad_norm": 1.7874032258987427, | |
| "learning_rate": 1.1008670924515191e-05, | |
| "loss": 1.09175443649292, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.9693251533742331, | |
| "grad_norm": 2.457379102706909, | |
| "learning_rate": 1.0997684124619286e-05, | |
| "loss": 1.2264299392700195, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.9700068166325835, | |
| "grad_norm": 2.286973476409912, | |
| "learning_rate": 1.0986696108178259e-05, | |
| "loss": 1.0105416774749756, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.9706884798909339, | |
| "grad_norm": 1.4236822128295898, | |
| "learning_rate": 1.0975706888590556e-05, | |
| "loss": 0.9824390411376953, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.9713701431492843, | |
| "grad_norm": 2.462789297103882, | |
| "learning_rate": 1.0964716479256094e-05, | |
| "loss": 1.3617198467254639, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.9720518064076347, | |
| "grad_norm": 2.2924046516418457, | |
| "learning_rate": 1.0953724893576236e-05, | |
| "loss": 1.625471830368042, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.972733469665985, | |
| "grad_norm": 2.718590021133423, | |
| "learning_rate": 1.0942732144953782e-05, | |
| "loss": 1.3157684803009033, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.9734151329243353, | |
| "grad_norm": 2.6177055835723877, | |
| "learning_rate": 1.0931738246792947e-05, | |
| "loss": 1.2948482036590576, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.9740967961826857, | |
| "grad_norm": 2.5654430389404297, | |
| "learning_rate": 1.0920743212499355e-05, | |
| "loss": 1.319617509841919, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.9747784594410361, | |
| "grad_norm": 1.715665578842163, | |
| "learning_rate": 1.0909747055480004e-05, | |
| "loss": 0.8499696254730225, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9754601226993865, | |
| "grad_norm": 5.237400531768799, | |
| "learning_rate": 1.089874978914327e-05, | |
| "loss": 1.848403811454773, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.9761417859577369, | |
| "grad_norm": 3.2037734985351562, | |
| "learning_rate": 1.0887751426898878e-05, | |
| "loss": 1.5934137105941772, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.9768234492160872, | |
| "grad_norm": 5.400890350341797, | |
| "learning_rate": 1.0876751982157892e-05, | |
| "loss": 2.2045936584472656, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.9775051124744376, | |
| "grad_norm": 2.7312662601470947, | |
| "learning_rate": 1.0865751468332695e-05, | |
| "loss": 1.0877323150634766, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.978186775732788, | |
| "grad_norm": 3.9003102779388428, | |
| "learning_rate": 1.0854749898836974e-05, | |
| "loss": 1.2857639789581299, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.9788684389911384, | |
| "grad_norm": 2.6747279167175293, | |
| "learning_rate": 1.0843747287085693e-05, | |
| "loss": 0.6983846426010132, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.9795501022494888, | |
| "grad_norm": 2.099173069000244, | |
| "learning_rate": 1.0832743646495105e-05, | |
| "loss": 0.3862391412258148, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.9802317655078391, | |
| "grad_norm": 3.4450464248657227, | |
| "learning_rate": 1.0821738990482709e-05, | |
| "loss": 2.163217782974243, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.9809134287661895, | |
| "grad_norm": 2.7090086936950684, | |
| "learning_rate": 1.0810733332467235e-05, | |
| "loss": 1.9412810802459717, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.9815950920245399, | |
| "grad_norm": 2.2056849002838135, | |
| "learning_rate": 1.0799726685868648e-05, | |
| "loss": 0.9925412535667419, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9822767552828903, | |
| "grad_norm": 1.637809157371521, | |
| "learning_rate": 1.0788719064108108e-05, | |
| "loss": 0.6028140783309937, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.9829584185412407, | |
| "grad_norm": 22.958332061767578, | |
| "learning_rate": 1.077771048060797e-05, | |
| "loss": 1.2065813541412354, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.983640081799591, | |
| "grad_norm": 4.294493198394775, | |
| "learning_rate": 1.076670094879176e-05, | |
| "loss": 1.4867355823516846, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.9843217450579413, | |
| "grad_norm": 6.976797103881836, | |
| "learning_rate": 1.0755690482084154e-05, | |
| "loss": 2.184006929397583, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.9850034083162917, | |
| "grad_norm": 1.6113888025283813, | |
| "learning_rate": 1.0744679093910987e-05, | |
| "loss": 1.124434232711792, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.9856850715746421, | |
| "grad_norm": 1.7234139442443848, | |
| "learning_rate": 1.0733666797699191e-05, | |
| "loss": 1.2631725072860718, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.9863667348329925, | |
| "grad_norm": 2.597224712371826, | |
| "learning_rate": 1.0722653606876828e-05, | |
| "loss": 1.4020732641220093, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.9870483980913429, | |
| "grad_norm": 1.8286712169647217, | |
| "learning_rate": 1.0711639534873035e-05, | |
| "loss": 0.7092963457107544, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.9877300613496932, | |
| "grad_norm": 2.565237283706665, | |
| "learning_rate": 1.0700624595118037e-05, | |
| "loss": 1.0420867204666138, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.9884117246080436, | |
| "grad_norm": 2.8494246006011963, | |
| "learning_rate": 1.0689608801043107e-05, | |
| "loss": 1.3831346035003662, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.989093387866394, | |
| "grad_norm": 2.3130369186401367, | |
| "learning_rate": 1.0678592166080565e-05, | |
| "loss": 1.2310330867767334, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.9897750511247444, | |
| "grad_norm": 1.8310436010360718, | |
| "learning_rate": 1.066757470366375e-05, | |
| "loss": 1.2459018230438232, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.9904567143830948, | |
| "grad_norm": 2.529782772064209, | |
| "learning_rate": 1.0656556427227019e-05, | |
| "loss": 1.8146663904190063, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 0.9911383776414451, | |
| "grad_norm": 1.7871593236923218, | |
| "learning_rate": 1.0645537350205714e-05, | |
| "loss": 1.0763839483261108, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.9918200408997955, | |
| "grad_norm": 1.7131450176239014, | |
| "learning_rate": 1.063451748603616e-05, | |
| "loss": 1.2468634843826294, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.9925017041581459, | |
| "grad_norm": 2.5768184661865234, | |
| "learning_rate": 1.0623496848155635e-05, | |
| "loss": 1.4966133832931519, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.9931833674164963, | |
| "grad_norm": 11.853314399719238, | |
| "learning_rate": 1.0612475450002363e-05, | |
| "loss": 1.127371907234192, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.9938650306748467, | |
| "grad_norm": 2.7855846881866455, | |
| "learning_rate": 1.0601453305015497e-05, | |
| "loss": 1.5127134323120117, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.994546693933197, | |
| "grad_norm": 2.1716721057891846, | |
| "learning_rate": 1.0590430426635098e-05, | |
| "loss": 0.47972676157951355, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.9952283571915473, | |
| "grad_norm": 2.356139898300171, | |
| "learning_rate": 1.0579406828302124e-05, | |
| "loss": 1.1699801683425903, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.9959100204498977, | |
| "grad_norm": 2.288517713546753, | |
| "learning_rate": 1.0568382523458412e-05, | |
| "loss": 0.9673724174499512, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.9965916837082481, | |
| "grad_norm": 2.9393985271453857, | |
| "learning_rate": 1.0557357525546651e-05, | |
| "loss": 1.2749524116516113, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.9972733469665985, | |
| "grad_norm": 3.3071682453155518, | |
| "learning_rate": 1.054633184801039e-05, | |
| "loss": 1.4630143642425537, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.9979550102249489, | |
| "grad_norm": 3.298875331878662, | |
| "learning_rate": 1.0535305504293988e-05, | |
| "loss": 2.061471700668335, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.9986366734832992, | |
| "grad_norm": 2.4312033653259277, | |
| "learning_rate": 1.0524278507842637e-05, | |
| "loss": 1.497416377067566, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.9993183367416496, | |
| "grad_norm": 1.8498071432113647, | |
| "learning_rate": 1.0513250872102312e-05, | |
| "loss": 0.9513588547706604, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.430646896362305, | |
| "learning_rate": 1.0502222610519772e-05, | |
| "loss": 1.1737076044082642, | |
| "step": 1467 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 2934, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.504286382316585e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |