Text Generation
Transformers
Safetensors
English
Korean
lfm2_moe
terminal
sft
vllm
tb2-lite
conversational
Instructions to use LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData") model = AutoModelForCausalLM.from_pretrained("LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData
- SGLang
How to use LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData with Docker Model Runner:
docker model run hf.co/LLM-OS-Models/LFM2-24B-A2B-Terminal-SFT-2Epoch-HF-FSDP-2BData
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 734, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0013623978201634877, | |
| "grad_norm": 27.647886276245117, | |
| "learning_rate": 0.0, | |
| "loss": 2.7866311073303223, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0027247956403269754, | |
| "grad_norm": 28.430374145507812, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 2.9265763759613037, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004087193460490463, | |
| "grad_norm": 24.750080108642578, | |
| "learning_rate": 8.88888888888889e-07, | |
| "loss": 2.650007724761963, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.005449591280653951, | |
| "grad_norm": 26.000843048095703, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 3.0528907775878906, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.006811989100817439, | |
| "grad_norm": 27.27891731262207, | |
| "learning_rate": 1.777777777777778e-06, | |
| "loss": 2.7619106769561768, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.008174386920980926, | |
| "grad_norm": 21.593921661376953, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 2.5617551803588867, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009536784741144414, | |
| "grad_norm": 19.521177291870117, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 2.3857626914978027, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.010899182561307902, | |
| "grad_norm": 13.534676551818848, | |
| "learning_rate": 3.1111111111111116e-06, | |
| "loss": 2.1002044677734375, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01226158038147139, | |
| "grad_norm": 9.906437873840332, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 1.8867732286453247, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.013623978201634877, | |
| "grad_norm": 10.24316692352295, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.7133018970489502, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014986376021798364, | |
| "grad_norm": 5.109798908233643, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 1.3970978260040283, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01634877384196185, | |
| "grad_norm": 4.742298126220703, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 1.3010644912719727, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.017711171662125342, | |
| "grad_norm": 8.546895027160645, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.1571484804153442, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01907356948228883, | |
| "grad_norm": 2.846215009689331, | |
| "learning_rate": 5.777777777777778e-06, | |
| "loss": 1.193354845046997, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.020435967302452316, | |
| "grad_norm": 2.637321710586548, | |
| "learning_rate": 6.222222222222223e-06, | |
| "loss": 1.077557921409607, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.021798365122615803, | |
| "grad_norm": 1.948442816734314, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.8891923427581787, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02316076294277929, | |
| "grad_norm": 2.195993423461914, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 0.917742908000946, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02452316076294278, | |
| "grad_norm": 1.420324683189392, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 1.0057953596115112, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.025885558583106268, | |
| "grad_norm": 1.1720658540725708, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.7948991060256958, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.027247956403269755, | |
| "grad_norm": 2.2465505599975586, | |
| "learning_rate": 8.444444444444446e-06, | |
| "loss": 0.9116527438163757, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02861035422343324, | |
| "grad_norm": 1.0430631637573242, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.8433143496513367, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.02997275204359673, | |
| "grad_norm": 0.9517339468002319, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.7266635894775391, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.031335149863760216, | |
| "grad_norm": 0.9054233431816101, | |
| "learning_rate": 9.777777777777779e-06, | |
| "loss": 0.6896510124206543, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0326975476839237, | |
| "grad_norm": 0.8445485830307007, | |
| "learning_rate": 1.0222222222222223e-05, | |
| "loss": 0.7166739106178284, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0340599455040872, | |
| "grad_norm": 0.799867570400238, | |
| "learning_rate": 1.0666666666666667e-05, | |
| "loss": 0.70599365234375, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.035422343324250684, | |
| "grad_norm": 0.8114046454429626, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.68497633934021, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03678474114441417, | |
| "grad_norm": 0.717369556427002, | |
| "learning_rate": 1.1555555555555556e-05, | |
| "loss": 0.4579915404319763, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03814713896457766, | |
| "grad_norm": 0.8005459904670715, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.6453820466995239, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.039509536784741145, | |
| "grad_norm": 0.7655712366104126, | |
| "learning_rate": 1.2444444444444446e-05, | |
| "loss": 0.5622988939285278, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.04087193460490463, | |
| "grad_norm": 0.6727678775787354, | |
| "learning_rate": 1.288888888888889e-05, | |
| "loss": 0.5062695741653442, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04223433242506812, | |
| "grad_norm": 0.7242804169654846, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.5595001578330994, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.043596730245231606, | |
| "grad_norm": 0.7660211324691772, | |
| "learning_rate": 1.377777777777778e-05, | |
| "loss": 0.5437361598014832, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04495912806539509, | |
| "grad_norm": 0.7480101585388184, | |
| "learning_rate": 1.4222222222222224e-05, | |
| "loss": 0.5963411927223206, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04632152588555858, | |
| "grad_norm": 0.7058649659156799, | |
| "learning_rate": 1.4666666666666666e-05, | |
| "loss": 0.5767841339111328, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.047683923705722074, | |
| "grad_norm": 0.6593936681747437, | |
| "learning_rate": 1.5111111111111112e-05, | |
| "loss": 0.4501464366912842, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04904632152588556, | |
| "grad_norm": 0.6758292317390442, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 0.5993712544441223, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05040871934604905, | |
| "grad_norm": 0.7313498258590698, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.5267407894134521, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.051771117166212535, | |
| "grad_norm": 0.6532080173492432, | |
| "learning_rate": 1.6444444444444444e-05, | |
| "loss": 0.5639113783836365, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.05313351498637602, | |
| "grad_norm": 0.6379766464233398, | |
| "learning_rate": 1.688888888888889e-05, | |
| "loss": 0.5104179382324219, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.05449591280653951, | |
| "grad_norm": 0.6598156690597534, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 0.5445395112037659, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.055858310626702996, | |
| "grad_norm": 0.6215161085128784, | |
| "learning_rate": 1.7777777777777777e-05, | |
| "loss": 0.5025588870048523, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.05722070844686648, | |
| "grad_norm": 0.5954686403274536, | |
| "learning_rate": 1.8222222222222224e-05, | |
| "loss": 0.46108683943748474, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05858310626702997, | |
| "grad_norm": 0.6308771967887878, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 0.5122817158699036, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.05994550408719346, | |
| "grad_norm": 0.6523401141166687, | |
| "learning_rate": 1.9111111111111113e-05, | |
| "loss": 0.5358462333679199, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06130790190735695, | |
| "grad_norm": 0.6872179508209229, | |
| "learning_rate": 1.9555555555555557e-05, | |
| "loss": 0.5521703958511353, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06267029972752043, | |
| "grad_norm": 0.5889444947242737, | |
| "learning_rate": 2e-05, | |
| "loss": 0.43677768111228943, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06403269754768393, | |
| "grad_norm": 0.6145616769790649, | |
| "learning_rate": 1.9999975629761854e-05, | |
| "loss": 0.581091046333313, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.0653950953678474, | |
| "grad_norm": 0.570745587348938, | |
| "learning_rate": 1.9999902519166192e-05, | |
| "loss": 0.5111463069915771, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0667574931880109, | |
| "grad_norm": 0.6257374286651611, | |
| "learning_rate": 1.9999780668569363e-05, | |
| "loss": 0.5142855644226074, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0681198910081744, | |
| "grad_norm": 0.5916558504104614, | |
| "learning_rate": 1.9999610078565272e-05, | |
| "loss": 0.4986756443977356, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06948228882833787, | |
| "grad_norm": 0.517594575881958, | |
| "learning_rate": 1.999939074998538e-05, | |
| "loss": 0.40387624502182007, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07084468664850137, | |
| "grad_norm": 0.5758000612258911, | |
| "learning_rate": 1.9999122683898708e-05, | |
| "loss": 0.4982905387878418, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07220708446866485, | |
| "grad_norm": 0.5720963478088379, | |
| "learning_rate": 1.9998805881611816e-05, | |
| "loss": 0.4793824553489685, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.07356948228882834, | |
| "grad_norm": 0.5784019231796265, | |
| "learning_rate": 1.9998440344668827e-05, | |
| "loss": 0.5332019329071045, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.07493188010899182, | |
| "grad_norm": 0.5833513736724854, | |
| "learning_rate": 1.999802607485137e-05, | |
| "loss": 0.5038864016532898, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.07629427792915532, | |
| "grad_norm": 0.5767697095870972, | |
| "learning_rate": 1.999756307417863e-05, | |
| "loss": 0.5325872898101807, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0776566757493188, | |
| "grad_norm": 0.5682862401008606, | |
| "learning_rate": 1.9997051344907284e-05, | |
| "loss": 0.5185012817382812, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.07901907356948229, | |
| "grad_norm": 0.614080011844635, | |
| "learning_rate": 1.9996490889531528e-05, | |
| "loss": 0.49285584688186646, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.08038147138964577, | |
| "grad_norm": 0.5887868404388428, | |
| "learning_rate": 1.999588171078305e-05, | |
| "loss": 0.5096205472946167, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.08174386920980926, | |
| "grad_norm": 0.556518018245697, | |
| "learning_rate": 1.9995223811631016e-05, | |
| "loss": 0.43357372283935547, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08310626702997276, | |
| "grad_norm": 0.6359555125236511, | |
| "learning_rate": 1.9994517195282053e-05, | |
| "loss": 0.5556465983390808, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.08446866485013624, | |
| "grad_norm": 0.5386205911636353, | |
| "learning_rate": 1.999376186518025e-05, | |
| "loss": 0.46860817074775696, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.08583106267029973, | |
| "grad_norm": 0.6243954300880432, | |
| "learning_rate": 1.9992957825007115e-05, | |
| "loss": 0.5260002613067627, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.08719346049046321, | |
| "grad_norm": 0.5575344562530518, | |
| "learning_rate": 1.9992105078681587e-05, | |
| "loss": 0.48442351818084717, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0885558583106267, | |
| "grad_norm": 0.5960455536842346, | |
| "learning_rate": 1.999120363035998e-05, | |
| "loss": 0.529167890548706, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.08991825613079019, | |
| "grad_norm": 0.49278023838996887, | |
| "learning_rate": 1.9990253484436004e-05, | |
| "loss": 0.4226565361022949, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09128065395095368, | |
| "grad_norm": 0.64710932970047, | |
| "learning_rate": 1.9989254645540715e-05, | |
| "loss": 0.5989794731140137, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.09264305177111716, | |
| "grad_norm": 0.5093353390693665, | |
| "learning_rate": 1.9988207118542504e-05, | |
| "loss": 0.4339316487312317, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.09400544959128065, | |
| "grad_norm": 0.5457233786582947, | |
| "learning_rate": 1.998711090854706e-05, | |
| "loss": 0.46932798624038696, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.09536784741144415, | |
| "grad_norm": 0.6200721859931946, | |
| "learning_rate": 1.998596602089737e-05, | |
| "loss": 0.5474086999893188, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09673024523160763, | |
| "grad_norm": 0.4950924217700958, | |
| "learning_rate": 1.9984772461173663e-05, | |
| "loss": 0.40740966796875, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.09809264305177112, | |
| "grad_norm": 0.5403825640678406, | |
| "learning_rate": 1.998353023519341e-05, | |
| "loss": 0.4256601929664612, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0994550408719346, | |
| "grad_norm": 0.5570508241653442, | |
| "learning_rate": 1.9982239349011286e-05, | |
| "loss": 0.5229888558387756, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1008174386920981, | |
| "grad_norm": 0.5410299897193909, | |
| "learning_rate": 1.9980899808919122e-05, | |
| "loss": 0.4738315939903259, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.10217983651226158, | |
| "grad_norm": 0.5304137468338013, | |
| "learning_rate": 1.9979511621445902e-05, | |
| "loss": 0.453370064496994, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.10354223433242507, | |
| "grad_norm": 0.5148070454597473, | |
| "learning_rate": 1.9978074793357726e-05, | |
| "loss": 0.4637362062931061, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.10490463215258855, | |
| "grad_norm": 0.4985616207122803, | |
| "learning_rate": 1.9976589331657754e-05, | |
| "loss": 0.41210031509399414, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.10626702997275204, | |
| "grad_norm": 0.5819671154022217, | |
| "learning_rate": 1.99750552435862e-05, | |
| "loss": 0.49417591094970703, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.10762942779291552, | |
| "grad_norm": 0.5260801315307617, | |
| "learning_rate": 1.997347253662028e-05, | |
| "loss": 0.46970927715301514, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.10899182561307902, | |
| "grad_norm": 0.7598191499710083, | |
| "learning_rate": 1.9971841218474184e-05, | |
| "loss": 0.5050591230392456, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11035422343324251, | |
| "grad_norm": 0.5178552865982056, | |
| "learning_rate": 1.997016129709904e-05, | |
| "loss": 0.48321259021759033, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.11171662125340599, | |
| "grad_norm": 0.588383674621582, | |
| "learning_rate": 1.9968432780682855e-05, | |
| "loss": 0.5230928659439087, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.11307901907356949, | |
| "grad_norm": 0.48950737714767456, | |
| "learning_rate": 1.9966655677650512e-05, | |
| "loss": 0.41843950748443604, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.11444141689373297, | |
| "grad_norm": 0.5205618739128113, | |
| "learning_rate": 1.9964829996663684e-05, | |
| "loss": 0.4415491819381714, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.11580381471389646, | |
| "grad_norm": 0.5614944100379944, | |
| "learning_rate": 1.9962955746620832e-05, | |
| "loss": 0.5067013502120972, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.11716621253405994, | |
| "grad_norm": 0.5298795104026794, | |
| "learning_rate": 1.9961032936657143e-05, | |
| "loss": 0.43599075078964233, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.11852861035422343, | |
| "grad_norm": 0.5152458548545837, | |
| "learning_rate": 1.9959061576144482e-05, | |
| "loss": 0.4749916195869446, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.11989100817438691, | |
| "grad_norm": 0.4422807991504669, | |
| "learning_rate": 1.9957041674691356e-05, | |
| "loss": 0.3546351194381714, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.12125340599455041, | |
| "grad_norm": 0.5386228561401367, | |
| "learning_rate": 1.995497324214285e-05, | |
| "loss": 0.488656610250473, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1226158038147139, | |
| "grad_norm": 0.530255913734436, | |
| "learning_rate": 1.995285628858062e-05, | |
| "loss": 0.4679569602012634, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.12397820163487738, | |
| "grad_norm": 0.5118647217750549, | |
| "learning_rate": 1.995069082432279e-05, | |
| "loss": 0.4375801682472229, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.12534059945504086, | |
| "grad_norm": 0.5084353685379028, | |
| "learning_rate": 1.994847685992393e-05, | |
| "loss": 0.46044063568115234, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.12670299727520437, | |
| "grad_norm": 0.49600714445114136, | |
| "learning_rate": 1.9946214406175016e-05, | |
| "loss": 0.4706187844276428, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.12806539509536785, | |
| "grad_norm": 0.5475752949714661, | |
| "learning_rate": 1.9943903474103354e-05, | |
| "loss": 0.47216880321502686, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.12942779291553133, | |
| "grad_norm": 0.4829142093658447, | |
| "learning_rate": 1.994154407497254e-05, | |
| "loss": 0.43380069732666016, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1307901907356948, | |
| "grad_norm": 0.4949623942375183, | |
| "learning_rate": 1.993913622028239e-05, | |
| "loss": 0.45172300934791565, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.13215258855585832, | |
| "grad_norm": 0.5360006093978882, | |
| "learning_rate": 1.9936679921768905e-05, | |
| "loss": 0.43014198541641235, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1335149863760218, | |
| "grad_norm": 0.4718469977378845, | |
| "learning_rate": 1.9934175191404202e-05, | |
| "loss": 0.42612555623054504, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.13487738419618528, | |
| "grad_norm": 0.5138616561889648, | |
| "learning_rate": 1.9931622041396456e-05, | |
| "loss": 0.4884234666824341, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1362397820163488, | |
| "grad_norm": 0.4901588559150696, | |
| "learning_rate": 1.9929020484189843e-05, | |
| "loss": 0.4561987519264221, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13760217983651227, | |
| "grad_norm": 0.46556147933006287, | |
| "learning_rate": 1.992637053246448e-05, | |
| "loss": 0.3546220660209656, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.13896457765667575, | |
| "grad_norm": 0.5201694965362549, | |
| "learning_rate": 1.992367219913635e-05, | |
| "loss": 0.4550625681877136, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.14032697547683923, | |
| "grad_norm": 0.5415034294128418, | |
| "learning_rate": 1.9920925497357265e-05, | |
| "loss": 0.43804478645324707, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.14168937329700274, | |
| "grad_norm": 0.475267231464386, | |
| "learning_rate": 1.9918130440514775e-05, | |
| "loss": 0.37618038058280945, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.14305177111716622, | |
| "grad_norm": 0.47954168915748596, | |
| "learning_rate": 1.9915287042232117e-05, | |
| "loss": 0.40945565700531006, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1444141689373297, | |
| "grad_norm": 0.4963965117931366, | |
| "learning_rate": 1.9912395316368163e-05, | |
| "loss": 0.39603012800216675, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.14577656675749318, | |
| "grad_norm": 0.5018640756607056, | |
| "learning_rate": 1.990945527701731e-05, | |
| "loss": 0.42917919158935547, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.14713896457765668, | |
| "grad_norm": 0.49071332812309265, | |
| "learning_rate": 1.9906466938509456e-05, | |
| "loss": 0.41273534297943115, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.14850136239782016, | |
| "grad_norm": 0.4668489992618561, | |
| "learning_rate": 1.9903430315409908e-05, | |
| "loss": 0.37738558650016785, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.14986376021798364, | |
| "grad_norm": 0.5912647843360901, | |
| "learning_rate": 1.9900345422519302e-05, | |
| "loss": 0.537000298500061, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15122615803814715, | |
| "grad_norm": 0.501377522945404, | |
| "learning_rate": 1.9897212274873558e-05, | |
| "loss": 0.45420363545417786, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.15258855585831063, | |
| "grad_norm": 0.47566258907318115, | |
| "learning_rate": 1.989403088774379e-05, | |
| "loss": 0.4192931652069092, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1539509536784741, | |
| "grad_norm": 0.545165479183197, | |
| "learning_rate": 1.9890801276636226e-05, | |
| "loss": 0.5133235454559326, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1553133514986376, | |
| "grad_norm": 0.4873102307319641, | |
| "learning_rate": 1.9887523457292145e-05, | |
| "loss": 0.45365816354751587, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1566757493188011, | |
| "grad_norm": 0.45992809534072876, | |
| "learning_rate": 1.9884197445687795e-05, | |
| "loss": 0.4125695824623108, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.15803814713896458, | |
| "grad_norm": 0.489008367061615, | |
| "learning_rate": 1.9880823258034317e-05, | |
| "loss": 0.45539143681526184, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.15940054495912806, | |
| "grad_norm": 0.45899710059165955, | |
| "learning_rate": 1.987740091077766e-05, | |
| "loss": 0.3859185576438904, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.16076294277929154, | |
| "grad_norm": 0.48963943123817444, | |
| "learning_rate": 1.9873930420598508e-05, | |
| "loss": 0.4098602831363678, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.16212534059945505, | |
| "grad_norm": 0.5609935522079468, | |
| "learning_rate": 1.9870411804412196e-05, | |
| "loss": 0.541596531867981, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.16348773841961853, | |
| "grad_norm": 0.451092392206192, | |
| "learning_rate": 1.9866845079368628e-05, | |
| "loss": 0.369413822889328, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.164850136239782, | |
| "grad_norm": 0.5160583257675171, | |
| "learning_rate": 1.9863230262852188e-05, | |
| "loss": 0.48153048753738403, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.16621253405994552, | |
| "grad_norm": 0.5104784369468689, | |
| "learning_rate": 1.9859567372481666e-05, | |
| "loss": 0.463962197303772, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.167574931880109, | |
| "grad_norm": 0.4819332957267761, | |
| "learning_rate": 1.9855856426110163e-05, | |
| "loss": 0.4457293152809143, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.16893732970027248, | |
| "grad_norm": 0.545768678188324, | |
| "learning_rate": 1.9852097441825017e-05, | |
| "loss": 0.4510464072227478, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.17029972752043596, | |
| "grad_norm": 0.46205708384513855, | |
| "learning_rate": 1.9848290437947683e-05, | |
| "loss": 0.4033975303173065, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.17166212534059946, | |
| "grad_norm": 0.4812323749065399, | |
| "learning_rate": 1.9844435433033687e-05, | |
| "loss": 0.42786744236946106, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.17302452316076294, | |
| "grad_norm": 0.5063855051994324, | |
| "learning_rate": 1.9840532445872504e-05, | |
| "loss": 0.439453125, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.17438692098092642, | |
| "grad_norm": 0.42103609442710876, | |
| "learning_rate": 1.983658149548748e-05, | |
| "loss": 0.372048556804657, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.17574931880108993, | |
| "grad_norm": 0.4785323739051819, | |
| "learning_rate": 1.9832582601135737e-05, | |
| "loss": 0.4088958501815796, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1771117166212534, | |
| "grad_norm": 0.4796772301197052, | |
| "learning_rate": 1.9828535782308074e-05, | |
| "loss": 0.4392489492893219, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1784741144414169, | |
| "grad_norm": 0.5020930767059326, | |
| "learning_rate": 1.9824441058728882e-05, | |
| "loss": 0.4506283402442932, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.17983651226158037, | |
| "grad_norm": 0.44487935304641724, | |
| "learning_rate": 1.9820298450356036e-05, | |
| "loss": 0.37223148345947266, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.18119891008174388, | |
| "grad_norm": 0.48866400122642517, | |
| "learning_rate": 1.9816107977380805e-05, | |
| "loss": 0.4460309147834778, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.18256130790190736, | |
| "grad_norm": 0.4672461450099945, | |
| "learning_rate": 1.9811869660227757e-05, | |
| "loss": 0.4269269108772278, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.18392370572207084, | |
| "grad_norm": 0.49336063861846924, | |
| "learning_rate": 1.980758351955465e-05, | |
| "loss": 0.4544104039669037, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.18528610354223432, | |
| "grad_norm": 0.45952633023262024, | |
| "learning_rate": 1.9803249576252338e-05, | |
| "loss": 0.38991525769233704, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.18664850136239783, | |
| "grad_norm": 0.48544129729270935, | |
| "learning_rate": 1.979886785144467e-05, | |
| "loss": 0.42076367139816284, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1880108991825613, | |
| "grad_norm": 0.38183560967445374, | |
| "learning_rate": 1.9794438366488377e-05, | |
| "loss": 0.2765650153160095, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1893732970027248, | |
| "grad_norm": 0.45844128727912903, | |
| "learning_rate": 1.9789961142972983e-05, | |
| "loss": 0.3620038628578186, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1907356948228883, | |
| "grad_norm": 0.4732954502105713, | |
| "learning_rate": 1.9785436202720687e-05, | |
| "loss": 0.40507155656814575, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19209809264305178, | |
| "grad_norm": 0.4901241362094879, | |
| "learning_rate": 1.978086356778626e-05, | |
| "loss": 0.4239882826805115, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.19346049046321526, | |
| "grad_norm": 0.4907649755477905, | |
| "learning_rate": 1.9776243260456953e-05, | |
| "loss": 0.4591466188430786, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.19482288828337874, | |
| "grad_norm": 0.4430483877658844, | |
| "learning_rate": 1.977157530325235e-05, | |
| "loss": 0.4003378450870514, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.19618528610354224, | |
| "grad_norm": 0.4529455006122589, | |
| "learning_rate": 1.976685971892431e-05, | |
| "loss": 0.4060153663158417, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.19754768392370572, | |
| "grad_norm": 0.500708281993866, | |
| "learning_rate": 1.9762096530456803e-05, | |
| "loss": 0.4507729411125183, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1989100817438692, | |
| "grad_norm": 0.4838204085826874, | |
| "learning_rate": 1.9757285761065846e-05, | |
| "loss": 0.4445388913154602, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.20027247956403268, | |
| "grad_norm": 0.47918063402175903, | |
| "learning_rate": 1.9752427434199356e-05, | |
| "loss": 0.4199233651161194, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2016348773841962, | |
| "grad_norm": 0.4424859881401062, | |
| "learning_rate": 1.9747521573537048e-05, | |
| "loss": 0.3845542073249817, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.20299727520435967, | |
| "grad_norm": 0.5027382373809814, | |
| "learning_rate": 1.974256820299032e-05, | |
| "loss": 0.4608474373817444, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.20435967302452315, | |
| "grad_norm": 0.4017482399940491, | |
| "learning_rate": 1.9737567346702137e-05, | |
| "loss": 0.35635966062545776, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.20572207084468666, | |
| "grad_norm": 0.4682810604572296, | |
| "learning_rate": 1.973251902904691e-05, | |
| "loss": 0.3920828104019165, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.20708446866485014, | |
| "grad_norm": 0.4764021337032318, | |
| "learning_rate": 1.9727423274630385e-05, | |
| "loss": 0.43593406677246094, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.20844686648501362, | |
| "grad_norm": 0.5115736126899719, | |
| "learning_rate": 1.97222801082895e-05, | |
| "loss": 0.4738002419471741, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.2098092643051771, | |
| "grad_norm": 0.4896221458911896, | |
| "learning_rate": 1.9717089555092306e-05, | |
| "loss": 0.4509856104850769, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2111716621253406, | |
| "grad_norm": 0.4766773581504822, | |
| "learning_rate": 1.97118516403378e-05, | |
| "loss": 0.44380685687065125, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2125340599455041, | |
| "grad_norm": 0.5250641107559204, | |
| "learning_rate": 1.9706566389555825e-05, | |
| "loss": 0.5936962962150574, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.21389645776566757, | |
| "grad_norm": 0.4729316830635071, | |
| "learning_rate": 1.970123382850695e-05, | |
| "loss": 0.4068388342857361, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.21525885558583105, | |
| "grad_norm": 0.4348066449165344, | |
| "learning_rate": 1.969585398318233e-05, | |
| "loss": 0.3906315267086029, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.21662125340599456, | |
| "grad_norm": 0.4758957624435425, | |
| "learning_rate": 1.969042687980359e-05, | |
| "loss": 0.4242008924484253, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.21798365122615804, | |
| "grad_norm": 0.49115386605262756, | |
| "learning_rate": 1.9684952544822685e-05, | |
| "loss": 0.44710224866867065, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21934604904632152, | |
| "grad_norm": 0.506231963634491, | |
| "learning_rate": 1.9679431004921788e-05, | |
| "loss": 0.49800950288772583, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.22070844686648503, | |
| "grad_norm": 0.4652118682861328, | |
| "learning_rate": 1.9673862287013144e-05, | |
| "loss": 0.397954523563385, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2220708446866485, | |
| "grad_norm": 0.4163675010204315, | |
| "learning_rate": 1.9668246418238955e-05, | |
| "loss": 0.33537113666534424, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.22343324250681199, | |
| "grad_norm": 0.46636438369750977, | |
| "learning_rate": 1.9662583425971227e-05, | |
| "loss": 0.4491173028945923, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.22479564032697547, | |
| "grad_norm": 0.471574068069458, | |
| "learning_rate": 1.9656873337811658e-05, | |
| "loss": 0.4455568790435791, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.22615803814713897, | |
| "grad_norm": 0.42547208070755005, | |
| "learning_rate": 1.9651116181591493e-05, | |
| "loss": 0.33267468214035034, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.22752043596730245, | |
| "grad_norm": 0.46092942357063293, | |
| "learning_rate": 1.9645311985371374e-05, | |
| "loss": 0.38794922828674316, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.22888283378746593, | |
| "grad_norm": 0.4635142683982849, | |
| "learning_rate": 1.9639460777441243e-05, | |
| "loss": 0.41988471150398254, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.23024523160762944, | |
| "grad_norm": 0.5089847445487976, | |
| "learning_rate": 1.9633562586320157e-05, | |
| "loss": 0.4452226758003235, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.23160762942779292, | |
| "grad_norm": 0.42120999097824097, | |
| "learning_rate": 1.962761744075618e-05, | |
| "loss": 0.34265103936195374, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2329700272479564, | |
| "grad_norm": 0.37199750542640686, | |
| "learning_rate": 1.9621625369726246e-05, | |
| "loss": 0.2743265926837921, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.23433242506811988, | |
| "grad_norm": 0.45613083243370056, | |
| "learning_rate": 1.961558640243598e-05, | |
| "loss": 0.3892587423324585, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2356948228882834, | |
| "grad_norm": 0.5023411512374878, | |
| "learning_rate": 1.9609500568319605e-05, | |
| "loss": 0.4512037932872772, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.23705722070844687, | |
| "grad_norm": 0.4689593017101288, | |
| "learning_rate": 1.960336789703977e-05, | |
| "loss": 0.4525565207004547, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.23841961852861035, | |
| "grad_norm": 0.4769952595233917, | |
| "learning_rate": 1.9597188418487395e-05, | |
| "loss": 0.4317702651023865, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.23978201634877383, | |
| "grad_norm": 0.44649723172187805, | |
| "learning_rate": 1.959096216278156e-05, | |
| "loss": 0.4062075912952423, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.24114441416893734, | |
| "grad_norm": 0.4167250096797943, | |
| "learning_rate": 1.958468916026933e-05, | |
| "loss": 0.3360878825187683, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.24250681198910082, | |
| "grad_norm": 0.501832127571106, | |
| "learning_rate": 1.957836944152562e-05, | |
| "loss": 0.4890025854110718, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2438692098092643, | |
| "grad_norm": 0.4422992169857025, | |
| "learning_rate": 1.957200303735304e-05, | |
| "loss": 0.3915623128414154, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2452316076294278, | |
| "grad_norm": 0.43513938784599304, | |
| "learning_rate": 1.9565589978781747e-05, | |
| "loss": 0.3817967474460602, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24659400544959129, | |
| "grad_norm": 0.5025864839553833, | |
| "learning_rate": 1.955913029706929e-05, | |
| "loss": 0.47003644704818726, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.24795640326975477, | |
| "grad_norm": 0.481099009513855, | |
| "learning_rate": 1.9552624023700472e-05, | |
| "loss": 0.4433462917804718, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.24931880108991825, | |
| "grad_norm": 0.5063067078590393, | |
| "learning_rate": 1.9546071190387175e-05, | |
| "loss": 0.41345930099487305, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.2506811989100817, | |
| "grad_norm": 0.46571362018585205, | |
| "learning_rate": 1.953947182906822e-05, | |
| "loss": 0.4099092483520508, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.25204359673024523, | |
| "grad_norm": 0.43692854046821594, | |
| "learning_rate": 1.953282597190921e-05, | |
| "loss": 0.34167295694351196, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.25340599455040874, | |
| "grad_norm": 0.46433547139167786, | |
| "learning_rate": 1.9526133651302372e-05, | |
| "loss": 0.4195370674133301, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2547683923705722, | |
| "grad_norm": 0.4526609778404236, | |
| "learning_rate": 1.951939489986639e-05, | |
| "loss": 0.4328376352787018, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2561307901907357, | |
| "grad_norm": 0.4832986891269684, | |
| "learning_rate": 1.951260975044626e-05, | |
| "loss": 0.42811205983161926, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2574931880108992, | |
| "grad_norm": 0.46561217308044434, | |
| "learning_rate": 1.950577823611313e-05, | |
| "loss": 0.4136439561843872, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.25885558583106266, | |
| "grad_norm": 0.7555133700370789, | |
| "learning_rate": 1.9498900390164118e-05, | |
| "loss": 0.44394075870513916, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.26021798365122617, | |
| "grad_norm": 0.49045872688293457, | |
| "learning_rate": 1.949197624612218e-05, | |
| "loss": 0.47819724678993225, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.2615803814713896, | |
| "grad_norm": 0.4706166386604309, | |
| "learning_rate": 1.9485005837735918e-05, | |
| "loss": 0.4488511383533478, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.26294277929155313, | |
| "grad_norm": 0.4559481143951416, | |
| "learning_rate": 1.947798919897944e-05, | |
| "loss": 0.35875898599624634, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.26430517711171664, | |
| "grad_norm": 0.459917277097702, | |
| "learning_rate": 1.947092636405217e-05, | |
| "loss": 0.43740493059158325, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2656675749318801, | |
| "grad_norm": 0.48496246337890625, | |
| "learning_rate": 1.946381736737871e-05, | |
| "loss": 0.4613405466079712, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2670299727520436, | |
| "grad_norm": 0.5405692458152771, | |
| "learning_rate": 1.9456662243608643e-05, | |
| "loss": 0.5320178270339966, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2683923705722071, | |
| "grad_norm": 0.4415741264820099, | |
| "learning_rate": 1.9449461027616382e-05, | |
| "loss": 0.376873642206192, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.26975476839237056, | |
| "grad_norm": 0.4813419282436371, | |
| "learning_rate": 1.9442213754501002e-05, | |
| "loss": 0.4509425759315491, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.27111716621253407, | |
| "grad_norm": 0.511968731880188, | |
| "learning_rate": 1.9434920459586054e-05, | |
| "loss": 0.5076147317886353, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2724795640326976, | |
| "grad_norm": 0.4836997091770172, | |
| "learning_rate": 1.9427581178419408e-05, | |
| "loss": 0.44785064458847046, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.273841961852861, | |
| "grad_norm": 0.4130904972553253, | |
| "learning_rate": 1.9420195946773063e-05, | |
| "loss": 0.36012643575668335, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.27520435967302453, | |
| "grad_norm": 0.4587012827396393, | |
| "learning_rate": 1.9412764800643e-05, | |
| "loss": 0.45600613951683044, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.276566757493188, | |
| "grad_norm": 0.4676440954208374, | |
| "learning_rate": 1.940528777624897e-05, | |
| "loss": 0.4534785747528076, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2779291553133515, | |
| "grad_norm": 0.5145980715751648, | |
| "learning_rate": 1.939776491003435e-05, | |
| "loss": 0.48491308093070984, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.279291553133515, | |
| "grad_norm": 0.41034233570098877, | |
| "learning_rate": 1.9390196238665944e-05, | |
| "loss": 0.34817013144493103, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.28065395095367845, | |
| "grad_norm": 0.5155647397041321, | |
| "learning_rate": 1.9382581799033824e-05, | |
| "loss": 0.5011834502220154, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.28201634877384196, | |
| "grad_norm": 0.4596708416938782, | |
| "learning_rate": 1.9374921628251127e-05, | |
| "loss": 0.39814120531082153, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.28337874659400547, | |
| "grad_norm": 0.4686714708805084, | |
| "learning_rate": 1.936721576365389e-05, | |
| "loss": 0.48343995213508606, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2847411444141689, | |
| "grad_norm": 0.47685396671295166, | |
| "learning_rate": 1.935946424280087e-05, | |
| "loss": 0.3693901002407074, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.28610354223433243, | |
| "grad_norm": 0.46070703864097595, | |
| "learning_rate": 1.935166710347334e-05, | |
| "loss": 0.40199288725852966, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.28746594005449594, | |
| "grad_norm": 0.42680197954177856, | |
| "learning_rate": 1.9343824383674936e-05, | |
| "loss": 0.39812421798706055, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2888283378746594, | |
| "grad_norm": 0.44704172015190125, | |
| "learning_rate": 1.9335936121631442e-05, | |
| "loss": 0.36939626932144165, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2901907356948229, | |
| "grad_norm": 0.39125990867614746, | |
| "learning_rate": 1.9328002355790624e-05, | |
| "loss": 0.332170695066452, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.29155313351498635, | |
| "grad_norm": 0.4364356994628906, | |
| "learning_rate": 1.9320023124822035e-05, | |
| "loss": 0.3795939087867737, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.29291553133514986, | |
| "grad_norm": 0.427299827337265, | |
| "learning_rate": 1.931199846761683e-05, | |
| "loss": 0.3539160192012787, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.29427792915531337, | |
| "grad_norm": 0.3824451267719269, | |
| "learning_rate": 1.9303928423287568e-05, | |
| "loss": 0.33199343085289, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2956403269754768, | |
| "grad_norm": 0.4601697027683258, | |
| "learning_rate": 1.929581303116803e-05, | |
| "loss": 0.4479762017726898, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2970027247956403, | |
| "grad_norm": 0.4551186263561249, | |
| "learning_rate": 1.9287652330813024e-05, | |
| "loss": 0.4017976224422455, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.29836512261580383, | |
| "grad_norm": 0.4389873445034027, | |
| "learning_rate": 1.9279446361998188e-05, | |
| "loss": 0.3540688157081604, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.2997275204359673, | |
| "grad_norm": 0.45103585720062256, | |
| "learning_rate": 1.927119516471981e-05, | |
| "loss": 0.4199802875518799, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3010899182561308, | |
| "grad_norm": 0.4221908450126648, | |
| "learning_rate": 1.9262898779194613e-05, | |
| "loss": 0.36206182837486267, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.3024523160762943, | |
| "grad_norm": 0.46009913086891174, | |
| "learning_rate": 1.9254557245859583e-05, | |
| "loss": 0.4081338047981262, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.30381471389645776, | |
| "grad_norm": 0.46696937084198, | |
| "learning_rate": 1.924617060537175e-05, | |
| "loss": 0.36600178480148315, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.30517711171662126, | |
| "grad_norm": 0.44915148615837097, | |
| "learning_rate": 1.9237738898607992e-05, | |
| "loss": 0.3773924708366394, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.3065395095367847, | |
| "grad_norm": 0.40148746967315674, | |
| "learning_rate": 1.9229262166664854e-05, | |
| "loss": 0.29502028226852417, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3079019073569482, | |
| "grad_norm": 0.3726682960987091, | |
| "learning_rate": 1.9220740450858328e-05, | |
| "loss": 0.3030723035335541, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.30926430517711173, | |
| "grad_norm": 0.5065223574638367, | |
| "learning_rate": 1.921217379272367e-05, | |
| "loss": 0.4941141605377197, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.3106267029972752, | |
| "grad_norm": 0.4267047941684723, | |
| "learning_rate": 1.9203562234015172e-05, | |
| "loss": 0.35858261585235596, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3119891008174387, | |
| "grad_norm": 0.38982680439949036, | |
| "learning_rate": 1.9194905816705988e-05, | |
| "loss": 0.3197462856769562, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.3133514986376022, | |
| "grad_norm": 0.44453105330467224, | |
| "learning_rate": 1.91862045829879e-05, | |
| "loss": 0.40640050172805786, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.31471389645776565, | |
| "grad_norm": 0.492948442697525, | |
| "learning_rate": 1.9177458575271143e-05, | |
| "loss": 0.4595552682876587, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.31607629427792916, | |
| "grad_norm": 0.4012284278869629, | |
| "learning_rate": 1.916866783618417e-05, | |
| "loss": 0.3264189660549164, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.31743869209809267, | |
| "grad_norm": 0.4652048647403717, | |
| "learning_rate": 1.9159832408573467e-05, | |
| "loss": 0.4094054698944092, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.3188010899182561, | |
| "grad_norm": 0.46388110518455505, | |
| "learning_rate": 1.9150952335503325e-05, | |
| "loss": 0.4324240982532501, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3201634877384196, | |
| "grad_norm": 0.44346991181373596, | |
| "learning_rate": 1.9142027660255645e-05, | |
| "loss": 0.369159996509552, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3215258855585831, | |
| "grad_norm": 0.42788419127464294, | |
| "learning_rate": 1.9133058426329717e-05, | |
| "loss": 0.37542596459388733, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3228882833787466, | |
| "grad_norm": 0.41329479217529297, | |
| "learning_rate": 1.912404467744202e-05, | |
| "loss": 0.36349016427993774, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.3242506811989101, | |
| "grad_norm": 0.47673869132995605, | |
| "learning_rate": 1.911498645752599e-05, | |
| "loss": 0.4231317937374115, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.32561307901907355, | |
| "grad_norm": 0.45236167311668396, | |
| "learning_rate": 1.9105883810731822e-05, | |
| "loss": 0.41860949993133545, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.32697547683923706, | |
| "grad_norm": 0.47766655683517456, | |
| "learning_rate": 1.9096736781426252e-05, | |
| "loss": 0.45107653737068176, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.32833787465940056, | |
| "grad_norm": 0.4172971546649933, | |
| "learning_rate": 1.9087545414192338e-05, | |
| "loss": 0.36643242835998535, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.329700272479564, | |
| "grad_norm": 0.4231413006782532, | |
| "learning_rate": 1.907830975382924e-05, | |
| "loss": 0.3520575165748596, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3310626702997275, | |
| "grad_norm": 0.4126909375190735, | |
| "learning_rate": 1.9069029845352006e-05, | |
| "loss": 0.36405885219573975, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.33242506811989103, | |
| "grad_norm": 0.42022719979286194, | |
| "learning_rate": 1.9059705733991352e-05, | |
| "loss": 0.3583207130432129, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3337874659400545, | |
| "grad_norm": 0.4531959593296051, | |
| "learning_rate": 1.9050337465193443e-05, | |
| "loss": 0.38180652260780334, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.335149863760218, | |
| "grad_norm": 0.42306748032569885, | |
| "learning_rate": 1.9040925084619663e-05, | |
| "loss": 0.3619072437286377, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.33651226158038144, | |
| "grad_norm": 0.46138980984687805, | |
| "learning_rate": 1.9031468638146408e-05, | |
| "loss": 0.38578206300735474, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.33787465940054495, | |
| "grad_norm": 0.43929317593574524, | |
| "learning_rate": 1.9021968171864843e-05, | |
| "loss": 0.4235476851463318, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.33923705722070846, | |
| "grad_norm": 0.401759535074234, | |
| "learning_rate": 1.90124237320807e-05, | |
| "loss": 0.34792521595954895, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.3405994550408719, | |
| "grad_norm": 0.478180468082428, | |
| "learning_rate": 1.900283536531403e-05, | |
| "loss": 0.4711982011795044, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3419618528610354, | |
| "grad_norm": 0.43466177582740784, | |
| "learning_rate": 1.8993203118298988e-05, | |
| "loss": 0.36447733640670776, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.34332425068119893, | |
| "grad_norm": 0.44407346844673157, | |
| "learning_rate": 1.8983527037983606e-05, | |
| "loss": 0.38788866996765137, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3446866485013624, | |
| "grad_norm": 0.4327251613140106, | |
| "learning_rate": 1.8973807171529556e-05, | |
| "loss": 0.38622599840164185, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.3460490463215259, | |
| "grad_norm": 0.44564682245254517, | |
| "learning_rate": 1.8964043566311942e-05, | |
| "loss": 0.3841802477836609, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3474114441416894, | |
| "grad_norm": 0.4468725621700287, | |
| "learning_rate": 1.8954236269919026e-05, | |
| "loss": 0.3644585609436035, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.34877384196185285, | |
| "grad_norm": 0.4858334958553314, | |
| "learning_rate": 1.8944385330152047e-05, | |
| "loss": 0.4247015118598938, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.35013623978201636, | |
| "grad_norm": 0.4670877456665039, | |
| "learning_rate": 1.893449079502495e-05, | |
| "loss": 0.41649121046066284, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.35149863760217986, | |
| "grad_norm": 0.42309293150901794, | |
| "learning_rate": 1.892455271276418e-05, | |
| "loss": 0.3775060772895813, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3528610354223433, | |
| "grad_norm": 0.42874789237976074, | |
| "learning_rate": 1.8914571131808407e-05, | |
| "loss": 0.40018612146377563, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3542234332425068, | |
| "grad_norm": 0.45877915620803833, | |
| "learning_rate": 1.8904546100808346e-05, | |
| "loss": 0.4454330801963806, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3555858310626703, | |
| "grad_norm": 0.4848128855228424, | |
| "learning_rate": 1.889447766862647e-05, | |
| "loss": 0.4608079791069031, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.3569482288828338, | |
| "grad_norm": 0.38830137252807617, | |
| "learning_rate": 1.8884365884336796e-05, | |
| "loss": 0.34983474016189575, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3583106267029973, | |
| "grad_norm": 0.45347243547439575, | |
| "learning_rate": 1.8874210797224646e-05, | |
| "loss": 0.37533241510391235, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.35967302452316074, | |
| "grad_norm": 0.4399562478065491, | |
| "learning_rate": 1.8864012456786397e-05, | |
| "loss": 0.3630625903606415, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.36103542234332425, | |
| "grad_norm": 0.4255392849445343, | |
| "learning_rate": 1.8853770912729243e-05, | |
| "loss": 0.37027978897094727, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.36239782016348776, | |
| "grad_norm": 0.423592746257782, | |
| "learning_rate": 1.884348621497096e-05, | |
| "loss": 0.37999391555786133, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.3637602179836512, | |
| "grad_norm": 0.4627998173236847, | |
| "learning_rate": 1.8833158413639656e-05, | |
| "loss": 0.43481695652008057, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3651226158038147, | |
| "grad_norm": 0.4129141867160797, | |
| "learning_rate": 1.8822787559073522e-05, | |
| "loss": 0.35913515090942383, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.36648501362397823, | |
| "grad_norm": 0.45795056223869324, | |
| "learning_rate": 1.8812373701820603e-05, | |
| "loss": 0.3965306878089905, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3678474114441417, | |
| "grad_norm": 0.40726739168167114, | |
| "learning_rate": 1.8801916892638533e-05, | |
| "loss": 0.3417432904243469, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3692098092643052, | |
| "grad_norm": 0.4479921758174896, | |
| "learning_rate": 1.8791417182494296e-05, | |
| "loss": 0.4090738594532013, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.37057220708446864, | |
| "grad_norm": 0.380698561668396, | |
| "learning_rate": 1.878087462256398e-05, | |
| "loss": 0.31110888719558716, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.37193460490463215, | |
| "grad_norm": 0.4521123468875885, | |
| "learning_rate": 1.8770289264232526e-05, | |
| "loss": 0.414950966835022, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.37329700272479566, | |
| "grad_norm": 0.4697805643081665, | |
| "learning_rate": 1.875966115909347e-05, | |
| "loss": 0.42033064365386963, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3746594005449591, | |
| "grad_norm": 0.4483690857887268, | |
| "learning_rate": 1.8748990358948713e-05, | |
| "loss": 0.3845226466655731, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3760217983651226, | |
| "grad_norm": 0.4191807210445404, | |
| "learning_rate": 1.8738276915808232e-05, | |
| "loss": 0.3214520514011383, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.3773841961852861, | |
| "grad_norm": 0.42148375511169434, | |
| "learning_rate": 1.8727520881889865e-05, | |
| "loss": 0.3698553442955017, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3787465940054496, | |
| "grad_norm": 0.42890867590904236, | |
| "learning_rate": 1.8716722309619033e-05, | |
| "loss": 0.35935938358306885, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3801089918256131, | |
| "grad_norm": 0.4575975835323334, | |
| "learning_rate": 1.870588125162849e-05, | |
| "loss": 0.3697071075439453, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3814713896457766, | |
| "grad_norm": 0.46115341782569885, | |
| "learning_rate": 1.8694997760758073e-05, | |
| "loss": 0.3968576192855835, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.38283378746594005, | |
| "grad_norm": 0.494211882352829, | |
| "learning_rate": 1.8684071890054425e-05, | |
| "loss": 0.44661998748779297, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.38419618528610355, | |
| "grad_norm": 0.4422100782394409, | |
| "learning_rate": 1.8673103692770772e-05, | |
| "loss": 0.39138174057006836, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.385558583106267, | |
| "grad_norm": 0.39777207374572754, | |
| "learning_rate": 1.8662093222366623e-05, | |
| "loss": 0.34542185068130493, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.3869209809264305, | |
| "grad_norm": 0.38417258858680725, | |
| "learning_rate": 1.8651040532507538e-05, | |
| "loss": 0.3356142044067383, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.388283378746594, | |
| "grad_norm": 0.46102169156074524, | |
| "learning_rate": 1.863994567706485e-05, | |
| "loss": 0.430128276348114, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3896457765667575, | |
| "grad_norm": 0.4118143618106842, | |
| "learning_rate": 1.8628808710115417e-05, | |
| "loss": 0.34772276878356934, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.391008174386921, | |
| "grad_norm": 0.39578622579574585, | |
| "learning_rate": 1.861762968594135e-05, | |
| "loss": 0.32895392179489136, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3923705722070845, | |
| "grad_norm": 0.3824600875377655, | |
| "learning_rate": 1.8606408659029736e-05, | |
| "loss": 0.32151734828948975, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.39373297002724794, | |
| "grad_norm": 0.4184086322784424, | |
| "learning_rate": 1.8595145684072398e-05, | |
| "loss": 0.3639921247959137, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.39509536784741145, | |
| "grad_norm": 0.45187273621559143, | |
| "learning_rate": 1.8583840815965614e-05, | |
| "loss": 0.38925087451934814, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.39645776566757496, | |
| "grad_norm": 0.39254751801490784, | |
| "learning_rate": 1.8572494109809852e-05, | |
| "loss": 0.3342083692550659, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.3978201634877384, | |
| "grad_norm": 0.43775174021720886, | |
| "learning_rate": 1.856110562090949e-05, | |
| "loss": 0.41103285551071167, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3991825613079019, | |
| "grad_norm": 0.4114571213722229, | |
| "learning_rate": 1.8549675404772574e-05, | |
| "loss": 0.36463573575019836, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.40054495912806537, | |
| "grad_norm": 0.4514394998550415, | |
| "learning_rate": 1.853820351711052e-05, | |
| "loss": 0.40414565801620483, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4019073569482289, | |
| "grad_norm": 0.4267936944961548, | |
| "learning_rate": 1.852669001383785e-05, | |
| "loss": 0.39556679129600525, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4032697547683924, | |
| "grad_norm": 0.3571447432041168, | |
| "learning_rate": 1.8515134951071932e-05, | |
| "loss": 0.27661603689193726, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.40463215258855584, | |
| "grad_norm": 0.4328432083129883, | |
| "learning_rate": 1.8503538385132692e-05, | |
| "loss": 0.3701832890510559, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.40599455040871935, | |
| "grad_norm": 0.3956933915615082, | |
| "learning_rate": 1.849190037254234e-05, | |
| "loss": 0.31413373351097107, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.40735694822888285, | |
| "grad_norm": 0.4569341540336609, | |
| "learning_rate": 1.8480220970025114e-05, | |
| "loss": 0.46876251697540283, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.4087193460490463, | |
| "grad_norm": 0.4352339208126068, | |
| "learning_rate": 1.8468500234506965e-05, | |
| "loss": 0.34557855129241943, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4100817438692098, | |
| "grad_norm": 0.47778064012527466, | |
| "learning_rate": 1.8456738223115325e-05, | |
| "loss": 0.4523351490497589, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.4114441416893733, | |
| "grad_norm": 0.3924112021923065, | |
| "learning_rate": 1.8444934993178796e-05, | |
| "loss": 0.32918280363082886, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4128065395095368, | |
| "grad_norm": 0.4721715748310089, | |
| "learning_rate": 1.843309060222688e-05, | |
| "loss": 0.4108656346797943, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.4141689373297003, | |
| "grad_norm": 0.4324953556060791, | |
| "learning_rate": 1.8421205107989707e-05, | |
| "loss": 0.39233145117759705, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.41553133514986373, | |
| "grad_norm": 0.4237790107727051, | |
| "learning_rate": 1.8409278568397742e-05, | |
| "loss": 0.33894914388656616, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.41689373297002724, | |
| "grad_norm": 0.3769596815109253, | |
| "learning_rate": 1.83973110415815e-05, | |
| "loss": 0.28818315267562866, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.41825613079019075, | |
| "grad_norm": 0.39678072929382324, | |
| "learning_rate": 1.8385302585871284e-05, | |
| "loss": 0.3248705565929413, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.4196185286103542, | |
| "grad_norm": 0.4702602028846741, | |
| "learning_rate": 1.8373253259796877e-05, | |
| "loss": 0.44057464599609375, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4209809264305177, | |
| "grad_norm": 0.43663206696510315, | |
| "learning_rate": 1.8361163122087265e-05, | |
| "loss": 0.41709601879119873, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.4223433242506812, | |
| "grad_norm": 0.41904065012931824, | |
| "learning_rate": 1.8349032231670363e-05, | |
| "loss": 0.3891496956348419, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.42370572207084467, | |
| "grad_norm": 0.42954081296920776, | |
| "learning_rate": 1.8336860647672702e-05, | |
| "loss": 0.38407522439956665, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.4250681198910082, | |
| "grad_norm": 0.40387260913848877, | |
| "learning_rate": 1.8324648429419164e-05, | |
| "loss": 0.35146600008010864, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4264305177111717, | |
| "grad_norm": 0.3923007845878601, | |
| "learning_rate": 1.831239563643268e-05, | |
| "loss": 0.3610236942768097, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.42779291553133514, | |
| "grad_norm": 0.8759288191795349, | |
| "learning_rate": 1.8300102328433952e-05, | |
| "loss": 0.4138031601905823, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.42915531335149865, | |
| "grad_norm": 0.4077308475971222, | |
| "learning_rate": 1.8287768565341143e-05, | |
| "loss": 0.3436448574066162, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4305177111716621, | |
| "grad_norm": 0.4676551818847656, | |
| "learning_rate": 1.82753944072696e-05, | |
| "loss": 0.4458121061325073, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.4318801089918256, | |
| "grad_norm": 0.3823956847190857, | |
| "learning_rate": 1.826297991453157e-05, | |
| "loss": 0.32054227590560913, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.4332425068119891, | |
| "grad_norm": 0.42467930912971497, | |
| "learning_rate": 1.8250525147635873e-05, | |
| "loss": 0.3760542869567871, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.43460490463215257, | |
| "grad_norm": 0.4084574580192566, | |
| "learning_rate": 1.8238030167287638e-05, | |
| "loss": 0.36126622557640076, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.4359673024523161, | |
| "grad_norm": 0.3886258602142334, | |
| "learning_rate": 1.8225495034387996e-05, | |
| "loss": 0.324174702167511, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4373297002724796, | |
| "grad_norm": 0.3744279146194458, | |
| "learning_rate": 1.8212919810033777e-05, | |
| "loss": 0.3609943985939026, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.43869209809264303, | |
| "grad_norm": 0.39002716541290283, | |
| "learning_rate": 1.820030455551723e-05, | |
| "loss": 0.33594828844070435, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.44005449591280654, | |
| "grad_norm": 0.4567135274410248, | |
| "learning_rate": 1.8187649332325702e-05, | |
| "loss": 0.4041770100593567, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.44141689373297005, | |
| "grad_norm": 0.40135565400123596, | |
| "learning_rate": 1.8174954202141352e-05, | |
| "loss": 0.3575393855571747, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4427792915531335, | |
| "grad_norm": 0.45827990770339966, | |
| "learning_rate": 1.8162219226840857e-05, | |
| "loss": 0.414590448141098, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.444141689373297, | |
| "grad_norm": 0.49530652165412903, | |
| "learning_rate": 1.814944446849508e-05, | |
| "loss": 0.45791754126548767, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.44550408719346046, | |
| "grad_norm": 0.4364672601222992, | |
| "learning_rate": 1.8136629989368815e-05, | |
| "loss": 0.3820730447769165, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.44686648501362397, | |
| "grad_norm": 0.46344640851020813, | |
| "learning_rate": 1.8123775851920438e-05, | |
| "loss": 0.42946088314056396, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.4482288828337875, | |
| "grad_norm": 0.3821968138217926, | |
| "learning_rate": 1.8110882118801633e-05, | |
| "loss": 0.3248857855796814, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.44959128065395093, | |
| "grad_norm": 0.39740368723869324, | |
| "learning_rate": 1.8097948852857054e-05, | |
| "loss": 0.34824830293655396, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.45095367847411444, | |
| "grad_norm": 0.4666673541069031, | |
| "learning_rate": 1.8084976117124072e-05, | |
| "loss": 0.39515918493270874, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.45231607629427795, | |
| "grad_norm": 0.36016690731048584, | |
| "learning_rate": 1.807196397483241e-05, | |
| "loss": 0.2601340711116791, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4536784741144414, | |
| "grad_norm": 0.3608089089393616, | |
| "learning_rate": 1.8058912489403867e-05, | |
| "loss": 0.2999919056892395, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.4550408719346049, | |
| "grad_norm": 0.42653337121009827, | |
| "learning_rate": 1.804582172445201e-05, | |
| "loss": 0.39884787797927856, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4564032697547684, | |
| "grad_norm": 0.42624911665916443, | |
| "learning_rate": 1.8032691743781853e-05, | |
| "loss": 0.39197784662246704, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.45776566757493187, | |
| "grad_norm": 0.513020396232605, | |
| "learning_rate": 1.8019522611389543e-05, | |
| "loss": 0.4664883315563202, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4591280653950954, | |
| "grad_norm": 0.3827018141746521, | |
| "learning_rate": 1.8006314391462056e-05, | |
| "loss": 0.3538336157798767, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.4604904632152589, | |
| "grad_norm": 0.3974727988243103, | |
| "learning_rate": 1.799306714837689e-05, | |
| "loss": 0.3404923379421234, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.46185286103542234, | |
| "grad_norm": 0.390813410282135, | |
| "learning_rate": 1.7979780946701737e-05, | |
| "loss": 0.3352108895778656, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.46321525885558584, | |
| "grad_norm": 0.395134299993515, | |
| "learning_rate": 1.7966455851194178e-05, | |
| "loss": 0.35127317905426025, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4645776566757493, | |
| "grad_norm": 0.3822191059589386, | |
| "learning_rate": 1.795309192680136e-05, | |
| "loss": 0.3215380311012268, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.4659400544959128, | |
| "grad_norm": 0.38805821537971497, | |
| "learning_rate": 1.7939689238659692e-05, | |
| "loss": 0.3339554965496063, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.4673024523160763, | |
| "grad_norm": 0.4180435538291931, | |
| "learning_rate": 1.792624785209451e-05, | |
| "loss": 0.37479549646377563, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.46866485013623976, | |
| "grad_norm": 0.3718315362930298, | |
| "learning_rate": 1.7912767832619776e-05, | |
| "loss": 0.2911319136619568, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.47002724795640327, | |
| "grad_norm": 0.43831902742385864, | |
| "learning_rate": 1.789924924593774e-05, | |
| "loss": 0.388246089220047, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.4713896457765668, | |
| "grad_norm": 0.40909937024116516, | |
| "learning_rate": 1.7885692157938646e-05, | |
| "loss": 0.3607439398765564, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.47275204359673023, | |
| "grad_norm": 0.4436415433883667, | |
| "learning_rate": 1.787209663470038e-05, | |
| "loss": 0.4487878382205963, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.47411444141689374, | |
| "grad_norm": 0.4295803904533386, | |
| "learning_rate": 1.7858462742488175e-05, | |
| "loss": 0.4205361008644104, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.47547683923705725, | |
| "grad_norm": 0.3416333794593811, | |
| "learning_rate": 1.7844790547754264e-05, | |
| "loss": 0.2564454674720764, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.4768392370572207, | |
| "grad_norm": 0.3937414884567261, | |
| "learning_rate": 1.7831080117137584e-05, | |
| "loss": 0.3362025022506714, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4782016348773842, | |
| "grad_norm": 0.44493257999420166, | |
| "learning_rate": 1.781733151746342e-05, | |
| "loss": 0.41151055693626404, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.47956403269754766, | |
| "grad_norm": 0.4470524489879608, | |
| "learning_rate": 1.7803544815743107e-05, | |
| "loss": 0.4086991548538208, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.48092643051771117, | |
| "grad_norm": 0.4161277115345001, | |
| "learning_rate": 1.7789720079173682e-05, | |
| "loss": 0.3814135193824768, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.4822888283378747, | |
| "grad_norm": 0.4259527027606964, | |
| "learning_rate": 1.777585737513757e-05, | |
| "loss": 0.39335864782333374, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.48365122615803813, | |
| "grad_norm": 0.44917598366737366, | |
| "learning_rate": 1.7761956771202255e-05, | |
| "loss": 0.41355639696121216, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.48501362397820164, | |
| "grad_norm": 0.45215553045272827, | |
| "learning_rate": 1.7748018335119935e-05, | |
| "loss": 0.42670729756355286, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.48637602179836514, | |
| "grad_norm": 0.43753278255462646, | |
| "learning_rate": 1.7734042134827216e-05, | |
| "loss": 0.39761465787887573, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.4877384196185286, | |
| "grad_norm": 0.42144203186035156, | |
| "learning_rate": 1.772002823844476e-05, | |
| "loss": 0.3738403916358948, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4891008174386921, | |
| "grad_norm": 0.4108579456806183, | |
| "learning_rate": 1.7705976714276976e-05, | |
| "loss": 0.3864634037017822, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.4904632152588556, | |
| "grad_norm": 0.44009944796562195, | |
| "learning_rate": 1.7691887630811653e-05, | |
| "loss": 0.387514591217041, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.49182561307901906, | |
| "grad_norm": 0.42644309997558594, | |
| "learning_rate": 1.7677761056719652e-05, | |
| "loss": 0.38349243998527527, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.49318801089918257, | |
| "grad_norm": 0.4462713301181793, | |
| "learning_rate": 1.7663597060854577e-05, | |
| "loss": 0.43910130858421326, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.494550408719346, | |
| "grad_norm": 0.4547002911567688, | |
| "learning_rate": 1.764939571225241e-05, | |
| "loss": 0.4280800223350525, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.49591280653950953, | |
| "grad_norm": 0.3978780210018158, | |
| "learning_rate": 1.763515708013121e-05, | |
| "loss": 0.3507936894893646, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.49727520435967304, | |
| "grad_norm": 0.3963427245616913, | |
| "learning_rate": 1.762088123389074e-05, | |
| "loss": 0.37120676040649414, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.4986376021798365, | |
| "grad_norm": 0.4243077337741852, | |
| "learning_rate": 1.760656824311216e-05, | |
| "loss": 0.36520522832870483, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.44183287024497986, | |
| "learning_rate": 1.7592218177557662e-05, | |
| "loss": 0.42573392391204834, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5013623978201635, | |
| "grad_norm": 0.4238261282444, | |
| "learning_rate": 1.7577831107170157e-05, | |
| "loss": 0.3345707952976227, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.502724795640327, | |
| "grad_norm": 0.4145262539386749, | |
| "learning_rate": 1.7563407102072902e-05, | |
| "loss": 0.3537534475326538, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5040871934604905, | |
| "grad_norm": 0.4781807065010071, | |
| "learning_rate": 1.7548946232569196e-05, | |
| "loss": 0.4613100588321686, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5054495912806539, | |
| "grad_norm": 0.383511483669281, | |
| "learning_rate": 1.7534448569141997e-05, | |
| "loss": 0.33608487248420715, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5068119891008175, | |
| "grad_norm": 0.4402075409889221, | |
| "learning_rate": 1.751991418245361e-05, | |
| "loss": 0.4129033088684082, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5081743869209809, | |
| "grad_norm": 0.5239101052284241, | |
| "learning_rate": 1.7505343143345328e-05, | |
| "loss": 0.45621195435523987, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5095367847411444, | |
| "grad_norm": 0.4115491807460785, | |
| "learning_rate": 1.749073552283709e-05, | |
| "loss": 0.338983952999115, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.510899182561308, | |
| "grad_norm": 0.4104604423046112, | |
| "learning_rate": 1.7476091392127132e-05, | |
| "loss": 0.34245091676712036, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5122615803814714, | |
| "grad_norm": 0.43850037455558777, | |
| "learning_rate": 1.746141082259165e-05, | |
| "loss": 0.40123671293258667, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5136239782016349, | |
| "grad_norm": 0.41533970832824707, | |
| "learning_rate": 1.7446693885784435e-05, | |
| "loss": 0.34971946477890015, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5149863760217984, | |
| "grad_norm": 0.36809873580932617, | |
| "learning_rate": 1.7431940653436538e-05, | |
| "loss": 0.3055441379547119, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5163487738419619, | |
| "grad_norm": 0.4054659605026245, | |
| "learning_rate": 1.7417151197455915e-05, | |
| "loss": 0.35166579484939575, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.5177111716621253, | |
| "grad_norm": 0.435969740152359, | |
| "learning_rate": 1.740232558992708e-05, | |
| "loss": 0.3930160403251648, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5190735694822888, | |
| "grad_norm": 0.4018082916736603, | |
| "learning_rate": 1.738746390311075e-05, | |
| "loss": 0.3543049693107605, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.5204359673024523, | |
| "grad_norm": 0.4188288450241089, | |
| "learning_rate": 1.7372566209443496e-05, | |
| "loss": 0.37953218817710876, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5217983651226158, | |
| "grad_norm": 0.4601037800312042, | |
| "learning_rate": 1.735763258153739e-05, | |
| "loss": 0.4313342571258545, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.5231607629427792, | |
| "grad_norm": 0.43152326345443726, | |
| "learning_rate": 1.7342663092179636e-05, | |
| "loss": 0.41218316555023193, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5245231607629428, | |
| "grad_norm": 0.4087159037590027, | |
| "learning_rate": 1.7327657814332247e-05, | |
| "loss": 0.3378143906593323, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5258855585831063, | |
| "grad_norm": 0.39226964116096497, | |
| "learning_rate": 1.7312616821131657e-05, | |
| "loss": 0.3294611871242523, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5272479564032697, | |
| "grad_norm": 0.41971486806869507, | |
| "learning_rate": 1.729754018588838e-05, | |
| "loss": 0.37797778844833374, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5286103542234333, | |
| "grad_norm": 0.4497551918029785, | |
| "learning_rate": 1.728242798208666e-05, | |
| "loss": 0.38471484184265137, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5299727520435967, | |
| "grad_norm": 0.41503041982650757, | |
| "learning_rate": 1.7267280283384104e-05, | |
| "loss": 0.38631588220596313, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5313351498637602, | |
| "grad_norm": 0.4341152310371399, | |
| "learning_rate": 1.7252097163611304e-05, | |
| "loss": 0.43717920780181885, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5326975476839237, | |
| "grad_norm": 0.41421955823898315, | |
| "learning_rate": 1.723687869677152e-05, | |
| "loss": 0.3743841052055359, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5340599455040872, | |
| "grad_norm": 0.39457762241363525, | |
| "learning_rate": 1.7221624957040274e-05, | |
| "loss": 0.3561673164367676, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5354223433242506, | |
| "grad_norm": 0.38131183385849, | |
| "learning_rate": 1.7206336018765026e-05, | |
| "loss": 0.3137727975845337, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5367847411444142, | |
| "grad_norm": 0.3871758282184601, | |
| "learning_rate": 1.7191011956464788e-05, | |
| "loss": 0.35666629672050476, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5381471389645777, | |
| "grad_norm": 0.43537119030952454, | |
| "learning_rate": 1.717565284482977e-05, | |
| "loss": 0.42949140071868896, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5395095367847411, | |
| "grad_norm": 0.4189457893371582, | |
| "learning_rate": 1.7160258758721015e-05, | |
| "loss": 0.396271288394928, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5408719346049047, | |
| "grad_norm": 0.41827261447906494, | |
| "learning_rate": 1.714482977317003e-05, | |
| "loss": 0.4051450490951538, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.5422343324250681, | |
| "grad_norm": 0.38875138759613037, | |
| "learning_rate": 1.7129365963378428e-05, | |
| "loss": 0.3301708698272705, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5435967302452316, | |
| "grad_norm": 0.42139869928359985, | |
| "learning_rate": 1.711386740471755e-05, | |
| "loss": 0.3770272433757782, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.5449591280653951, | |
| "grad_norm": 0.4148419499397278, | |
| "learning_rate": 1.7098334172728112e-05, | |
| "loss": 0.37180018424987793, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5463215258855586, | |
| "grad_norm": 0.42452266812324524, | |
| "learning_rate": 1.7082766343119822e-05, | |
| "loss": 0.37390637397766113, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.547683923705722, | |
| "grad_norm": 0.4145396053791046, | |
| "learning_rate": 1.706716399177103e-05, | |
| "loss": 0.3574928939342499, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5490463215258855, | |
| "grad_norm": 0.404379665851593, | |
| "learning_rate": 1.7051527194728343e-05, | |
| "loss": 0.3393360674381256, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.5504087193460491, | |
| "grad_norm": 0.4394095242023468, | |
| "learning_rate": 1.703585602820624e-05, | |
| "loss": 0.38446563482284546, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5517711171662125, | |
| "grad_norm": 0.4012243449687958, | |
| "learning_rate": 1.7020150568586743e-05, | |
| "loss": 0.34150344133377075, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.553133514986376, | |
| "grad_norm": 0.43355002999305725, | |
| "learning_rate": 1.7004410892419012e-05, | |
| "loss": 0.3841056227684021, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5544959128065395, | |
| "grad_norm": 0.4600158631801605, | |
| "learning_rate": 1.698863707641897e-05, | |
| "loss": 0.39545172452926636, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.555858310626703, | |
| "grad_norm": 0.473522424697876, | |
| "learning_rate": 1.6972829197468958e-05, | |
| "loss": 0.4410251975059509, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5572207084468664, | |
| "grad_norm": 0.44125762581825256, | |
| "learning_rate": 1.695698733261732e-05, | |
| "loss": 0.3858538269996643, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.55858310626703, | |
| "grad_norm": 0.5052700042724609, | |
| "learning_rate": 1.694111155907807e-05, | |
| "loss": 0.505725622177124, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5599455040871935, | |
| "grad_norm": 0.39854127168655396, | |
| "learning_rate": 1.6925201954230474e-05, | |
| "loss": 0.3284291625022888, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.5613079019073569, | |
| "grad_norm": 0.4676287770271301, | |
| "learning_rate": 1.690925859561871e-05, | |
| "loss": 0.42946767807006836, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5626702997275205, | |
| "grad_norm": 0.4244855046272278, | |
| "learning_rate": 1.689328156095147e-05, | |
| "loss": 0.3870871663093567, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.5640326975476839, | |
| "grad_norm": 0.41849035024642944, | |
| "learning_rate": 1.6877270928101573e-05, | |
| "loss": 0.37404653429985046, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5653950953678474, | |
| "grad_norm": 0.4176686406135559, | |
| "learning_rate": 1.6861226775105618e-05, | |
| "loss": 0.38222536444664, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5667574931880109, | |
| "grad_norm": 0.42130082845687866, | |
| "learning_rate": 1.684514918016356e-05, | |
| "loss": 0.3801380395889282, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5681198910081744, | |
| "grad_norm": 0.4545654058456421, | |
| "learning_rate": 1.6829038221638366e-05, | |
| "loss": 0.42598506808280945, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.5694822888283378, | |
| "grad_norm": 0.4204128682613373, | |
| "learning_rate": 1.681289397805562e-05, | |
| "loss": 0.3880673944950104, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5708446866485014, | |
| "grad_norm": 0.40949374437332153, | |
| "learning_rate": 1.6796716528103127e-05, | |
| "loss": 0.3792712092399597, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.5722070844686649, | |
| "grad_norm": 0.42425790429115295, | |
| "learning_rate": 1.6780505950630552e-05, | |
| "loss": 0.40029221773147583, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5735694822888283, | |
| "grad_norm": 0.449004203081131, | |
| "learning_rate": 1.6764262324649024e-05, | |
| "loss": 0.4227592647075653, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.5749318801089919, | |
| "grad_norm": 0.3931463956832886, | |
| "learning_rate": 1.674798572933075e-05, | |
| "loss": 0.3561609387397766, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5762942779291553, | |
| "grad_norm": 0.4123283624649048, | |
| "learning_rate": 1.6731676244008622e-05, | |
| "loss": 0.3775140643119812, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.5776566757493188, | |
| "grad_norm": 0.413714736700058, | |
| "learning_rate": 1.6715333948175857e-05, | |
| "loss": 0.3620632290840149, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5790190735694822, | |
| "grad_norm": 0.36583277583122253, | |
| "learning_rate": 1.6698958921485577e-05, | |
| "loss": 0.30589473247528076, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5803814713896458, | |
| "grad_norm": 0.4184879958629608, | |
| "learning_rate": 1.668255124375045e-05, | |
| "loss": 0.37757375836372375, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5817438692098093, | |
| "grad_norm": 0.4325942397117615, | |
| "learning_rate": 1.6666110994942274e-05, | |
| "loss": 0.3947750926017761, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.5831062670299727, | |
| "grad_norm": 0.41197946667671204, | |
| "learning_rate": 1.6649638255191604e-05, | |
| "loss": 0.36544039845466614, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5844686648501363, | |
| "grad_norm": 0.4328736662864685, | |
| "learning_rate": 1.663313310478736e-05, | |
| "loss": 0.3924716114997864, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5858310626702997, | |
| "grad_norm": 0.4509877562522888, | |
| "learning_rate": 1.661659562417643e-05, | |
| "loss": 0.4020155370235443, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5871934604904632, | |
| "grad_norm": 0.4538882076740265, | |
| "learning_rate": 1.660002589396328e-05, | |
| "loss": 0.42874789237976074, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.5885558583106267, | |
| "grad_norm": 0.4030280113220215, | |
| "learning_rate": 1.6583423994909573e-05, | |
| "loss": 0.36683404445648193, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5899182561307902, | |
| "grad_norm": 0.4076124131679535, | |
| "learning_rate": 1.6566790007933746e-05, | |
| "loss": 0.3436656594276428, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.5912806539509536, | |
| "grad_norm": 0.454622358083725, | |
| "learning_rate": 1.6550124014110646e-05, | |
| "loss": 0.4208211302757263, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5926430517711172, | |
| "grad_norm": 0.422625869512558, | |
| "learning_rate": 1.6533426094671125e-05, | |
| "loss": 0.3721390664577484, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.5940054495912807, | |
| "grad_norm": 0.35872289538383484, | |
| "learning_rate": 1.651669633100163e-05, | |
| "loss": 0.29117974638938904, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5953678474114441, | |
| "grad_norm": 0.36502501368522644, | |
| "learning_rate": 1.6499934804643838e-05, | |
| "loss": 0.2901703119277954, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.5967302452316077, | |
| "grad_norm": 0.412866473197937, | |
| "learning_rate": 1.6483141597294214e-05, | |
| "loss": 0.31686800718307495, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5980926430517711, | |
| "grad_norm": 0.37948358058929443, | |
| "learning_rate": 1.646631679080366e-05, | |
| "loss": 0.3297507166862488, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5994550408719346, | |
| "grad_norm": 0.4186059832572937, | |
| "learning_rate": 1.6449460467177078e-05, | |
| "loss": 0.37851136922836304, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6008174386920981, | |
| "grad_norm": 0.3846619427204132, | |
| "learning_rate": 1.6432572708572997e-05, | |
| "loss": 0.3128829002380371, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.6021798365122616, | |
| "grad_norm": 0.4310852289199829, | |
| "learning_rate": 1.641565359730315e-05, | |
| "loss": 0.39028769731521606, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.603542234332425, | |
| "grad_norm": 0.4645153880119324, | |
| "learning_rate": 1.6398703215832097e-05, | |
| "loss": 0.428106427192688, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.6049046321525886, | |
| "grad_norm": 0.4289141595363617, | |
| "learning_rate": 1.6381721646776805e-05, | |
| "loss": 0.3838496208190918, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6062670299727521, | |
| "grad_norm": 0.381273478269577, | |
| "learning_rate": 1.6364708972906246e-05, | |
| "loss": 0.3133726119995117, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6076294277929155, | |
| "grad_norm": 0.4178448021411896, | |
| "learning_rate": 1.6347665277141005e-05, | |
| "loss": 0.37862086296081543, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6089918256130791, | |
| "grad_norm": 0.42201322317123413, | |
| "learning_rate": 1.6330590642552867e-05, | |
| "loss": 0.39746665954589844, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.6103542234332425, | |
| "grad_norm": 0.43660175800323486, | |
| "learning_rate": 1.6313485152364417e-05, | |
| "loss": 0.3727680444717407, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.611716621253406, | |
| "grad_norm": 0.4348539710044861, | |
| "learning_rate": 1.6296348889948627e-05, | |
| "loss": 0.386578232049942, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.6130790190735694, | |
| "grad_norm": 0.4908278286457062, | |
| "learning_rate": 1.627918193882845e-05, | |
| "loss": 0.45541319251060486, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.614441416893733, | |
| "grad_norm": 0.46335548162460327, | |
| "learning_rate": 1.6261984382676432e-05, | |
| "loss": 0.4110366702079773, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.6158038147138964, | |
| "grad_norm": 0.4220646917819977, | |
| "learning_rate": 1.624475630531428e-05, | |
| "loss": 0.35985416173934937, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6171662125340599, | |
| "grad_norm": 0.4713914096355438, | |
| "learning_rate": 1.6227497790712458e-05, | |
| "loss": 0.4303567111492157, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.6185286103542235, | |
| "grad_norm": 0.4228816628456116, | |
| "learning_rate": 1.621020892298979e-05, | |
| "loss": 0.35945630073547363, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6198910081743869, | |
| "grad_norm": 0.3780403435230255, | |
| "learning_rate": 1.6192889786413048e-05, | |
| "loss": 0.32220372557640076, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.6212534059945504, | |
| "grad_norm": 0.4096536338329315, | |
| "learning_rate": 1.617554046539652e-05, | |
| "loss": 0.3749344050884247, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6226158038147139, | |
| "grad_norm": 0.4119769334793091, | |
| "learning_rate": 1.6158161044501624e-05, | |
| "loss": 0.38287466764450073, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.6239782016348774, | |
| "grad_norm": 0.39846253395080566, | |
| "learning_rate": 1.6140751608436487e-05, | |
| "loss": 0.3293435275554657, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6253405994550408, | |
| "grad_norm": 0.4839153289794922, | |
| "learning_rate": 1.6123312242055533e-05, | |
| "loss": 0.49237170815467834, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.6267029972752044, | |
| "grad_norm": 0.345336377620697, | |
| "learning_rate": 1.6105843030359055e-05, | |
| "loss": 0.2600591778755188, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6280653950953679, | |
| "grad_norm": 0.36076706647872925, | |
| "learning_rate": 1.6088344058492836e-05, | |
| "loss": 0.3101092576980591, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.6294277929155313, | |
| "grad_norm": 0.4160480499267578, | |
| "learning_rate": 1.6070815411747686e-05, | |
| "loss": 0.38243746757507324, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6307901907356949, | |
| "grad_norm": 0.35517603158950806, | |
| "learning_rate": 1.6053257175559074e-05, | |
| "loss": 0.28862473368644714, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.6321525885558583, | |
| "grad_norm": 0.3878868520259857, | |
| "learning_rate": 1.6035669435506674e-05, | |
| "loss": 0.3289714455604553, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6335149863760218, | |
| "grad_norm": 0.42289090156555176, | |
| "learning_rate": 1.6018052277313966e-05, | |
| "loss": 0.3916146755218506, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6348773841961853, | |
| "grad_norm": 0.40691617131233215, | |
| "learning_rate": 1.600040578684782e-05, | |
| "loss": 0.38257652521133423, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6362397820163488, | |
| "grad_norm": 0.3280545771121979, | |
| "learning_rate": 1.598273005011808e-05, | |
| "loss": 0.26138222217559814, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.6376021798365122, | |
| "grad_norm": 0.4141409695148468, | |
| "learning_rate": 1.5965025153277112e-05, | |
| "loss": 0.35480794310569763, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6389645776566758, | |
| "grad_norm": 0.4675044119358063, | |
| "learning_rate": 1.5947291182619444e-05, | |
| "loss": 0.4690595865249634, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6403269754768393, | |
| "grad_norm": 0.3967062830924988, | |
| "learning_rate": 1.5929528224581283e-05, | |
| "loss": 0.34801554679870605, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6416893732970027, | |
| "grad_norm": 0.43718570470809937, | |
| "learning_rate": 1.5911736365740133e-05, | |
| "loss": 0.3724061846733093, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.6430517711171662, | |
| "grad_norm": 0.45367127656936646, | |
| "learning_rate": 1.5893915692814365e-05, | |
| "loss": 0.4154523015022278, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6444141689373297, | |
| "grad_norm": 0.41580531001091003, | |
| "learning_rate": 1.5876066292662784e-05, | |
| "loss": 0.3685305714607239, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.6457765667574932, | |
| "grad_norm": 0.3875036835670471, | |
| "learning_rate": 1.585818825228422e-05, | |
| "loss": 0.33631476759910583, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6471389645776566, | |
| "grad_norm": 0.3880947232246399, | |
| "learning_rate": 1.5840281658817093e-05, | |
| "loss": 0.3135243058204651, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6485013623978202, | |
| "grad_norm": 0.4160878658294678, | |
| "learning_rate": 1.582234659953899e-05, | |
| "loss": 0.3561175763607025, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6498637602179836, | |
| "grad_norm": 0.427616149187088, | |
| "learning_rate": 1.5804383161866245e-05, | |
| "loss": 0.39607974886894226, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.6512261580381471, | |
| "grad_norm": 0.4082714319229126, | |
| "learning_rate": 1.5786391433353508e-05, | |
| "loss": 0.35210445523262024, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.6525885558583107, | |
| "grad_norm": 0.41783925890922546, | |
| "learning_rate": 1.5768371501693326e-05, | |
| "loss": 0.376731276512146, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.6539509536784741, | |
| "grad_norm": 0.37993308901786804, | |
| "learning_rate": 1.5750323454715696e-05, | |
| "loss": 0.322902649641037, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6553133514986376, | |
| "grad_norm": 0.4466570317745209, | |
| "learning_rate": 1.5732247380387664e-05, | |
| "loss": 0.39785605669021606, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.6566757493188011, | |
| "grad_norm": 0.40761619806289673, | |
| "learning_rate": 1.5714143366812876e-05, | |
| "loss": 0.3618497848510742, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.6580381471389646, | |
| "grad_norm": 0.3778402507305145, | |
| "learning_rate": 1.5696011502231158e-05, | |
| "loss": 0.339969664812088, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.659400544959128, | |
| "grad_norm": 0.4464191496372223, | |
| "learning_rate": 1.5677851875018076e-05, | |
| "loss": 0.43336886167526245, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6607629427792916, | |
| "grad_norm": 0.3769300878047943, | |
| "learning_rate": 1.565966457368453e-05, | |
| "loss": 0.31399497389793396, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.662125340599455, | |
| "grad_norm": 0.39209413528442383, | |
| "learning_rate": 1.564144968687628e-05, | |
| "loss": 0.34925514459609985, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.6634877384196185, | |
| "grad_norm": 0.44385579228401184, | |
| "learning_rate": 1.5623207303373553e-05, | |
| "loss": 0.4195161461830139, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.6648501362397821, | |
| "grad_norm": 0.44335752725601196, | |
| "learning_rate": 1.5604937512090602e-05, | |
| "loss": 0.394603431224823, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.6662125340599455, | |
| "grad_norm": 0.41506636142730713, | |
| "learning_rate": 1.5586640402075258e-05, | |
| "loss": 0.39139121770858765, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.667574931880109, | |
| "grad_norm": 0.4141143262386322, | |
| "learning_rate": 1.5568316062508502e-05, | |
| "loss": 0.3790861666202545, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6689373297002725, | |
| "grad_norm": 0.4253380298614502, | |
| "learning_rate": 1.5549964582704044e-05, | |
| "loss": 0.38978779315948486, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.670299727520436, | |
| "grad_norm": 0.38304126262664795, | |
| "learning_rate": 1.5531586052107868e-05, | |
| "loss": 0.3342415690422058, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6716621253405994, | |
| "grad_norm": 0.4576060175895691, | |
| "learning_rate": 1.5513180560297808e-05, | |
| "loss": 0.42615556716918945, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.6730245231607629, | |
| "grad_norm": 0.4253360331058502, | |
| "learning_rate": 1.5494748196983106e-05, | |
| "loss": 0.4119224548339844, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6743869209809265, | |
| "grad_norm": 0.4528999626636505, | |
| "learning_rate": 1.547628905200398e-05, | |
| "loss": 0.49085426330566406, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.6757493188010899, | |
| "grad_norm": 0.4398576617240906, | |
| "learning_rate": 1.5457803215331182e-05, | |
| "loss": 0.40197885036468506, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.6771117166212534, | |
| "grad_norm": 0.4078048765659332, | |
| "learning_rate": 1.5439290777065558e-05, | |
| "loss": 0.35455724596977234, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.6784741144414169, | |
| "grad_norm": 0.41575735807418823, | |
| "learning_rate": 1.542075182743762e-05, | |
| "loss": 0.3819142282009125, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6798365122615804, | |
| "grad_norm": 0.3810281753540039, | |
| "learning_rate": 1.5402186456807086e-05, | |
| "loss": 0.330873966217041, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.6811989100817438, | |
| "grad_norm": 0.3949718773365021, | |
| "learning_rate": 1.5383594755662453e-05, | |
| "loss": 0.34446001052856445, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6825613079019074, | |
| "grad_norm": 0.3613286316394806, | |
| "learning_rate": 1.5364976814620568e-05, | |
| "loss": 0.3226144313812256, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.6839237057220708, | |
| "grad_norm": 0.3470703661441803, | |
| "learning_rate": 1.5346332724426155e-05, | |
| "loss": 0.29430314898490906, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6852861035422343, | |
| "grad_norm": 0.3816620111465454, | |
| "learning_rate": 1.5327662575951404e-05, | |
| "loss": 0.3283197283744812, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.6866485013623979, | |
| "grad_norm": 0.391053169965744, | |
| "learning_rate": 1.5308966460195503e-05, | |
| "loss": 0.34893274307250977, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6880108991825613, | |
| "grad_norm": 0.4378596544265747, | |
| "learning_rate": 1.5290244468284206e-05, | |
| "loss": 0.40730422735214233, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.6893732970027248, | |
| "grad_norm": 0.4042869210243225, | |
| "learning_rate": 1.5271496691469404e-05, | |
| "loss": 0.3062353730201721, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6907356948228883, | |
| "grad_norm": 0.42129504680633545, | |
| "learning_rate": 1.525272322112865e-05, | |
| "loss": 0.39751139283180237, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.6920980926430518, | |
| "grad_norm": 0.42566022276878357, | |
| "learning_rate": 1.5233924148764727e-05, | |
| "loss": 0.3925180435180664, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6934604904632152, | |
| "grad_norm": 0.3715537488460541, | |
| "learning_rate": 1.5215099566005217e-05, | |
| "loss": 0.31566864252090454, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.6948228882833788, | |
| "grad_norm": 0.45559632778167725, | |
| "learning_rate": 1.519624956460203e-05, | |
| "loss": 0.4103913903236389, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6961852861035422, | |
| "grad_norm": 0.34860438108444214, | |
| "learning_rate": 1.517737423643097e-05, | |
| "loss": 0.2865496277809143, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.6975476839237057, | |
| "grad_norm": 0.44315922260284424, | |
| "learning_rate": 1.5158473673491285e-05, | |
| "loss": 0.4178670048713684, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6989100817438693, | |
| "grad_norm": 0.4062572419643402, | |
| "learning_rate": 1.5139547967905221e-05, | |
| "loss": 0.34824541211128235, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.7002724795640327, | |
| "grad_norm": 0.42972540855407715, | |
| "learning_rate": 1.5120597211917564e-05, | |
| "loss": 0.40658995509147644, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7016348773841962, | |
| "grad_norm": 0.4179152846336365, | |
| "learning_rate": 1.510162149789521e-05, | |
| "loss": 0.38474875688552856, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.7029972752043597, | |
| "grad_norm": 0.41898468136787415, | |
| "learning_rate": 1.5082620918326685e-05, | |
| "loss": 0.3936446011066437, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7043596730245232, | |
| "grad_norm": 0.4191248416900635, | |
| "learning_rate": 1.5063595565821721e-05, | |
| "loss": 0.3933945298194885, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.7057220708446866, | |
| "grad_norm": 0.45913466811180115, | |
| "learning_rate": 1.5044545533110793e-05, | |
| "loss": 0.38266128301620483, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7070844686648501, | |
| "grad_norm": 0.4441032409667969, | |
| "learning_rate": 1.5025470913044666e-05, | |
| "loss": 0.4108320474624634, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.7084468664850136, | |
| "grad_norm": 0.42410925030708313, | |
| "learning_rate": 1.5006371798593948e-05, | |
| "loss": 0.4113953709602356, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7098092643051771, | |
| "grad_norm": 0.4308149218559265, | |
| "learning_rate": 1.4987248282848637e-05, | |
| "loss": 0.3980264961719513, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.7111716621253406, | |
| "grad_norm": 0.44944408535957336, | |
| "learning_rate": 1.4968100459017652e-05, | |
| "loss": 0.36318397521972656, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.7125340599455041, | |
| "grad_norm": 0.39412856101989746, | |
| "learning_rate": 1.4948928420428403e-05, | |
| "loss": 0.3426111042499542, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.7138964577656676, | |
| "grad_norm": 0.35763245820999146, | |
| "learning_rate": 1.4929732260526318e-05, | |
| "loss": 0.28396356105804443, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.715258855585831, | |
| "grad_norm": 0.4681552052497864, | |
| "learning_rate": 1.4910512072874395e-05, | |
| "loss": 0.4816880226135254, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7166212534059946, | |
| "grad_norm": 0.38465479016304016, | |
| "learning_rate": 1.489126795115274e-05, | |
| "loss": 0.31631630659103394, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.717983651226158, | |
| "grad_norm": 0.700157880783081, | |
| "learning_rate": 1.4871999989158123e-05, | |
| "loss": 0.3877072334289551, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.7193460490463215, | |
| "grad_norm": 0.36922982335090637, | |
| "learning_rate": 1.4852708280803512e-05, | |
| "loss": 0.33346259593963623, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.720708446866485, | |
| "grad_norm": 0.4037865698337555, | |
| "learning_rate": 1.4833392920117607e-05, | |
| "loss": 0.3595266342163086, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.7220708446866485, | |
| "grad_norm": 0.38142573833465576, | |
| "learning_rate": 1.4814054001244395e-05, | |
| "loss": 0.31460440158843994, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.723433242506812, | |
| "grad_norm": 0.3785574436187744, | |
| "learning_rate": 1.4794691618442691e-05, | |
| "loss": 0.30783623456954956, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.7247956403269755, | |
| "grad_norm": 0.44186413288116455, | |
| "learning_rate": 1.477530586608567e-05, | |
| "loss": 0.41240638494491577, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.726158038147139, | |
| "grad_norm": 0.3977665603160858, | |
| "learning_rate": 1.4755896838660412e-05, | |
| "loss": 0.32479894161224365, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.7275204359673024, | |
| "grad_norm": 0.40463075041770935, | |
| "learning_rate": 1.4736464630767442e-05, | |
| "loss": 0.3699343502521515, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.728882833787466, | |
| "grad_norm": 0.41050946712493896, | |
| "learning_rate": 1.4717009337120268e-05, | |
| "loss": 0.3829270005226135, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.7302452316076294, | |
| "grad_norm": 0.4132955074310303, | |
| "learning_rate": 1.4697531052544914e-05, | |
| "loss": 0.34740668535232544, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7316076294277929, | |
| "grad_norm": 0.43629321455955505, | |
| "learning_rate": 1.4678029871979469e-05, | |
| "loss": 0.3730025589466095, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.7329700272479565, | |
| "grad_norm": 0.44061553478240967, | |
| "learning_rate": 1.4658505890473615e-05, | |
| "loss": 0.40129199624061584, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.7343324250681199, | |
| "grad_norm": 0.4008716642856598, | |
| "learning_rate": 1.463895920318817e-05, | |
| "loss": 0.3489900827407837, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.7356948228882834, | |
| "grad_norm": 0.3535612225532532, | |
| "learning_rate": 1.4619389905394616e-05, | |
| "loss": 0.2988109290599823, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7370572207084468, | |
| "grad_norm": 0.4381372630596161, | |
| "learning_rate": 1.4599798092474646e-05, | |
| "loss": 0.3608015775680542, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.7384196185286104, | |
| "grad_norm": 0.3945034444332123, | |
| "learning_rate": 1.4580183859919686e-05, | |
| "loss": 0.32994401454925537, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.7397820163487738, | |
| "grad_norm": 0.41232776641845703, | |
| "learning_rate": 1.4560547303330441e-05, | |
| "loss": 0.38900381326675415, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.7411444141689373, | |
| "grad_norm": 0.38951003551483154, | |
| "learning_rate": 1.4540888518416423e-05, | |
| "loss": 0.3017880320549011, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7425068119891008, | |
| "grad_norm": 0.3949369490146637, | |
| "learning_rate": 1.4521207600995487e-05, | |
| "loss": 0.32867276668548584, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7438692098092643, | |
| "grad_norm": 0.3731197416782379, | |
| "learning_rate": 1.4501504646993358e-05, | |
| "loss": 0.32244962453842163, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.7452316076294278, | |
| "grad_norm": 0.42670756578445435, | |
| "learning_rate": 1.4481779752443177e-05, | |
| "loss": 0.34767279028892517, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.7465940054495913, | |
| "grad_norm": 0.4462198317050934, | |
| "learning_rate": 1.446203301348502e-05, | |
| "loss": 0.40442246198654175, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7479564032697548, | |
| "grad_norm": 0.43297913670539856, | |
| "learning_rate": 1.4442264526365425e-05, | |
| "loss": 0.39974820613861084, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.7493188010899182, | |
| "grad_norm": 0.38846778869628906, | |
| "learning_rate": 1.4422474387436951e-05, | |
| "loss": 0.3305945098400116, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7506811989100818, | |
| "grad_norm": 0.3659208118915558, | |
| "learning_rate": 1.4402662693157672e-05, | |
| "loss": 0.28628021478652954, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.7520435967302452, | |
| "grad_norm": 0.4232732653617859, | |
| "learning_rate": 1.4382829540090728e-05, | |
| "loss": 0.39836883544921875, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7534059945504087, | |
| "grad_norm": 0.3981814682483673, | |
| "learning_rate": 1.4362975024903854e-05, | |
| "loss": 0.3883022964000702, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.7547683923705722, | |
| "grad_norm": 0.37059178948402405, | |
| "learning_rate": 1.43430992443689e-05, | |
| "loss": 0.31552067399024963, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7561307901907357, | |
| "grad_norm": 0.5106791257858276, | |
| "learning_rate": 1.4323202295361375e-05, | |
| "loss": 0.3364841043949127, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.7574931880108992, | |
| "grad_norm": 0.3709793984889984, | |
| "learning_rate": 1.4303284274859947e-05, | |
| "loss": 0.3160533010959625, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.7588555858310627, | |
| "grad_norm": 0.38688817620277405, | |
| "learning_rate": 1.4283345279946e-05, | |
| "loss": 0.3377053439617157, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.7602179836512262, | |
| "grad_norm": 0.3955825865268707, | |
| "learning_rate": 1.4263385407803147e-05, | |
| "loss": 0.367174357175827, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.7615803814713896, | |
| "grad_norm": 0.387660413980484, | |
| "learning_rate": 1.424340475571675e-05, | |
| "loss": 0.355742484331131, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.7629427792915532, | |
| "grad_norm": 0.4117288589477539, | |
| "learning_rate": 1.4223403421073465e-05, | |
| "loss": 0.3632027506828308, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7643051771117166, | |
| "grad_norm": 0.3913569748401642, | |
| "learning_rate": 1.4203381501360746e-05, | |
| "loss": 0.34550565481185913, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.7656675749318801, | |
| "grad_norm": 0.3794730305671692, | |
| "learning_rate": 1.4183339094166386e-05, | |
| "loss": 0.32455602288246155, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.7670299727520435, | |
| "grad_norm": 0.4123244881629944, | |
| "learning_rate": 1.416327629717803e-05, | |
| "loss": 0.33050400018692017, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.7683923705722071, | |
| "grad_norm": 0.5458968877792358, | |
| "learning_rate": 1.4143193208182705e-05, | |
| "loss": 0.33752286434173584, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.7697547683923706, | |
| "grad_norm": 0.3934307396411896, | |
| "learning_rate": 1.4123089925066347e-05, | |
| "loss": 0.31893715262413025, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.771117166212534, | |
| "grad_norm": 0.4280702471733093, | |
| "learning_rate": 1.4102966545813312e-05, | |
| "loss": 0.38816317915916443, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.7724795640326976, | |
| "grad_norm": 0.38977575302124023, | |
| "learning_rate": 1.4082823168505912e-05, | |
| "loss": 0.3048614263534546, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.773841961852861, | |
| "grad_norm": 0.4607933461666107, | |
| "learning_rate": 1.4062659891323927e-05, | |
| "loss": 0.3881381154060364, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.7752043596730245, | |
| "grad_norm": 0.4132702350616455, | |
| "learning_rate": 1.4042476812544128e-05, | |
| "loss": 0.367891788482666, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.776566757493188, | |
| "grad_norm": 0.36173874139785767, | |
| "learning_rate": 1.4022274030539802e-05, | |
| "loss": 0.2838096618652344, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7779291553133515, | |
| "grad_norm": 0.3847193121910095, | |
| "learning_rate": 1.4002051643780275e-05, | |
| "loss": 0.31407347321510315, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.779291553133515, | |
| "grad_norm": 0.3895076811313629, | |
| "learning_rate": 1.398180975083042e-05, | |
| "loss": 0.3542616367340088, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7806539509536785, | |
| "grad_norm": 0.4060671329498291, | |
| "learning_rate": 1.3961548450350184e-05, | |
| "loss": 0.3690754175186157, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.782016348773842, | |
| "grad_norm": 0.3732289671897888, | |
| "learning_rate": 1.3941267841094118e-05, | |
| "loss": 0.3208252191543579, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7833787465940054, | |
| "grad_norm": 0.4188457429409027, | |
| "learning_rate": 1.3920968021910872e-05, | |
| "loss": 0.3940437436103821, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.784741144414169, | |
| "grad_norm": 0.3824058175086975, | |
| "learning_rate": 1.3900649091742734e-05, | |
| "loss": 0.3468947410583496, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7861035422343324, | |
| "grad_norm": 0.36254164576530457, | |
| "learning_rate": 1.3880311149625141e-05, | |
| "loss": 0.3099260926246643, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.7874659400544959, | |
| "grad_norm": 0.34511882066726685, | |
| "learning_rate": 1.3859954294686185e-05, | |
| "loss": 0.2614838778972626, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7888283378746594, | |
| "grad_norm": 0.42470481991767883, | |
| "learning_rate": 1.3839578626146143e-05, | |
| "loss": 0.36970824003219604, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.7901907356948229, | |
| "grad_norm": 0.43448764085769653, | |
| "learning_rate": 1.3819184243317008e-05, | |
| "loss": 0.38234907388687134, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7915531335149864, | |
| "grad_norm": 0.41992104053497314, | |
| "learning_rate": 1.3798771245601961e-05, | |
| "loss": 0.3760548233985901, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.7929155313351499, | |
| "grad_norm": 0.3761197328567505, | |
| "learning_rate": 1.3778339732494933e-05, | |
| "loss": 0.31548407673835754, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7942779291553134, | |
| "grad_norm": 0.40058112144470215, | |
| "learning_rate": 1.3757889803580085e-05, | |
| "loss": 0.3305339515209198, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.7956403269754768, | |
| "grad_norm": 0.4397111237049103, | |
| "learning_rate": 1.373742155853135e-05, | |
| "loss": 0.39684200286865234, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7970027247956403, | |
| "grad_norm": 0.3730669319629669, | |
| "learning_rate": 1.3716935097111926e-05, | |
| "loss": 0.30914586782455444, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.7983651226158038, | |
| "grad_norm": 0.3845134377479553, | |
| "learning_rate": 1.3696430519173802e-05, | |
| "loss": 0.3142814040184021, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.7997275204359673, | |
| "grad_norm": 0.4052923619747162, | |
| "learning_rate": 1.367590792465727e-05, | |
| "loss": 0.33971768617630005, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.8010899182561307, | |
| "grad_norm": 0.35842764377593994, | |
| "learning_rate": 1.3655367413590433e-05, | |
| "loss": 0.3152307868003845, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8024523160762943, | |
| "grad_norm": 0.41765162348747253, | |
| "learning_rate": 1.3634809086088715e-05, | |
| "loss": 0.36689841747283936, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.8038147138964578, | |
| "grad_norm": 0.3411005735397339, | |
| "learning_rate": 1.361423304235439e-05, | |
| "loss": 0.2985839545726776, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8051771117166212, | |
| "grad_norm": 0.38796186447143555, | |
| "learning_rate": 1.359363938267607e-05, | |
| "loss": 0.31117764115333557, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.8065395095367848, | |
| "grad_norm": 0.4477858543395996, | |
| "learning_rate": 1.3573028207428239e-05, | |
| "loss": 0.3759269714355469, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8079019073569482, | |
| "grad_norm": 0.3874566853046417, | |
| "learning_rate": 1.3552399617070742e-05, | |
| "loss": 0.35220903158187866, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.8092643051771117, | |
| "grad_norm": 0.3966315686702728, | |
| "learning_rate": 1.3531753712148312e-05, | |
| "loss": 0.327017605304718, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8106267029972752, | |
| "grad_norm": 0.38722142577171326, | |
| "learning_rate": 1.3511090593290073e-05, | |
| "loss": 0.3531439006328583, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.8119891008174387, | |
| "grad_norm": 0.35546261072158813, | |
| "learning_rate": 1.3490410361209051e-05, | |
| "loss": 0.29371026158332825, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.8133514986376021, | |
| "grad_norm": 0.4084646701812744, | |
| "learning_rate": 1.3469713116701683e-05, | |
| "loss": 0.3657349944114685, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.8147138964577657, | |
| "grad_norm": 0.3828872740268707, | |
| "learning_rate": 1.3448998960647324e-05, | |
| "loss": 0.27304738759994507, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8160762942779292, | |
| "grad_norm": 0.40250927209854126, | |
| "learning_rate": 1.3428267994007756e-05, | |
| "loss": 0.36223679780960083, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.8174386920980926, | |
| "grad_norm": 0.38634181022644043, | |
| "learning_rate": 1.3407520317826697e-05, | |
| "loss": 0.32051095366477966, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8188010899182562, | |
| "grad_norm": 0.3875936269760132, | |
| "learning_rate": 1.3386756033229314e-05, | |
| "loss": 0.31921273469924927, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.8201634877384196, | |
| "grad_norm": 0.4128807783126831, | |
| "learning_rate": 1.3365975241421712e-05, | |
| "loss": 0.35329893231391907, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.8215258855585831, | |
| "grad_norm": 0.3846687376499176, | |
| "learning_rate": 1.3345178043690463e-05, | |
| "loss": 0.3306158185005188, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.8228882833787466, | |
| "grad_norm": 0.37342211604118347, | |
| "learning_rate": 1.3324364541402102e-05, | |
| "loss": 0.29243338108062744, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8242506811989101, | |
| "grad_norm": 0.4314493238925934, | |
| "learning_rate": 1.3303534836002629e-05, | |
| "loss": 0.3962687849998474, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.8256130790190735, | |
| "grad_norm": 0.41795799136161804, | |
| "learning_rate": 1.328268902901702e-05, | |
| "loss": 0.3593180775642395, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8269754768392371, | |
| "grad_norm": 0.41061070561408997, | |
| "learning_rate": 1.326182722204873e-05, | |
| "loss": 0.3891078233718872, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.8283378746594006, | |
| "grad_norm": 0.3990163803100586, | |
| "learning_rate": 1.32409495167792e-05, | |
| "loss": 0.3603130578994751, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.829700272479564, | |
| "grad_norm": 0.37110787630081177, | |
| "learning_rate": 1.3220056014967359e-05, | |
| "loss": 0.3168402910232544, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.8310626702997275, | |
| "grad_norm": 0.42547863721847534, | |
| "learning_rate": 1.3199146818449134e-05, | |
| "loss": 0.4026668667793274, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.832425068119891, | |
| "grad_norm": 0.47038719058036804, | |
| "learning_rate": 1.317822202913694e-05, | |
| "loss": 0.44461554288864136, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.8337874659400545, | |
| "grad_norm": 0.38207799196243286, | |
| "learning_rate": 1.3157281749019199e-05, | |
| "loss": 0.3126547336578369, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.8351498637602179, | |
| "grad_norm": 0.3733403980731964, | |
| "learning_rate": 1.3136326080159836e-05, | |
| "loss": 0.31984227895736694, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.8365122615803815, | |
| "grad_norm": 0.37421712279319763, | |
| "learning_rate": 1.3115355124697775e-05, | |
| "loss": 0.3349495232105255, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.837874659400545, | |
| "grad_norm": 0.4028952121734619, | |
| "learning_rate": 1.3094368984846453e-05, | |
| "loss": 0.3623151183128357, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.8392370572207084, | |
| "grad_norm": 0.3345983028411865, | |
| "learning_rate": 1.3073367762893316e-05, | |
| "loss": 0.25584784150123596, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.840599455040872, | |
| "grad_norm": 0.4131789803504944, | |
| "learning_rate": 1.3052351561199321e-05, | |
| "loss": 0.36740627884864807, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.8419618528610354, | |
| "grad_norm": 0.38671812415122986, | |
| "learning_rate": 1.3031320482198433e-05, | |
| "loss": 0.32881784439086914, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.8433242506811989, | |
| "grad_norm": 0.43707364797592163, | |
| "learning_rate": 1.3010274628397137e-05, | |
| "loss": 0.4240247905254364, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.8446866485013624, | |
| "grad_norm": 0.37011024355888367, | |
| "learning_rate": 1.298921410237392e-05, | |
| "loss": 0.3215616047382355, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8460490463215259, | |
| "grad_norm": 0.40730512142181396, | |
| "learning_rate": 1.2968139006778797e-05, | |
| "loss": 0.3768579661846161, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.8474114441416893, | |
| "grad_norm": 0.43396347761154175, | |
| "learning_rate": 1.2947049444332782e-05, | |
| "loss": 0.41485506296157837, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.8487738419618529, | |
| "grad_norm": 0.43405383825302124, | |
| "learning_rate": 1.292594551782741e-05, | |
| "loss": 0.40304839611053467, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.8501362397820164, | |
| "grad_norm": 0.44254007935523987, | |
| "learning_rate": 1.2904827330124223e-05, | |
| "loss": 0.39402496814727783, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8514986376021798, | |
| "grad_norm": 0.36872732639312744, | |
| "learning_rate": 1.2883694984154273e-05, | |
| "loss": 0.3053838312625885, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.8528610354223434, | |
| "grad_norm": 0.4020664095878601, | |
| "learning_rate": 1.2862548582917622e-05, | |
| "loss": 0.34385716915130615, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.8542234332425068, | |
| "grad_norm": 0.37539398670196533, | |
| "learning_rate": 1.2841388229482834e-05, | |
| "loss": 0.32522130012512207, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.8555858310626703, | |
| "grad_norm": 0.4901396632194519, | |
| "learning_rate": 1.2820214026986481e-05, | |
| "loss": 0.4514284133911133, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.8569482288828338, | |
| "grad_norm": 0.42575520277023315, | |
| "learning_rate": 1.2799026078632638e-05, | |
| "loss": 0.3785257935523987, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.8583106267029973, | |
| "grad_norm": 0.4292484223842621, | |
| "learning_rate": 1.2777824487692373e-05, | |
| "loss": 0.37280866503715515, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8596730245231607, | |
| "grad_norm": 0.38012853264808655, | |
| "learning_rate": 1.2756609357503248e-05, | |
| "loss": 0.3435444235801697, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.8610354223433242, | |
| "grad_norm": 0.37119728326797485, | |
| "learning_rate": 1.2735380791468814e-05, | |
| "loss": 0.315696656703949, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.8623978201634878, | |
| "grad_norm": 0.4296729564666748, | |
| "learning_rate": 1.271413889305812e-05, | |
| "loss": 0.3677264451980591, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.8637602179836512, | |
| "grad_norm": 0.43633759021759033, | |
| "learning_rate": 1.2692883765805188e-05, | |
| "loss": 0.42795848846435547, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.8651226158038147, | |
| "grad_norm": 0.35888829827308655, | |
| "learning_rate": 1.2671615513308524e-05, | |
| "loss": 0.307987779378891, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.8664850136239782, | |
| "grad_norm": 0.39719316363334656, | |
| "learning_rate": 1.2650334239230598e-05, | |
| "loss": 0.34023380279541016, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.8678474114441417, | |
| "grad_norm": 0.39609214663505554, | |
| "learning_rate": 1.2629040047297356e-05, | |
| "loss": 0.3457816243171692, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.8692098092643051, | |
| "grad_norm": 0.3761802911758423, | |
| "learning_rate": 1.2607733041297703e-05, | |
| "loss": 0.3295412063598633, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.8705722070844687, | |
| "grad_norm": 0.4121301472187042, | |
| "learning_rate": 1.2586413325083e-05, | |
| "loss": 0.3802829086780548, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.8719346049046321, | |
| "grad_norm": 0.42629581689834595, | |
| "learning_rate": 1.2565081002566563e-05, | |
| "loss": 0.41711223125457764, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8732970027247956, | |
| "grad_norm": 0.4152551591396332, | |
| "learning_rate": 1.2543736177723147e-05, | |
| "loss": 0.3565431833267212, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.8746594005449592, | |
| "grad_norm": 0.3676934838294983, | |
| "learning_rate": 1.2522378954588443e-05, | |
| "loss": 0.3011196553707123, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.8760217983651226, | |
| "grad_norm": 0.3511006534099579, | |
| "learning_rate": 1.2501009437258576e-05, | |
| "loss": 0.27889248728752136, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.8773841961852861, | |
| "grad_norm": 0.33939114212989807, | |
| "learning_rate": 1.2479627729889587e-05, | |
| "loss": 0.2643985152244568, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8787465940054496, | |
| "grad_norm": 0.46138131618499756, | |
| "learning_rate": 1.245823393669694e-05, | |
| "loss": 0.431702196598053, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.8801089918256131, | |
| "grad_norm": 0.39123064279556274, | |
| "learning_rate": 1.2436828161955004e-05, | |
| "loss": 0.35548466444015503, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.8814713896457765, | |
| "grad_norm": 0.37139931321144104, | |
| "learning_rate": 1.2415410509996537e-05, | |
| "loss": 0.3073146939277649, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.8828337874659401, | |
| "grad_norm": 0.39180418848991394, | |
| "learning_rate": 1.2393981085212204e-05, | |
| "loss": 0.3435180187225342, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8841961852861036, | |
| "grad_norm": 0.3841586410999298, | |
| "learning_rate": 1.2372539992050037e-05, | |
| "loss": 0.3478638529777527, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.885558583106267, | |
| "grad_norm": 0.4314171373844147, | |
| "learning_rate": 1.2351087335014945e-05, | |
| "loss": 0.34792008996009827, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8869209809264306, | |
| "grad_norm": 0.36530905961990356, | |
| "learning_rate": 1.2329623218668197e-05, | |
| "loss": 0.31735897064208984, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.888283378746594, | |
| "grad_norm": 0.4034612476825714, | |
| "learning_rate": 1.2308147747626926e-05, | |
| "loss": 0.3590067923069, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.8896457765667575, | |
| "grad_norm": 0.3697197437286377, | |
| "learning_rate": 1.2286661026563597e-05, | |
| "loss": 0.30383849143981934, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.8910081743869209, | |
| "grad_norm": 0.36092400550842285, | |
| "learning_rate": 1.2265163160205514e-05, | |
| "loss": 0.2998080551624298, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.8923705722070845, | |
| "grad_norm": 0.41580548882484436, | |
| "learning_rate": 1.2243654253334299e-05, | |
| "loss": 0.39810556173324585, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.8937329700272479, | |
| "grad_norm": 0.43990781903266907, | |
| "learning_rate": 1.2222134410785386e-05, | |
| "loss": 0.37231457233428955, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8950953678474114, | |
| "grad_norm": 0.3491821587085724, | |
| "learning_rate": 1.2200603737447515e-05, | |
| "loss": 0.27115774154663086, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.896457765667575, | |
| "grad_norm": 0.41555097699165344, | |
| "learning_rate": 1.2179062338262217e-05, | |
| "loss": 0.3704499900341034, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8978201634877384, | |
| "grad_norm": 0.41340363025665283, | |
| "learning_rate": 1.2157510318223296e-05, | |
| "loss": 0.3653028905391693, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.8991825613079019, | |
| "grad_norm": 0.42316463589668274, | |
| "learning_rate": 1.2135947782376322e-05, | |
| "loss": 0.37947019934654236, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9005449591280654, | |
| "grad_norm": 0.4614158570766449, | |
| "learning_rate": 1.2114374835818122e-05, | |
| "loss": 0.4423688054084778, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.9019073569482289, | |
| "grad_norm": 0.3793640434741974, | |
| "learning_rate": 1.2092791583696266e-05, | |
| "loss": 0.3140786290168762, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.9032697547683923, | |
| "grad_norm": 0.41017946600914, | |
| "learning_rate": 1.207119813120855e-05, | |
| "loss": 0.3691558241844177, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.9046321525885559, | |
| "grad_norm": 0.42569923400878906, | |
| "learning_rate": 1.2049594583602495e-05, | |
| "loss": 0.39558589458465576, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9059945504087193, | |
| "grad_norm": 0.38926219940185547, | |
| "learning_rate": 1.2027981046174817e-05, | |
| "loss": 0.36622583866119385, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.9073569482288828, | |
| "grad_norm": 0.3998047709465027, | |
| "learning_rate": 1.2006357624270927e-05, | |
| "loss": 0.30081361532211304, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9087193460490464, | |
| "grad_norm": 0.3933253884315491, | |
| "learning_rate": 1.198472442328442e-05, | |
| "loss": 0.3435978293418884, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.9100817438692098, | |
| "grad_norm": 0.3495563864707947, | |
| "learning_rate": 1.1963081548656539e-05, | |
| "loss": 0.29989689588546753, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9114441416893733, | |
| "grad_norm": 0.4060697853565216, | |
| "learning_rate": 1.1941429105875686e-05, | |
| "loss": 0.3763880431652069, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.9128065395095368, | |
| "grad_norm": 0.33757802844047546, | |
| "learning_rate": 1.1919767200476904e-05, | |
| "loss": 0.2711639106273651, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9141689373297003, | |
| "grad_norm": 0.3644583523273468, | |
| "learning_rate": 1.1898095938041352e-05, | |
| "loss": 0.2867870330810547, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.9155313351498637, | |
| "grad_norm": 0.4242314100265503, | |
| "learning_rate": 1.187641542419579e-05, | |
| "loss": 0.3521907925605774, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.9168937329700273, | |
| "grad_norm": 0.4531702995300293, | |
| "learning_rate": 1.1854725764612078e-05, | |
| "loss": 0.40591543912887573, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.9182561307901907, | |
| "grad_norm": 0.4164139926433563, | |
| "learning_rate": 1.183302706500665e-05, | |
| "loss": 0.3550049662590027, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9196185286103542, | |
| "grad_norm": 0.37005481123924255, | |
| "learning_rate": 1.181131943114e-05, | |
| "loss": 0.30720236897468567, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.9209809264305178, | |
| "grad_norm": 0.3972548246383667, | |
| "learning_rate": 1.1789602968816172e-05, | |
| "loss": 0.318911075592041, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9223433242506812, | |
| "grad_norm": 0.37663185596466064, | |
| "learning_rate": 1.1767877783882235e-05, | |
| "loss": 0.31429654359817505, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.9237057220708447, | |
| "grad_norm": 0.373142808675766, | |
| "learning_rate": 1.1746143982227778e-05, | |
| "loss": 0.30172014236450195, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.9250681198910081, | |
| "grad_norm": 0.42605262994766235, | |
| "learning_rate": 1.1724401669784385e-05, | |
| "loss": 0.37753045558929443, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.9264305177111717, | |
| "grad_norm": 0.41822153329849243, | |
| "learning_rate": 1.1702650952525116e-05, | |
| "loss": 0.39182257652282715, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9277929155313351, | |
| "grad_norm": 0.39568570256233215, | |
| "learning_rate": 1.168089193646401e-05, | |
| "loss": 0.33675575256347656, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.9291553133514986, | |
| "grad_norm": 0.3839928209781647, | |
| "learning_rate": 1.1659124727655546e-05, | |
| "loss": 0.34991830587387085, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.9305177111716622, | |
| "grad_norm": 0.4019002914428711, | |
| "learning_rate": 1.1637349432194137e-05, | |
| "loss": 0.3185323476791382, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.9318801089918256, | |
| "grad_norm": 0.3919433057308197, | |
| "learning_rate": 1.1615566156213609e-05, | |
| "loss": 0.31005561351776123, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.9332425068119891, | |
| "grad_norm": 0.3654928207397461, | |
| "learning_rate": 1.1593775005886687e-05, | |
| "loss": 0.3170800507068634, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.9346049046321526, | |
| "grad_norm": 0.4315038025379181, | |
| "learning_rate": 1.1571976087424478e-05, | |
| "loss": 0.4050094485282898, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.9359673024523161, | |
| "grad_norm": 0.3850279450416565, | |
| "learning_rate": 1.1550169507075939e-05, | |
| "loss": 0.3527379631996155, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.9373297002724795, | |
| "grad_norm": 0.39894241094589233, | |
| "learning_rate": 1.1528355371127396e-05, | |
| "loss": 0.35128986835479736, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.9386920980926431, | |
| "grad_norm": 0.39317700266838074, | |
| "learning_rate": 1.1506533785901977e-05, | |
| "loss": 0.32606303691864014, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.9400544959128065, | |
| "grad_norm": 0.33535271883010864, | |
| "learning_rate": 1.148470485775913e-05, | |
| "loss": 0.26627829670906067, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.94141689373297, | |
| "grad_norm": 0.38109129667282104, | |
| "learning_rate": 1.146286869309409e-05, | |
| "loss": 0.3295374810695648, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.9427792915531336, | |
| "grad_norm": 0.3924390375614166, | |
| "learning_rate": 1.1441025398337365e-05, | |
| "loss": 0.3435341417789459, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.944141689373297, | |
| "grad_norm": 0.42672890424728394, | |
| "learning_rate": 1.141917507995421e-05, | |
| "loss": 0.36071181297302246, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.9455040871934605, | |
| "grad_norm": 0.417208194732666, | |
| "learning_rate": 1.1397317844444125e-05, | |
| "loss": 0.38000190258026123, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.946866485013624, | |
| "grad_norm": 0.38172468543052673, | |
| "learning_rate": 1.137545379834031e-05, | |
| "loss": 0.30421239137649536, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.9482288828337875, | |
| "grad_norm": 0.32868707180023193, | |
| "learning_rate": 1.1353583048209171e-05, | |
| "loss": 0.2640570104122162, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.9495912806539509, | |
| "grad_norm": 0.39148321747779846, | |
| "learning_rate": 1.1331705700649786e-05, | |
| "loss": 0.32436051964759827, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.9509536784741145, | |
| "grad_norm": 0.4759628176689148, | |
| "learning_rate": 1.1309821862293385e-05, | |
| "loss": 0.41697192192077637, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.952316076294278, | |
| "grad_norm": 0.38073548674583435, | |
| "learning_rate": 1.128793163980284e-05, | |
| "loss": 0.323738157749176, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.9536784741144414, | |
| "grad_norm": 0.3676002025604248, | |
| "learning_rate": 1.1266035139872142e-05, | |
| "loss": 0.288091778755188, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9550408719346049, | |
| "grad_norm": 0.3463435173034668, | |
| "learning_rate": 1.1244132469225872e-05, | |
| "loss": 0.29015398025512695, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.9564032697547684, | |
| "grad_norm": 0.3797283470630646, | |
| "learning_rate": 1.1222223734618689e-05, | |
| "loss": 0.34152185916900635, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.9577656675749319, | |
| "grad_norm": 0.3729090094566345, | |
| "learning_rate": 1.120030904283481e-05, | |
| "loss": 0.2951827943325043, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.9591280653950953, | |
| "grad_norm": 0.37600177526474, | |
| "learning_rate": 1.1178388500687482e-05, | |
| "loss": 0.3445882797241211, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9604904632152589, | |
| "grad_norm": 0.3924142122268677, | |
| "learning_rate": 1.115646221501848e-05, | |
| "loss": 0.34941422939300537, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.9618528610354223, | |
| "grad_norm": 0.3414173424243927, | |
| "learning_rate": 1.1134530292697558e-05, | |
| "loss": 0.2858058214187622, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.9632152588555858, | |
| "grad_norm": 0.35240837931632996, | |
| "learning_rate": 1.1112592840621954e-05, | |
| "loss": 0.29714637994766235, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.9645776566757494, | |
| "grad_norm": 0.4237878620624542, | |
| "learning_rate": 1.1090649965715852e-05, | |
| "loss": 0.3578713536262512, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.9659400544959128, | |
| "grad_norm": 0.33575090765953064, | |
| "learning_rate": 1.1068701774929868e-05, | |
| "loss": 0.2601892352104187, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.9673024523160763, | |
| "grad_norm": 0.4071582555770874, | |
| "learning_rate": 1.1046748375240532e-05, | |
| "loss": 0.3512653410434723, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.9686648501362398, | |
| "grad_norm": 0.3828555941581726, | |
| "learning_rate": 1.1024789873649761e-05, | |
| "loss": 0.3424757719039917, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.9700272479564033, | |
| "grad_norm": 0.3985345661640167, | |
| "learning_rate": 1.1002826377184334e-05, | |
| "loss": 0.3405340313911438, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.9713896457765667, | |
| "grad_norm": 0.3681023120880127, | |
| "learning_rate": 1.0980857992895381e-05, | |
| "loss": 0.30252936482429504, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.9727520435967303, | |
| "grad_norm": 0.4133037328720093, | |
| "learning_rate": 1.0958884827857853e-05, | |
| "loss": 0.39566880464553833, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.9741144414168937, | |
| "grad_norm": 0.3889504671096802, | |
| "learning_rate": 1.0936906989170004e-05, | |
| "loss": 0.3164287805557251, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.9754768392370572, | |
| "grad_norm": 0.42561522126197815, | |
| "learning_rate": 1.0914924583952864e-05, | |
| "loss": 0.39496541023254395, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.9768392370572208, | |
| "grad_norm": 0.4179486632347107, | |
| "learning_rate": 1.0892937719349723e-05, | |
| "loss": 0.3516708016395569, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.9782016348773842, | |
| "grad_norm": 0.4315149188041687, | |
| "learning_rate": 1.087094650252561e-05, | |
| "loss": 0.40251898765563965, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.9795640326975477, | |
| "grad_norm": 0.4050588309764862, | |
| "learning_rate": 1.0848951040666762e-05, | |
| "loss": 0.3581554591655731, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.9809264305177112, | |
| "grad_norm": 0.4035855829715729, | |
| "learning_rate": 1.0826951440980105e-05, | |
| "loss": 0.3374115526676178, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9822888283378747, | |
| "grad_norm": 0.3701610565185547, | |
| "learning_rate": 1.0804947810692736e-05, | |
| "loss": 0.3004315495491028, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.9836512261580381, | |
| "grad_norm": 0.4019452631473541, | |
| "learning_rate": 1.07829402570514e-05, | |
| "loss": 0.3550060987472534, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.9850136239782016, | |
| "grad_norm": 0.4384351968765259, | |
| "learning_rate": 1.076092888732196e-05, | |
| "loss": 0.4013964831829071, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.9863760217983651, | |
| "grad_norm": 0.41561856865882874, | |
| "learning_rate": 1.073891380878888e-05, | |
| "loss": 0.3638450801372528, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.9877384196185286, | |
| "grad_norm": 0.4159381687641144, | |
| "learning_rate": 1.0716895128754704e-05, | |
| "loss": 0.3947365880012512, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.989100817438692, | |
| "grad_norm": 0.3710257112979889, | |
| "learning_rate": 1.069487295453952e-05, | |
| "loss": 0.32494619488716125, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.9904632152588556, | |
| "grad_norm": 0.3763940930366516, | |
| "learning_rate": 1.0672847393480466e-05, | |
| "loss": 0.32123690843582153, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.9918256130790191, | |
| "grad_norm": 0.4262928366661072, | |
| "learning_rate": 1.0650818552931162e-05, | |
| "loss": 0.344281405210495, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.9931880108991825, | |
| "grad_norm": 0.4106824994087219, | |
| "learning_rate": 1.0628786540261235e-05, | |
| "loss": 0.3853453993797302, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.9945504087193461, | |
| "grad_norm": 0.4608916640281677, | |
| "learning_rate": 1.0606751462855764e-05, | |
| "loss": 0.3871040940284729, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.9959128065395095, | |
| "grad_norm": 0.41636621952056885, | |
| "learning_rate": 1.0584713428114764e-05, | |
| "loss": 0.34777921438217163, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.997275204359673, | |
| "grad_norm": 0.41956406831741333, | |
| "learning_rate": 1.0562672543452666e-05, | |
| "loss": 0.38779330253601074, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.9986376021798365, | |
| "grad_norm": 0.35188162326812744, | |
| "learning_rate": 1.0540628916297791e-05, | |
| "loss": 0.2766571640968323, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.3943222165107727, | |
| "learning_rate": 1.0518582654091824e-05, | |
| "loss": 0.33342719078063965, | |
| "step": 734 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1468, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.300068371614925e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |