Text Generation
Transformers
Safetensors
gemma4_text
gemma-4
terminal-agent
full-finetuning
tb2-lite
gemma4-native-template
conversational
Instructions to use LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch") model = AutoModelForCausalLM.from_pretrained("LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch
- SGLang
How to use LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch with Docker Model Runner:
docker model run hf.co/LLM-OS-Models/gemma-4-26B-A4B-Terminal-SFT-Native-Liquid-1Epoch
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 510, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00196078431372549, | |
| "grad_norm": 4080.0, | |
| "learning_rate": 0.0, | |
| "loss": 13.411018371582031, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00392156862745098, | |
| "grad_norm": 3520.0, | |
| "learning_rate": 3.2258064516129035e-07, | |
| "loss": 12.185730934143066, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0058823529411764705, | |
| "grad_norm": 1152.0, | |
| "learning_rate": 6.451612903225807e-07, | |
| "loss": 11.017197608947754, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00784313725490196, | |
| "grad_norm": 11072.0, | |
| "learning_rate": 9.67741935483871e-07, | |
| "loss": 15.801798820495605, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00980392156862745, | |
| "grad_norm": 2656.0, | |
| "learning_rate": 1.2903225806451614e-06, | |
| "loss": 12.750621795654297, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.011764705882352941, | |
| "grad_norm": 1752.0, | |
| "learning_rate": 1.6129032258064516e-06, | |
| "loss": 14.785816192626953, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.013725490196078431, | |
| "grad_norm": 912.0, | |
| "learning_rate": 1.935483870967742e-06, | |
| "loss": 11.984111785888672, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01568627450980392, | |
| "grad_norm": 494.0, | |
| "learning_rate": 2.2580645161290324e-06, | |
| "loss": 11.356058120727539, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01764705882352941, | |
| "grad_norm": 8192.0, | |
| "learning_rate": 2.580645161290323e-06, | |
| "loss": 15.237844467163086, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0196078431372549, | |
| "grad_norm": 2128.0, | |
| "learning_rate": 2.903225806451613e-06, | |
| "loss": 13.795385360717773, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.021568627450980392, | |
| "grad_norm": 904.0, | |
| "learning_rate": 3.225806451612903e-06, | |
| "loss": 11.242429733276367, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.023529411764705882, | |
| "grad_norm": 1648.0, | |
| "learning_rate": 3.548387096774194e-06, | |
| "loss": 12.521835327148438, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.025490196078431372, | |
| "grad_norm": 544.0, | |
| "learning_rate": 3.870967741935484e-06, | |
| "loss": 11.381891250610352, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.027450980392156862, | |
| "grad_norm": 1632.0, | |
| "learning_rate": 4.193548387096774e-06, | |
| "loss": 11.05846881866455, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 876.0, | |
| "learning_rate": 4.516129032258065e-06, | |
| "loss": 11.80467700958252, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03137254901960784, | |
| "grad_norm": 904.0, | |
| "learning_rate": 4.838709677419355e-06, | |
| "loss": 10.73906421661377, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": 1048.0, | |
| "learning_rate": 5.161290322580646e-06, | |
| "loss": 10.119402885437012, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03529411764705882, | |
| "grad_norm": 804.0, | |
| "learning_rate": 5.483870967741935e-06, | |
| "loss": 10.461430549621582, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03725490196078431, | |
| "grad_norm": 540.0, | |
| "learning_rate": 5.806451612903226e-06, | |
| "loss": 9.726204872131348, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0392156862745098, | |
| "grad_norm": 30464.0, | |
| "learning_rate": 6.129032258064517e-06, | |
| "loss": 9.501750946044922, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.041176470588235294, | |
| "grad_norm": 1168.0, | |
| "learning_rate": 6.451612903225806e-06, | |
| "loss": 8.244911193847656, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.043137254901960784, | |
| "grad_norm": 552.0, | |
| "learning_rate": 6.774193548387097e-06, | |
| "loss": 7.955575942993164, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.045098039215686274, | |
| "grad_norm": 31872.0, | |
| "learning_rate": 7.096774193548388e-06, | |
| "loss": 8.407343864440918, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.047058823529411764, | |
| "grad_norm": 3456.0, | |
| "learning_rate": 7.4193548387096784e-06, | |
| "loss": 9.274879455566406, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.049019607843137254, | |
| "grad_norm": 2024.0, | |
| "learning_rate": 7.741935483870968e-06, | |
| "loss": 8.015785217285156, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.050980392156862744, | |
| "grad_norm": 384.0, | |
| "learning_rate": 8.064516129032258e-06, | |
| "loss": 7.839591979980469, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.052941176470588235, | |
| "grad_norm": 462.0, | |
| "learning_rate": 8.387096774193549e-06, | |
| "loss": 8.52480697631836, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.054901960784313725, | |
| "grad_norm": 740.0, | |
| "learning_rate": 8.70967741935484e-06, | |
| "loss": 8.2735595703125, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.056862745098039215, | |
| "grad_norm": 632.0, | |
| "learning_rate": 9.03225806451613e-06, | |
| "loss": 6.925156116485596, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 1216.0, | |
| "learning_rate": 9.35483870967742e-06, | |
| "loss": 9.130925178527832, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.060784313725490195, | |
| "grad_norm": 724.0, | |
| "learning_rate": 9.67741935483871e-06, | |
| "loss": 7.672003746032715, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06274509803921569, | |
| "grad_norm": 300.0, | |
| "learning_rate": 1e-05, | |
| "loss": 7.18646240234375, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06470588235294118, | |
| "grad_norm": 99.0, | |
| "learning_rate": 9.999974774092107e-06, | |
| "loss": 7.103943824768066, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 243.0, | |
| "learning_rate": 9.999899096622962e-06, | |
| "loss": 7.195862770080566, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06862745098039216, | |
| "grad_norm": 149.0, | |
| "learning_rate": 9.999772968356182e-06, | |
| "loss": 6.914237976074219, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07058823529411765, | |
| "grad_norm": 135.0, | |
| "learning_rate": 9.999596390564446e-06, | |
| "loss": 6.820693016052246, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07254901960784314, | |
| "grad_norm": 116.5, | |
| "learning_rate": 9.999369365029487e-06, | |
| "loss": 6.872282028198242, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.07450980392156863, | |
| "grad_norm": 172.0, | |
| "learning_rate": 9.999091894042077e-06, | |
| "loss": 6.997214317321777, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07647058823529412, | |
| "grad_norm": 9088.0, | |
| "learning_rate": 9.998763980401997e-06, | |
| "loss": 7.570876121520996, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 188.0, | |
| "learning_rate": 9.998385627418015e-06, | |
| "loss": 6.341652870178223, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0803921568627451, | |
| "grad_norm": 81.5, | |
| "learning_rate": 9.997956838907853e-06, | |
| "loss": 6.633429050445557, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.08235294117647059, | |
| "grad_norm": 127.0, | |
| "learning_rate": 9.997477619198138e-06, | |
| "loss": 6.926297187805176, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.08431372549019608, | |
| "grad_norm": 136.0, | |
| "learning_rate": 9.996947973124372e-06, | |
| "loss": 6.302867889404297, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.08627450980392157, | |
| "grad_norm": 164.0, | |
| "learning_rate": 9.996367906030879e-06, | |
| "loss": 6.492257118225098, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 68.5, | |
| "learning_rate": 9.995737423770746e-06, | |
| "loss": 6.334178447723389, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09019607843137255, | |
| "grad_norm": 63.0, | |
| "learning_rate": 9.995056532705766e-06, | |
| "loss": 5.588380336761475, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09215686274509804, | |
| "grad_norm": 34.75, | |
| "learning_rate": 9.994325239706377e-06, | |
| "loss": 6.14424467086792, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.09411764705882353, | |
| "grad_norm": 46.5, | |
| "learning_rate": 9.993543552151594e-06, | |
| "loss": 5.817169666290283, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.09607843137254903, | |
| "grad_norm": 35.25, | |
| "learning_rate": 9.992711477928925e-06, | |
| "loss": 5.768467426300049, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.09803921568627451, | |
| "grad_norm": 450.0, | |
| "learning_rate": 9.991829025434305e-06, | |
| "loss": 5.587636470794678, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 38.75, | |
| "learning_rate": 9.990896203571994e-06, | |
| "loss": 5.87672233581543, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.10196078431372549, | |
| "grad_norm": 51.25, | |
| "learning_rate": 9.98991302175451e-06, | |
| "loss": 5.926921844482422, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.10392156862745099, | |
| "grad_norm": 36.25, | |
| "learning_rate": 9.98887948990251e-06, | |
| "loss": 6.116643905639648, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.10588235294117647, | |
| "grad_norm": 19.875, | |
| "learning_rate": 9.987795618444707e-06, | |
| "loss": 5.681459903717041, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.10784313725490197, | |
| "grad_norm": 69.5, | |
| "learning_rate": 9.986661418317759e-06, | |
| "loss": 6.164295196533203, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10980392156862745, | |
| "grad_norm": 163.0, | |
| "learning_rate": 9.985476900966156e-06, | |
| "loss": 5.477411270141602, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.11176470588235295, | |
| "grad_norm": 41.0, | |
| "learning_rate": 9.984242078342108e-06, | |
| "loss": 6.057120323181152, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.11372549019607843, | |
| "grad_norm": 30.0, | |
| "learning_rate": 9.982956962905423e-06, | |
| "loss": 5.983468532562256, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.11568627450980393, | |
| "grad_norm": 36.75, | |
| "learning_rate": 9.981621567623385e-06, | |
| "loss": 6.317110061645508, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 32.25, | |
| "learning_rate": 9.980235905970615e-06, | |
| "loss": 5.8935651779174805, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11960784313725491, | |
| "grad_norm": 33.25, | |
| "learning_rate": 9.978799991928945e-06, | |
| "loss": 5.925508499145508, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.12156862745098039, | |
| "grad_norm": 266.0, | |
| "learning_rate": 9.977313839987265e-06, | |
| "loss": 6.190548419952393, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.12352941176470589, | |
| "grad_norm": 33.25, | |
| "learning_rate": 9.975777465141391e-06, | |
| "loss": 6.119345188140869, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.12549019607843137, | |
| "grad_norm": 41.5, | |
| "learning_rate": 9.974190882893901e-06, | |
| "loss": 5.63485860824585, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.12745098039215685, | |
| "grad_norm": 33.25, | |
| "learning_rate": 9.972554109253988e-06, | |
| "loss": 5.473740577697754, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12941176470588237, | |
| "grad_norm": 16.125, | |
| "learning_rate": 9.970867160737293e-06, | |
| "loss": 5.605908393859863, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.13137254901960785, | |
| "grad_norm": 35.25, | |
| "learning_rate": 9.969130054365737e-06, | |
| "loss": 5.744837760925293, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 26.25, | |
| "learning_rate": 9.967342807667355e-06, | |
| "loss": 5.603204727172852, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.13529411764705881, | |
| "grad_norm": 78.5, | |
| "learning_rate": 9.965505438676115e-06, | |
| "loss": 5.763700485229492, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.13725490196078433, | |
| "grad_norm": 34.0, | |
| "learning_rate": 9.963617965931738e-06, | |
| "loss": 5.855405807495117, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1392156862745098, | |
| "grad_norm": 61.0, | |
| "learning_rate": 9.961680408479508e-06, | |
| "loss": 5.495353698730469, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1411764705882353, | |
| "grad_norm": 18.5, | |
| "learning_rate": 9.959692785870086e-06, | |
| "loss": 5.601709365844727, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.14313725490196078, | |
| "grad_norm": 33.5, | |
| "learning_rate": 9.957655118159304e-06, | |
| "loss": 5.185734272003174, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1450980392156863, | |
| "grad_norm": 28.0, | |
| "learning_rate": 9.955567425907968e-06, | |
| "loss": 5.452523231506348, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 48.5, | |
| "learning_rate": 9.953429730181653e-06, | |
| "loss": 5.12846040725708, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14901960784313725, | |
| "grad_norm": 195.0, | |
| "learning_rate": 9.951242052550487e-06, | |
| "loss": 6.20569372177124, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.15098039215686274, | |
| "grad_norm": 27.75, | |
| "learning_rate": 9.949004415088928e-06, | |
| "loss": 5.708788871765137, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.15294117647058825, | |
| "grad_norm": 12.375, | |
| "learning_rate": 9.946716840375552e-06, | |
| "loss": 5.593677520751953, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.15490196078431373, | |
| "grad_norm": 37.0, | |
| "learning_rate": 9.944379351492818e-06, | |
| "loss": 5.621125221252441, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 18.75, | |
| "learning_rate": 9.941991972026839e-06, | |
| "loss": 5.425329685211182, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1588235294117647, | |
| "grad_norm": 302.0, | |
| "learning_rate": 9.939554726067142e-06, | |
| "loss": 5.684831142425537, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1607843137254902, | |
| "grad_norm": 756.0, | |
| "learning_rate": 9.937067638206418e-06, | |
| "loss": 5.517162322998047, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1627450980392157, | |
| "grad_norm": 26.0, | |
| "learning_rate": 9.934530733540293e-06, | |
| "loss": 4.898902416229248, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.16470588235294117, | |
| "grad_norm": 18.375, | |
| "learning_rate": 9.931944037667056e-06, | |
| "loss": 5.5186262130737305, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 21.0, | |
| "learning_rate": 9.929307576687404e-06, | |
| "loss": 5.309273719787598, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16862745098039217, | |
| "grad_norm": 18.75, | |
| "learning_rate": 9.926621377204188e-06, | |
| "loss": 5.826333999633789, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.17058823529411765, | |
| "grad_norm": 30.625, | |
| "learning_rate": 9.923885466322135e-06, | |
| "loss": 5.59458065032959, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.17254901960784313, | |
| "grad_norm": 47.5, | |
| "learning_rate": 9.921099871647582e-06, | |
| "loss": 5.588604927062988, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.17450980392156862, | |
| "grad_norm": 39.25, | |
| "learning_rate": 9.918264621288187e-06, | |
| "loss": 5.356253623962402, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 224.0, | |
| "learning_rate": 9.91537974385266e-06, | |
| "loss": 5.486222267150879, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1784313725490196, | |
| "grad_norm": 23.125, | |
| "learning_rate": 9.912445268450459e-06, | |
| "loss": 5.685971736907959, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1803921568627451, | |
| "grad_norm": 17.25, | |
| "learning_rate": 9.909461224691506e-06, | |
| "loss": 5.2897138595581055, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.18235294117647058, | |
| "grad_norm": 35.25, | |
| "learning_rate": 9.906427642685889e-06, | |
| "loss": 5.555470943450928, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.1843137254901961, | |
| "grad_norm": 32.0, | |
| "learning_rate": 9.90334455304355e-06, | |
| "loss": 5.584178447723389, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.18627450980392157, | |
| "grad_norm": 21.25, | |
| "learning_rate": 9.900211986873986e-06, | |
| "loss": 5.32234001159668, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.18823529411764706, | |
| "grad_norm": 1672.0, | |
| "learning_rate": 9.897029975785924e-06, | |
| "loss": 5.3855438232421875, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.19019607843137254, | |
| "grad_norm": 29.0, | |
| "learning_rate": 9.89379855188701e-06, | |
| "loss": 4.83759880065918, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.19215686274509805, | |
| "grad_norm": 50.5, | |
| "learning_rate": 9.89051774778349e-06, | |
| "loss": 5.624599456787109, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.19411764705882353, | |
| "grad_norm": 14.9375, | |
| "learning_rate": 9.887187596579865e-06, | |
| "loss": 5.616288185119629, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 16.625, | |
| "learning_rate": 9.883808131878573e-06, | |
| "loss": 5.2882161140441895, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1980392156862745, | |
| "grad_norm": 31.125, | |
| "learning_rate": 9.880379387779637e-06, | |
| "loss": 5.646514892578125, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 37.75, | |
| "learning_rate": 9.87690139888033e-06, | |
| "loss": 5.044747352600098, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2019607843137255, | |
| "grad_norm": 21.0, | |
| "learning_rate": 9.873374200274826e-06, | |
| "loss": 5.134984970092773, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.20392156862745098, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 9.869797827553837e-06, | |
| "loss": 5.253122329711914, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 32.25, | |
| "learning_rate": 9.866172316804265e-06, | |
| "loss": 5.45121955871582, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20784313725490197, | |
| "grad_norm": 13.5625, | |
| "learning_rate": 9.862497704608829e-06, | |
| "loss": 5.211917400360107, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.20980392156862746, | |
| "grad_norm": 21.0, | |
| "learning_rate": 9.8587740280457e-06, | |
| "loss": 5.20741081237793, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.21176470588235294, | |
| "grad_norm": 20.375, | |
| "learning_rate": 9.855001324688128e-06, | |
| "loss": 5.545747756958008, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.21372549019607842, | |
| "grad_norm": 18.125, | |
| "learning_rate": 9.851179632604057e-06, | |
| "loss": 5.483153820037842, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.21568627450980393, | |
| "grad_norm": 16.375, | |
| "learning_rate": 9.847308990355752e-06, | |
| "loss": 5.224430561065674, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21764705882352942, | |
| "grad_norm": 17.5, | |
| "learning_rate": 9.843389436999396e-06, | |
| "loss": 5.291140556335449, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2196078431372549, | |
| "grad_norm": 70.0, | |
| "learning_rate": 9.839421012084709e-06, | |
| "loss": 5.474943161010742, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.22156862745098038, | |
| "grad_norm": 38.5, | |
| "learning_rate": 9.835403755654535e-06, | |
| "loss": 5.760432243347168, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2235294117647059, | |
| "grad_norm": 60.5, | |
| "learning_rate": 9.831337708244454e-06, | |
| "loss": 4.943289756774902, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.22549019607843138, | |
| "grad_norm": 11.875, | |
| "learning_rate": 9.827222910882358e-06, | |
| "loss": 5.659804344177246, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22745098039215686, | |
| "grad_norm": 34.0, | |
| "learning_rate": 9.82305940508805e-06, | |
| "loss": 5.012216567993164, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.22941176470588234, | |
| "grad_norm": 17.5, | |
| "learning_rate": 9.818847232872815e-06, | |
| "loss": 5.458867073059082, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.23137254901960785, | |
| "grad_norm": 30.625, | |
| "learning_rate": 9.814586436738998e-06, | |
| "loss": 5.191954612731934, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.23333333333333334, | |
| "grad_norm": 117.0, | |
| "learning_rate": 9.81027705967958e-06, | |
| "loss": 4.847655296325684, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 92.0, | |
| "learning_rate": 9.805919145177741e-06, | |
| "loss": 5.273130416870117, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2372549019607843, | |
| "grad_norm": 38.75, | |
| "learning_rate": 9.801512737206422e-06, | |
| "loss": 5.468034744262695, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.23921568627450981, | |
| "grad_norm": 23.875, | |
| "learning_rate": 9.797057880227878e-06, | |
| "loss": 5.291849613189697, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2411764705882353, | |
| "grad_norm": 22.75, | |
| "learning_rate": 9.792554619193235e-06, | |
| "loss": 5.240516185760498, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.24313725490196078, | |
| "grad_norm": 21.875, | |
| "learning_rate": 9.78800299954203e-06, | |
| "loss": 5.448852062225342, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.24509803921568626, | |
| "grad_norm": 256.0, | |
| "learning_rate": 9.783403067201763e-06, | |
| "loss": 5.412049293518066, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24705882352941178, | |
| "grad_norm": 226.0, | |
| "learning_rate": 9.778754868587414e-06, | |
| "loss": 5.50108528137207, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.24901960784313726, | |
| "grad_norm": 35.5, | |
| "learning_rate": 9.774058450601003e-06, | |
| "loss": 5.559615135192871, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.25098039215686274, | |
| "grad_norm": 31.125, | |
| "learning_rate": 9.76931386063109e-06, | |
| "loss": 5.127172470092773, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2529411764705882, | |
| "grad_norm": 225.0, | |
| "learning_rate": 9.76452114655231e-06, | |
| "loss": 5.387312889099121, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2549019607843137, | |
| "grad_norm": 80.0, | |
| "learning_rate": 9.759680356724888e-06, | |
| "loss": 5.126557350158691, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2568627450980392, | |
| "grad_norm": 11.75, | |
| "learning_rate": 9.754791539994153e-06, | |
| "loss": 5.493594646453857, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.25882352941176473, | |
| "grad_norm": 15.0625, | |
| "learning_rate": 9.749854745690041e-06, | |
| "loss": 5.234238624572754, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2607843137254902, | |
| "grad_norm": 55.5, | |
| "learning_rate": 9.744870023626598e-06, | |
| "loss": 5.279170513153076, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2627450980392157, | |
| "grad_norm": 25.875, | |
| "learning_rate": 9.739837424101484e-06, | |
| "loss": 4.998948097229004, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 18.125, | |
| "learning_rate": 9.73475699789545e-06, | |
| "loss": 5.123308181762695, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 16.125, | |
| "learning_rate": 9.729628796271844e-06, | |
| "loss": 5.026663780212402, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.26862745098039215, | |
| "grad_norm": 45.0, | |
| "learning_rate": 9.724452870976084e-06, | |
| "loss": 4.588525295257568, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.27058823529411763, | |
| "grad_norm": 46.5, | |
| "learning_rate": 9.719229274235134e-06, | |
| "loss": 5.217161178588867, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2725490196078431, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 9.713958058756985e-06, | |
| "loss": 4.979425430297852, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.27450980392156865, | |
| "grad_norm": 41.5, | |
| "learning_rate": 9.708639277730112e-06, | |
| "loss": 5.812428951263428, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.27647058823529413, | |
| "grad_norm": 18.5, | |
| "learning_rate": 9.703272984822947e-06, | |
| "loss": 4.830598831176758, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2784313725490196, | |
| "grad_norm": 143.0, | |
| "learning_rate": 9.697859234183336e-06, | |
| "loss": 5.446103096008301, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2803921568627451, | |
| "grad_norm": 29.75, | |
| "learning_rate": 9.692398080437991e-06, | |
| "loss": 5.361805438995361, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.2823529411764706, | |
| "grad_norm": 16.625, | |
| "learning_rate": 9.68688957869193e-06, | |
| "loss": 5.320940971374512, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.28431372549019607, | |
| "grad_norm": 15.9375, | |
| "learning_rate": 9.681333784527945e-06, | |
| "loss": 4.9812116622924805, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.28627450980392155, | |
| "grad_norm": 43.0, | |
| "learning_rate": 9.67573075400601e-06, | |
| "loss": 5.004095077514648, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.28823529411764703, | |
| "grad_norm": 20.625, | |
| "learning_rate": 9.670080543662742e-06, | |
| "loss": 5.257203102111816, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2901960784313726, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 9.66438321051081e-06, | |
| "loss": 5.016636848449707, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.29215686274509806, | |
| "grad_norm": 17.75, | |
| "learning_rate": 9.658638812038379e-06, | |
| "loss": 5.388177394866943, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 19.0, | |
| "learning_rate": 9.652847406208514e-06, | |
| "loss": 4.996750831604004, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.296078431372549, | |
| "grad_norm": 17.625, | |
| "learning_rate": 9.647009051458604e-06, | |
| "loss": 5.164402961730957, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.2980392156862745, | |
| "grad_norm": 14.9375, | |
| "learning_rate": 9.641123806699769e-06, | |
| "loss": 5.236954689025879, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 12.1875, | |
| "learning_rate": 9.635191731316262e-06, | |
| "loss": 5.390079498291016, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.30196078431372547, | |
| "grad_norm": 33.0, | |
| "learning_rate": 9.629212885164882e-06, | |
| "loss": 5.225905418395996, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.30392156862745096, | |
| "grad_norm": 456.0, | |
| "learning_rate": 9.623187328574357e-06, | |
| "loss": 5.147684097290039, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3058823529411765, | |
| "grad_norm": 19.0, | |
| "learning_rate": 9.617115122344742e-06, | |
| "loss": 5.367049217224121, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.307843137254902, | |
| "grad_norm": 16.75, | |
| "learning_rate": 9.6109963277468e-06, | |
| "loss": 4.848927974700928, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.30980392156862746, | |
| "grad_norm": 26.0, | |
| "learning_rate": 9.604831006521393e-06, | |
| "loss": 5.3677873611450195, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.31176470588235294, | |
| "grad_norm": 30.875, | |
| "learning_rate": 9.598619220878852e-06, | |
| "loss": 5.039382457733154, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 23.375, | |
| "learning_rate": 9.592361033498349e-06, | |
| "loss": 5.168381690979004, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3156862745098039, | |
| "grad_norm": 24.5, | |
| "learning_rate": 9.586056507527266e-06, | |
| "loss": 5.181293964385986, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3176470588235294, | |
| "grad_norm": 21.625, | |
| "learning_rate": 9.57970570658056e-06, | |
| "loss": 4.873326301574707, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3196078431372549, | |
| "grad_norm": 56.75, | |
| "learning_rate": 9.57330869474012e-06, | |
| "loss": 5.396710395812988, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3215686274509804, | |
| "grad_norm": 540.0, | |
| "learning_rate": 9.566865536554119e-06, | |
| "loss": 5.066160202026367, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 125.0, | |
| "learning_rate": 9.560376297036362e-06, | |
| "loss": 5.154975891113281, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3254901960784314, | |
| "grad_norm": 24.875, | |
| "learning_rate": 9.553841041665632e-06, | |
| "loss": 5.022787094116211, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.32745098039215687, | |
| "grad_norm": 13.125, | |
| "learning_rate": 9.54725983638503e-06, | |
| "loss": 5.266829490661621, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.32941176470588235, | |
| "grad_norm": 22.375, | |
| "learning_rate": 9.540632747601309e-06, | |
| "loss": 5.651679515838623, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.33137254901960783, | |
| "grad_norm": 23.25, | |
| "learning_rate": 9.533959842184195e-06, | |
| "loss": 5.472508907318115, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 68.5, | |
| "learning_rate": 9.527241187465735e-06, | |
| "loss": 5.464788913726807, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3352941176470588, | |
| "grad_norm": 121.5, | |
| "learning_rate": 9.520476851239588e-06, | |
| "loss": 5.100685119628906, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.33725490196078434, | |
| "grad_norm": 25.5, | |
| "learning_rate": 9.513666901760368e-06, | |
| "loss": 5.16568660736084, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3392156862745098, | |
| "grad_norm": 50.5, | |
| "learning_rate": 9.506811407742938e-06, | |
| "loss": 5.174825668334961, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3411764705882353, | |
| "grad_norm": 14.625, | |
| "learning_rate": 9.49991043836172e-06, | |
| "loss": 5.095907211303711, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3431372549019608, | |
| "grad_norm": 636.0, | |
| "learning_rate": 9.49296406325e-06, | |
| "loss": 5.351215362548828, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.34509803921568627, | |
| "grad_norm": 15.625, | |
| "learning_rate": 9.485972352499231e-06, | |
| "loss": 4.977153301239014, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.34705882352941175, | |
| "grad_norm": 20.125, | |
| "learning_rate": 9.478935376658308e-06, | |
| "loss": 4.573214530944824, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.34901960784313724, | |
| "grad_norm": 33.0, | |
| "learning_rate": 9.471853206732875e-06, | |
| "loss": 5.356305122375488, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3509803921568627, | |
| "grad_norm": 9.25, | |
| "learning_rate": 9.4647259141846e-06, | |
| "loss": 5.243356704711914, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 20.375, | |
| "learning_rate": 9.457553570930451e-06, | |
| "loss": 5.4737443923950195, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.35490196078431374, | |
| "grad_norm": 15.625, | |
| "learning_rate": 9.450336249341976e-06, | |
| "loss": 4.992424488067627, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3568627450980392, | |
| "grad_norm": 236.0, | |
| "learning_rate": 9.443074022244573e-06, | |
| "loss": 4.870045185089111, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3588235294117647, | |
| "grad_norm": 11.4375, | |
| "learning_rate": 9.435766962916749e-06, | |
| "loss": 5.3330488204956055, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.3607843137254902, | |
| "grad_norm": 18.875, | |
| "learning_rate": 9.428415145089385e-06, | |
| "loss": 5.4815239906311035, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3627450980392157, | |
| "grad_norm": 57.75, | |
| "learning_rate": 9.421018642944996e-06, | |
| "loss": 4.641464710235596, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.36470588235294116, | |
| "grad_norm": 50.25, | |
| "learning_rate": 9.413577531116973e-06, | |
| "loss": 5.204540729522705, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.36666666666666664, | |
| "grad_norm": 19.375, | |
| "learning_rate": 9.406091884688837e-06, | |
| "loss": 5.2501301765441895, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.3686274509803922, | |
| "grad_norm": 12.8125, | |
| "learning_rate": 9.398561779193477e-06, | |
| "loss": 5.34678840637207, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.37058823529411766, | |
| "grad_norm": 31.75, | |
| "learning_rate": 9.390987290612396e-06, | |
| "loss": 5.497261047363281, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.37254901960784315, | |
| "grad_norm": 27.875, | |
| "learning_rate": 9.38336849537493e-06, | |
| "loss": 5.6493706703186035, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37450980392156863, | |
| "grad_norm": 109.5, | |
| "learning_rate": 9.375705470357493e-06, | |
| "loss": 5.106384754180908, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.3764705882352941, | |
| "grad_norm": 36.5, | |
| "learning_rate": 9.367998292882789e-06, | |
| "loss": 4.899719715118408, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3784313725490196, | |
| "grad_norm": 11.375, | |
| "learning_rate": 9.36024704071904e-06, | |
| "loss": 4.735408782958984, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.3803921568627451, | |
| "grad_norm": 10.25, | |
| "learning_rate": 9.35245179207919e-06, | |
| "loss": 4.760075569152832, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 17.5, | |
| "learning_rate": 9.344612625620134e-06, | |
| "loss": 4.938511848449707, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3843137254901961, | |
| "grad_norm": 20.0, | |
| "learning_rate": 9.336729620441906e-06, | |
| "loss": 5.251160621643066, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3862745098039216, | |
| "grad_norm": 28.375, | |
| "learning_rate": 9.328802856086891e-06, | |
| "loss": 5.085601806640625, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.38823529411764707, | |
| "grad_norm": 15.0, | |
| "learning_rate": 9.32083241253902e-06, | |
| "loss": 5.098453998565674, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.39019607843137255, | |
| "grad_norm": 46.25, | |
| "learning_rate": 9.312818370222962e-06, | |
| "loss": 5.29483699798584, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 11.5, | |
| "learning_rate": 9.304760810003318e-06, | |
| "loss": 5.213921070098877, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3941176470588235, | |
| "grad_norm": 67.0, | |
| "learning_rate": 9.296659813183794e-06, | |
| "loss": 5.252015113830566, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.396078431372549, | |
| "grad_norm": 13.6875, | |
| "learning_rate": 9.28851546150639e-06, | |
| "loss": 5.031217575073242, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3980392156862745, | |
| "grad_norm": 31.625, | |
| "learning_rate": 9.280327837150572e-06, | |
| "loss": 5.183858871459961, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 49.75, | |
| "learning_rate": 9.272097022732444e-06, | |
| "loss": 5.053634166717529, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4019607843137255, | |
| "grad_norm": 18.375, | |
| "learning_rate": 9.263823101303911e-06, | |
| "loss": 4.873892784118652, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.403921568627451, | |
| "grad_norm": 7.125, | |
| "learning_rate": 9.255506156351846e-06, | |
| "loss": 5.063465118408203, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.40588235294117647, | |
| "grad_norm": 197.0, | |
| "learning_rate": 9.247146271797244e-06, | |
| "loss": 5.553455352783203, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.40784313725490196, | |
| "grad_norm": 22.75, | |
| "learning_rate": 9.238743531994378e-06, | |
| "loss": 4.573488712310791, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.40980392156862744, | |
| "grad_norm": 68.0, | |
| "learning_rate": 9.23029802172994e-06, | |
| "loss": 4.863184452056885, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 9.625, | |
| "learning_rate": 9.221809826222198e-06, | |
| "loss": 5.0148773193359375, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4137254901960784, | |
| "grad_norm": 21.0, | |
| "learning_rate": 9.213279031120129e-06, | |
| "loss": 5.232052326202393, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.41568627450980394, | |
| "grad_norm": 13.0625, | |
| "learning_rate": 9.20470572250255e-06, | |
| "loss": 5.250632286071777, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4176470588235294, | |
| "grad_norm": 24.625, | |
| "learning_rate": 9.196089986877262e-06, | |
| "loss": 5.105681419372559, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.4196078431372549, | |
| "grad_norm": 37.25, | |
| "learning_rate": 9.18743191118016e-06, | |
| "loss": 5.038761138916016, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4215686274509804, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 9.17873158277438e-06, | |
| "loss": 4.875316619873047, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4235294117647059, | |
| "grad_norm": 11.4375, | |
| "learning_rate": 9.16998908944939e-06, | |
| "loss": 5.176741123199463, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.42549019607843136, | |
| "grad_norm": 12.25, | |
| "learning_rate": 9.161204519420126e-06, | |
| "loss": 5.155636787414551, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.42745098039215684, | |
| "grad_norm": 19.125, | |
| "learning_rate": 9.152377961326085e-06, | |
| "loss": 4.634543418884277, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4294117647058823, | |
| "grad_norm": 29.0, | |
| "learning_rate": 9.14350950423045e-06, | |
| "loss": 5.09789514541626, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.43137254901960786, | |
| "grad_norm": 31.875, | |
| "learning_rate": 9.134599237619167e-06, | |
| "loss": 5.202082633972168, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.43333333333333335, | |
| "grad_norm": 22.625, | |
| "learning_rate": 9.125647251400068e-06, | |
| "loss": 4.814334869384766, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.43529411764705883, | |
| "grad_norm": 13.875, | |
| "learning_rate": 9.11665363590194e-06, | |
| "loss": 5.112233638763428, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4372549019607843, | |
| "grad_norm": 43.25, | |
| "learning_rate": 9.107618481873632e-06, | |
| "loss": 4.683226585388184, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4392156862745098, | |
| "grad_norm": 18.75, | |
| "learning_rate": 9.098541880483129e-06, | |
| "loss": 5.400690078735352, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 31.5, | |
| "learning_rate": 9.089423923316636e-06, | |
| "loss": 5.162740230560303, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.44313725490196076, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 9.08026470237765e-06, | |
| "loss": 4.947070121765137, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.44509803921568625, | |
| "grad_norm": 26.0, | |
| "learning_rate": 9.07106431008604e-06, | |
| "loss": 5.304220199584961, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4470588235294118, | |
| "grad_norm": 18.5, | |
| "learning_rate": 9.0618228392771e-06, | |
| "loss": 5.03424072265625, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.44901960784313727, | |
| "grad_norm": 46.0, | |
| "learning_rate": 9.052540383200634e-06, | |
| "loss": 5.069074630737305, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.45098039215686275, | |
| "grad_norm": 15.0, | |
| "learning_rate": 9.043217035519986e-06, | |
| "loss": 5.057849407196045, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.45294117647058824, | |
| "grad_norm": 13.5625, | |
| "learning_rate": 9.033852890311127e-06, | |
| "loss": 5.0433783531188965, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.4549019607843137, | |
| "grad_norm": 15.625, | |
| "learning_rate": 9.02444804206168e-06, | |
| "loss": 5.389444351196289, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4568627450980392, | |
| "grad_norm": 31.125, | |
| "learning_rate": 9.01500258566998e-06, | |
| "loss": 4.969291687011719, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.4588235294117647, | |
| "grad_norm": 12.4375, | |
| "learning_rate": 9.005516616444112e-06, | |
| "loss": 5.279530048370361, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.46078431372549017, | |
| "grad_norm": 36.75, | |
| "learning_rate": 8.99599023010095e-06, | |
| "loss": 5.162971019744873, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4627450980392157, | |
| "grad_norm": 22.25, | |
| "learning_rate": 8.986423522765191e-06, | |
| "loss": 5.149713039398193, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4647058823529412, | |
| "grad_norm": 236.0, | |
| "learning_rate": 8.976816590968388e-06, | |
| "loss": 5.080571174621582, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.4666666666666667, | |
| "grad_norm": 13.75, | |
| "learning_rate": 8.967169531647971e-06, | |
| "loss": 5.257791519165039, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.46862745098039216, | |
| "grad_norm": 12.75, | |
| "learning_rate": 8.957482442146271e-06, | |
| "loss": 5.317870616912842, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 29.0, | |
| "learning_rate": 8.947755420209541e-06, | |
| "loss": 5.190963268280029, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4725490196078431, | |
| "grad_norm": 42.5, | |
| "learning_rate": 8.937988563986963e-06, | |
| "loss": 4.852507591247559, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.4745098039215686, | |
| "grad_norm": 11.5, | |
| "learning_rate": 8.928181972029664e-06, | |
| "loss": 5.3673176765441895, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4764705882352941, | |
| "grad_norm": 44.25, | |
| "learning_rate": 8.918335743289717e-06, | |
| "loss": 5.486108779907227, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.47843137254901963, | |
| "grad_norm": 16.25, | |
| "learning_rate": 8.90844997711915e-06, | |
| "loss": 5.265598297119141, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4803921568627451, | |
| "grad_norm": 72.5, | |
| "learning_rate": 8.898524773268926e-06, | |
| "loss": 5.243829727172852, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4823529411764706, | |
| "grad_norm": 12.375, | |
| "learning_rate": 8.888560231887963e-06, | |
| "loss": 4.801326751708984, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4843137254901961, | |
| "grad_norm": 19.25, | |
| "learning_rate": 8.8785564535221e-06, | |
| "loss": 5.160724639892578, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.48627450980392156, | |
| "grad_norm": 34.5, | |
| "learning_rate": 8.868513539113093e-06, | |
| "loss": 5.16162109375, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.48823529411764705, | |
| "grad_norm": 209.0, | |
| "learning_rate": 8.858431589997597e-06, | |
| "loss": 5.235032558441162, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.49019607843137253, | |
| "grad_norm": 134.0, | |
| "learning_rate": 8.848310707906138e-06, | |
| "loss": 4.463550090789795, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.492156862745098, | |
| "grad_norm": 26.75, | |
| "learning_rate": 8.838150994962094e-06, | |
| "loss": 5.130112648010254, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.49411764705882355, | |
| "grad_norm": 73.0, | |
| "learning_rate": 8.827952553680656e-06, | |
| "loss": 5.642120838165283, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.49607843137254903, | |
| "grad_norm": 206.0, | |
| "learning_rate": 8.817715486967803e-06, | |
| "loss": 5.265994548797607, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.4980392156862745, | |
| "grad_norm": 19.0, | |
| "learning_rate": 8.807439898119252e-06, | |
| "loss": 5.119565010070801, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 19.25, | |
| "learning_rate": 8.797125890819429e-06, | |
| "loss": 5.100986957550049, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5019607843137255, | |
| "grad_norm": 28.875, | |
| "learning_rate": 8.786773569140414e-06, | |
| "loss": 4.755583763122559, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.503921568627451, | |
| "grad_norm": 12.875, | |
| "learning_rate": 8.776383037540888e-06, | |
| "loss": 4.882185935974121, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5058823529411764, | |
| "grad_norm": 17.25, | |
| "learning_rate": 8.765954400865093e-06, | |
| "loss": 5.192405700683594, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5078431372549019, | |
| "grad_norm": 15.5625, | |
| "learning_rate": 8.755487764341756e-06, | |
| "loss": 4.974501132965088, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5098039215686274, | |
| "grad_norm": 22.5, | |
| "learning_rate": 8.744983233583044e-06, | |
| "loss": 4.73140811920166, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5117647058823529, | |
| "grad_norm": 22.25, | |
| "learning_rate": 8.734440914583486e-06, | |
| "loss": 5.232514381408691, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5137254901960784, | |
| "grad_norm": 44.5, | |
| "learning_rate": 8.72386091371891e-06, | |
| "loss": 5.060776233673096, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.515686274509804, | |
| "grad_norm": 55.75, | |
| "learning_rate": 8.713243337745366e-06, | |
| "loss": 4.823212146759033, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5176470588235295, | |
| "grad_norm": 14.3125, | |
| "learning_rate": 8.70258829379805e-06, | |
| "loss": 5.4208269119262695, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5196078431372549, | |
| "grad_norm": 11.5625, | |
| "learning_rate": 8.691895889390228e-06, | |
| "loss": 4.989123821258545, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5215686274509804, | |
| "grad_norm": 205.0, | |
| "learning_rate": 8.681166232412142e-06, | |
| "loss": 5.007363319396973, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5235294117647059, | |
| "grad_norm": 14.75, | |
| "learning_rate": 8.670399431129926e-06, | |
| "loss": 5.106622695922852, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5254901960784314, | |
| "grad_norm": 33.5, | |
| "learning_rate": 8.659595594184516e-06, | |
| "loss": 5.067702293395996, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5274509803921569, | |
| "grad_norm": 22.375, | |
| "learning_rate": 8.648754830590552e-06, | |
| "loss": 4.492011070251465, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 14.75, | |
| "learning_rate": 8.637877249735274e-06, | |
| "loss": 4.7508039474487305, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5313725490196078, | |
| "grad_norm": 126.5, | |
| "learning_rate": 8.626962961377423e-06, | |
| "loss": 5.045147895812988, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 23.25, | |
| "learning_rate": 8.616012075646134e-06, | |
| "loss": 5.0517730712890625, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5352941176470588, | |
| "grad_norm": 20.875, | |
| "learning_rate": 8.605024703039817e-06, | |
| "loss": 5.3973517417907715, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5372549019607843, | |
| "grad_norm": 11.5625, | |
| "learning_rate": 8.594000954425056e-06, | |
| "loss": 5.324957847595215, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5392156862745098, | |
| "grad_norm": 13.0, | |
| "learning_rate": 8.582940941035476e-06, | |
| "loss": 5.2578349113464355, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5411764705882353, | |
| "grad_norm": 107.5, | |
| "learning_rate": 8.571844774470627e-06, | |
| "loss": 5.171541213989258, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5431372549019607, | |
| "grad_norm": 9.125, | |
| "learning_rate": 8.560712566694863e-06, | |
| "loss": 5.251072406768799, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5450980392156862, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 8.549544430036198e-06, | |
| "loss": 5.052114009857178, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5470588235294118, | |
| "grad_norm": 17.5, | |
| "learning_rate": 8.538340477185191e-06, | |
| "loss": 4.93861198425293, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 11.625, | |
| "learning_rate": 8.527100821193797e-06, | |
| "loss": 4.7137298583984375, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5509803921568628, | |
| "grad_norm": 33.25, | |
| "learning_rate": 8.51582557547422e-06, | |
| "loss": 5.149138927459717, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5529411764705883, | |
| "grad_norm": 24.625, | |
| "learning_rate": 8.504514853797789e-06, | |
| "loss": 4.847334384918213, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5549019607843138, | |
| "grad_norm": 75.0, | |
| "learning_rate": 8.493168770293793e-06, | |
| "loss": 4.963643550872803, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5568627450980392, | |
| "grad_norm": 35.5, | |
| "learning_rate": 8.481787439448332e-06, | |
| "loss": 4.975803375244141, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": 27.875, | |
| "learning_rate": 8.470370976103171e-06, | |
| "loss": 4.802888870239258, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5607843137254902, | |
| "grad_norm": 162.0, | |
| "learning_rate": 8.458919495454567e-06, | |
| "loss": 5.223158359527588, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5627450980392157, | |
| "grad_norm": 14.75, | |
| "learning_rate": 8.447433113052124e-06, | |
| "loss": 4.974187850952148, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5647058823529412, | |
| "grad_norm": 110.0, | |
| "learning_rate": 8.435911944797605e-06, | |
| "loss": 4.781928062438965, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5666666666666667, | |
| "grad_norm": 24.125, | |
| "learning_rate": 8.42435610694379e-06, | |
| "loss": 5.388498306274414, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5686274509803921, | |
| "grad_norm": 62.5, | |
| "learning_rate": 8.412765716093273e-06, | |
| "loss": 5.016127109527588, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5705882352941176, | |
| "grad_norm": 87.5, | |
| "learning_rate": 8.401140889197305e-06, | |
| "loss": 4.991474628448486, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.5725490196078431, | |
| "grad_norm": 19.125, | |
| "learning_rate": 8.38948174355462e-06, | |
| "loss": 5.0762858390808105, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5745098039215686, | |
| "grad_norm": 14.1875, | |
| "learning_rate": 8.377788396810223e-06, | |
| "loss": 4.603714942932129, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.5764705882352941, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 8.366060966954235e-06, | |
| "loss": 5.238812446594238, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5784313725490197, | |
| "grad_norm": 32.75, | |
| "learning_rate": 8.354299572320679e-06, | |
| "loss": 5.05300235748291, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5803921568627451, | |
| "grad_norm": 15.5625, | |
| "learning_rate": 8.342504331586298e-06, | |
| "loss": 5.6517252922058105, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5823529411764706, | |
| "grad_norm": 11.1875, | |
| "learning_rate": 8.330675363769356e-06, | |
| "loss": 5.379548072814941, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5843137254901961, | |
| "grad_norm": 19.875, | |
| "learning_rate": 8.318812788228434e-06, | |
| "loss": 4.605257987976074, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5862745098039216, | |
| "grad_norm": 17.875, | |
| "learning_rate": 8.306916724661225e-06, | |
| "loss": 5.0481977462768555, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 13.5625, | |
| "learning_rate": 8.294987293103334e-06, | |
| "loss": 5.095377445220947, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5901960784313726, | |
| "grad_norm": 15.3125, | |
| "learning_rate": 8.283024613927055e-06, | |
| "loss": 5.265956401824951, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.592156862745098, | |
| "grad_norm": 15.3125, | |
| "learning_rate": 8.271028807840164e-06, | |
| "loss": 5.021054267883301, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5941176470588235, | |
| "grad_norm": 14.0625, | |
| "learning_rate": 8.258999995884706e-06, | |
| "loss": 4.8666839599609375, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.596078431372549, | |
| "grad_norm": 43.75, | |
| "learning_rate": 8.246938299435759e-06, | |
| "loss": 4.84593391418457, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5980392156862745, | |
| "grad_norm": 19.125, | |
| "learning_rate": 8.234843840200218e-06, | |
| "loss": 5.261534214019775, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 38.25, | |
| "learning_rate": 8.222716740215573e-06, | |
| "loss": 5.06063175201416, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6019607843137255, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 8.210557121848664e-06, | |
| "loss": 5.031374454498291, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6039215686274509, | |
| "grad_norm": 30.0, | |
| "learning_rate": 8.198365107794457e-06, | |
| "loss": 4.909897804260254, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6058823529411764, | |
| "grad_norm": 9.125, | |
| "learning_rate": 8.186140821074801e-06, | |
| "loss": 5.3152594566345215, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6078431372549019, | |
| "grad_norm": 16.5, | |
| "learning_rate": 8.173884385037193e-06, | |
| "loss": 5.177217483520508, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6098039215686275, | |
| "grad_norm": 14.75, | |
| "learning_rate": 8.161595923353516e-06, | |
| "loss": 5.079937934875488, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.611764705882353, | |
| "grad_norm": 17.625, | |
| "learning_rate": 8.149275560018816e-06, | |
| "loss": 5.179655075073242, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6137254901960785, | |
| "grad_norm": 28.25, | |
| "learning_rate": 8.136923419350032e-06, | |
| "loss": 5.067418575286865, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.615686274509804, | |
| "grad_norm": 16.0, | |
| "learning_rate": 8.12453962598475e-06, | |
| "loss": 5.350031852722168, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": 36.25, | |
| "learning_rate": 8.112124304879938e-06, | |
| "loss": 5.431763172149658, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6196078431372549, | |
| "grad_norm": 10.375, | |
| "learning_rate": 8.0996775813107e-06, | |
| "loss": 5.225980281829834, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6215686274509804, | |
| "grad_norm": 9.625, | |
| "learning_rate": 8.087199580868997e-06, | |
| "loss": 5.203134536743164, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6235294117647059, | |
| "grad_norm": 19.75, | |
| "learning_rate": 8.07469042946238e-06, | |
| "loss": 5.057469844818115, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6254901960784314, | |
| "grad_norm": 26.75, | |
| "learning_rate": 8.062150253312735e-06, | |
| "loss": 5.305059909820557, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 8.04957917895499e-06, | |
| "loss": 5.183746814727783, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6294117647058823, | |
| "grad_norm": 23.0, | |
| "learning_rate": 8.03697733323585e-06, | |
| "loss": 5.076390266418457, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6313725490196078, | |
| "grad_norm": 152.0, | |
| "learning_rate": 8.024344843312517e-06, | |
| "loss": 5.084694862365723, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6333333333333333, | |
| "grad_norm": 30.375, | |
| "learning_rate": 8.011681836651401e-06, | |
| "loss": 4.967686176300049, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6352941176470588, | |
| "grad_norm": 24.25, | |
| "learning_rate": 7.99898844102684e-06, | |
| "loss": 4.887096405029297, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6372549019607843, | |
| "grad_norm": 46.5, | |
| "learning_rate": 7.986264784519801e-06, | |
| "loss": 5.092063903808594, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6392156862745098, | |
| "grad_norm": 23.875, | |
| "learning_rate": 7.973510995516603e-06, | |
| "loss": 5.136166572570801, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6411764705882353, | |
| "grad_norm": 11.375, | |
| "learning_rate": 7.960727202707605e-06, | |
| "loss": 5.2515153884887695, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.6431372549019608, | |
| "grad_norm": 244.0, | |
| "learning_rate": 7.947913535085925e-06, | |
| "loss": 5.353679656982422, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6450980392156863, | |
| "grad_norm": 16.25, | |
| "learning_rate": 7.935070121946116e-06, | |
| "loss": 5.194516181945801, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": 37.25, | |
| "learning_rate": 7.922197092882882e-06, | |
| "loss": 4.8353095054626465, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6490196078431373, | |
| "grad_norm": 7.5, | |
| "learning_rate": 7.909294577789765e-06, | |
| "loss": 4.934078216552734, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.6509803921568628, | |
| "grad_norm": 22.375, | |
| "learning_rate": 7.896362706857825e-06, | |
| "loss": 5.069338798522949, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6529411764705882, | |
| "grad_norm": 16.625, | |
| "learning_rate": 7.883401610574338e-06, | |
| "loss": 5.3965229988098145, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6549019607843137, | |
| "grad_norm": 11.5625, | |
| "learning_rate": 7.870411419721468e-06, | |
| "loss": 5.186374664306641, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6568627450980392, | |
| "grad_norm": 55.0, | |
| "learning_rate": 7.857392265374963e-06, | |
| "loss": 5.19561767578125, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6588235294117647, | |
| "grad_norm": 52.25, | |
| "learning_rate": 7.844344278902815e-06, | |
| "loss": 5.131958484649658, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6607843137254902, | |
| "grad_norm": 34.5, | |
| "learning_rate": 7.83126759196395e-06, | |
| "loss": 4.746824264526367, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6627450980392157, | |
| "grad_norm": 21.25, | |
| "learning_rate": 7.818162336506885e-06, | |
| "loss": 5.224120140075684, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6647058823529411, | |
| "grad_norm": 12.25, | |
| "learning_rate": 7.805028644768407e-06, | |
| "loss": 4.973474025726318, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 21.875, | |
| "learning_rate": 7.791866649272236e-06, | |
| "loss": 5.3684821128845215, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6686274509803921, | |
| "grad_norm": 25.0, | |
| "learning_rate": 7.778676482827686e-06, | |
| "loss": 5.071963310241699, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.6705882352941176, | |
| "grad_norm": 12.9375, | |
| "learning_rate": 7.765458278528327e-06, | |
| "loss": 5.314568519592285, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6725490196078432, | |
| "grad_norm": 14.3125, | |
| "learning_rate": 7.752212169750642e-06, | |
| "loss": 4.6215596199035645, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.6745098039215687, | |
| "grad_norm": 36.0, | |
| "learning_rate": 7.738938290152675e-06, | |
| "loss": 5.359746932983398, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": 22.875, | |
| "learning_rate": 7.725636773672694e-06, | |
| "loss": 5.124155044555664, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6784313725490196, | |
| "grad_norm": 12.0625, | |
| "learning_rate": 7.712307754527832e-06, | |
| "loss": 5.157146453857422, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6803921568627451, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 7.69895136721273e-06, | |
| "loss": 5.049259662628174, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.6823529411764706, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 7.685567746498191e-06, | |
| "loss": 5.118771553039551, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6843137254901961, | |
| "grad_norm": 7.90625, | |
| "learning_rate": 7.672157027429803e-06, | |
| "loss": 5.174470901489258, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.6862745098039216, | |
| "grad_norm": 23.875, | |
| "learning_rate": 7.658719345326595e-06, | |
| "loss": 5.073647975921631, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6882352941176471, | |
| "grad_norm": 116.0, | |
| "learning_rate": 7.645254835779657e-06, | |
| "loss": 5.21632719039917, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.6901960784313725, | |
| "grad_norm": 12.4375, | |
| "learning_rate": 7.631763634650783e-06, | |
| "loss": 4.588676452636719, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.692156862745098, | |
| "grad_norm": 25.0, | |
| "learning_rate": 7.618245878071091e-06, | |
| "loss": 5.39186954498291, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.6941176470588235, | |
| "grad_norm": 26.625, | |
| "learning_rate": 7.604701702439652e-06, | |
| "loss": 5.2945146560668945, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.696078431372549, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 7.591131244422118e-06, | |
| "loss": 4.693891525268555, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6980392156862745, | |
| "grad_norm": 12.25, | |
| "learning_rate": 7.57753464094934e-06, | |
| "loss": 5.024456024169922, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 7.563912029215983e-06, | |
| "loss": 5.163435935974121, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7019607843137254, | |
| "grad_norm": 452.0, | |
| "learning_rate": 7.550263546679148e-06, | |
| "loss": 5.493864059448242, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.703921568627451, | |
| "grad_norm": 21.375, | |
| "learning_rate": 7.536589331056976e-06, | |
| "loss": 5.569558620452881, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 960.0, | |
| "learning_rate": 7.522889520327275e-06, | |
| "loss": 5.333744049072266, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.707843137254902, | |
| "grad_norm": 12.0, | |
| "learning_rate": 7.509164252726107e-06, | |
| "loss": 5.133704662322998, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7098039215686275, | |
| "grad_norm": 11.375, | |
| "learning_rate": 7.495413666746406e-06, | |
| "loss": 5.274477005004883, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.711764705882353, | |
| "grad_norm": 17.625, | |
| "learning_rate": 7.481637901136578e-06, | |
| "loss": 5.171307563781738, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7137254901960784, | |
| "grad_norm": 80.0, | |
| "learning_rate": 7.467837094899104e-06, | |
| "loss": 4.533388137817383, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7156862745098039, | |
| "grad_norm": 10.125, | |
| "learning_rate": 7.454011387289127e-06, | |
| "loss": 5.268569469451904, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7176470588235294, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 7.440160917813059e-06, | |
| "loss": 5.046947479248047, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7196078431372549, | |
| "grad_norm": 16.5, | |
| "learning_rate": 7.426285826227171e-06, | |
| "loss": 4.640476703643799, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7215686274509804, | |
| "grad_norm": 15.9375, | |
| "learning_rate": 7.412386252536168e-06, | |
| "loss": 4.895157337188721, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7235294117647059, | |
| "grad_norm": 12.875, | |
| "learning_rate": 7.398462336991802e-06, | |
| "loss": 5.1850433349609375, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7254901960784313, | |
| "grad_norm": 42.5, | |
| "learning_rate": 7.384514220091437e-06, | |
| "loss": 5.30681848526001, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7274509803921568, | |
| "grad_norm": 16.75, | |
| "learning_rate": 7.370542042576635e-06, | |
| "loss": 5.194714546203613, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7294117647058823, | |
| "grad_norm": 53.0, | |
| "learning_rate": 7.356545945431744e-06, | |
| "loss": 5.145817756652832, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7313725490196078, | |
| "grad_norm": 20.5, | |
| "learning_rate": 7.342526069882465e-06, | |
| "loss": 5.071390151977539, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.7333333333333333, | |
| "grad_norm": 24.875, | |
| "learning_rate": 7.328482557394435e-06, | |
| "loss": 5.190312385559082, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 14.5625, | |
| "learning_rate": 7.314415549671795e-06, | |
| "loss": 5.221264362335205, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7372549019607844, | |
| "grad_norm": 12.3125, | |
| "learning_rate": 7.300325188655762e-06, | |
| "loss": 4.9833574295043945, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7392156862745098, | |
| "grad_norm": 378.0, | |
| "learning_rate": 7.286211616523193e-06, | |
| "loss": 4.600691795349121, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.7411764705882353, | |
| "grad_norm": 16.75, | |
| "learning_rate": 7.27207497568516e-06, | |
| "loss": 4.92762565612793, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.7431372549019608, | |
| "grad_norm": 26.25, | |
| "learning_rate": 7.257915408785499e-06, | |
| "loss": 4.881844997406006, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.7450980392156863, | |
| "grad_norm": 23.75, | |
| "learning_rate": 7.243733058699386e-06, | |
| "loss": 4.775541305541992, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7470588235294118, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 7.229528068531881e-06, | |
| "loss": 5.095828056335449, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.7490196078431373, | |
| "grad_norm": 17.625, | |
| "learning_rate": 7.215300581616496e-06, | |
| "loss": 5.315362930297852, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7509803921568627, | |
| "grad_norm": 22.875, | |
| "learning_rate": 7.201050741513735e-06, | |
| "loss": 5.607841491699219, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.7529411764705882, | |
| "grad_norm": 13.75, | |
| "learning_rate": 7.186778692009669e-06, | |
| "loss": 5.133593559265137, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.7549019607843137, | |
| "grad_norm": 17.25, | |
| "learning_rate": 7.172484577114452e-06, | |
| "loss": 4.697779655456543, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7568627450980392, | |
| "grad_norm": 9.625, | |
| "learning_rate": 7.1581685410609e-06, | |
| "loss": 5.201179027557373, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7588235294117647, | |
| "grad_norm": 11.75, | |
| "learning_rate": 7.1438307283030106e-06, | |
| "loss": 5.069579124450684, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.7607843137254902, | |
| "grad_norm": 162.0, | |
| "learning_rate": 7.129471283514525e-06, | |
| "loss": 5.083244323730469, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7627450980392156, | |
| "grad_norm": 100.0, | |
| "learning_rate": 7.115090351587455e-06, | |
| "loss": 5.057339668273926, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": 33.75, | |
| "learning_rate": 7.100688077630628e-06, | |
| "loss": 4.903251647949219, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7666666666666667, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 7.086264606968215e-06, | |
| "loss": 5.1166181564331055, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.7686274509803922, | |
| "grad_norm": 14.1875, | |
| "learning_rate": 7.071820085138275e-06, | |
| "loss": 5.415125846862793, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7705882352941177, | |
| "grad_norm": 27.125, | |
| "learning_rate": 7.05735465789128e-06, | |
| "loss": 5.206658363342285, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.7725490196078432, | |
| "grad_norm": 136.0, | |
| "learning_rate": 7.042868471188642e-06, | |
| "loss": 5.055720806121826, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7745098039215687, | |
| "grad_norm": 30.25, | |
| "learning_rate": 7.028361671201245e-06, | |
| "loss": 5.035671234130859, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7764705882352941, | |
| "grad_norm": 33.75, | |
| "learning_rate": 7.013834404307972e-06, | |
| "loss": 5.380437850952148, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7784313725490196, | |
| "grad_norm": 113.5, | |
| "learning_rate": 6.9992868170942205e-06, | |
| "loss": 4.5458831787109375, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.7803921568627451, | |
| "grad_norm": 26.375, | |
| "learning_rate": 6.9847190563504284e-06, | |
| "loss": 4.586980819702148, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7823529411764706, | |
| "grad_norm": 32.75, | |
| "learning_rate": 6.970131269070591e-06, | |
| "loss": 5.0616559982299805, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 143.0, | |
| "learning_rate": 6.95552360245078e-06, | |
| "loss": 5.486038684844971, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7862745098039216, | |
| "grad_norm": 13.4375, | |
| "learning_rate": 6.940896203887659e-06, | |
| "loss": 5.011553764343262, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.788235294117647, | |
| "grad_norm": 11.9375, | |
| "learning_rate": 6.926249220976988e-06, | |
| "loss": 5.143858909606934, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.7901960784313725, | |
| "grad_norm": 36.25, | |
| "learning_rate": 6.911582801512146e-06, | |
| "loss": 5.41754674911499, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.792156862745098, | |
| "grad_norm": 12.0, | |
| "learning_rate": 6.8968970934826296e-06, | |
| "loss": 4.824446678161621, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": 24.0, | |
| "learning_rate": 6.88219224507257e-06, | |
| "loss": 5.1240129470825195, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.796078431372549, | |
| "grad_norm": 11.1875, | |
| "learning_rate": 6.867468404659222e-06, | |
| "loss": 5.1134138107299805, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7980392156862746, | |
| "grad_norm": 30.5, | |
| "learning_rate": 6.852725720811487e-06, | |
| "loss": 4.89101505279541, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 51.25, | |
| "learning_rate": 6.837964342288399e-06, | |
| "loss": 5.113155364990234, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8019607843137255, | |
| "grad_norm": 91.5, | |
| "learning_rate": 6.823184418037625e-06, | |
| "loss": 4.534147262573242, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.803921568627451, | |
| "grad_norm": 55.75, | |
| "learning_rate": 6.808386097193969e-06, | |
| "loss": 4.7851409912109375, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8058823529411765, | |
| "grad_norm": 25.875, | |
| "learning_rate": 6.793569529077864e-06, | |
| "loss": 4.942509174346924, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.807843137254902, | |
| "grad_norm": 14.4375, | |
| "learning_rate": 6.778734863193862e-06, | |
| "loss": 5.097578525543213, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8098039215686275, | |
| "grad_norm": 24.625, | |
| "learning_rate": 6.76388224922913e-06, | |
| "loss": 5.055602073669434, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8117647058823529, | |
| "grad_norm": 49.5, | |
| "learning_rate": 6.7490118370519356e-06, | |
| "loss": 5.3905158042907715, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8137254901960784, | |
| "grad_norm": 14.5, | |
| "learning_rate": 6.7341237767101375e-06, | |
| "loss": 5.291065216064453, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8156862745098039, | |
| "grad_norm": 36.5, | |
| "learning_rate": 6.7192182184296725e-06, | |
| "loss": 5.055734634399414, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8176470588235294, | |
| "grad_norm": 43.25, | |
| "learning_rate": 6.704295312613037e-06, | |
| "loss": 4.7537078857421875, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.8196078431372549, | |
| "grad_norm": 11.0, | |
| "learning_rate": 6.689355209837769e-06, | |
| "loss": 5.249905586242676, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8215686274509804, | |
| "grad_norm": 14.9375, | |
| "learning_rate": 6.674398060854931e-06, | |
| "loss": 4.9035539627075195, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 19.0, | |
| "learning_rate": 6.65942401658759e-06, | |
| "loss": 4.805628776550293, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8254901960784313, | |
| "grad_norm": 16.75, | |
| "learning_rate": 6.644433228129288e-06, | |
| "loss": 5.162158489227295, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.8274509803921568, | |
| "grad_norm": 83.0, | |
| "learning_rate": 6.6294258467425256e-06, | |
| "loss": 5.218243598937988, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8294117647058824, | |
| "grad_norm": 19.625, | |
| "learning_rate": 6.614402023857231e-06, | |
| "loss": 4.90092658996582, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.8313725490196079, | |
| "grad_norm": 202.0, | |
| "learning_rate": 6.599361911069235e-06, | |
| "loss": 5.307265281677246, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 6.584305660138734e-06, | |
| "loss": 4.833058834075928, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8352941176470589, | |
| "grad_norm": 12.875, | |
| "learning_rate": 6.569233422988771e-06, | |
| "loss": 4.753157615661621, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.8372549019607843, | |
| "grad_norm": 17.25, | |
| "learning_rate": 6.554145351703689e-06, | |
| "loss": 5.133755207061768, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.8392156862745098, | |
| "grad_norm": 15.5, | |
| "learning_rate": 6.539041598527612e-06, | |
| "loss": 5.210625171661377, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.8411764705882353, | |
| "grad_norm": 18.5, | |
| "learning_rate": 6.523922315862887e-06, | |
| "loss": 4.968243598937988, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.8431372549019608, | |
| "grad_norm": 35.25, | |
| "learning_rate": 6.508787656268573e-06, | |
| "loss": 4.71547269821167, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8450980392156863, | |
| "grad_norm": 77.0, | |
| "learning_rate": 6.4936377724588794e-06, | |
| "loss": 4.9928693771362305, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.8470588235294118, | |
| "grad_norm": 27.5, | |
| "learning_rate": 6.478472817301635e-06, | |
| "loss": 5.335099220275879, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.8490196078431372, | |
| "grad_norm": 90.0, | |
| "learning_rate": 6.463292943816747e-06, | |
| "loss": 4.708322525024414, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.8509803921568627, | |
| "grad_norm": 25.5, | |
| "learning_rate": 6.448098305174648e-06, | |
| "loss": 4.99176549911499, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 6.4328890546947645e-06, | |
| "loss": 5.023103713989258, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8549019607843137, | |
| "grad_norm": 13.875, | |
| "learning_rate": 6.417665345843952e-06, | |
| "loss": 4.433863639831543, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8568627450980392, | |
| "grad_norm": 22.0, | |
| "learning_rate": 6.402427332234965e-06, | |
| "loss": 5.005480766296387, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.8588235294117647, | |
| "grad_norm": 33.25, | |
| "learning_rate": 6.387175167624894e-06, | |
| "loss": 5.027538299560547, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.8607843137254902, | |
| "grad_norm": 35.0, | |
| "learning_rate": 6.371909005913618e-06, | |
| "loss": 4.9888691902160645, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 6.3566290011422515e-06, | |
| "loss": 5.083306789398193, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8647058823529412, | |
| "grad_norm": 11.5625, | |
| "learning_rate": 6.341335307491596e-06, | |
| "loss": 4.916092395782471, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.8666666666666667, | |
| "grad_norm": 16.875, | |
| "learning_rate": 6.32602807928057e-06, | |
| "loss": 5.217679977416992, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8686274509803922, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 6.310707470964668e-06, | |
| "loss": 4.544569969177246, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.8705882352941177, | |
| "grad_norm": 10.375, | |
| "learning_rate": 6.29537363713439e-06, | |
| "loss": 4.775785446166992, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8725490196078431, | |
| "grad_norm": 37.75, | |
| "learning_rate": 6.280026732513689e-06, | |
| "loss": 4.826279163360596, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8745098039215686, | |
| "grad_norm": 27.75, | |
| "learning_rate": 6.264666911958404e-06, | |
| "loss": 5.18260383605957, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8764705882352941, | |
| "grad_norm": 38.25, | |
| "learning_rate": 6.249294330454705e-06, | |
| "loss": 4.590150833129883, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.8784313725490196, | |
| "grad_norm": 32.5, | |
| "learning_rate": 6.233909143117521e-06, | |
| "loss": 5.024850368499756, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8803921568627451, | |
| "grad_norm": 34.5, | |
| "learning_rate": 6.21851150518898e-06, | |
| "loss": 5.589545726776123, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 31.0, | |
| "learning_rate": 6.203101572036839e-06, | |
| "loss": 5.289287090301514, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.884313725490196, | |
| "grad_norm": 37.5, | |
| "learning_rate": 6.18767949915292e-06, | |
| "loss": 5.225309371948242, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.8862745098039215, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 6.172245442151541e-06, | |
| "loss": 4.946108341217041, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.888235294117647, | |
| "grad_norm": 9.25, | |
| "learning_rate": 6.156799556767941e-06, | |
| "loss": 5.044528007507324, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.8901960784313725, | |
| "grad_norm": 14.25, | |
| "learning_rate": 6.141341998856711e-06, | |
| "loss": 4.899598121643066, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8921568627450981, | |
| "grad_norm": 98.0, | |
| "learning_rate": 6.125872924390226e-06, | |
| "loss": 5.111475467681885, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8941176470588236, | |
| "grad_norm": 11.0, | |
| "learning_rate": 6.110392489457067e-06, | |
| "loss": 5.1644086837768555, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8960784313725491, | |
| "grad_norm": 14.4375, | |
| "learning_rate": 6.094900850260439e-06, | |
| "loss": 4.752385139465332, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.8980392156862745, | |
| "grad_norm": 21.25, | |
| "learning_rate": 6.079398163116611e-06, | |
| "loss": 4.762664794921875, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 22.75, | |
| "learning_rate": 6.063884584453326e-06, | |
| "loss": 5.30159854888916, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.9019607843137255, | |
| "grad_norm": 16.5, | |
| "learning_rate": 6.048360270808226e-06, | |
| "loss": 4.787961006164551, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.903921568627451, | |
| "grad_norm": 17.375, | |
| "learning_rate": 6.032825378827273e-06, | |
| "loss": 4.75579309463501, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9058823529411765, | |
| "grad_norm": 11.5, | |
| "learning_rate": 6.0172800652631706e-06, | |
| "loss": 5.047743797302246, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.907843137254902, | |
| "grad_norm": 11.8125, | |
| "learning_rate": 6.001724486973774e-06, | |
| "loss": 5.246438980102539, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.9098039215686274, | |
| "grad_norm": 16.5, | |
| "learning_rate": 5.986158800920523e-06, | |
| "loss": 5.05087947845459, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": 77.0, | |
| "learning_rate": 5.970583164166838e-06, | |
| "loss": 5.218969821929932, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9137254901960784, | |
| "grad_norm": 11.75, | |
| "learning_rate": 5.954997733876552e-06, | |
| "loss": 4.697071075439453, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9156862745098039, | |
| "grad_norm": 20.75, | |
| "learning_rate": 5.939402667312316e-06, | |
| "loss": 5.159415245056152, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.9176470588235294, | |
| "grad_norm": 51.5, | |
| "learning_rate": 5.923798121834016e-06, | |
| "loss": 5.336456775665283, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9196078431372549, | |
| "grad_norm": 14.9375, | |
| "learning_rate": 5.908184254897183e-06, | |
| "loss": 5.008672714233398, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.9215686274509803, | |
| "grad_norm": 37.75, | |
| "learning_rate": 5.892561224051403e-06, | |
| "loss": 5.013587951660156, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9235294117647059, | |
| "grad_norm": 15.1875, | |
| "learning_rate": 5.876929186938734e-06, | |
| "loss": 5.09112548828125, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.9254901960784314, | |
| "grad_norm": 8.625, | |
| "learning_rate": 5.861288301292103e-06, | |
| "loss": 5.109498977661133, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.9274509803921569, | |
| "grad_norm": 20.5, | |
| "learning_rate": 5.845638724933729e-06, | |
| "loss": 4.940072059631348, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.9294117647058824, | |
| "grad_norm": 19.75, | |
| "learning_rate": 5.82998061577352e-06, | |
| "loss": 4.561947345733643, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.9313725490196079, | |
| "grad_norm": 12.0, | |
| "learning_rate": 5.814314131807486e-06, | |
| "loss": 5.177187919616699, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 16.25, | |
| "learning_rate": 5.798639431116135e-06, | |
| "loss": 5.03114652633667, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.9352941176470588, | |
| "grad_norm": 12.375, | |
| "learning_rate": 5.782956671862895e-06, | |
| "loss": 5.067164897918701, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.9372549019607843, | |
| "grad_norm": 15.0, | |
| "learning_rate": 5.767266012292496e-06, | |
| "loss": 5.1333818435668945, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.9392156862745098, | |
| "grad_norm": 31.875, | |
| "learning_rate": 5.751567610729398e-06, | |
| "loss": 5.109825611114502, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 27.625, | |
| "learning_rate": 5.735861625576167e-06, | |
| "loss": 5.42066764831543, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9431372549019608, | |
| "grad_norm": 10.0, | |
| "learning_rate": 5.720148215311902e-06, | |
| "loss": 5.352512359619141, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.9450980392156862, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 5.7044275384906164e-06, | |
| "loss": 5.076541423797607, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.9470588235294117, | |
| "grad_norm": 14.8125, | |
| "learning_rate": 5.688699753739649e-06, | |
| "loss": 5.070345401763916, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.9490196078431372, | |
| "grad_norm": 13.125, | |
| "learning_rate": 5.672965019758061e-06, | |
| "loss": 4.844189643859863, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.9509803921568627, | |
| "grad_norm": 47.25, | |
| "learning_rate": 5.657223495315031e-06, | |
| "loss": 5.295019626617432, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9529411764705882, | |
| "grad_norm": 15.1875, | |
| "learning_rate": 5.641475339248257e-06, | |
| "loss": 4.830320358276367, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.9549019607843138, | |
| "grad_norm": 44.25, | |
| "learning_rate": 5.625720710462352e-06, | |
| "loss": 5.128537178039551, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.9568627450980393, | |
| "grad_norm": 22.625, | |
| "learning_rate": 5.609959767927247e-06, | |
| "loss": 5.368704795837402, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.9588235294117647, | |
| "grad_norm": 16.75, | |
| "learning_rate": 5.594192670676568e-06, | |
| "loss": 5.183511734008789, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.9607843137254902, | |
| "grad_norm": 79.0, | |
| "learning_rate": 5.578419577806058e-06, | |
| "loss": 5.282448768615723, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9627450980392157, | |
| "grad_norm": 9.375, | |
| "learning_rate": 5.562640648471951e-06, | |
| "loss": 4.948698043823242, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.9647058823529412, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 5.546856041889374e-06, | |
| "loss": 5.277318954467773, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9666666666666667, | |
| "grad_norm": 29.625, | |
| "learning_rate": 5.531065917330737e-06, | |
| "loss": 4.637456893920898, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.9686274509803922, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 5.515270434124136e-06, | |
| "loss": 4.952828884124756, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": 20.875, | |
| "learning_rate": 5.499469751651728e-06, | |
| "loss": 4.610632419586182, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9725490196078431, | |
| "grad_norm": 23.5, | |
| "learning_rate": 5.483664029348141e-06, | |
| "loss": 5.143571376800537, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9745098039215686, | |
| "grad_norm": 58.75, | |
| "learning_rate": 5.467853426698852e-06, | |
| "loss": 5.044045925140381, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.9764705882352941, | |
| "grad_norm": 28.5, | |
| "learning_rate": 5.452038103238582e-06, | |
| "loss": 5.476343154907227, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9784313725490196, | |
| "grad_norm": 25.5, | |
| "learning_rate": 5.43621821854969e-06, | |
| "loss": 5.234396457672119, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 21.375, | |
| "learning_rate": 5.420393932260557e-06, | |
| "loss": 5.006505489349365, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9823529411764705, | |
| "grad_norm": 15.0, | |
| "learning_rate": 5.404565404043977e-06, | |
| "loss": 5.230044364929199, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.984313725490196, | |
| "grad_norm": 44.75, | |
| "learning_rate": 5.388732793615551e-06, | |
| "loss": 5.160193920135498, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9862745098039216, | |
| "grad_norm": 9.125, | |
| "learning_rate": 5.372896260732065e-06, | |
| "loss": 4.933669090270996, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.9882352941176471, | |
| "grad_norm": 12.875, | |
| "learning_rate": 5.357055965189888e-06, | |
| "loss": 5.125208854675293, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9901960784313726, | |
| "grad_norm": 19.125, | |
| "learning_rate": 5.341212066823356e-06, | |
| "loss": 4.810538291931152, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9921568627450981, | |
| "grad_norm": 15.6875, | |
| "learning_rate": 5.325364725503155e-06, | |
| "loss": 5.523466110229492, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9941176470588236, | |
| "grad_norm": 19.75, | |
| "learning_rate": 5.3095141011347155e-06, | |
| "loss": 4.905384063720703, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.996078431372549, | |
| "grad_norm": 11.0625, | |
| "learning_rate": 5.2936603536565915e-06, | |
| "loss": 4.761824607849121, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9980392156862745, | |
| "grad_norm": 16.375, | |
| "learning_rate": 5.277803643038855e-06, | |
| "loss": 5.2767839431762695, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 11.375, | |
| "learning_rate": 5.261944129281474e-06, | |
| "loss": 4.927273750305176, | |
| "step": 510 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1020, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.3914914053153096e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |