| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.017274289810760155, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.7274289810760155e-05, |
| "grad_norm": 0.318359375, |
| "learning_rate": 0.01, |
| "loss": 1.4153, |
| "loss/crossentropy": 2.180091619491577, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.2821359634399414, |
| "step": 1 |
| }, |
| { |
| "epoch": 3.454857962152031e-05, |
| "grad_norm": 0.3515625, |
| "learning_rate": 0.01, |
| "loss": 1.4401, |
| "loss/crossentropy": 2.49104380607605, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2602585107088089, |
| "step": 2 |
| }, |
| { |
| "epoch": 5.182286943228046e-05, |
| "grad_norm": 0.30859375, |
| "learning_rate": 0.01, |
| "loss": 1.4352, |
| "loss/crossentropy": 2.453463077545166, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.2847007066011429, |
| "step": 3 |
| }, |
| { |
| "epoch": 6.909715924304062e-05, |
| "grad_norm": 0.306640625, |
| "learning_rate": 0.01, |
| "loss": 1.3983, |
| "loss/crossentropy": 2.52145779132843, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2535911202430725, |
| "step": 4 |
| }, |
| { |
| "epoch": 8.637144905380078e-05, |
| "grad_norm": 0.35546875, |
| "learning_rate": 0.01, |
| "loss": 1.4077, |
| "loss/crossentropy": 2.364890694618225, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24292171746492386, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00010364573886456093, |
| "grad_norm": 0.310546875, |
| "learning_rate": 0.01, |
| "loss": 1.3824, |
| "loss/crossentropy": 2.3052154779434204, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24541093409061432, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00012092002867532108, |
| "grad_norm": 0.29296875, |
| "learning_rate": 0.01, |
| "loss": 1.4026, |
| "loss/crossentropy": 2.381744861602783, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2507929801940918, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00013819431848608124, |
| "grad_norm": 0.322265625, |
| "learning_rate": 0.01, |
| "loss": 1.4452, |
| "loss/crossentropy": 2.613944888114929, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.27175769209861755, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0001554686082968414, |
| "grad_norm": 0.31640625, |
| "learning_rate": 0.01, |
| "loss": 1.4301, |
| "loss/crossentropy": 2.4438647031784058, |
| "loss/fcd": 1.2890625, |
| "loss/logits": 0.31327594816684723, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00017274289810760156, |
| "grad_norm": 0.322265625, |
| "learning_rate": 0.01, |
| "loss": 1.4258, |
| "loss/crossentropy": 2.689444422721863, |
| "loss/fcd": 1.20703125, |
| "loss/logits": 0.2705621272325516, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0001900171879183617, |
| "grad_norm": 0.283203125, |
| "learning_rate": 0.01, |
| "loss": 1.38, |
| "loss/crossentropy": 2.6325626373291016, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.26059799641370773, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00020729147772912185, |
| "grad_norm": 0.294921875, |
| "learning_rate": 0.01, |
| "loss": 1.3964, |
| "loss/crossentropy": 2.2171366214752197, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.25415121763944626, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00022456576753988202, |
| "grad_norm": 0.314453125, |
| "learning_rate": 0.01, |
| "loss": 1.4028, |
| "loss/crossentropy": 2.239351272583008, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.2298966646194458, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00024184005735064217, |
| "grad_norm": 0.31640625, |
| "learning_rate": 0.01, |
| "loss": 1.4218, |
| "loss/crossentropy": 2.712681293487549, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.24666083604097366, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00025911434716140234, |
| "grad_norm": 0.3515625, |
| "learning_rate": 0.01, |
| "loss": 1.4074, |
| "loss/crossentropy": 2.6137157678604126, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2758009433746338, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0002763886369721625, |
| "grad_norm": 0.376953125, |
| "grad_norm_var": 0.0006428877512613932, |
| "learning_rate": 0.01, |
| "loss": 1.4429, |
| "loss/crossentropy": 2.266461730003357, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.26471851766109467, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00029366292678292263, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0007058302561442057, |
| "learning_rate": 0.01, |
| "loss": 1.433, |
| "loss/crossentropy": 2.63763689994812, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.28894874453544617, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0003109372165936828, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0006610711415608723, |
| "learning_rate": 0.01, |
| "loss": 1.4003, |
| "loss/crossentropy": 2.5304828882217407, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.26741379499435425, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0003282115064044429, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0006503899892171224, |
| "learning_rate": 0.01, |
| "loss": 1.4179, |
| "loss/crossentropy": 2.36896288394928, |
| "loss/fcd": 1.19921875, |
| "loss/logits": 0.2745219022035599, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0003454857962152031, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0006408055623372395, |
| "learning_rate": 0.01, |
| "loss": 1.4132, |
| "loss/crossentropy": 2.471444010734558, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.24692216515541077, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00036276008602596327, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0005624771118164062, |
| "learning_rate": 0.01, |
| "loss": 1.3532, |
| "loss/crossentropy": 2.4798572063446045, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.24522659927606583, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0003800343758367234, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0005975723266601563, |
| "learning_rate": 0.01, |
| "loss": 1.4057, |
| "loss/crossentropy": 2.3649370670318604, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.26143455505371094, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00039730866564748356, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0005533854166666667, |
| "learning_rate": 0.01, |
| "loss": 1.4282, |
| "loss/crossentropy": 2.7900454998016357, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.256390705704689, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0004145829554582437, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.000638580322265625, |
| "learning_rate": 0.01, |
| "loss": 1.422, |
| "loss/crossentropy": 2.3018282651901245, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.26084744930267334, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0004318572452690039, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0006613254547119141, |
| "learning_rate": 0.01, |
| "loss": 1.4043, |
| "loss/crossentropy": 2.404328942298889, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.24188002943992615, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00044913153507976405, |
| "grad_norm": 2.265625, |
| "grad_norm_var": 0.23812503814697267, |
| "learning_rate": 0.01, |
| "loss": 1.3559, |
| "loss/crossentropy": 2.5355838537216187, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.24743662029504776, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0004664058248905242, |
| "grad_norm": 0.36328125, |
| "grad_norm_var": 0.23687055905659993, |
| "learning_rate": 0.01, |
| "loss": 1.4526, |
| "loss/crossentropy": 2.329304337501526, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.2358776032924652, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.00048368011470128434, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.23719480832417805, |
| "learning_rate": 0.01, |
| "loss": 1.3243, |
| "loss/crossentropy": 2.1602375507354736, |
| "loss/fcd": 1.02734375, |
| "loss/logits": 0.21287230402231216, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0005009544045120445, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.2374394734700521, |
| "learning_rate": 0.01, |
| "loss": 1.4111, |
| "loss/crossentropy": 2.4278478622436523, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2437409982085228, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0005182286943228047, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.23818588256835938, |
| "learning_rate": 0.01, |
| "loss": 1.4091, |
| "loss/crossentropy": 2.5047000646591187, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.27113544940948486, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0005355029841335648, |
| "grad_norm": 0.373046875, |
| "grad_norm_var": 0.23796435991923015, |
| "learning_rate": 0.01, |
| "loss": 1.4498, |
| "loss/crossentropy": 2.3999940156936646, |
| "loss/fcd": 1.25, |
| "loss/logits": 0.27853211760520935, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.000552777273944325, |
| "grad_norm": 0.3671875, |
| "grad_norm_var": 0.23805281321207683, |
| "learning_rate": 0.01, |
| "loss": 1.4805, |
| "loss/crossentropy": 2.511382222175598, |
| "loss/fcd": 1.3203125, |
| "loss/logits": 0.409069299697876, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0005700515637550852, |
| "grad_norm": 0.6875, |
| "grad_norm_var": 0.24118663469950358, |
| "learning_rate": 0.01, |
| "loss": 1.3432, |
| "loss/crossentropy": 2.5396409034729004, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.25656259059906006, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0005873258535658453, |
| "grad_norm": 0.357421875, |
| "grad_norm_var": 0.24034620920817057, |
| "learning_rate": 0.01, |
| "loss": 1.4207, |
| "loss/crossentropy": 2.3687368631362915, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.23432840406894684, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0006046001433766055, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.2409596602121989, |
| "learning_rate": 0.01, |
| "loss": 1.4195, |
| "loss/crossentropy": 2.428983449935913, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.2627260833978653, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0006218744331873656, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.2408828576405843, |
| "learning_rate": 0.01, |
| "loss": 1.372, |
| "loss/crossentropy": 2.827309250831604, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2433805763721466, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0006391487229981258, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.24178783098856607, |
| "learning_rate": 0.01, |
| "loss": 1.388, |
| "loss/crossentropy": 2.4543423652648926, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2694346010684967, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0006564230128088858, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.24162036577860516, |
| "learning_rate": 0.01, |
| "loss": 1.4109, |
| "loss/crossentropy": 2.5903791189193726, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.246421679854393, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.000673697302619646, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.24139873186747232, |
| "learning_rate": 0.01, |
| "loss": 1.4232, |
| "loss/crossentropy": 2.248749613761902, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.23829498887062073, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0006909715924304062, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.24068241119384765, |
| "learning_rate": 0.01, |
| "loss": 1.4025, |
| "loss/crossentropy": 2.52192759513855, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.3120736628770828, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0007082458822411663, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.24076868693033854, |
| "learning_rate": 0.01, |
| "loss": 1.3388, |
| "loss/crossentropy": 2.4299440383911133, |
| "loss/fcd": 1.05078125, |
| "loss/logits": 0.21974454075098038, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0007255201720519265, |
| "grad_norm": 0.361328125, |
| "grad_norm_var": 0.00956584612528483, |
| "learning_rate": 0.01, |
| "loss": 1.3783, |
| "loss/crossentropy": 2.3354129791259766, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.22372399270534515, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0007427944618626866, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.009831984837849935, |
| "learning_rate": 0.01, |
| "loss": 1.3578, |
| "loss/crossentropy": 2.3422107696533203, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.22979970276355743, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0007600687516734468, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.009589751561482748, |
| "learning_rate": 0.01, |
| "loss": 1.4869, |
| "loss/crossentropy": 2.4120657444000244, |
| "loss/fcd": 1.22265625, |
| "loss/logits": 0.27795399725437164, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.000777343041484207, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.009527333577473958, |
| "learning_rate": 0.01, |
| "loss": 1.3861, |
| "loss/crossentropy": 2.66101336479187, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.25736863911151886, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0007946173312949671, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.009370152155558269, |
| "learning_rate": 0.01, |
| "loss": 1.4078, |
| "loss/crossentropy": 2.5887415409088135, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.285249263048172, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0008118916211057273, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.009616454442342123, |
| "learning_rate": 0.01, |
| "loss": 1.3142, |
| "loss/crossentropy": 2.5115991830825806, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.23692379146814346, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0008291659109164874, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.00964506467183431, |
| "learning_rate": 0.01, |
| "loss": 1.4093, |
| "loss/crossentropy": 2.5383851528167725, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2725464850664139, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0008464402007272476, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0007040500640869141, |
| "learning_rate": 0.01, |
| "loss": 1.3871, |
| "loss/crossentropy": 2.3415656089782715, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.23871353268623352, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0008637144905380078, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.000538492202758789, |
| "learning_rate": 0.01, |
| "loss": 1.422, |
| "loss/crossentropy": 2.241709351539612, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2642487585544586, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0008809887803487679, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0006014347076416015, |
| "learning_rate": 0.01, |
| "loss": 1.4018, |
| "loss/crossentropy": 2.18844211101532, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.25836754590272903, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0008982630701595281, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0006024678548177083, |
| "learning_rate": 0.01, |
| "loss": 1.3702, |
| "loss/crossentropy": 2.4040807485580444, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.27216051518917084, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0009155373599702882, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0005683739980061849, |
| "learning_rate": 0.01, |
| "loss": 1.375, |
| "loss/crossentropy": 2.3604718446731567, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2563931792974472, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0009328116497810484, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0006024678548177083, |
| "learning_rate": 0.01, |
| "loss": 1.3398, |
| "loss/crossentropy": 2.3702304363250732, |
| "loss/fcd": 1.044921875, |
| "loss/logits": 0.23356395214796066, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0009500859395918086, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.000598001480102539, |
| "learning_rate": 0.01, |
| "loss": 1.4359, |
| "loss/crossentropy": 2.532386064529419, |
| "loss/fcd": 1.19921875, |
| "loss/logits": 0.29735907912254333, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0009673602294025687, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0005999088287353515, |
| "learning_rate": 0.01, |
| "loss": 1.3103, |
| "loss/crossentropy": 2.4240375757217407, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.2354799136519432, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0009846345192133289, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0005986372629801433, |
| "learning_rate": 0.01, |
| "loss": 1.4436, |
| "loss/crossentropy": 2.6270374059677124, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2780339866876602, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.001001908809024089, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0003819147745768229, |
| "learning_rate": 0.01, |
| "loss": 1.4263, |
| "loss/crossentropy": 2.6478673219680786, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.26073622703552246, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.001019183098834849, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.00032817522684733074, |
| "learning_rate": 0.01, |
| "loss": 1.3944, |
| "loss/crossentropy": 2.596788763999939, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2364196628332138, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0010364573886456094, |
| "grad_norm": 0.388671875, |
| "grad_norm_var": 0.0007116794586181641, |
| "learning_rate": 0.01, |
| "loss": 1.4703, |
| "loss/crossentropy": 2.516297459602356, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2600822076201439, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0010537316784563695, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0008394718170166016, |
| "learning_rate": 0.01, |
| "loss": 1.4355, |
| "loss/crossentropy": 2.3750414848327637, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.257433146238327, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0010710059682671295, |
| "grad_norm": 0.341796875, |
| "grad_norm_var": 0.0008870283762613932, |
| "learning_rate": 0.01, |
| "loss": 1.4704, |
| "loss/crossentropy": 2.6349244117736816, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.27842070162296295, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0010882802580778899, |
| "grad_norm": 0.345703125, |
| "grad_norm_var": 0.000816965103149414, |
| "learning_rate": 0.01, |
| "loss": 1.4253, |
| "loss/crossentropy": 2.4561866521835327, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2601305991411209, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.00110555454788865, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0008069197336832682, |
| "learning_rate": 0.01, |
| "loss": 1.3542, |
| "loss/crossentropy": 2.4422744512557983, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.25526949763298035, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.00112282883769941, |
| "grad_norm": 0.41796875, |
| "grad_norm_var": 0.0014043013254801432, |
| "learning_rate": 0.01, |
| "loss": 1.4401, |
| "loss/crossentropy": 2.164702892303467, |
| "loss/fcd": 1.22265625, |
| "loss/logits": 0.20365531742572784, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0011401031275101703, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0014294942220052084, |
| "learning_rate": 0.01, |
| "loss": 1.3525, |
| "loss/crossentropy": 2.7132447957992554, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2643866539001465, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0011573774173209304, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0012522220611572265, |
| "learning_rate": 0.01, |
| "loss": 1.3225, |
| "loss/crossentropy": 2.4213372468948364, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2328537479043007, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0011746517071316905, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.00119627316792806, |
| "learning_rate": 0.01, |
| "loss": 1.3973, |
| "loss/crossentropy": 2.436795651912689, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2546040713787079, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0011919259969424506, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0012935479482014975, |
| "learning_rate": 0.01, |
| "loss": 1.3866, |
| "loss/crossentropy": 2.274712562561035, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.26513542234897614, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.001209200286753211, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0013611952463785807, |
| "learning_rate": 0.01, |
| "loss": 1.3986, |
| "loss/crossentropy": 2.4798693656921387, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2789834886789322, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.001226474576563971, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0014126936594645182, |
| "learning_rate": 0.01, |
| "loss": 1.3394, |
| "loss/crossentropy": 2.496403932571411, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.23832575976848602, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.001243748866374731, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0013817946116129558, |
| "learning_rate": 0.01, |
| "loss": 1.3945, |
| "loss/crossentropy": 2.330789804458618, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.23751115798950195, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0012610231561854914, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0014734745025634765, |
| "learning_rate": 0.01, |
| "loss": 1.3859, |
| "loss/crossentropy": 2.5367313623428345, |
| "loss/fcd": 1.19921875, |
| "loss/logits": 0.2804088890552521, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0012782974459962515, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.001559305191040039, |
| "learning_rate": 0.01, |
| "loss": 1.3887, |
| "loss/crossentropy": 2.3117035627365112, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.2553889825940132, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0012955717358070116, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.001582193374633789, |
| "learning_rate": 0.01, |
| "loss": 1.4083, |
| "loss/crossentropy": 2.5574092864990234, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24754850566387177, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0013128460256177717, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0013358910878499349, |
| "learning_rate": 0.01, |
| "loss": 1.39, |
| "loss/crossentropy": 2.5164517164230347, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.23118755221366882, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.001330120315428532, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0012410481770833333, |
| "learning_rate": 0.01, |
| "loss": 1.4129, |
| "loss/crossentropy": 2.4725937843322754, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2354634776711464, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.001347394605239292, |
| "grad_norm": 0.52734375, |
| "grad_norm_var": 0.0040692488352457685, |
| "learning_rate": 0.01, |
| "loss": 1.5435, |
| "loss/crossentropy": 2.067330479621887, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.2535740062594414, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0013646688950500522, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0040776570638020836, |
| "learning_rate": 0.01, |
| "loss": 1.3808, |
| "loss/crossentropy": 2.363155961036682, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2392946034669876, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0013819431848608125, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.004054371515909831, |
| "learning_rate": 0.01, |
| "loss": 1.4014, |
| "loss/crossentropy": 2.561974883079529, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2719137519598007, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0013992174746715726, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0034375349680582684, |
| "learning_rate": 0.01, |
| "loss": 1.3718, |
| "loss/crossentropy": 2.5669400691986084, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.27283619344234467, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0014164917644823327, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.003415362040201823, |
| "learning_rate": 0.01, |
| "loss": 1.423, |
| "loss/crossentropy": 2.3874313831329346, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.25072336941957474, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.001433766054293093, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.003453509012858073, |
| "learning_rate": 0.01, |
| "loss": 1.4176, |
| "loss/crossentropy": 2.711247205734253, |
| "loss/fcd": 1.23046875, |
| "loss/logits": 0.28591448068618774, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.001451040344103853, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0034200032552083332, |
| "learning_rate": 0.01, |
| "loss": 1.3905, |
| "loss/crossentropy": 2.549779772758484, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2730839252471924, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0014683146339146132, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0033526102701822917, |
| "learning_rate": 0.01, |
| "loss": 1.3706, |
| "loss/crossentropy": 2.255567193031311, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.2550910860300064, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0014855889237253732, |
| "grad_norm": 0.6015625, |
| "grad_norm_var": 0.008341471354166666, |
| "learning_rate": 0.01, |
| "loss": 1.629, |
| "loss/crossentropy": 2.245366394519806, |
| "loss/fcd": 1.58203125, |
| "loss/logits": 0.3177703619003296, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0015028632135361336, |
| "grad_norm": 0.361328125, |
| "grad_norm_var": 0.0082763671875, |
| "learning_rate": 0.01, |
| "loss": 1.3925, |
| "loss/crossentropy": 2.5329853296279907, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2691914439201355, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0015201375033468936, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.008169158299763998, |
| "learning_rate": 0.01, |
| "loss": 1.3783, |
| "loss/crossentropy": 2.573711633682251, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24663084745407104, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0015374117931576537, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.008059438069661458, |
| "learning_rate": 0.01, |
| "loss": 1.3466, |
| "loss/crossentropy": 2.4545916318893433, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.22179614007472992, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.001554686082968414, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.007821893692016602, |
| "learning_rate": 0.01, |
| "loss": 1.4058, |
| "loss/crossentropy": 2.0489944219589233, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.25446537882089615, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0015719603727791741, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.007696262995402018, |
| "learning_rate": 0.01, |
| "loss": 1.4186, |
| "loss/crossentropy": 2.6278460025787354, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2563782036304474, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0015892346625899342, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.0074314753214518225, |
| "learning_rate": 0.01, |
| "loss": 1.4634, |
| "loss/crossentropy": 2.3578550815582275, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.26509464532136917, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0016065089524006945, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.007539876302083333, |
| "learning_rate": 0.01, |
| "loss": 1.3685, |
| "loss/crossentropy": 2.53238308429718, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.24864411354064941, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.0016237832422114546, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.005428679784138997, |
| "learning_rate": 0.01, |
| "loss": 1.4136, |
| "loss/crossentropy": 2.3873801231384277, |
| "loss/fcd": 1.2265625, |
| "loss/logits": 0.2842061370611191, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0016410575320222147, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.005470768610636393, |
| "learning_rate": 0.01, |
| "loss": 1.4, |
| "loss/crossentropy": 2.576484441757202, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.2684750333428383, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0016583318218329748, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0054779052734375, |
| "learning_rate": 0.01, |
| "loss": 1.4256, |
| "loss/crossentropy": 2.5171070098876953, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.2969086170196533, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0016756061116437351, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.005534728368123372, |
| "learning_rate": 0.01, |
| "loss": 1.3949, |
| "loss/crossentropy": 2.6096785068511963, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2719826400279999, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0016928804014544952, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0056027571360270185, |
| "learning_rate": 0.01, |
| "loss": 1.3758, |
| "loss/crossentropy": 2.366774320602417, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.23891064524650574, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0017101546912652553, |
| "grad_norm": 0.34765625, |
| "grad_norm_var": 0.005489714940388997, |
| "learning_rate": 0.01, |
| "loss": 1.436, |
| "loss/crossentropy": 2.356974244117737, |
| "loss/fcd": 1.3046875, |
| "loss/logits": 0.2715897411108017, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0017274289810760156, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.005489714940388997, |
| "learning_rate": 0.01, |
| "loss": 1.3544, |
| "loss/crossentropy": 2.5830947160720825, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.28681397438049316, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0017447032708867757, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0055816650390625, |
| "learning_rate": 0.01, |
| "loss": 1.3767, |
| "loss/crossentropy": 2.538628339767456, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2549655809998512, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0017619775606975358, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0005793094635009766, |
| "learning_rate": 0.01, |
| "loss": 1.3127, |
| "loss/crossentropy": 2.153649151325226, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.23056582361459732, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0017792518505082959, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.0004759311676025391, |
| "learning_rate": 0.01, |
| "loss": 1.4807, |
| "loss/crossentropy": 2.7840667963027954, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.26921743154525757, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0017965261403190562, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0004470189412434896, |
| "learning_rate": 0.01, |
| "loss": 1.4075, |
| "loss/crossentropy": 2.375385046005249, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2573629766702652, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0018138004301298163, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0004608154296875, |
| "learning_rate": 0.01, |
| "loss": 1.3262, |
| "loss/crossentropy": 2.7132558822631836, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2457902729511261, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0018310747199405764, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00039315223693847656, |
| "learning_rate": 0.01, |
| "loss": 1.292, |
| "loss/crossentropy": 2.017941474914551, |
| "loss/fcd": 0.986328125, |
| "loss/logits": 0.20789727568626404, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0018483490097513367, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00035691261291503906, |
| "learning_rate": 0.01, |
| "loss": 1.4188, |
| "loss/crossentropy": 2.457041621208191, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.23911338299512863, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0018656232995620968, |
| "grad_norm": 0.267578125, |
| "grad_norm_var": 0.00039513905843098957, |
| "learning_rate": 0.01, |
| "loss": 1.3624, |
| "loss/crossentropy": 2.264693021774292, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.23969107121229172, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0018828975893728569, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0004439671834309896, |
| "learning_rate": 0.01, |
| "loss": 1.3602, |
| "loss/crossentropy": 2.5558459758758545, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24982617795467377, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0019001718791836172, |
| "grad_norm": 0.44921875, |
| "grad_norm_var": 0.0017612298329671224, |
| "learning_rate": 0.01, |
| "loss": 1.4482, |
| "loss/crossentropy": 2.623742938041687, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2605845034122467, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0019174461689943773, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0017667134602864583, |
| "learning_rate": 0.01, |
| "loss": 1.4127, |
| "loss/crossentropy": 2.7532334327697754, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.26577115058898926, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0019347204588051373, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0017864068349202475, |
| "learning_rate": 0.01, |
| "loss": 1.3525, |
| "loss/crossentropy": 2.4502193927764893, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.2550206333398819, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0019519947486158974, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.0018309911092122396, |
| "learning_rate": 0.01, |
| "loss": 1.4422, |
| "loss/crossentropy": 2.0644272565841675, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.25845974683761597, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0019692690384266577, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0018169244130452475, |
| "learning_rate": 0.01, |
| "loss": 1.3762, |
| "loss/crossentropy": 2.6453906297683716, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.28696541488170624, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.001986543328237418, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0017402489980061849, |
| "learning_rate": 0.01, |
| "loss": 1.3974, |
| "loss/crossentropy": 2.229590892791748, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22459837794303894, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.002003817618048178, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0017174879709879558, |
| "learning_rate": 0.01, |
| "loss": 1.3518, |
| "loss/crossentropy": 2.5267633199691772, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.24026738852262497, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0020210919078589382, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0017108758290608724, |
| "learning_rate": 0.01, |
| "loss": 1.4729, |
| "loss/crossentropy": 2.3015085458755493, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.23641249537467957, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.002038366197669698, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0017054080963134766, |
| "learning_rate": 0.01, |
| "loss": 1.4479, |
| "loss/crossentropy": 2.0869252681732178, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2337687686085701, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0020556404874804584, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0016692479451497395, |
| "learning_rate": 0.01, |
| "loss": 1.3789, |
| "loss/crossentropy": 2.620050311088562, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2408916875720024, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0020729147772912187, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0016824722290039063, |
| "learning_rate": 0.01, |
| "loss": 1.3728, |
| "loss/crossentropy": 2.406272053718567, |
| "loss/fcd": 1.28515625, |
| "loss/logits": 0.27460669726133347, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0020901890671019786, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0016681512196858725, |
| "learning_rate": 0.01, |
| "loss": 1.3607, |
| "loss/crossentropy": 2.1980100870132446, |
| "loss/fcd": 1.013671875, |
| "loss/logits": 0.23184800148010254, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.002107463356912739, |
| "grad_norm": 0.365234375, |
| "grad_norm_var": 0.0018063863118489584, |
| "learning_rate": 0.01, |
| "loss": 1.4133, |
| "loss/crossentropy": 2.672022223472595, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.2712271511554718, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.002124737646723499, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0018046061197916667, |
| "learning_rate": 0.01, |
| "loss": 1.4161, |
| "loss/crossentropy": 2.161317527294159, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2415143996477127, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.002142011936534259, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0016402562459309896, |
| "learning_rate": 0.01, |
| "loss": 1.3432, |
| "loss/crossentropy": 2.4041404724121094, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2565518468618393, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0021592862263450194, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0015553792317708334, |
| "learning_rate": 0.01, |
| "loss": 1.3222, |
| "loss/crossentropy": 2.289466381072998, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2588811218738556, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0021765605161557797, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0003751118977864583, |
| "learning_rate": 0.01, |
| "loss": 1.4145, |
| "loss/crossentropy": 2.0946825742721558, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.22345608472824097, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0021938348059665396, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0003452936808268229, |
| "learning_rate": 0.01, |
| "loss": 1.3904, |
| "loss/crossentropy": 2.4527688026428223, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.23762068152427673, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0022111090957773, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0003202915191650391, |
| "learning_rate": 0.01, |
| "loss": 1.4117, |
| "loss/crossentropy": 2.6558061838150024, |
| "loss/fcd": 1.26953125, |
| "loss/logits": 0.3351695239543915, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.00222838338558806, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00027667681376139324, |
| "learning_rate": 0.01, |
| "loss": 1.3841, |
| "loss/crossentropy": 2.3390719890594482, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.23404338955879211, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.00224565767539882, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.00028254191080729165, |
| "learning_rate": 0.01, |
| "loss": 1.3402, |
| "loss/crossentropy": 2.5888524055480957, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2385600358247757, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0022629319652095804, |
| "grad_norm": 0.365234375, |
| "grad_norm_var": 0.00045291582743326825, |
| "learning_rate": 0.01, |
| "loss": 1.4423, |
| "loss/crossentropy": 2.1622209548950195, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.25934895873069763, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0022802062550203407, |
| "grad_norm": 0.349609375, |
| "grad_norm_var": 0.0004840691884358724, |
| "learning_rate": 0.01, |
| "loss": 1.5001, |
| "loss/crossentropy": 2.5385576486587524, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.26659196615219116, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0022974805448311006, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005355676015218099, |
| "learning_rate": 0.01, |
| "loss": 1.3481, |
| "loss/crossentropy": 2.348211407661438, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2560664862394333, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.002314754834641861, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0005294164021809896, |
| "learning_rate": 0.01, |
| "loss": 1.3607, |
| "loss/crossentropy": 2.117067277431488, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.22807861864566803, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.0023320291244526207, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0005870660146077474, |
| "learning_rate": 0.01, |
| "loss": 1.387, |
| "loss/crossentropy": 2.5187747478485107, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.27947917580604553, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.002349303414263381, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0005658467610677084, |
| "learning_rate": 0.01, |
| "loss": 1.3995, |
| "loss/crossentropy": 2.4953707456588745, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24946419894695282, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0023665777040741414, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0005611260732014974, |
| "learning_rate": 0.01, |
| "loss": 1.4027, |
| "loss/crossentropy": 2.3007187843322754, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.2944917380809784, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0023838519938849012, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0004042943318684896, |
| "learning_rate": 0.01, |
| "loss": 1.3784, |
| "loss/crossentropy": 2.406763792037964, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.24067886918783188, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0024011262836956615, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0004521052042643229, |
| "learning_rate": 0.01, |
| "loss": 1.394, |
| "loss/crossentropy": 2.3716171979904175, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2490846812725067, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.002418400573506422, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0004530429840087891, |
| "learning_rate": 0.01, |
| "loss": 1.3992, |
| "loss/crossentropy": 2.298838496208191, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2580900937318802, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0024356748633171817, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.00048014322916666664, |
| "learning_rate": 0.01, |
| "loss": 1.3887, |
| "loss/crossentropy": 2.1861318349838257, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.25625482201576233, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.002452949153127942, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0005390803019205729, |
| "learning_rate": 0.01, |
| "loss": 1.4149, |
| "loss/crossentropy": 2.5295623540878296, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.24908355623483658, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0024702234429387023, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0005419413248697917, |
| "learning_rate": 0.01, |
| "loss": 1.4095, |
| "loss/crossentropy": 2.4763203859329224, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.25878605246543884, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.002487497732749462, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0005533854166666667, |
| "learning_rate": 0.01, |
| "loss": 1.4244, |
| "loss/crossentropy": 2.520187020301819, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.24524306505918503, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0025047720225602225, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0005658308664957683, |
| "learning_rate": 0.01, |
| "loss": 1.4039, |
| "loss/crossentropy": 2.517001748085022, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.23872993886470795, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.002522046312370983, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0005833784739176433, |
| "learning_rate": 0.01, |
| "loss": 1.3182, |
| "loss/crossentropy": 2.4004757404327393, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.24094465374946594, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0025393206021817427, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0003750960032145182, |
| "learning_rate": 0.01, |
| "loss": 1.3334, |
| "loss/crossentropy": 2.1713826656341553, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.22458232194185257, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.002556594891992503, |
| "grad_norm": 0.34375, |
| "grad_norm_var": 0.00034052530924479166, |
| "learning_rate": 0.01, |
| "loss": 1.3361, |
| "loss/crossentropy": 2.438323974609375, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.24637237191200256, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0025738691818032633, |
| "grad_norm": 0.380859375, |
| "grad_norm_var": 0.0007058302561442057, |
| "learning_rate": 0.01, |
| "loss": 1.4953, |
| "loss/crossentropy": 2.450320243835449, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.24158670753240585, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.002591143471614023, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0007044474283854166, |
| "learning_rate": 0.01, |
| "loss": 1.4629, |
| "loss/crossentropy": 2.294734477996826, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.2762032076716423, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0026084177614247835, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0007077376047770182, |
| "learning_rate": 0.01, |
| "loss": 1.4165, |
| "loss/crossentropy": 2.468201994895935, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.2507496029138565, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0026256920512355434, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0006917158762613933, |
| "learning_rate": 0.01, |
| "loss": 1.395, |
| "loss/crossentropy": 2.353287696838379, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2722310647368431, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0026429663410463037, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0007008234659830729, |
| "learning_rate": 0.01, |
| "loss": 1.3506, |
| "loss/crossentropy": 2.2797771692276, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2620129883289337, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.002660240630857064, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0007210890452067057, |
| "learning_rate": 0.01, |
| "loss": 1.3943, |
| "loss/crossentropy": 2.6261144876480103, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.3041190207004547, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.002677514920667824, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0007389704386393229, |
| "learning_rate": 0.01, |
| "loss": 1.4487, |
| "loss/crossentropy": 2.327589750289917, |
| "loss/fcd": 1.2890625, |
| "loss/logits": 0.333427369594574, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.002694789210478584, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0007445653279622396, |
| "learning_rate": 0.01, |
| "loss": 1.3842, |
| "loss/crossentropy": 2.4801390171051025, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.23910623788833618, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0027120635002893445, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0007395267486572266, |
| "learning_rate": 0.01, |
| "loss": 1.3487, |
| "loss/crossentropy": 2.577694535255432, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2568306028842926, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0027293377901001043, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0006922403971354167, |
| "learning_rate": 0.01, |
| "loss": 1.3505, |
| "loss/crossentropy": 2.415543556213379, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2512781471014023, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.0027466120799108647, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0006875991821289062, |
| "learning_rate": 0.01, |
| "loss": 1.4042, |
| "loss/crossentropy": 2.4328696727752686, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2584942355751991, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.002763886369721625, |
| "grad_norm": 0.2578125, |
| "grad_norm_var": 0.0008356730143229167, |
| "learning_rate": 0.01, |
| "loss": 1.2883, |
| "loss/crossentropy": 2.344989776611328, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.23677106201648712, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.002781160659532385, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0008282979329427083, |
| "learning_rate": 0.01, |
| "loss": 1.3544, |
| "loss/crossentropy": 2.3909146785736084, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.26238836348056793, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.002798434949343145, |
| "grad_norm": 0.341796875, |
| "grad_norm_var": 0.000886980692545573, |
| "learning_rate": 0.01, |
| "loss": 1.4284, |
| "loss/crossentropy": 2.6815162897109985, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.28333599865436554, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0028157092391539055, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0008396784464518229, |
| "learning_rate": 0.01, |
| "loss": 1.3743, |
| "loss/crossentropy": 2.363664388656616, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.23216551542282104, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0028329835289646653, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0007912794748942058, |
| "learning_rate": 0.01, |
| "loss": 1.3503, |
| "loss/crossentropy": 2.6360952854156494, |
| "loss/fcd": 1.19140625, |
| "loss/logits": 0.25444111227989197, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0028502578187754256, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0004067579905192057, |
| "learning_rate": 0.01, |
| "loss": 1.3827, |
| "loss/crossentropy": 2.255971908569336, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2420385479927063, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.002867532108586186, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.00044854482014973957, |
| "learning_rate": 0.01, |
| "loss": 1.3572, |
| "loss/crossentropy": 2.5781320333480835, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2430611252784729, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.002884806398396946, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0003909905751546224, |
| "learning_rate": 0.01, |
| "loss": 1.394, |
| "loss/crossentropy": 2.698032259941101, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.248212069272995, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.002902080688207706, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0003845055898030599, |
| "learning_rate": 0.01, |
| "loss": 1.4097, |
| "loss/crossentropy": 2.372989535331726, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.24837365001440048, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.002919354978018466, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0003870646158854167, |
| "learning_rate": 0.01, |
| "loss": 1.3624, |
| "loss/crossentropy": 2.555245876312256, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2645147144794464, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0029366292678292263, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.00038089752197265623, |
| "learning_rate": 0.01, |
| "loss": 1.3458, |
| "loss/crossentropy": 2.2800326347351074, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.22108863294124603, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0029539035576399866, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.000353240966796875, |
| "learning_rate": 0.01, |
| "loss": 1.3788, |
| "loss/crossentropy": 2.638196110725403, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2918136268854141, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0029711778474507465, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.00034152666727701824, |
| "learning_rate": 0.01, |
| "loss": 1.3664, |
| "loss/crossentropy": 2.6176986694335938, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.26864323019981384, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.002988452137261507, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0003345330556233724, |
| "learning_rate": 0.01, |
| "loss": 1.4184, |
| "loss/crossentropy": 2.62368905544281, |
| "loss/fcd": 1.25390625, |
| "loss/logits": 0.28509171307086945, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.003005726427072267, |
| "grad_norm": 0.671875, |
| "grad_norm_var": 0.00890649159749349, |
| "learning_rate": 0.01, |
| "loss": 1.4685, |
| "loss/crossentropy": 2.309454083442688, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.26153236627578735, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.003023000716883027, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.00887309710184733, |
| "learning_rate": 0.01, |
| "loss": 1.4154, |
| "loss/crossentropy": 2.320811152458191, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.24308273196220398, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0030402750066937873, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.008579444885253907, |
| "learning_rate": 0.01, |
| "loss": 1.3805, |
| "loss/crossentropy": 2.579828977584839, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2542525976896286, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0030575492965045476, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.008579444885253907, |
| "learning_rate": 0.01, |
| "loss": 1.3868, |
| "loss/crossentropy": 2.5000842809677124, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2917867451906204, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.0030748235863153075, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.008653004964192709, |
| "learning_rate": 0.01, |
| "loss": 1.3679, |
| "loss/crossentropy": 2.5240609645843506, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2740897983312607, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0030920978761260678, |
| "grad_norm": 0.75, |
| "grad_norm_var": 0.019812758763631186, |
| "learning_rate": 0.01, |
| "loss": 1.423, |
| "loss/crossentropy": 2.383319854736328, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2834385186433792, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.003109372165936828, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.01962865193684896, |
| "learning_rate": 0.01, |
| "loss": 1.3861, |
| "loss/crossentropy": 2.3524543046951294, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.24870187044143677, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.003126646455747588, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.01944268544514974, |
| "learning_rate": 0.01, |
| "loss": 1.4644, |
| "loss/crossentropy": 2.768381118774414, |
| "loss/fcd": 1.26171875, |
| "loss/logits": 0.3117068111896515, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.0031439207455583483, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.019518470764160155, |
| "learning_rate": 0.01, |
| "loss": 1.4071, |
| "loss/crossentropy": 2.5678982734680176, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.25002971291542053, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0031611950353691086, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.019382969538370768, |
| "learning_rate": 0.01, |
| "loss": 1.4044, |
| "loss/crossentropy": 2.6401069164276123, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2738536596298218, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0031784693251798684, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.019187148412068686, |
| "learning_rate": 0.01, |
| "loss": 1.4165, |
| "loss/crossentropy": 2.3614484071731567, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.28841613233089447, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0031957436149906288, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.01904290517171224, |
| "learning_rate": 0.01, |
| "loss": 1.4151, |
| "loss/crossentropy": 2.2044495344161987, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.25532982498407364, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.003213017904801389, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.019160970052083334, |
| "learning_rate": 0.01, |
| "loss": 1.3233, |
| "loss/crossentropy": 2.657314658164978, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2434261366724968, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.003230292194612149, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.019019174575805663, |
| "learning_rate": 0.01, |
| "loss": 1.457, |
| "loss/crossentropy": 2.509123682975769, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.27627624571323395, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0032475664844229092, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.01898535092671712, |
| "learning_rate": 0.01, |
| "loss": 1.4612, |
| "loss/crossentropy": 2.4355961084365845, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2809949368238449, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.003264840774233669, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.018945821126302085, |
| "learning_rate": 0.01, |
| "loss": 1.4111, |
| "loss/crossentropy": 2.657699465751648, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.26505863666534424, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.0032821150640444294, |
| "grad_norm": 0.56640625, |
| "grad_norm_var": 0.01528771718343099, |
| "learning_rate": 0.01, |
| "loss": 1.4753, |
| "loss/crossentropy": 2.4757652282714844, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.22812122106552124, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0032993893538551897, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.015449269612630209, |
| "learning_rate": 0.01, |
| "loss": 1.3867, |
| "loss/crossentropy": 2.4966439604759216, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.24755483120679855, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0033166636436659496, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.015437173843383788, |
| "learning_rate": 0.01, |
| "loss": 1.4331, |
| "loss/crossentropy": 2.2156739234924316, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.24708709865808487, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.00333393793347671, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.015273523330688477, |
| "learning_rate": 0.01, |
| "loss": 1.4652, |
| "loss/crossentropy": 2.5916343927383423, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.26975981891155243, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.0033512122232874702, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.01524046262105306, |
| "learning_rate": 0.01, |
| "loss": 1.3916, |
| "loss/crossentropy": 2.4512441158294678, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2599586248397827, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.00336848651309823, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.004449717203776042, |
| "learning_rate": 0.01, |
| "loss": 1.2906, |
| "loss/crossentropy": 2.4583925008773804, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.22421551495790482, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0033857608029089904, |
| "grad_norm": 0.345703125, |
| "grad_norm_var": 0.004455931981404622, |
| "learning_rate": 0.01, |
| "loss": 1.4645, |
| "loss/crossentropy": 3.102002263069153, |
| "loss/fcd": 1.26953125, |
| "loss/logits": 0.31158843636512756, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.0034030350927197507, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.00462950070699056, |
| "learning_rate": 0.01, |
| "loss": 1.3805, |
| "loss/crossentropy": 2.537242293357849, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.24022500216960907, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.0034203093825305106, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.004623905817667643, |
| "learning_rate": 0.01, |
| "loss": 1.4295, |
| "loss/crossentropy": 1.8695432543754578, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.2338111400604248, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.003437583672341271, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.004644711812337239, |
| "learning_rate": 0.01, |
| "loss": 1.4342, |
| "loss/crossentropy": 2.5979591608047485, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2477928102016449, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.003454857962152031, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.004698117574055989, |
| "learning_rate": 0.01, |
| "loss": 1.3588, |
| "loss/crossentropy": 2.6363730430603027, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.26602891087532043, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.003472132251962791, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.004715919494628906, |
| "learning_rate": 0.01, |
| "loss": 1.3919, |
| "loss/crossentropy": 2.6225093603134155, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.25346362590789795, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.0034894065417735514, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.004638528823852539, |
| "learning_rate": 0.01, |
| "loss": 1.3639, |
| "loss/crossentropy": 2.6900315284729004, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.25750475376844406, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.0035066808315843117, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0046525160471598305, |
| "learning_rate": 0.01, |
| "loss": 1.4454, |
| "loss/crossentropy": 2.4896918535232544, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.24820879101753235, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.0035239551213950716, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.004671732584635417, |
| "learning_rate": 0.01, |
| "loss": 1.3744, |
| "loss/crossentropy": 2.4207727909088135, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.267853319644928, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.003541229411205832, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.004704777399698893, |
| "learning_rate": 0.01, |
| "loss": 1.3827, |
| "loss/crossentropy": 2.6077362298965454, |
| "loss/fcd": 1.19140625, |
| "loss/logits": 0.2449246495962143, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0035585037010165918, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.00035959879557291666, |
| "learning_rate": 0.01, |
| "loss": 1.36, |
| "loss/crossentropy": 2.2625831365585327, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.25722844898700714, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.003575777990827352, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0003903547922770182, |
| "learning_rate": 0.01, |
| "loss": 1.4604, |
| "loss/crossentropy": 2.6293487548828125, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2616356760263443, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0035930522806381124, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.00040022532145182293, |
| "learning_rate": 0.01, |
| "loss": 1.3362, |
| "loss/crossentropy": 2.4450851678848267, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23832125961780548, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0036103265704488722, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0003178278605143229, |
| "learning_rate": 0.01, |
| "loss": 1.3815, |
| "loss/crossentropy": 2.265815496444702, |
| "loss/fcd": 1.25, |
| "loss/logits": 0.2856537625193596, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.0036276008602596325, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0003153483072916667, |
| "learning_rate": 0.01, |
| "loss": 1.3779, |
| "loss/crossentropy": 2.4830867052078247, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.27156491577625275, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.003644875150070393, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.00030007362365722654, |
| "learning_rate": 0.01, |
| "loss": 1.3838, |
| "loss/crossentropy": 2.4645248651504517, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2536320984363556, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.0036621494398811527, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.000191497802734375, |
| "learning_rate": 0.01, |
| "loss": 1.3463, |
| "loss/crossentropy": 2.4574155807495117, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22712672501802444, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.003679423729691913, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00014468828837076823, |
| "learning_rate": 0.01, |
| "loss": 1.3817, |
| "loss/crossentropy": 2.51455819606781, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24947896599769592, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.0036966980195026733, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00013477007548014323, |
| "learning_rate": 0.01, |
| "loss": 1.4295, |
| "loss/crossentropy": 2.5708523988723755, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.3021456152200699, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.0037139723093134332, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0001357396443684896, |
| "learning_rate": 0.01, |
| "loss": 1.3823, |
| "loss/crossentropy": 2.696264386177063, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2742393985390663, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0037312465991241935, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.00015913645426432292, |
| "learning_rate": 0.01, |
| "loss": 1.4102, |
| "loss/crossentropy": 2.310886025428772, |
| "loss/fcd": 1.22265625, |
| "loss/logits": 0.2918149083852768, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.003748520888934954, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.000156402587890625, |
| "learning_rate": 0.01, |
| "loss": 1.4137, |
| "loss/crossentropy": 2.2433084845542908, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.25447261333465576, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.0037657951787457137, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.00016528765360514323, |
| "learning_rate": 0.01, |
| "loss": 1.4519, |
| "loss/crossentropy": 2.4079222679138184, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2586686462163925, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.003783069468556474, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.00021602312723795574, |
| "learning_rate": 0.01, |
| "loss": 1.415, |
| "loss/crossentropy": 2.460106134414673, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2525549978017807, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0038003437583672343, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00022377967834472657, |
| "learning_rate": 0.01, |
| "loss": 1.4134, |
| "loss/crossentropy": 2.225171685218811, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.22205037623643875, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.003817618048177994, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0003029982248942057, |
| "learning_rate": 0.01, |
| "loss": 1.3745, |
| "loss/crossentropy": 2.3788317441940308, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.24135209619998932, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.0038348923379887545, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0003082116444905599, |
| "learning_rate": 0.01, |
| "loss": 1.4452, |
| "loss/crossentropy": 2.375778555870056, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.25682032108306885, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.0038521666277995144, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0002720514933268229, |
| "learning_rate": 0.01, |
| "loss": 1.3877, |
| "loss/crossentropy": 2.4510881900787354, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2522790729999542, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0038694409176102747, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.00027872721354166665, |
| "learning_rate": 0.01, |
| "loss": 1.3783, |
| "loss/crossentropy": 2.4119985103607178, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.22855417430400848, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.003886715207421035, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0002911726633707682, |
| "learning_rate": 0.01, |
| "loss": 1.4037, |
| "loss/crossentropy": 2.4024510383605957, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2690604329109192, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.003903989497231795, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.00034427642822265625, |
| "learning_rate": 0.01, |
| "loss": 1.3468, |
| "loss/crossentropy": 2.550796151161194, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.24284164607524872, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.003921263787042556, |
| "grad_norm": 0.427734375, |
| "grad_norm_var": 0.0013123671213785806, |
| "learning_rate": 0.01, |
| "loss": 1.4574, |
| "loss/crossentropy": 2.9375933408737183, |
| "loss/fcd": 1.3046875, |
| "loss/logits": 0.2513057738542557, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0039385380768533155, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0013051350911458333, |
| "learning_rate": 0.01, |
| "loss": 1.4281, |
| "loss/crossentropy": 2.524444341659546, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.27370719611644745, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.003955812366664075, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.001366106669108073, |
| "learning_rate": 0.01, |
| "loss": 1.4307, |
| "loss/crossentropy": 2.486480951309204, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.26038021594285965, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.003973086656474836, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0013638655344645181, |
| "learning_rate": 0.01, |
| "loss": 1.3844, |
| "loss/crossentropy": 2.48094379901886, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24342957884073257, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.003990360946285596, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0013834476470947266, |
| "learning_rate": 0.01, |
| "loss": 1.3685, |
| "loss/crossentropy": 2.241925358772278, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.24210943281650543, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.004007635236096356, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0013643741607666016, |
| "learning_rate": 0.01, |
| "loss": 1.448, |
| "loss/crossentropy": 2.648869752883911, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24799171090126038, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.004024909525907116, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0013676802317301431, |
| "learning_rate": 0.01, |
| "loss": 1.4431, |
| "loss/crossentropy": 2.63001549243927, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.27701297402381897, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.0040421838157178765, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0013848463694254556, |
| "learning_rate": 0.01, |
| "loss": 1.3746, |
| "loss/crossentropy": 2.2247713804244995, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.24730068445205688, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.004059458105528636, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0013358910878499349, |
| "learning_rate": 0.01, |
| "loss": 1.4197, |
| "loss/crossentropy": 2.511416435241699, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2583580017089844, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.004076732395339396, |
| "grad_norm": 0.369140625, |
| "grad_norm_var": 0.0015294392903645833, |
| "learning_rate": 0.01, |
| "loss": 1.4459, |
| "loss/crossentropy": 2.366840362548828, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2747315466403961, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.004094006685150157, |
| "grad_norm": 0.34375, |
| "grad_norm_var": 0.0014388402303059896, |
| "learning_rate": 0.01, |
| "loss": 1.379, |
| "loss/crossentropy": 2.645435094833374, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.2583626061677933, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.004111280974960917, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0014134089152018229, |
| "learning_rate": 0.01, |
| "loss": 1.3999, |
| "loss/crossentropy": 2.0519449710845947, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2533458322286606, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.004128555264771677, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0014001051584879556, |
| "learning_rate": 0.01, |
| "loss": 1.3654, |
| "loss/crossentropy": 2.236992359161377, |
| "loss/fcd": 1.02734375, |
| "loss/logits": 0.23388498276472092, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.0041458295545824374, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0014027277628580728, |
| "learning_rate": 0.01, |
| "loss": 1.3499, |
| "loss/crossentropy": 2.308284044265747, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.23218639194965363, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.004163103844393197, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0013278802235921225, |
| "learning_rate": 0.01, |
| "loss": 1.4553, |
| "loss/crossentropy": 2.360711455345154, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.24909411370754242, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.004180378134203957, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0011983235677083333, |
| "learning_rate": 0.01, |
| "loss": 1.4332, |
| "loss/crossentropy": 2.486197352409363, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.28059011697769165, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.004197652424014718, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0004508813222249349, |
| "learning_rate": 0.01, |
| "loss": 1.3993, |
| "loss/crossentropy": 2.461425542831421, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2716974467039108, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.004214926713825478, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0004518985748291016, |
| "learning_rate": 0.01, |
| "loss": 1.4236, |
| "loss/crossentropy": 2.344510316848755, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.2624819576740265, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.004232201003636238, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0005180199940999348, |
| "learning_rate": 0.01, |
| "loss": 1.3542, |
| "loss/crossentropy": 2.6375720500946045, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2671656012535095, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.004249475293446998, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0005176385243733724, |
| "learning_rate": 0.01, |
| "loss": 1.3853, |
| "loss/crossentropy": 2.4105772972106934, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2800147980451584, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.004266749583257758, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0004948298136393229, |
| "learning_rate": 0.01, |
| "loss": 1.3755, |
| "loss/crossentropy": 2.2956700325012207, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2564444988965988, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.004284023873068518, |
| "grad_norm": 0.50390625, |
| "grad_norm_var": 0.0026893456776936847, |
| "learning_rate": 0.01, |
| "loss": 1.3836, |
| "loss/crossentropy": 2.3848729133605957, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2581590488553047, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.004301298162879279, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.002683115005493164, |
| "learning_rate": 0.01, |
| "loss": 1.3791, |
| "loss/crossentropy": 2.6016765832901, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.26704905927181244, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.004318572452690039, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.002565956115722656, |
| "learning_rate": 0.01, |
| "loss": 1.4703, |
| "loss/crossentropy": 2.4796223640441895, |
| "loss/fcd": 1.28125, |
| "loss/logits": 0.30792760848999023, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.004335846742500799, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.002574777603149414, |
| "learning_rate": 0.01, |
| "loss": 1.3831, |
| "loss/crossentropy": 2.45810604095459, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.26673202961683273, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.004353121032311559, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0025400797526041667, |
| "learning_rate": 0.01, |
| "loss": 1.3379, |
| "loss/crossentropy": 2.37344229221344, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2348434329032898, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.004370395322122319, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.002615213394165039, |
| "learning_rate": 0.01, |
| "loss": 1.4283, |
| "loss/crossentropy": 2.310893416404724, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.22272542119026184, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.004387669611933079, |
| "grad_norm": 0.455078125, |
| "grad_norm_var": 0.003699223200480143, |
| "learning_rate": 0.01, |
| "loss": 1.4319, |
| "loss/crossentropy": 2.287319302558899, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.23187098652124405, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.00440494390174384, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.003693072001139323, |
| "learning_rate": 0.01, |
| "loss": 1.3951, |
| "loss/crossentropy": 2.751601457595825, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2715594172477722, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.0044222181915546, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0037031650543212892, |
| "learning_rate": 0.01, |
| "loss": 1.372, |
| "loss/crossentropy": 2.513296961784363, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.23859571665525436, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.00443949248136536, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.003735971450805664, |
| "learning_rate": 0.01, |
| "loss": 1.3787, |
| "loss/crossentropy": 2.501555562019348, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2450244277715683, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.00445676677117612, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.00377195676167806, |
| "learning_rate": 0.01, |
| "loss": 1.3965, |
| "loss/crossentropy": 2.503899097442627, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.28062424063682556, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.00447404106098688, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0037612279256184896, |
| "learning_rate": 0.01, |
| "loss": 1.3864, |
| "loss/crossentropy": 2.5635122060775757, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.25401656329631805, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.00449131535079764, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0037770430246988934, |
| "learning_rate": 0.01, |
| "loss": 1.3786, |
| "loss/crossentropy": 2.4950658082962036, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2641760855913162, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.004508589640608401, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.003665781021118164, |
| "learning_rate": 0.01, |
| "loss": 1.3656, |
| "loss/crossentropy": 2.4370001554489136, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.249709352850914, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.004525863930419161, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.003766632080078125, |
| "learning_rate": 0.01, |
| "loss": 1.3332, |
| "loss/crossentropy": 2.4650388956069946, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2645094692707062, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.004543138220229921, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0037601312001546224, |
| "learning_rate": 0.01, |
| "loss": 1.3832, |
| "loss/crossentropy": 2.677791714668274, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.28196755796670914, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.004560412510040681, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0015848795572916666, |
| "learning_rate": 0.01, |
| "loss": 1.4601, |
| "loss/crossentropy": 2.4847524166107178, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2580026537179947, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.004577686799851441, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0015946547190348306, |
| "learning_rate": 0.01, |
| "loss": 1.4087, |
| "loss/crossentropy": 2.4944722652435303, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2483246624469757, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.004594961089662201, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0016375223795572916, |
| "learning_rate": 0.01, |
| "loss": 1.3835, |
| "loss/crossentropy": 2.2753440141677856, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.24172206223011017, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.004612235379472961, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0016192118326822916, |
| "learning_rate": 0.01, |
| "loss": 1.3721, |
| "loss/crossentropy": 2.4424277544021606, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2600102424621582, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.004629509669283722, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0016474246978759766, |
| "learning_rate": 0.01, |
| "loss": 1.3636, |
| "loss/crossentropy": 2.5198450088500977, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.25200945883989334, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.004646783959094482, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0016239007314046224, |
| "learning_rate": 0.01, |
| "loss": 1.3874, |
| "loss/crossentropy": 2.4488155841827393, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2999647855758667, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.0046640582489052415, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00022017161051432292, |
| "learning_rate": 0.01, |
| "loss": 1.3399, |
| "loss/crossentropy": 2.1886658668518066, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.241354301571846, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.004681332538716002, |
| "grad_norm": 0.359375, |
| "grad_norm_var": 0.00041605631510416665, |
| "learning_rate": 0.01, |
| "loss": 1.3419, |
| "loss/crossentropy": 2.382296085357666, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2474452257156372, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.004698606828526762, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0004093805948893229, |
| "learning_rate": 0.01, |
| "loss": 1.325, |
| "loss/crossentropy": 2.569235324859619, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.24149076640605927, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.004715881118337522, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0004258314768473307, |
| "learning_rate": 0.01, |
| "loss": 1.2981, |
| "loss/crossentropy": 2.5184491872787476, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.25748542696237564, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.004733155408148283, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0004210789998372396, |
| "learning_rate": 0.01, |
| "loss": 1.3787, |
| "loss/crossentropy": 2.2780392169952393, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.24548518657684326, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.004750429697959043, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0003958225250244141, |
| "learning_rate": 0.01, |
| "loss": 1.4055, |
| "loss/crossentropy": 2.392509341239929, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.25389473140239716, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0047677039877698025, |
| "grad_norm": 0.55078125, |
| "grad_norm_var": 0.004181019465128581, |
| "learning_rate": 0.01, |
| "loss": 1.3982, |
| "loss/crossentropy": 2.6148691177368164, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.27452078461647034, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.004784978277580563, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.004148213068644205, |
| "learning_rate": 0.01, |
| "loss": 1.3782, |
| "loss/crossentropy": 2.4390900135040283, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2340994030237198, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.004802252567391323, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.004165760676066081, |
| "learning_rate": 0.01, |
| "loss": 1.3674, |
| "loss/crossentropy": 2.065169870853424, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.23831525444984436, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.004819526857202083, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.0041680494944254555, |
| "learning_rate": 0.01, |
| "loss": 1.3986, |
| "loss/crossentropy": 2.23395574092865, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.25276701152324677, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.004836801147012844, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.004177459081013997, |
| "learning_rate": 0.01, |
| "loss": 1.3866, |
| "loss/crossentropy": 2.5360673666000366, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2552696242928505, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0048540754368236036, |
| "grad_norm": 0.59375, |
| "grad_norm_var": 0.008786503473917644, |
| "learning_rate": 0.01, |
| "loss": 1.3841, |
| "loss/crossentropy": 2.64610493183136, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2660531848669052, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.0048713497266343634, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.008615605036417643, |
| "learning_rate": 0.01, |
| "loss": 1.3781, |
| "loss/crossentropy": 2.206232786178589, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22382746636867523, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.004888624016445124, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.008825031916300456, |
| "learning_rate": 0.01, |
| "loss": 1.3924, |
| "loss/crossentropy": 2.491134285926819, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.28758758306503296, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.004905898306255884, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.008684396743774414, |
| "learning_rate": 0.01, |
| "loss": 1.3821, |
| "loss/crossentropy": 2.418181896209717, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24221232533454895, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.004923172596066644, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.00864103635152181, |
| "learning_rate": 0.01, |
| "loss": 1.385, |
| "loss/crossentropy": 2.532857298851013, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.25721532106399536, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.004940446885877405, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.008668883641560873, |
| "learning_rate": 0.01, |
| "loss": 1.3564, |
| "loss/crossentropy": 2.602588653564453, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2445499449968338, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.0049577211756881645, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.008800490697224935, |
| "learning_rate": 0.01, |
| "loss": 1.3491, |
| "loss/crossentropy": 2.5629632472991943, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2607369050383568, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.004974995465498924, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.008880043029785156, |
| "learning_rate": 0.01, |
| "loss": 1.3404, |
| "loss/crossentropy": 2.522684097290039, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.27616211771965027, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.004992269755309685, |
| "grad_norm": 0.26171875, |
| "grad_norm_var": 0.009095001220703124, |
| "learning_rate": 0.01, |
| "loss": 1.3618, |
| "loss/crossentropy": 2.6350889205932617, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.24171672016382217, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.005009544045120445, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.009074894587198894, |
| "learning_rate": 0.01, |
| "loss": 1.459, |
| "loss/crossentropy": 2.98556649684906, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.2643963396549225, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.005026818334931205, |
| "grad_norm": 0.251953125, |
| "grad_norm_var": 0.009484354654947917, |
| "learning_rate": 0.01, |
| "loss": 1.3693, |
| "loss/crossentropy": 2.230570673942566, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.24412426352500916, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.005044092624741966, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.006051127115885417, |
| "learning_rate": 0.01, |
| "loss": 1.3658, |
| "loss/crossentropy": 2.8022435903549194, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.2787918150424957, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.0050613669145527255, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.006091165542602539, |
| "learning_rate": 0.01, |
| "loss": 1.3367, |
| "loss/crossentropy": 2.4132487773895264, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.26599422097206116, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.005078641204363485, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00606382687886556, |
| "learning_rate": 0.01, |
| "loss": 1.3944, |
| "loss/crossentropy": 2.2870916724205017, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.25007129460573196, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.005095915494174246, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0060225804646809895, |
| "learning_rate": 0.01, |
| "loss": 1.3933, |
| "loss/crossentropy": 2.60745906829834, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2817099541425705, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.005113189783985006, |
| "grad_norm": 0.34765625, |
| "grad_norm_var": 0.006082900365193685, |
| "learning_rate": 0.01, |
| "loss": 1.4644, |
| "loss/crossentropy": 2.1799449920654297, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.23855505883693695, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.005130464073795766, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0006179650624593099, |
| "learning_rate": 0.01, |
| "loss": 1.3902, |
| "loss/crossentropy": 2.299877882003784, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24762696027755737, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.005147738363606527, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0006234327952067058, |
| "learning_rate": 0.01, |
| "loss": 1.3882, |
| "loss/crossentropy": 2.334827423095703, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.23748627305030823, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.0051650126534172865, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0006581465403238932, |
| "learning_rate": 0.01, |
| "loss": 1.3226, |
| "loss/crossentropy": 2.4439618587493896, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.23564526438713074, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.005182286943228046, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0006502787272135417, |
| "learning_rate": 0.01, |
| "loss": 1.4317, |
| "loss/crossentropy": 2.4066379070281982, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.28721271455287933, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.005199561233038807, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.0007389704386393229, |
| "learning_rate": 0.01, |
| "loss": 1.392, |
| "loss/crossentropy": 2.6461589336395264, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2553107738494873, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.005216835522849567, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0007389704386393229, |
| "learning_rate": 0.01, |
| "loss": 1.3864, |
| "loss/crossentropy": 2.607328414916992, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.26615719497203827, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.005234109812660327, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0007313410441080729, |
| "learning_rate": 0.01, |
| "loss": 1.3974, |
| "loss/crossentropy": 2.5339640378952026, |
| "loss/fcd": 1.23046875, |
| "loss/logits": 0.29202982783317566, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.005251384102471087, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.000777292251586914, |
| "learning_rate": 0.01, |
| "loss": 1.3597, |
| "loss/crossentropy": 2.418789029121399, |
| "loss/fcd": 1.0078125, |
| "loss/logits": 0.22410588711500168, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.0052686583922818475, |
| "grad_norm": 0.26171875, |
| "grad_norm_var": 0.000777292251586914, |
| "learning_rate": 0.01, |
| "loss": 1.3612, |
| "loss/crossentropy": 2.333797812461853, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2548183798789978, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.005285932682092607, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.0007448673248291015, |
| "learning_rate": 0.01, |
| "loss": 1.4106, |
| "loss/crossentropy": 2.444805860519409, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.2654833495616913, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.005303206971903367, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0005655765533447265, |
| "learning_rate": 0.01, |
| "loss": 1.4068, |
| "loss/crossentropy": 2.478832244873047, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.27099600434303284, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.005320481261714128, |
| "grad_norm": 0.34375, |
| "grad_norm_var": 0.0006519158681233724, |
| "learning_rate": 0.01, |
| "loss": 1.4297, |
| "loss/crossentropy": 2.276490032672882, |
| "loss/fcd": 1.2578125, |
| "loss/logits": 0.2906430959701538, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.005337755551524888, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0006444136301676433, |
| "learning_rate": 0.01, |
| "loss": 1.3362, |
| "loss/crossentropy": 2.1777199506759644, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2572901248931885, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.005355029841335648, |
| "grad_norm": 0.349609375, |
| "grad_norm_var": 0.0007352193196614583, |
| "learning_rate": 0.01, |
| "loss": 1.4705, |
| "loss/crossentropy": 2.4591206312179565, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2502119764685631, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0053723041311464085, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0007771650950113932, |
| "learning_rate": 0.01, |
| "loss": 1.4149, |
| "loss/crossentropy": 2.377845048904419, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.25507183372974396, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.005389578420957168, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0006932417551676432, |
| "learning_rate": 0.01, |
| "loss": 1.3878, |
| "loss/crossentropy": 2.6086690425872803, |
| "loss/fcd": 1.25, |
| "loss/logits": 0.28851139545440674, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.005406852710767928, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0006875991821289062, |
| "learning_rate": 0.01, |
| "loss": 1.3607, |
| "loss/crossentropy": 2.089534819126129, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.22003582119941711, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.005424127000578689, |
| "grad_norm": 0.388671875, |
| "grad_norm_var": 0.0011123021443684895, |
| "learning_rate": 0.01, |
| "loss": 1.3856, |
| "loss/crossentropy": 2.0762287974357605, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.23012210428714752, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.005441401290389449, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0011039574940999348, |
| "learning_rate": 0.01, |
| "loss": 1.3841, |
| "loss/crossentropy": 2.5591676235198975, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.2246263027191162, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.005458675580200209, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0010869344075520833, |
| "learning_rate": 0.01, |
| "loss": 1.4073, |
| "loss/crossentropy": 2.412803888320923, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24091031402349472, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.0054759498700109694, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.001073137919108073, |
| "learning_rate": 0.01, |
| "loss": 1.368, |
| "loss/crossentropy": 2.328226327896118, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2949056923389435, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.005493224159821729, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.001038042704264323, |
| "learning_rate": 0.01, |
| "loss": 1.3639, |
| "loss/crossentropy": 2.2848289012908936, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.25566980242729187, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.005510498449632489, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0011049906412760417, |
| "learning_rate": 0.01, |
| "loss": 1.3843, |
| "loss/crossentropy": 2.3968076705932617, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2567252665758133, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.00552777273944325, |
| "grad_norm": 0.423828125, |
| "grad_norm_var": 0.001811663309733073, |
| "learning_rate": 0.01, |
| "loss": 1.3891, |
| "loss/crossentropy": 2.396988272666931, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.24911059439182281, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.00554504702925401, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.001689910888671875, |
| "learning_rate": 0.01, |
| "loss": 1.3517, |
| "loss/crossentropy": 2.4934462308883667, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2607601135969162, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.00556232131906477, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0017145156860351562, |
| "learning_rate": 0.01, |
| "loss": 1.4164, |
| "loss/crossentropy": 2.421591639518738, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2476629763841629, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.00557959560887553, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0016919453938802084, |
| "learning_rate": 0.01, |
| "loss": 1.4522, |
| "loss/crossentropy": 2.5826879739761353, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24336670339107513, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.00559686989868629, |
| "grad_norm": 0.36328125, |
| "grad_norm_var": 0.0017831802368164062, |
| "learning_rate": 0.01, |
| "loss": 1.435, |
| "loss/crossentropy": 2.6005271673202515, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2697305530309677, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.00561414418849705, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.001741647720336914, |
| "learning_rate": 0.01, |
| "loss": 1.4172, |
| "loss/crossentropy": 2.514216661453247, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2561942785978317, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.005631418478307811, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0017611026763916016, |
| "learning_rate": 0.01, |
| "loss": 1.3803, |
| "loss/crossentropy": 2.6110743284225464, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.25072459131479263, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.005648692768118571, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0018299738566080728, |
| "learning_rate": 0.01, |
| "loss": 1.3267, |
| "loss/crossentropy": 2.3151168823242188, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.22984758019447327, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.005665967057929331, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.001784515380859375, |
| "learning_rate": 0.01, |
| "loss": 1.4146, |
| "loss/crossentropy": 2.610999584197998, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.27360329031944275, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.005683241347740091, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.00178680419921875, |
| "learning_rate": 0.01, |
| "loss": 1.4228, |
| "loss/crossentropy": 2.3715471029281616, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.24973652511835098, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.005700515637550851, |
| "grad_norm": 0.359375, |
| "grad_norm_var": 0.0015657901763916015, |
| "learning_rate": 0.01, |
| "loss": 1.3711, |
| "loss/crossentropy": 2.3313710689544678, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.2390831932425499, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.005717789927361611, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0015309651692708333, |
| "learning_rate": 0.01, |
| "loss": 1.3683, |
| "loss/crossentropy": 2.405033826828003, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.26245684921741486, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.005735064217172372, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0015340010325113932, |
| "learning_rate": 0.01, |
| "loss": 1.3872, |
| "loss/crossentropy": 2.6667896509170532, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2503022700548172, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.005752338506983132, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0015253543853759766, |
| "learning_rate": 0.01, |
| "loss": 1.3296, |
| "loss/crossentropy": 2.6033343076705933, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.24763934314250946, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.005769612796793892, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0016357421875, |
| "learning_rate": 0.01, |
| "loss": 1.3707, |
| "loss/crossentropy": 2.3747464418411255, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24109259992837906, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.005786887086604652, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.001557159423828125, |
| "learning_rate": 0.01, |
| "loss": 1.3676, |
| "loss/crossentropy": 2.064777910709381, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.20032966136932373, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.005804161376415412, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0007188002268473308, |
| "learning_rate": 0.01, |
| "loss": 1.415, |
| "loss/crossentropy": 2.395054817199707, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.26608574390411377, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.005821435666226172, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0007338047027587891, |
| "learning_rate": 0.01, |
| "loss": 1.3812, |
| "loss/crossentropy": 2.2238911390304565, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.2315894290804863, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.005838709956036932, |
| "grad_norm": 0.400390625, |
| "grad_norm_var": 0.0012453556060791015, |
| "learning_rate": 0.01, |
| "loss": 1.4817, |
| "loss/crossentropy": 2.6248074769973755, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.28028567135334015, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.005855984245847693, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0012906233469645182, |
| "learning_rate": 0.01, |
| "loss": 1.3788, |
| "loss/crossentropy": 2.125354528427124, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.26438966393470764, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.005873258535658453, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0011260350545247396, |
| "learning_rate": 0.01, |
| "loss": 1.4301, |
| "loss/crossentropy": 2.301461696624756, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.254987433552742, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0058905328254692125, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.001136000951131185, |
| "learning_rate": 0.01, |
| "loss": 1.4306, |
| "loss/crossentropy": 2.369805097579956, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.25373272597789764, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.005907807115279973, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0010833104451497396, |
| "learning_rate": 0.01, |
| "loss": 1.4098, |
| "loss/crossentropy": 2.5944920778274536, |
| "loss/fcd": 1.23046875, |
| "loss/logits": 0.2799176275730133, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.005925081405090733, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0009953657786051433, |
| "learning_rate": 0.01, |
| "loss": 1.3992, |
| "loss/crossentropy": 2.13715797662735, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.24987763166427612, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.005942355694901493, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0009989261627197266, |
| "learning_rate": 0.01, |
| "loss": 1.4174, |
| "loss/crossentropy": 2.4599469900131226, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2511429339647293, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.005959629984712254, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0010085900624593098, |
| "learning_rate": 0.01, |
| "loss": 1.3608, |
| "loss/crossentropy": 2.431983709335327, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2585323229432106, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.005976904274523014, |
| "grad_norm": 0.35546875, |
| "grad_norm_var": 0.0009857018788655598, |
| "learning_rate": 0.01, |
| "loss": 1.3743, |
| "loss/crossentropy": 2.3239270448684692, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.2441619336605072, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.0059941785643337735, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0009857018788655598, |
| "learning_rate": 0.01, |
| "loss": 1.3826, |
| "loss/crossentropy": 2.229923963546753, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.22727931290864944, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.006011452854144534, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0010732014973958333, |
| "learning_rate": 0.01, |
| "loss": 1.3712, |
| "loss/crossentropy": 2.6727981567382812, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.28281402587890625, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.006028727143955294, |
| "grad_norm": 0.392578125, |
| "grad_norm_var": 0.0014724095662434896, |
| "learning_rate": 0.01, |
| "loss": 1.4247, |
| "loss/crossentropy": 2.4443578720092773, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2722969502210617, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.006046001433766054, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0013391971588134766, |
| "learning_rate": 0.01, |
| "loss": 1.3573, |
| "loss/crossentropy": 2.399729371070862, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22808712720870972, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.006063275723576815, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.001315927505493164, |
| "learning_rate": 0.01, |
| "loss": 1.3975, |
| "loss/crossentropy": 2.521644949913025, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.25423599034547806, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.006080550013387575, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0013570149739583334, |
| "learning_rate": 0.01, |
| "loss": 1.3756, |
| "loss/crossentropy": 2.263104200363159, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.26695793122053146, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.0060978243031983344, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0012316226959228516, |
| "learning_rate": 0.01, |
| "loss": 1.3735, |
| "loss/crossentropy": 2.6748716831207275, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.27432236075401306, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.006115098593009095, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0008518060048421223, |
| "learning_rate": 0.01, |
| "loss": 1.3334, |
| "loss/crossentropy": 2.3271913528442383, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.25318336486816406, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.006132372882819855, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0008040746053059896, |
| "learning_rate": 0.01, |
| "loss": 1.4881, |
| "loss/crossentropy": 2.6528772115707397, |
| "loss/fcd": 1.296875, |
| "loss/logits": 0.3017214983701706, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.006149647172630615, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0008056640625, |
| "learning_rate": 0.01, |
| "loss": 1.3815, |
| "loss/crossentropy": 2.4514299631118774, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.25581270456314087, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.006166921462441376, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.000800180435180664, |
| "learning_rate": 0.01, |
| "loss": 1.3652, |
| "loss/crossentropy": 2.307224750518799, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24949809908866882, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.0061841957522521356, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.000836944580078125, |
| "learning_rate": 0.01, |
| "loss": 1.3842, |
| "loss/crossentropy": 2.120967745780945, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2532486915588379, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.006201470042062895, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0008429050445556641, |
| "learning_rate": 0.01, |
| "loss": 1.4114, |
| "loss/crossentropy": 2.4582676887512207, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.278301477432251, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.006218744331873656, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.000936126708984375, |
| "learning_rate": 0.01, |
| "loss": 1.3794, |
| "loss/crossentropy": 2.636004090309143, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.2849871665239334, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.006236018621684416, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0009364922841389974, |
| "learning_rate": 0.01, |
| "loss": 1.4538, |
| "loss/crossentropy": 2.55968701839447, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.29454614222049713, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.006253292911495176, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0007910251617431641, |
| "learning_rate": 0.01, |
| "loss": 1.3948, |
| "loss/crossentropy": 2.3076229095458984, |
| "loss/fcd": 1.015625, |
| "loss/logits": 0.23156127333641052, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.006270567201305937, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0008000055948893229, |
| "learning_rate": 0.01, |
| "loss": 1.3462, |
| "loss/crossentropy": 2.3910467624664307, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.24528269469738007, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.0062878414911166965, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0007506688435872396, |
| "learning_rate": 0.01, |
| "loss": 1.4077, |
| "loss/crossentropy": 2.3372639417648315, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.2640947550535202, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.006305115780927456, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0002445856730143229, |
| "learning_rate": 0.01, |
| "loss": 1.3759, |
| "loss/crossentropy": 2.454505205154419, |
| "loss/fcd": 1.05078125, |
| "loss/logits": 0.2401072233915329, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.006322390070738217, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0002534071604410807, |
| "learning_rate": 0.01, |
| "loss": 1.3749, |
| "loss/crossentropy": 2.3645259141921997, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.23153205960988998, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.006339664360548977, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0002587477366129557, |
| "learning_rate": 0.01, |
| "loss": 1.3546, |
| "loss/crossentropy": 2.494025230407715, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.26557300239801407, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.006356938650359737, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.000279998779296875, |
| "learning_rate": 0.01, |
| "loss": 1.3496, |
| "loss/crossentropy": 2.3776293992996216, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2568487524986267, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.006374212940170498, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.00024310747782389322, |
| "learning_rate": 0.01, |
| "loss": 1.3734, |
| "loss/crossentropy": 2.591793417930603, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.27023325860500336, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.0063914872299812575, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0002357323964436849, |
| "learning_rate": 0.01, |
| "loss": 1.3944, |
| "loss/crossentropy": 2.415038585662842, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.3026815205812454, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.006408761519792017, |
| "grad_norm": 0.3359375, |
| "grad_norm_var": 0.0002699375152587891, |
| "learning_rate": 0.01, |
| "loss": 1.3485, |
| "loss/crossentropy": 2.4911344051361084, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2093563750386238, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.006426035809602778, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.00027815500895182293, |
| "learning_rate": 0.01, |
| "loss": 1.34, |
| "loss/crossentropy": 2.470622181892395, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2913671284914017, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.006443310099413538, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.00027815500895182293, |
| "learning_rate": 0.01, |
| "loss": 1.4077, |
| "loss/crossentropy": 2.6227082014083862, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24900969862937927, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.006460584389224298, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0003157138824462891, |
| "learning_rate": 0.01, |
| "loss": 1.4209, |
| "loss/crossentropy": 3.0212732553482056, |
| "loss/fcd": 1.22265625, |
| "loss/logits": 0.270741730928421, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.006477858679035058, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0003500461578369141, |
| "learning_rate": 0.01, |
| "loss": 1.465, |
| "loss/crossentropy": 2.8352737426757812, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.31054478883743286, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0064951329688458185, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0003049055735270182, |
| "learning_rate": 0.01, |
| "loss": 1.3951, |
| "loss/crossentropy": 2.450179100036621, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.24616704881191254, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.006512407258656578, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0003193537394205729, |
| "learning_rate": 0.01, |
| "loss": 1.3687, |
| "loss/crossentropy": 2.2392066717147827, |
| "loss/fcd": 1.025390625, |
| "loss/logits": 0.24169814586639404, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.006529681548467338, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0003195285797119141, |
| "learning_rate": 0.01, |
| "loss": 1.4315, |
| "loss/crossentropy": 2.6067546606063843, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.31542879343032837, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.006546955838278099, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.00031558672587076825, |
| "learning_rate": 0.01, |
| "loss": 1.3917, |
| "loss/crossentropy": 2.360989570617676, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.25205816328525543, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.006564230128088859, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0003201643625895182, |
| "learning_rate": 0.01, |
| "loss": 1.4293, |
| "loss/crossentropy": 2.71570360660553, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2731679454445839, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.006581504417899619, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00033359527587890626, |
| "learning_rate": 0.01, |
| "loss": 1.351, |
| "loss/crossentropy": 2.624392867088318, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2293551042675972, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.0065987787077103795, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00032958984375, |
| "learning_rate": 0.01, |
| "loss": 1.3969, |
| "loss/crossentropy": 2.1760467290878296, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.23204915970563889, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.006616052997521139, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00029544830322265626, |
| "learning_rate": 0.01, |
| "loss": 1.3163, |
| "loss/crossentropy": 2.089251697063446, |
| "loss/fcd": 1.041015625, |
| "loss/logits": 0.21481642127037048, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.006633327287331899, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00025424957275390627, |
| "learning_rate": 0.01, |
| "loss": 1.413, |
| "loss/crossentropy": 2.1335262060165405, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.29476068913936615, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.00665060157714266, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0002770582834879557, |
| "learning_rate": 0.01, |
| "loss": 1.3581, |
| "loss/crossentropy": 2.3327542543411255, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2519141435623169, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.00666787586695342, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0002797285715738932, |
| "learning_rate": 0.01, |
| "loss": 1.3991, |
| "loss/crossentropy": 2.521241784095764, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2740743160247803, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.00668515015676418, |
| "grad_norm": 0.341796875, |
| "grad_norm_var": 0.00030543009440104165, |
| "learning_rate": 0.01, |
| "loss": 1.4046, |
| "loss/crossentropy": 2.5978543758392334, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.24079592525959015, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.0067024244465749405, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.00029575030008951824, |
| "learning_rate": 0.01, |
| "loss": 1.4169, |
| "loss/crossentropy": 2.246425747871399, |
| "loss/fcd": 1.19140625, |
| "loss/logits": 0.2572794705629349, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.0067196987363857, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0003040949503580729, |
| "learning_rate": 0.01, |
| "loss": 1.382, |
| "loss/crossentropy": 2.293286442756653, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.23658673465251923, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.00673697302619646, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.00032596588134765626, |
| "learning_rate": 0.01, |
| "loss": 1.4607, |
| "loss/crossentropy": 2.441470980644226, |
| "loss/fcd": 1.31640625, |
| "loss/logits": 0.28673678636550903, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.006754247316007221, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0003061771392822266, |
| "learning_rate": 0.01, |
| "loss": 1.3719, |
| "loss/crossentropy": 2.5365694761276245, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2776503562927246, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.006771521605817981, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.00037663777669270834, |
| "learning_rate": 0.01, |
| "loss": 1.4373, |
| "loss/crossentropy": 2.517317056655884, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.27259568870067596, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.006788795895628741, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0003398736317952474, |
| "learning_rate": 0.01, |
| "loss": 1.382, |
| "loss/crossentropy": 2.38772451877594, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.24375227838754654, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.006806070185439501, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0003720601399739583, |
| "learning_rate": 0.01, |
| "loss": 1.3734, |
| "loss/crossentropy": 2.2084882259368896, |
| "loss/fcd": 1.076171875, |
| "loss/logits": 0.22468051314353943, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.006823344475250261, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.00041039784749348957, |
| "learning_rate": 0.01, |
| "loss": 1.4417, |
| "loss/crossentropy": 2.4394543170928955, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.25751829147338867, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.006840618765061021, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0004447778065999349, |
| "learning_rate": 0.01, |
| "loss": 1.3414, |
| "loss/crossentropy": 2.365694999694824, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.24539195746183395, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.006857893054871782, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00042292277018229164, |
| "learning_rate": 0.01, |
| "loss": 1.396, |
| "loss/crossentropy": 2.4616193771362305, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.2692428231239319, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.006875167344682542, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.0004531224568684896, |
| "learning_rate": 0.01, |
| "loss": 1.403, |
| "loss/crossentropy": 2.2189152240753174, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.24257495999336243, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.006892441634493302, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0004249413808186849, |
| "learning_rate": 0.01, |
| "loss": 1.3559, |
| "loss/crossentropy": 2.37640380859375, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2584332674741745, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.006909715924304062, |
| "grad_norm": 0.34765625, |
| "grad_norm_var": 0.0005074659983317057, |
| "learning_rate": 0.01, |
| "loss": 1.4774, |
| "loss/crossentropy": 2.384715437889099, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2619960308074951, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.006926990214114822, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00046054522196451825, |
| "learning_rate": 0.01, |
| "loss": 1.4446, |
| "loss/crossentropy": 2.1976479291915894, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.25502003729343414, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.006944264503925582, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0004532972971598307, |
| "learning_rate": 0.01, |
| "loss": 1.3809, |
| "loss/crossentropy": 2.278647780418396, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.2284827083349228, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.006961538793736343, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0004994710286458333, |
| "learning_rate": 0.01, |
| "loss": 1.3505, |
| "loss/crossentropy": 2.4870028495788574, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2371172457933426, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.006978813083547103, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0004836400349934896, |
| "learning_rate": 0.01, |
| "loss": 1.4059, |
| "loss/crossentropy": 2.65886914730072, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2828421890735626, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.006996087373357863, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0005295912424723308, |
| "learning_rate": 0.01, |
| "loss": 1.3245, |
| "loss/crossentropy": 2.1928412914276123, |
| "loss/fcd": 1.0234375, |
| "loss/logits": 0.22634898871183395, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.007013361663168623, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0004922072092692057, |
| "learning_rate": 0.01, |
| "loss": 1.4224, |
| "loss/crossentropy": 2.6360604763031006, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.2727653980255127, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.007030635952979383, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00048584938049316405, |
| "learning_rate": 0.01, |
| "loss": 1.3588, |
| "loss/crossentropy": 2.3004168272018433, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.239614799618721, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.007047910242790143, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0004352410634358724, |
| "learning_rate": 0.01, |
| "loss": 1.4105, |
| "loss/crossentropy": 2.3150475025177, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2282358631491661, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.007065184532600904, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0004974365234375, |
| "learning_rate": 0.01, |
| "loss": 1.3576, |
| "loss/crossentropy": 2.645399570465088, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2676645368337631, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.007082458822411664, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.00047855377197265626, |
| "learning_rate": 0.01, |
| "loss": 1.4103, |
| "loss/crossentropy": 2.1640161275863647, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2479998767375946, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.007099733112222424, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0004301548004150391, |
| "learning_rate": 0.01, |
| "loss": 1.4286, |
| "loss/crossentropy": 2.5662118196487427, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2710702270269394, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.0071170074020331835, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.000391387939453125, |
| "learning_rate": 0.01, |
| "loss": 1.369, |
| "loss/crossentropy": 2.1513331532478333, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.22271250188350677, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.007134281691843944, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.00042565663655598957, |
| "learning_rate": 0.01, |
| "loss": 1.3294, |
| "loss/crossentropy": 2.2309274673461914, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.24107103794813156, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.007151555981654704, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0003997802734375, |
| "learning_rate": 0.01, |
| "loss": 1.3924, |
| "loss/crossentropy": 2.6093149185180664, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24238202720880508, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.007168830271465464, |
| "grad_norm": 0.3828125, |
| "grad_norm_var": 0.0008020877838134765, |
| "learning_rate": 0.01, |
| "loss": 1.4011, |
| "loss/crossentropy": 2.6286522150039673, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.258474200963974, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.007186104561276225, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0007280985514322917, |
| "learning_rate": 0.01, |
| "loss": 1.3625, |
| "loss/crossentropy": 2.686766266822815, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.24827048182487488, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.007203378851086985, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0007395426432291667, |
| "learning_rate": 0.01, |
| "loss": 1.3839, |
| "loss/crossentropy": 2.319527268409729, |
| "loss/fcd": 1.20703125, |
| "loss/logits": 0.2674332559108734, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.0072206531408977445, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0007565657297770183, |
| "learning_rate": 0.01, |
| "loss": 1.3619, |
| "loss/crossentropy": 2.3169610500335693, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23959346115589142, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.007237927430708505, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0007449944814046223, |
| "learning_rate": 0.01, |
| "loss": 1.4737, |
| "loss/crossentropy": 2.5569876432418823, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2552832216024399, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.007255201720519265, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0007374445597330729, |
| "learning_rate": 0.01, |
| "loss": 1.4197, |
| "loss/crossentropy": 2.0687599182128906, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2598320543766022, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.007272476010330025, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0006955305735270183, |
| "learning_rate": 0.01, |
| "loss": 1.4605, |
| "loss/crossentropy": 2.419862389564514, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2556862235069275, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.007289750300140786, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0006964206695556641, |
| "learning_rate": 0.01, |
| "loss": 1.4071, |
| "loss/crossentropy": 2.5204795598983765, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2741318494081497, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.007307024589951546, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0006945292154947917, |
| "learning_rate": 0.01, |
| "loss": 1.4196, |
| "loss/crossentropy": 2.489278793334961, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.25576694309711456, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.0073242988797623055, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0007067362467447917, |
| "learning_rate": 0.01, |
| "loss": 1.3185, |
| "loss/crossentropy": 2.3392102122306824, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.21298449486494064, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.007341573169573066, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0006886641184488932, |
| "learning_rate": 0.01, |
| "loss": 1.3571, |
| "loss/crossentropy": 2.2977930307388306, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23583728075027466, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.007358847459383826, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0006886641184488932, |
| "learning_rate": 0.01, |
| "loss": 1.4277, |
| "loss/crossentropy": 2.6484419107437134, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.27860742807388306, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.007376121749194586, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0007303873697916667, |
| "learning_rate": 0.01, |
| "loss": 1.3439, |
| "loss/crossentropy": 2.460866689682007, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.23756644129753113, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.007393396039005347, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.000762033462524414, |
| "learning_rate": 0.01, |
| "loss": 1.343, |
| "loss/crossentropy": 2.0784988403320312, |
| "loss/fcd": 1.044921875, |
| "loss/logits": 0.21802522987127304, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.0074106703288161066, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0007471720377604167, |
| "learning_rate": 0.01, |
| "loss": 1.3824, |
| "loss/crossentropy": 2.312214493751526, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2373846471309662, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.0074279446186268664, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0007441043853759766, |
| "learning_rate": 0.01, |
| "loss": 1.4031, |
| "loss/crossentropy": 2.43253231048584, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.24533094465732574, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.007445218908437627, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0002559502919514974, |
| "learning_rate": 0.01, |
| "loss": 1.3775, |
| "loss/crossentropy": 2.7691128253936768, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.22900952398777008, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.007462493198248387, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0002489566802978516, |
| "learning_rate": 0.01, |
| "loss": 1.3613, |
| "loss/crossentropy": 2.231864333152771, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.24191942811012268, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.007479767488059147, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.00026493072509765626, |
| "learning_rate": 0.01, |
| "loss": 1.4166, |
| "loss/crossentropy": 2.343968152999878, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2661665081977844, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.007497041777869908, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.000299835205078125, |
| "learning_rate": 0.01, |
| "loss": 1.3807, |
| "loss/crossentropy": 2.6194422245025635, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2392604500055313, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.0075143160676806675, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.00023585955301920573, |
| "learning_rate": 0.01, |
| "loss": 1.2902, |
| "loss/crossentropy": 2.46696138381958, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.26606328785419464, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.007531590357491427, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0002272923787434896, |
| "learning_rate": 0.01, |
| "loss": 1.3931, |
| "loss/crossentropy": 2.4375393390655518, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.27766771614551544, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.007548864647302188, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.0003665765126546224, |
| "learning_rate": 0.01, |
| "loss": 1.3732, |
| "loss/crossentropy": 2.3699560165405273, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2573126032948494, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.007566138937112948, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.00037282307942708334, |
| "learning_rate": 0.01, |
| "loss": 1.4006, |
| "loss/crossentropy": 2.227339029312134, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2607281506061554, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.007583413226923708, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0003864129384358724, |
| "learning_rate": 0.01, |
| "loss": 1.3632, |
| "loss/crossentropy": 2.46047842502594, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24269723892211914, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.007600687516734469, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0003676732381184896, |
| "learning_rate": 0.01, |
| "loss": 1.3795, |
| "loss/crossentropy": 2.4994819164276123, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.25722265988588333, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0076179618065452285, |
| "grad_norm": 0.265625, |
| "grad_norm_var": 0.0004109064737955729, |
| "learning_rate": 0.01, |
| "loss": 1.3064, |
| "loss/crossentropy": 2.5115902423858643, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.2371089681982994, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.007635236096355988, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0003619988759358724, |
| "learning_rate": 0.01, |
| "loss": 1.4358, |
| "loss/crossentropy": 2.380179762840271, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.2640485018491745, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.007652510386166749, |
| "grad_norm": 0.3359375, |
| "grad_norm_var": 0.000460052490234375, |
| "learning_rate": 0.01, |
| "loss": 1.4287, |
| "loss/crossentropy": 2.6699330806732178, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.2810060381889343, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.007669784675977509, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0004988988240559896, |
| "learning_rate": 0.01, |
| "loss": 1.4246, |
| "loss/crossentropy": 2.5262972116470337, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.25480419397354126, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.007687058965788269, |
| "grad_norm": 0.34765625, |
| "grad_norm_var": 0.000649261474609375, |
| "learning_rate": 0.01, |
| "loss": 1.3859, |
| "loss/crossentropy": 2.3320013284683228, |
| "loss/fcd": 1.05078125, |
| "loss/logits": 0.2234661728143692, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.007704333255599029, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0006438573201497396, |
| "learning_rate": 0.01, |
| "loss": 1.3848, |
| "loss/crossentropy": 2.448530673980713, |
| "loss/fcd": 1.234375, |
| "loss/logits": 0.2647833973169327, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.0077216075454097895, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.000642840067545573, |
| "learning_rate": 0.01, |
| "loss": 1.4458, |
| "loss/crossentropy": 2.279269576072693, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2693684697151184, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.007738881835220549, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0006202538808186849, |
| "learning_rate": 0.01, |
| "loss": 1.3777, |
| "loss/crossentropy": 2.6742255687713623, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.2811601459980011, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.007756156125031309, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0006031672159830729, |
| "learning_rate": 0.01, |
| "loss": 1.4104, |
| "loss/crossentropy": 2.4074745178222656, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.24794109165668488, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.00777343041484207, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0005887190500895183, |
| "learning_rate": 0.01, |
| "loss": 1.3185, |
| "loss/crossentropy": 2.35663104057312, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.22819262742996216, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.00779070470465283, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0005558649698893229, |
| "learning_rate": 0.01, |
| "loss": 1.3834, |
| "loss/crossentropy": 2.6186258792877197, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2587556540966034, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.00780797899446359, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0005566755930582683, |
| "learning_rate": 0.01, |
| "loss": 1.4106, |
| "loss/crossentropy": 2.6754432916641235, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2465488687157631, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.00782525328427435, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0005098978678385416, |
| "learning_rate": 0.01, |
| "loss": 1.3696, |
| "loss/crossentropy": 2.5379905700683594, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2804763838648796, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.007842527574085111, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005257765452067058, |
| "learning_rate": 0.01, |
| "loss": 1.3851, |
| "loss/crossentropy": 2.5852067470550537, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.23731224238872528, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.007859801863895871, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0004597345987955729, |
| "learning_rate": 0.01, |
| "loss": 1.4338, |
| "loss/crossentropy": 2.6572701930999756, |
| "loss/fcd": 1.234375, |
| "loss/logits": 0.28852197527885437, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.007877076153706631, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0004513899485270182, |
| "learning_rate": 0.01, |
| "loss": 1.3824, |
| "loss/crossentropy": 2.6901192665100098, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24115828424692154, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.00789435044351739, |
| "grad_norm": 0.451171875, |
| "grad_norm_var": 0.00158538818359375, |
| "learning_rate": 0.01, |
| "loss": 1.5497, |
| "loss/crossentropy": 2.636592984199524, |
| "loss/fcd": 1.30859375, |
| "loss/logits": 0.36482033133506775, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.00791162473332815, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0015401045481363933, |
| "learning_rate": 0.01, |
| "loss": 1.3698, |
| "loss/crossentropy": 2.414226531982422, |
| "loss/fcd": 1.076171875, |
| "loss/logits": 0.2397611290216446, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.00792889902313891, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.001520522435506185, |
| "learning_rate": 0.01, |
| "loss": 1.4165, |
| "loss/crossentropy": 2.463810086250305, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.24305613338947296, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.007946173312949672, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0015141805013020833, |
| "learning_rate": 0.01, |
| "loss": 1.4418, |
| "loss/crossentropy": 2.451104521751404, |
| "loss/fcd": 1.296875, |
| "loss/logits": 0.30130288004875183, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.007963447602760432, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0014490763346354167, |
| "learning_rate": 0.01, |
| "loss": 1.3988, |
| "loss/crossentropy": 2.53925359249115, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24273447692394257, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.007980721892571192, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0014133294423421224, |
| "learning_rate": 0.01, |
| "loss": 1.3928, |
| "loss/crossentropy": 2.5229551792144775, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.25667132437229156, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.007997996182381952, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0014353434244791666, |
| "learning_rate": 0.01, |
| "loss": 1.3347, |
| "loss/crossentropy": 2.341879367828369, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.23053725808858871, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.008015270472192712, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0014744440714518229, |
| "learning_rate": 0.01, |
| "loss": 1.3569, |
| "loss/crossentropy": 2.2920732498168945, |
| "loss/fcd": 1.03515625, |
| "loss/logits": 0.23280857503414154, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.008032544762003472, |
| "grad_norm": 0.349609375, |
| "grad_norm_var": 0.001541582743326823, |
| "learning_rate": 0.01, |
| "loss": 1.3894, |
| "loss/crossentropy": 2.515018939971924, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24030621349811554, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.008049819051814231, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0015279134114583334, |
| "learning_rate": 0.01, |
| "loss": 1.4597, |
| "loss/crossentropy": 2.2328585386276245, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.25991010665893555, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.008067093341624993, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0015306949615478515, |
| "learning_rate": 0.01, |
| "loss": 1.4036, |
| "loss/crossentropy": 2.798638701438904, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.29376721382141113, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.008084367631435753, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0015871683756510417, |
| "learning_rate": 0.01, |
| "loss": 1.358, |
| "loss/crossentropy": 2.322153091430664, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2475121170282364, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.008101641921246513, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0015871683756510417, |
| "learning_rate": 0.01, |
| "loss": 1.3756, |
| "loss/crossentropy": 2.2007282972335815, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.23374570161104202, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.008118916211057273, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.001594400405883789, |
| "learning_rate": 0.01, |
| "loss": 1.366, |
| "loss/crossentropy": 2.408711314201355, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.23746006190776825, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.008136190500868033, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0016522566477457682, |
| "learning_rate": 0.01, |
| "loss": 1.4157, |
| "loss/crossentropy": 2.328341841697693, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2784807085990906, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.008153464790678792, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0016563256581624349, |
| "learning_rate": 0.01, |
| "loss": 1.3845, |
| "loss/crossentropy": 2.414987564086914, |
| "loss/fcd": 1.26171875, |
| "loss/logits": 0.32799775153398514, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.008170739080489554, |
| "grad_norm": 0.357421875, |
| "grad_norm_var": 0.0004951318105061848, |
| "learning_rate": 0.01, |
| "loss": 1.4935, |
| "loss/crossentropy": 2.597047209739685, |
| "loss/fcd": 1.34375, |
| "loss/logits": 0.3595212921500206, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.008188013370300314, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0004927953084309896, |
| "learning_rate": 0.01, |
| "loss": 1.4074, |
| "loss/crossentropy": 2.6870315074920654, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2819272577762604, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.008205287660111074, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0004863580067952474, |
| "learning_rate": 0.01, |
| "loss": 1.4023, |
| "loss/crossentropy": 2.416118621826172, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2792641520500183, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.008222561949921834, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0004933675130208334, |
| "learning_rate": 0.01, |
| "loss": 1.3668, |
| "loss/crossentropy": 2.4251519441604614, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.25571418553590775, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.008239836239732594, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.00052490234375, |
| "learning_rate": 0.01, |
| "loss": 1.396, |
| "loss/crossentropy": 2.2888123989105225, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2410544455051422, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.008257110529543353, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.000519561767578125, |
| "learning_rate": 0.01, |
| "loss": 1.3594, |
| "loss/crossentropy": 2.479097008705139, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.25502997636795044, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.008274384819354115, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.000513140360514323, |
| "learning_rate": 0.01, |
| "loss": 1.3785, |
| "loss/crossentropy": 2.4117250442504883, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.26754797995090485, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.008291659109164875, |
| "grad_norm": 0.34375, |
| "grad_norm_var": 0.0005388895670572917, |
| "learning_rate": 0.01, |
| "loss": 1.4354, |
| "loss/crossentropy": 2.577602744102478, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.2731374129652977, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.008308933398975635, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.000490252176920573, |
| "learning_rate": 0.01, |
| "loss": 1.3588, |
| "loss/crossentropy": 2.3125388622283936, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2633324861526489, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.008326207688786395, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0004997094472249349, |
| "learning_rate": 0.01, |
| "loss": 1.3518, |
| "loss/crossentropy": 2.3964109420776367, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24801631271839142, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.008343481978597155, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0005289077758789063, |
| "learning_rate": 0.01, |
| "loss": 1.3619, |
| "loss/crossentropy": 2.5348154306411743, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.273783415555954, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.008360756268407914, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0005053043365478516, |
| "learning_rate": 0.01, |
| "loss": 1.3716, |
| "loss/crossentropy": 2.525968909263611, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.25891977548599243, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.008378030558218676, |
| "grad_norm": 0.357421875, |
| "grad_norm_var": 0.000632333755493164, |
| "learning_rate": 0.01, |
| "loss": 1.4665, |
| "loss/crossentropy": 2.476569890975952, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.29254642128944397, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.008395304848029436, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.000678110122680664, |
| "learning_rate": 0.01, |
| "loss": 1.3305, |
| "loss/crossentropy": 2.4879168272018433, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.22623379528522491, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.008412579137840196, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0006357192993164063, |
| "learning_rate": 0.01, |
| "loss": 1.3945, |
| "loss/crossentropy": 2.4186280965805054, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.23819412291049957, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.008429853427650956, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0006739139556884765, |
| "learning_rate": 0.01, |
| "loss": 1.3611, |
| "loss/crossentropy": 2.2941300868988037, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.22146066278219223, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.008447127717461715, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0005716323852539062, |
| "learning_rate": 0.01, |
| "loss": 1.3797, |
| "loss/crossentropy": 2.368129849433899, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2645361125469208, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.008464402007272475, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0005732059478759765, |
| "learning_rate": 0.01, |
| "loss": 1.3563, |
| "loss/crossentropy": 2.5257701873779297, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2530096620321274, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.008481676297083237, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.0006844679514567058, |
| "learning_rate": 0.01, |
| "loss": 1.3688, |
| "loss/crossentropy": 2.1511563062667847, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.240036740899086, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.008498950586893997, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0006647109985351562, |
| "learning_rate": 0.01, |
| "loss": 1.3563, |
| "loss/crossentropy": 2.370754837989807, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2698900103569031, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.008516224876704757, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0006010532379150391, |
| "learning_rate": 0.01, |
| "loss": 1.4051, |
| "loss/crossentropy": 2.55213725566864, |
| "loss/fcd": 1.19140625, |
| "loss/logits": 0.26752666383981705, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.008533499166515517, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0006074110666910807, |
| "learning_rate": 0.01, |
| "loss": 1.3304, |
| "loss/crossentropy": 2.878965377807617, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.248264878988266, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.008550773456326276, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0006083170572916667, |
| "learning_rate": 0.01, |
| "loss": 1.3554, |
| "loss/crossentropy": 2.389639139175415, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2504645884037018, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.008568047746137036, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0005273818969726562, |
| "learning_rate": 0.01, |
| "loss": 1.353, |
| "loss/crossentropy": 2.261403799057007, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.22545771300792694, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.008585322035947798, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0005760033925374349, |
| "learning_rate": 0.01, |
| "loss": 1.4314, |
| "loss/crossentropy": 2.755717635154724, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.28124481439590454, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.008602596325758558, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0005721410115559895, |
| "learning_rate": 0.01, |
| "loss": 1.39, |
| "loss/crossentropy": 2.5332454442977905, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2577049881219864, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.008619870615569318, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0006039937337239584, |
| "learning_rate": 0.01, |
| "loss": 1.3284, |
| "loss/crossentropy": 2.3752284049987793, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24180973321199417, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.008637144905380078, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0006058851877848308, |
| "learning_rate": 0.01, |
| "loss": 1.3868, |
| "loss/crossentropy": 2.299641966819763, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2509627118706703, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.008654419195190837, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.00035869280497233074, |
| "learning_rate": 0.01, |
| "loss": 1.3756, |
| "loss/crossentropy": 2.2871060371398926, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.22674021124839783, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.008671693485001597, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00034610430399576825, |
| "learning_rate": 0.01, |
| "loss": 1.3644, |
| "loss/crossentropy": 2.2024362087249756, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2369084656238556, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.008688967774812357, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.00030414263407389325, |
| "learning_rate": 0.01, |
| "loss": 1.343, |
| "loss/crossentropy": 2.5880898237228394, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.25857551395893097, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.008706242064623119, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.00030986467997233075, |
| "learning_rate": 0.01, |
| "loss": 1.4237, |
| "loss/crossentropy": 2.3485684394836426, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.266690656542778, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.008723516354433879, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0005009969075520834, |
| "learning_rate": 0.01, |
| "loss": 1.3758, |
| "loss/crossentropy": 2.539777636528015, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.24045251309871674, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.008740790644244639, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0005444844563802083, |
| "learning_rate": 0.01, |
| "loss": 1.3113, |
| "loss/crossentropy": 2.492120862007141, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2610347419977188, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.008758064934055398, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0004825433095296224, |
| "learning_rate": 0.01, |
| "loss": 1.4436, |
| "loss/crossentropy": 2.5324673652648926, |
| "loss/fcd": 1.33984375, |
| "loss/logits": 0.3312453627586365, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.008775339223866158, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0005187829335530599, |
| "learning_rate": 0.01, |
| "loss": 1.3478, |
| "loss/crossentropy": 2.612854242324829, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.261405885219574, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.008792613513676918, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0005492528279622395, |
| "learning_rate": 0.01, |
| "loss": 1.3917, |
| "loss/crossentropy": 2.4303773641586304, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24008433520793915, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.00880988780348768, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0005370457967122396, |
| "learning_rate": 0.01, |
| "loss": 1.3929, |
| "loss/crossentropy": 2.676490068435669, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.291456863284111, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.00882716209329844, |
| "grad_norm": 0.248046875, |
| "grad_norm_var": 0.0006914615631103515, |
| "learning_rate": 0.01, |
| "loss": 1.3138, |
| "loss/crossentropy": 2.1477047204971313, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2524523437023163, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.0088444363831092, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0006460666656494141, |
| "learning_rate": 0.01, |
| "loss": 1.3208, |
| "loss/crossentropy": 2.3151156902313232, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2605459988117218, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.00886171067291996, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0005753676096598308, |
| "learning_rate": 0.01, |
| "loss": 1.357, |
| "loss/crossentropy": 2.4916226863861084, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.25671282410621643, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.00887898496273072, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0006388346354166667, |
| "learning_rate": 0.01, |
| "loss": 1.4206, |
| "loss/crossentropy": 2.2333791255950928, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.28083400428295135, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.00889625925254148, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0006095727284749348, |
| "learning_rate": 0.01, |
| "loss": 1.3783, |
| "loss/crossentropy": 2.606614589691162, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2666025906801224, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.00891353354235224, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0006219863891601563, |
| "learning_rate": 0.01, |
| "loss": 1.3641, |
| "loss/crossentropy": 2.5051095485687256, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.25101958215236664, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.008930807832163, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.000811767578125, |
| "learning_rate": 0.01, |
| "loss": 1.4186, |
| "loss/crossentropy": 2.3850373029708862, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.27909501641988754, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.00894808212197376, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0008066177368164062, |
| "learning_rate": 0.01, |
| "loss": 1.366, |
| "loss/crossentropy": 2.217817187309265, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.23760483413934708, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.00896535641178452, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0007822513580322266, |
| "learning_rate": 0.01, |
| "loss": 1.3482, |
| "loss/crossentropy": 2.537502408027649, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.27564045786857605, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.00898263070159528, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0008463382720947266, |
| "learning_rate": 0.01, |
| "loss": 1.4875, |
| "loss/crossentropy": 2.628643035888672, |
| "loss/fcd": 1.31640625, |
| "loss/logits": 0.30241404473781586, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.00899990499140604, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0006756941477457682, |
| "learning_rate": 0.01, |
| "loss": 1.4536, |
| "loss/crossentropy": 2.2907025814056396, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.2538699805736542, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.009017179281216802, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0006413618723551432, |
| "learning_rate": 0.01, |
| "loss": 1.4079, |
| "loss/crossentropy": 2.5753923654556274, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.2975587248802185, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.009034453571027562, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0006230513254801433, |
| "learning_rate": 0.01, |
| "loss": 1.3724, |
| "loss/crossentropy": 2.327569842338562, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2123243287205696, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.009051727860838321, |
| "grad_norm": 0.345703125, |
| "grad_norm_var": 0.0006653944651285808, |
| "learning_rate": 0.01, |
| "loss": 1.484, |
| "loss/crossentropy": 2.4529794454574585, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.2642442062497139, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.009069002150649081, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0006680647532145182, |
| "learning_rate": 0.01, |
| "loss": 1.3426, |
| "loss/crossentropy": 2.405073642730713, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24681153148412704, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.009086276440459841, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0006830692291259766, |
| "learning_rate": 0.01, |
| "loss": 1.3501, |
| "loss/crossentropy": 2.5336978435516357, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.26675350964069366, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.009103550730270601, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.00044960975646972655, |
| "learning_rate": 0.01, |
| "loss": 1.4051, |
| "loss/crossentropy": 2.306818962097168, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.24449439346790314, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.009120825020081363, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00044960975646972655, |
| "learning_rate": 0.01, |
| "loss": 1.3847, |
| "loss/crossentropy": 2.394535183906555, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2896339148283005, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.009138099309892123, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.00044960975646972655, |
| "learning_rate": 0.01, |
| "loss": 1.383, |
| "loss/crossentropy": 2.502661347389221, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2570330798625946, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.009155373599702882, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0004755655924479167, |
| "learning_rate": 0.01, |
| "loss": 1.3914, |
| "loss/crossentropy": 2.5401047468185425, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.25133057683706284, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.009172647889513642, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.000457000732421875, |
| "learning_rate": 0.01, |
| "loss": 1.3288, |
| "loss/crossentropy": 2.357369303703308, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.25731976330280304, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.009189922179324402, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0005107720692952474, |
| "learning_rate": 0.01, |
| "loss": 1.3301, |
| "loss/crossentropy": 2.361912250518799, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.23256495594978333, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.009207196469135162, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0003574212392171224, |
| "learning_rate": 0.01, |
| "loss": 1.4286, |
| "loss/crossentropy": 2.5182912349700928, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.24184302985668182, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.009224470758945922, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.00035564104715983075, |
| "learning_rate": 0.01, |
| "loss": 1.3729, |
| "loss/crossentropy": 2.3095160722732544, |
| "loss/fcd": 1.068359375, |
| "loss/logits": 0.22853360325098038, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.009241745048756684, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00034936269124348957, |
| "learning_rate": 0.01, |
| "loss": 1.4586, |
| "loss/crossentropy": 2.4540841579437256, |
| "loss/fcd": 1.26953125, |
| "loss/logits": 0.3655036687850952, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.009259019338567443, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0003513971964518229, |
| "learning_rate": 0.01, |
| "loss": 1.3534, |
| "loss/crossentropy": 2.350268244743347, |
| "loss/fcd": 1.02734375, |
| "loss/logits": 0.21084149181842804, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.009276293628378203, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0003573099772135417, |
| "learning_rate": 0.01, |
| "loss": 1.4337, |
| "loss/crossentropy": 2.1304550170898438, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2608063519001007, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.009293567918188963, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.00033899943033854164, |
| "learning_rate": 0.01, |
| "loss": 1.3731, |
| "loss/crossentropy": 2.4391915798187256, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2429627627134323, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.009310842207999723, |
| "grad_norm": 0.2490234375, |
| "grad_norm_var": 0.0004955569903055827, |
| "learning_rate": 0.01, |
| "loss": 1.3286, |
| "loss/crossentropy": 2.3171310424804688, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.2482328712940216, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.009328116497810483, |
| "grad_norm": 0.34375, |
| "grad_norm_var": 0.0004833817481994629, |
| "learning_rate": 0.01, |
| "loss": 1.5811, |
| "loss/crossentropy": 2.376081347465515, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.25213149189949036, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.009345390787621245, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0005142807960510254, |
| "learning_rate": 0.01, |
| "loss": 1.3856, |
| "loss/crossentropy": 2.4632989168167114, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.2334313914179802, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.009362665077432004, |
| "grad_norm": 0.267578125, |
| "grad_norm_var": 0.0005667328834533692, |
| "learning_rate": 0.01, |
| "loss": 1.2882, |
| "loss/crossentropy": 2.177401542663574, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.24528680741786957, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.009379939367242764, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005423506100972493, |
| "learning_rate": 0.01, |
| "loss": 1.4063, |
| "loss/crossentropy": 2.4587985277175903, |
| "loss/fcd": 1.22265625, |
| "loss/logits": 0.2990281730890274, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.009397213657053524, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0005635221799214681, |
| "learning_rate": 0.01, |
| "loss": 1.3479, |
| "loss/crossentropy": 2.5811359882354736, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2688131481409073, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.009414487946864284, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0004939039548238119, |
| "learning_rate": 0.01, |
| "loss": 1.3695, |
| "loss/crossentropy": 2.51469349861145, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.26591262221336365, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.009431762236675044, |
| "grad_norm": 0.462890625, |
| "grad_norm_var": 0.002329091231028239, |
| "learning_rate": 0.01, |
| "loss": 1.3836, |
| "loss/crossentropy": 2.46504545211792, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25862205028533936, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.009449036526485806, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0023592273394266766, |
| "learning_rate": 0.01, |
| "loss": 1.3895, |
| "loss/crossentropy": 2.6220297813415527, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2548370361328125, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.009466310816296565, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0023488322893778484, |
| "learning_rate": 0.01, |
| "loss": 1.3737, |
| "loss/crossentropy": 2.591723322868347, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.25868477672338486, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.009483585106107325, |
| "grad_norm": 0.345703125, |
| "grad_norm_var": 0.0024718562761942547, |
| "learning_rate": 0.01, |
| "loss": 1.4328, |
| "loss/crossentropy": 2.568224310874939, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.278149738907814, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.009500859395918085, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0024854302406311034, |
| "learning_rate": 0.01, |
| "loss": 1.4231, |
| "loss/crossentropy": 2.5823177099227905, |
| "loss/fcd": 1.25, |
| "loss/logits": 0.2855361998081207, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.009518133685728845, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0025197307268778482, |
| "learning_rate": 0.01, |
| "loss": 1.3549, |
| "loss/crossentropy": 2.8035439252853394, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.26180362701416016, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.009535407975539605, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0024581233660380046, |
| "learning_rate": 0.01, |
| "loss": 1.3912, |
| "loss/crossentropy": 2.695222020149231, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.26626719534397125, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.009552682265350367, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.002452115217844645, |
| "learning_rate": 0.01, |
| "loss": 1.3884, |
| "loss/crossentropy": 2.2692904472351074, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.26358961313962936, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.009569956555161126, |
| "grad_norm": 0.484375, |
| "grad_norm_var": 0.004515453179677328, |
| "learning_rate": 0.01, |
| "loss": 1.4362, |
| "loss/crossentropy": 2.587984561920166, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.28202252089977264, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.009587230844971886, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.004301055272420248, |
| "learning_rate": 0.01, |
| "loss": 1.3332, |
| "loss/crossentropy": 2.238184094429016, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.25094330310821533, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.009604505134782646, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.004252099990844726, |
| "learning_rate": 0.01, |
| "loss": 1.3777, |
| "loss/crossentropy": 2.2282315492630005, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.2441270500421524, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.009621779424593406, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.004194132486979167, |
| "learning_rate": 0.01, |
| "loss": 1.3897, |
| "loss/crossentropy": 2.354749321937561, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.26998236775398254, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.009639053714404166, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0040819803873697914, |
| "learning_rate": 0.01, |
| "loss": 1.4072, |
| "loss/crossentropy": 2.3754522800445557, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.27060529589653015, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.009656328004214927, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0040383497873942055, |
| "learning_rate": 0.01, |
| "loss": 1.3777, |
| "loss/crossentropy": 2.3385682106018066, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.24154536426067352, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.009673602294025687, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.00392297108968099, |
| "learning_rate": 0.01, |
| "loss": 1.4435, |
| "loss/crossentropy": 2.525418996810913, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2767959535121918, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.009690876583836447, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.003881438573201497, |
| "learning_rate": 0.01, |
| "loss": 1.3849, |
| "loss/crossentropy": 2.291569232940674, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.28253524005413055, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.009708150873647207, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0024538675944010416, |
| "learning_rate": 0.01, |
| "loss": 1.3855, |
| "loss/crossentropy": 1.8735097646713257, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.186705082654953, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.009725425163457967, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.002462625503540039, |
| "learning_rate": 0.01, |
| "loss": 1.3507, |
| "loss/crossentropy": 2.2446945905685425, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23518769443035126, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.009742699453268727, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.002405405044555664, |
| "learning_rate": 0.01, |
| "loss": 1.3812, |
| "loss/crossentropy": 1.9525874853134155, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.19731061905622482, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.009759973743079488, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.00233610471089681, |
| "learning_rate": 0.01, |
| "loss": 1.3995, |
| "loss/crossentropy": 2.53279709815979, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.27113981544971466, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.009777248032890248, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0023889541625976562, |
| "learning_rate": 0.01, |
| "loss": 1.346, |
| "loss/crossentropy": 2.4163317680358887, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.26083898544311523, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.009794522322701008, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.00232086181640625, |
| "learning_rate": 0.01, |
| "loss": 1.3419, |
| "loss/crossentropy": 2.4386374950408936, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2661859691143036, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.009811796612511768, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0023355484008789062, |
| "learning_rate": 0.01, |
| "loss": 1.3659, |
| "loss/crossentropy": 2.509569525718689, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.263532429933548, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.009829070902322528, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.00232086181640625, |
| "learning_rate": 0.01, |
| "loss": 1.434, |
| "loss/crossentropy": 2.400490880012512, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.24774880707263947, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.009846345192133288, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0002483208974202474, |
| "learning_rate": 0.01, |
| "loss": 1.4385, |
| "loss/crossentropy": 2.390196442604065, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2328876331448555, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.009863619481944048, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00023013750712076823, |
| "learning_rate": 0.01, |
| "loss": 1.3399, |
| "loss/crossentropy": 2.399609327316284, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2631242126226425, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.00988089377175481, |
| "grad_norm": 0.388671875, |
| "grad_norm_var": 0.0006914774576822917, |
| "learning_rate": 0.01, |
| "loss": 1.4129, |
| "loss/crossentropy": 2.5639859437942505, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2520062252879143, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.00989816806156557, |
| "grad_norm": 0.34765625, |
| "grad_norm_var": 0.000757280985514323, |
| "learning_rate": 0.01, |
| "loss": 1.4226, |
| "loss/crossentropy": 2.4615684747695923, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2613854482769966, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.009915442351376329, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0007432142893473308, |
| "learning_rate": 0.01, |
| "loss": 1.4494, |
| "loss/crossentropy": 2.4410594701766968, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.3067672997713089, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.009932716641187089, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0007410685221354167, |
| "learning_rate": 0.01, |
| "loss": 1.4228, |
| "loss/crossentropy": 2.6319605112075806, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.24571086466312408, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.009949990930997849, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0008020401000976562, |
| "learning_rate": 0.01, |
| "loss": 1.2951, |
| "loss/crossentropy": 2.368131637573242, |
| "loss/fcd": 1.03515625, |
| "loss/logits": 0.23180700838565826, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.009967265220808609, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0008711338043212891, |
| "learning_rate": 0.01, |
| "loss": 1.3585, |
| "loss/crossentropy": 2.197615623474121, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2364010065793991, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.00998453951061937, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0008722782135009765, |
| "learning_rate": 0.01, |
| "loss": 1.3929, |
| "loss/crossentropy": 2.5560864210128784, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.25519636273384094, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.01000181380043013, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0008318424224853516, |
| "learning_rate": 0.01, |
| "loss": 1.3903, |
| "loss/crossentropy": 2.290327787399292, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.242530919611454, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.01001908809024089, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0008769830067952474, |
| "learning_rate": 0.01, |
| "loss": 1.3204, |
| "loss/crossentropy": 2.558402419090271, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.24013052880764008, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.01003636238005165, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0008811791737874349, |
| "learning_rate": 0.01, |
| "loss": 1.3934, |
| "loss/crossentropy": 2.3049778938293457, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.24487978965044022, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.01005363666986241, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0008811791737874349, |
| "learning_rate": 0.01, |
| "loss": 1.3844, |
| "loss/crossentropy": 2.5796691179275513, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2458028495311737, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.01007091095967317, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.0009151299794514974, |
| "learning_rate": 0.01, |
| "loss": 1.4305, |
| "loss/crossentropy": 2.3386783599853516, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.24171485751867294, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.010088185249483931, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0009124120076497396, |
| "learning_rate": 0.01, |
| "loss": 1.386, |
| "loss/crossentropy": 2.3040322065353394, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25387245416641235, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.010105459539294691, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0009125868479410807, |
| "learning_rate": 0.01, |
| "loss": 1.3622, |
| "loss/crossentropy": 3.012826681137085, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.255868136882782, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.010122733829105451, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0009113947550455729, |
| "learning_rate": 0.01, |
| "loss": 1.4032, |
| "loss/crossentropy": 2.7537986040115356, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.25436051189899445, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.010140008118916211, |
| "grad_norm": 0.251953125, |
| "grad_norm_var": 0.001083230972290039, |
| "learning_rate": 0.01, |
| "loss": 1.3117, |
| "loss/crossentropy": 2.14433491230011, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24180641025304794, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.01015728240872697, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.000574493408203125, |
| "learning_rate": 0.01, |
| "loss": 1.3691, |
| "loss/crossentropy": 2.101401686668396, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.19958080351352692, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.01017455669853773, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.00040378570556640623, |
| "learning_rate": 0.01, |
| "loss": 1.3827, |
| "loss/crossentropy": 2.436479330062866, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.23494569957256317, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.010191830988348492, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.00043892860412597656, |
| "learning_rate": 0.01, |
| "loss": 1.4279, |
| "loss/crossentropy": 2.6805481910705566, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2272372618317604, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.010209105278159252, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.00048065185546875, |
| "learning_rate": 0.01, |
| "loss": 1.322, |
| "loss/crossentropy": 2.7796462774276733, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.26299113035202026, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.010226379567970012, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.000478363037109375, |
| "learning_rate": 0.01, |
| "loss": 1.3396, |
| "loss/crossentropy": 2.4198944568634033, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2373996302485466, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.010243653857780772, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00046634674072265625, |
| "learning_rate": 0.01, |
| "loss": 1.3618, |
| "loss/crossentropy": 2.5916903018951416, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.23520419746637344, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.010260928147591532, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0004646142323811849, |
| "learning_rate": 0.01, |
| "loss": 1.4032, |
| "loss/crossentropy": 2.2067846059799194, |
| "loss/fcd": 1.05078125, |
| "loss/logits": 0.2392275035381317, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.010278202437402292, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0004112084706624349, |
| "learning_rate": 0.01, |
| "loss": 1.3551, |
| "loss/crossentropy": 2.5146957635879517, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2572908252477646, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.010295476727213053, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0004157861073811849, |
| "learning_rate": 0.01, |
| "loss": 1.4091, |
| "loss/crossentropy": 2.7353230714797974, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.2845850735902786, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.010312751017023813, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00041147867838541664, |
| "learning_rate": 0.01, |
| "loss": 1.4371, |
| "loss/crossentropy": 2.290863871574402, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25596096366643906, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.010330025306834573, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0003811995188395182, |
| "learning_rate": 0.01, |
| "loss": 1.3736, |
| "loss/crossentropy": 2.4351121187210083, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2633766904473305, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.010347299596645333, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.000312042236328125, |
| "learning_rate": 0.01, |
| "loss": 1.3671, |
| "loss/crossentropy": 2.3196725845336914, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23156649619340897, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.010364573886456093, |
| "grad_norm": 0.267578125, |
| "grad_norm_var": 0.00036290486653645836, |
| "learning_rate": 0.01, |
| "loss": 1.3352, |
| "loss/crossentropy": 2.0654172897338867, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.23978617042303085, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.010381848176266853, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00035233497619628905, |
| "learning_rate": 0.01, |
| "loss": 1.3716, |
| "loss/crossentropy": 2.0811039805412292, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2653958946466446, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.010399122466077614, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0003536065419514974, |
| "learning_rate": 0.01, |
| "loss": 1.3644, |
| "loss/crossentropy": 2.7797833681106567, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.26879242062568665, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.010416396755888374, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.0002975304921468099, |
| "learning_rate": 0.01, |
| "loss": 1.3323, |
| "loss/crossentropy": 2.2734681367874146, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.21455278247594833, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.010433671045699134, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0002975304921468099, |
| "learning_rate": 0.01, |
| "loss": 1.3207, |
| "loss/crossentropy": 1.978046715259552, |
| "loss/fcd": 1.03515625, |
| "loss/logits": 0.23233170062303543, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.010450945335509894, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0003051122029622396, |
| "learning_rate": 0.01, |
| "loss": 1.4169, |
| "loss/crossentropy": 2.5054962635040283, |
| "loss/fcd": 1.2265625, |
| "loss/logits": 0.2957670986652374, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.010468219625320654, |
| "grad_norm": 0.359375, |
| "grad_norm_var": 0.0005370934804280598, |
| "learning_rate": 0.01, |
| "loss": 1.4294, |
| "loss/crossentropy": 2.5767931938171387, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2684077024459839, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.010485493915131414, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.00048039754231770835, |
| "learning_rate": 0.01, |
| "loss": 1.37, |
| "loss/crossentropy": 2.3274868726730347, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.23180848360061646, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.010502768204942173, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0004983107248942057, |
| "learning_rate": 0.01, |
| "loss": 1.416, |
| "loss/crossentropy": 2.5422879457473755, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.223361574113369, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.010520042494752935, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0005373636881510417, |
| "learning_rate": 0.01, |
| "loss": 1.3627, |
| "loss/crossentropy": 2.570125699043274, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.25247204303741455, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.010537316784563695, |
| "grad_norm": 0.265625, |
| "grad_norm_var": 0.0006189823150634765, |
| "learning_rate": 0.01, |
| "loss": 1.316, |
| "loss/crossentropy": 2.2968589067459106, |
| "loss/fcd": 1.015625, |
| "loss/logits": 0.1994389146566391, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.010554591074374455, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0006140232086181641, |
| "learning_rate": 0.01, |
| "loss": 1.4265, |
| "loss/crossentropy": 2.493618369102478, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2581065893173218, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.010571865364185215, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.000604248046875, |
| "learning_rate": 0.01, |
| "loss": 1.4014, |
| "loss/crossentropy": 2.4227527379989624, |
| "loss/fcd": 1.19140625, |
| "loss/logits": 0.25313572585582733, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.010589139653995975, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0005971272786458333, |
| "learning_rate": 0.01, |
| "loss": 1.3718, |
| "loss/crossentropy": 2.3819390535354614, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.22010967135429382, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.010606413943806734, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.000598001480102539, |
| "learning_rate": 0.01, |
| "loss": 1.5079, |
| "loss/crossentropy": 2.190422534942627, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24383512139320374, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.010623688233617496, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.000598001480102539, |
| "learning_rate": 0.01, |
| "loss": 1.3733, |
| "loss/crossentropy": 2.5865895748138428, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24275009334087372, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.010640962523428256, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005334854125976562, |
| "learning_rate": 0.01, |
| "loss": 1.3404, |
| "loss/crossentropy": 2.1975014209747314, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.2261335551738739, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.010658236813239016, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0005843480428059896, |
| "learning_rate": 0.01, |
| "loss": 1.4037, |
| "loss/crossentropy": 2.7723870277404785, |
| "loss/fcd": 1.234375, |
| "loss/logits": 0.2835993468761444, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.010675511103049776, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0005884647369384765, |
| "learning_rate": 0.01, |
| "loss": 1.3625, |
| "loss/crossentropy": 2.599759817123413, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.285232275724411, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.010692785392860536, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0004821618398030599, |
| "learning_rate": 0.01, |
| "loss": 1.3733, |
| "loss/crossentropy": 2.4128291606903076, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.26694832742214203, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.010710059682671295, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.00047651926676432293, |
| "learning_rate": 0.01, |
| "loss": 1.3343, |
| "loss/crossentropy": 2.5237722396850586, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.26433800160884857, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.010727333972482057, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.0005385716756184896, |
| "learning_rate": 0.01, |
| "loss": 1.4112, |
| "loss/crossentropy": 2.317731261253357, |
| "loss/fcd": 1.2265625, |
| "loss/logits": 0.28476743400096893, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.010744608262292817, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0004234155019124349, |
| "learning_rate": 0.01, |
| "loss": 1.3603, |
| "loss/crossentropy": 2.3109618425369263, |
| "loss/fcd": 1.041015625, |
| "loss/logits": 0.2279675453901291, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.010761882552103577, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.00044498443603515627, |
| "learning_rate": 0.01, |
| "loss": 1.3089, |
| "loss/crossentropy": 2.3984739780426025, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.25493185222148895, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.010779156841914337, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0004232883453369141, |
| "learning_rate": 0.01, |
| "loss": 1.4079, |
| "loss/crossentropy": 2.1802881956100464, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.23454807698726654, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.010796431131725097, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.00041599273681640624, |
| "learning_rate": 0.01, |
| "loss": 1.3629, |
| "loss/crossentropy": 2.6050442457199097, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.2245146408677101, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.010813705421535856, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0003667036692301432, |
| "learning_rate": 0.01, |
| "loss": 1.3622, |
| "loss/crossentropy": 2.4274967908859253, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2685912102460861, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.010830979711346618, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0003754774729410807, |
| "learning_rate": 0.01, |
| "loss": 1.4161, |
| "loss/crossentropy": 2.556549072265625, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2520214840769768, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.010848254001157378, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0003949483235677083, |
| "learning_rate": 0.01, |
| "loss": 1.3802, |
| "loss/crossentropy": 2.2824164628982544, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.22343048453330994, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.010865528290968138, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0004146416982014974, |
| "learning_rate": 0.01, |
| "loss": 1.3555, |
| "loss/crossentropy": 2.500080108642578, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.24835523962974548, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.010882802580778898, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.00041631062825520836, |
| "learning_rate": 0.01, |
| "loss": 1.4, |
| "loss/crossentropy": 2.4014720916748047, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.236750990152359, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.010900076870589658, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.00041286150614420575, |
| "learning_rate": 0.01, |
| "loss": 1.3707, |
| "loss/crossentropy": 2.3228918313980103, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.23406407982110977, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.010917351160400417, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0004018147786458333, |
| "learning_rate": 0.01, |
| "loss": 1.3885, |
| "loss/crossentropy": 2.50198233127594, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.258284330368042, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.010934625450211179, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0003524621327718099, |
| "learning_rate": 0.01, |
| "loss": 1.3978, |
| "loss/crossentropy": 2.637346863746643, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.28542736172676086, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.010951899740021939, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.00033969879150390624, |
| "learning_rate": 0.01, |
| "loss": 1.4403, |
| "loss/crossentropy": 2.4110260009765625, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2651347145438194, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.010969174029832699, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.000360870361328125, |
| "learning_rate": 0.01, |
| "loss": 1.4184, |
| "loss/crossentropy": 2.7041887044906616, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2508121207356453, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.010986448319643459, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.00037713050842285155, |
| "learning_rate": 0.01, |
| "loss": 1.401, |
| "loss/crossentropy": 2.4663859605789185, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2824552655220032, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.011003722609454218, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0003279209136962891, |
| "learning_rate": 0.01, |
| "loss": 1.3728, |
| "loss/crossentropy": 2.5915483236312866, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24787750095129013, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.011020996899264978, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.00029652913411458334, |
| "learning_rate": 0.01, |
| "loss": 1.3354, |
| "loss/crossentropy": 2.5775671005249023, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.26823610067367554, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.01103827118907574, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.00025151570638020835, |
| "learning_rate": 0.01, |
| "loss": 1.3522, |
| "loss/crossentropy": 2.3462886810302734, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.23827192932367325, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.0110555454788865, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00025151570638020835, |
| "learning_rate": 0.01, |
| "loss": 1.4206, |
| "loss/crossentropy": 2.2796329855918884, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24281439930200577, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.01107281976869726, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.00021107991536458334, |
| "learning_rate": 0.01, |
| "loss": 1.4093, |
| "loss/crossentropy": 2.2618002891540527, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24219272285699844, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.01109009405850802, |
| "grad_norm": 0.515625, |
| "grad_norm_var": 0.0030247847239176433, |
| "learning_rate": 0.01, |
| "loss": 1.5002, |
| "loss/crossentropy": 2.628837466239929, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.27036982774734497, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.01110736834831878, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0030420303344726564, |
| "learning_rate": 0.01, |
| "loss": 1.3437, |
| "loss/crossentropy": 2.377197504043579, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.2347392812371254, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.01112464263812954, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0030603885650634767, |
| "learning_rate": 0.01, |
| "loss": 1.3465, |
| "loss/crossentropy": 2.241411805152893, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22135238349437714, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.0111419169279403, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.00304563840230306, |
| "learning_rate": 0.01, |
| "loss": 1.3781, |
| "loss/crossentropy": 2.132224917411804, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.24958615005016327, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.01115919121775106, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0030664443969726563, |
| "learning_rate": 0.01, |
| "loss": 1.3319, |
| "loss/crossentropy": 2.379546046257019, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.23225411772727966, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.01117646550756182, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0031198501586914063, |
| "learning_rate": 0.01, |
| "loss": 1.3696, |
| "loss/crossentropy": 2.4151222705841064, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.25382500886917114, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.01119373979737258, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0031198501586914063, |
| "learning_rate": 0.01, |
| "loss": 1.3725, |
| "loss/crossentropy": 2.4386223554611206, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24172081053256989, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.01121101408718334, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.003135426839192708, |
| "learning_rate": 0.01, |
| "loss": 1.4136, |
| "loss/crossentropy": 2.4053245782852173, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2587142735719681, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.0112282883769941, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0031352837880452475, |
| "learning_rate": 0.01, |
| "loss": 1.3908, |
| "loss/crossentropy": 2.8473496437072754, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.24620139598846436, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.01124556266680486, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.003128496805826823, |
| "learning_rate": 0.01, |
| "loss": 1.346, |
| "loss/crossentropy": 2.4264625310897827, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22718993574380875, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.011262836956615622, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.003088871637980143, |
| "learning_rate": 0.01, |
| "loss": 1.3722, |
| "loss/crossentropy": 2.393033504486084, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2361084669828415, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.011280111246426382, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0030968825022379557, |
| "learning_rate": 0.01, |
| "loss": 1.3962, |
| "loss/crossentropy": 2.5740654468536377, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.27814269065856934, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.011297385536237142, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0030664443969726563, |
| "learning_rate": 0.01, |
| "loss": 1.3502, |
| "loss/crossentropy": 2.572822332382202, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.25307735800743103, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.011314659826047901, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.003061676025390625, |
| "learning_rate": 0.01, |
| "loss": 1.3652, |
| "loss/crossentropy": 2.36893892288208, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.24310748279094696, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.011331934115858661, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.003099505106608073, |
| "learning_rate": 0.01, |
| "loss": 1.382, |
| "loss/crossentropy": 2.453968048095703, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2507154792547226, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.011349208405669421, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0030968825022379557, |
| "learning_rate": 0.01, |
| "loss": 1.4208, |
| "loss/crossentropy": 2.3706772327423096, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.2801935374736786, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.011366482695480183, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 7.348060607910156e-05, |
| "learning_rate": 0.01, |
| "loss": 1.3745, |
| "loss/crossentropy": 2.3052316308021545, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.24023611843585968, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.011383756985290943, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.00013184547424316406, |
| "learning_rate": 0.01, |
| "loss": 1.3589, |
| "loss/crossentropy": 2.3989150524139404, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.23345524072647095, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.011401031275101703, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.000202178955078125, |
| "learning_rate": 0.01, |
| "loss": 1.4671, |
| "loss/crossentropy": 2.4908188581466675, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.22981490939855576, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.011418305564912462, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.000232696533203125, |
| "learning_rate": 0.01, |
| "loss": 1.3845, |
| "loss/crossentropy": 2.182092070579529, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.22433090209960938, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.011435579854723222, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00023331642150878907, |
| "learning_rate": 0.01, |
| "loss": 1.3832, |
| "loss/crossentropy": 2.557218909263611, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.26849667727947235, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.011452854144533982, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0002688090006510417, |
| "learning_rate": 0.01, |
| "loss": 1.3516, |
| "loss/crossentropy": 2.4368367195129395, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2600485235452652, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.011470128434344744, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0002975304921468099, |
| "learning_rate": 0.01, |
| "loss": 1.3436, |
| "loss/crossentropy": 2.283419609069824, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2451685667037964, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.011487402724155504, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0003295262654622396, |
| "learning_rate": 0.01, |
| "loss": 1.3917, |
| "loss/crossentropy": 2.2501282691955566, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.23817522078752518, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.011504677013966264, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0003692468007405599, |
| "learning_rate": 0.01, |
| "loss": 1.3747, |
| "loss/crossentropy": 2.595417618751526, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.272259384393692, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.011521951303777023, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.0004292170206705729, |
| "learning_rate": 0.01, |
| "loss": 1.3477, |
| "loss/crossentropy": 2.3635072708129883, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24695640057325363, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.011539225593587783, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0004246870676676432, |
| "learning_rate": 0.01, |
| "loss": 1.3744, |
| "loss/crossentropy": 2.310747981071472, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2579839900135994, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.011556499883398543, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.00044193267822265623, |
| "learning_rate": 0.01, |
| "loss": 1.349, |
| "loss/crossentropy": 2.497538447380066, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.26720890402793884, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.011573774173209305, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00044960975646972655, |
| "learning_rate": 0.01, |
| "loss": 1.3475, |
| "loss/crossentropy": 2.5883569717407227, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.29579465091228485, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.011591048463020065, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.0004998366038004557, |
| "learning_rate": 0.01, |
| "loss": 1.3349, |
| "loss/crossentropy": 2.2982797622680664, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.22655323147773743, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.011608322752830825, |
| "grad_norm": 0.359375, |
| "grad_norm_var": 0.0007800896962483724, |
| "learning_rate": 0.01, |
| "loss": 1.3753, |
| "loss/crossentropy": 2.4650286436080933, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24685797840356827, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.011625597042641584, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0007897535959879557, |
| "learning_rate": 0.01, |
| "loss": 1.3429, |
| "loss/crossentropy": 2.5849392414093018, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2600446939468384, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.011642871332452344, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0007954756418863932, |
| "learning_rate": 0.01, |
| "loss": 1.3721, |
| "loss/crossentropy": 2.4149436950683594, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24952851235866547, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.011660145622263104, |
| "grad_norm": 0.34765625, |
| "grad_norm_var": 0.0008959452311197917, |
| "learning_rate": 0.01, |
| "loss": 1.4752, |
| "loss/crossentropy": 2.582419753074646, |
| "loss/fcd": 1.2578125, |
| "loss/logits": 0.2812621593475342, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.011677419912073864, |
| "grad_norm": 0.26171875, |
| "grad_norm_var": 0.0009247938791910808, |
| "learning_rate": 0.01, |
| "loss": 1.3666, |
| "loss/crossentropy": 2.3817840814590454, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.24483423680067062, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.011694694201884626, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0008938948313395183, |
| "learning_rate": 0.01, |
| "loss": 1.3642, |
| "loss/crossentropy": 2.4791339635849, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.25220367312431335, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.011711968491695385, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0009230931599934895, |
| "learning_rate": 0.01, |
| "loss": 1.3435, |
| "loss/crossentropy": 2.3865939378738403, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24416129291057587, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.011729242781506145, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0009215672810872396, |
| "learning_rate": 0.01, |
| "loss": 1.4158, |
| "loss/crossentropy": 2.514981508255005, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.27227045595645905, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.011746517071316905, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0008992513020833333, |
| "learning_rate": 0.01, |
| "loss": 1.399, |
| "loss/crossentropy": 2.660152792930603, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2607909142971039, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.011763791361127665, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0008722305297851563, |
| "learning_rate": 0.01, |
| "loss": 1.4528, |
| "loss/crossentropy": 2.165284812450409, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2606969401240349, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.011781065650938425, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.0008966922760009766, |
| "learning_rate": 0.01, |
| "loss": 1.4402, |
| "loss/crossentropy": 2.719216465950012, |
| "loss/fcd": 1.21875, |
| "loss/logits": 0.274374857544899, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.011798339940749187, |
| "grad_norm": 0.357421875, |
| "grad_norm_var": 0.0010416507720947266, |
| "learning_rate": 0.01, |
| "loss": 1.4226, |
| "loss/crossentropy": 2.405388355255127, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2703537493944168, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.011815614230559946, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0010400772094726562, |
| "learning_rate": 0.01, |
| "loss": 1.4291, |
| "loss/crossentropy": 2.7011595964431763, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.25208880007267, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.011832888520370706, |
| "grad_norm": 0.37109375, |
| "grad_norm_var": 0.0012689590454101562, |
| "learning_rate": 0.01, |
| "loss": 1.3541, |
| "loss/crossentropy": 2.5975828170776367, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.23054596036672592, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.011850162810181466, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.001270278294881185, |
| "learning_rate": 0.01, |
| "loss": 1.3724, |
| "loss/crossentropy": 2.202287197113037, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.24445781856775284, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.011867437099992226, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0011264642079671225, |
| "learning_rate": 0.01, |
| "loss": 1.3371, |
| "loss/crossentropy": 2.309388518333435, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2442098781466484, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.011884711389802986, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0009780248006184896, |
| "learning_rate": 0.01, |
| "loss": 1.3841, |
| "loss/crossentropy": 2.499300003051758, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.26171083748340607, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.011901985679613748, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0009480635325113932, |
| "learning_rate": 0.01, |
| "loss": 1.4675, |
| "loss/crossentropy": 2.35269558429718, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.2726883888244629, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.011919259969424507, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.000909868876139323, |
| "learning_rate": 0.01, |
| "loss": 1.403, |
| "loss/crossentropy": 2.78786039352417, |
| "loss/fcd": 1.25390625, |
| "loss/logits": 0.3147393763065338, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.011936534259235267, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0009186903635660808, |
| "learning_rate": 0.01, |
| "loss": 1.3294, |
| "loss/crossentropy": 2.0689194798469543, |
| "loss/fcd": 1.03515625, |
| "loss/logits": 0.234086312353611, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.011953808549046027, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0007778008778889974, |
| "learning_rate": 0.01, |
| "loss": 1.3331, |
| "loss/crossentropy": 2.290665626525879, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.23476862162351608, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.011971082838856787, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0008422215779622396, |
| "learning_rate": 0.01, |
| "loss": 1.3703, |
| "loss/crossentropy": 2.4959352016448975, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2350049912929535, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.011988357128667547, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0007624308268229167, |
| "learning_rate": 0.01, |
| "loss": 1.3499, |
| "loss/crossentropy": 2.3858295679092407, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.2346876710653305, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.012005631418478309, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0007703145345052083, |
| "learning_rate": 0.01, |
| "loss": 1.4174, |
| "loss/crossentropy": 2.5176814794540405, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2492477372288704, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.012022905708289068, |
| "grad_norm": 0.365234375, |
| "grad_norm_var": 0.0009376366933186848, |
| "learning_rate": 0.01, |
| "loss": 1.4472, |
| "loss/crossentropy": 2.553426146507263, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.25825950503349304, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.012040179998099828, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0009119510650634766, |
| "learning_rate": 0.01, |
| "loss": 1.3902, |
| "loss/crossentropy": 2.524499535560608, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2615286335349083, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.012057454287910588, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.001028299331665039, |
| "learning_rate": 0.01, |
| "loss": 1.3468, |
| "loss/crossentropy": 2.234209656715393, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2630993127822876, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.012074728577721348, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0009722232818603516, |
| "learning_rate": 0.01, |
| "loss": 1.3397, |
| "loss/crossentropy": 2.595862627029419, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.25720856338739395, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.012092002867532108, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0009722232818603516, |
| "learning_rate": 0.01, |
| "loss": 1.3472, |
| "loss/crossentropy": 2.3556742668151855, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23623445630073547, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01210927715734287, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0007013797760009766, |
| "learning_rate": 0.01, |
| "loss": 1.3728, |
| "loss/crossentropy": 2.286816358566284, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.24584627896547318, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.01212655144715363, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0007274468739827474, |
| "learning_rate": 0.01, |
| "loss": 1.3878, |
| "loss/crossentropy": 2.2807174921035767, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.25587528198957443, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.01214382573696439, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0007240136464436848, |
| "learning_rate": 0.01, |
| "loss": 1.3971, |
| "loss/crossentropy": 2.5250132083892822, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2833500802516937, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.01216110002677515, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0007322788238525391, |
| "learning_rate": 0.01, |
| "loss": 1.3972, |
| "loss/crossentropy": 2.5938040018081665, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2679053843021393, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.012178374316585909, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.000661468505859375, |
| "learning_rate": 0.01, |
| "loss": 1.3572, |
| "loss/crossentropy": 2.3809746503829956, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2514628916978836, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.012195648606396669, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0005812168121337891, |
| "learning_rate": 0.01, |
| "loss": 1.3698, |
| "loss/crossentropy": 2.3113526105880737, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.24198968708515167, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.01221292289620743, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0005541324615478515, |
| "learning_rate": 0.01, |
| "loss": 1.3485, |
| "loss/crossentropy": 2.465987205505371, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2991575300693512, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.01223019718601819, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0005623976389567058, |
| "learning_rate": 0.01, |
| "loss": 1.3754, |
| "loss/crossentropy": 2.4940463304519653, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2627300024032593, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.01224747147582895, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0005009810129801433, |
| "learning_rate": 0.01, |
| "loss": 1.378, |
| "loss/crossentropy": 2.6033318042755127, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2630281075835228, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.01226474576563971, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0005177656809488932, |
| "learning_rate": 0.01, |
| "loss": 1.3969, |
| "loss/crossentropy": 2.218273878097534, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.23244468122720718, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.01228202005545047, |
| "grad_norm": 0.3359375, |
| "grad_norm_var": 0.0005585829416910808, |
| "learning_rate": 0.01, |
| "loss": 1.4508, |
| "loss/crossentropy": 2.329068422317505, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.24251049757003784, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.01229929434526123, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.00029511451721191405, |
| "learning_rate": 0.01, |
| "loss": 1.4497, |
| "loss/crossentropy": 2.4693063497543335, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2587638199329376, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.01231656863507199, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00029511451721191405, |
| "learning_rate": 0.01, |
| "loss": 1.371, |
| "loss/crossentropy": 2.4224281311035156, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.27352161705493927, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.012333842924882751, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.00025577545166015624, |
| "learning_rate": 0.01, |
| "loss": 1.3508, |
| "loss/crossentropy": 2.5101382732391357, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25151751190423965, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.012351117214693511, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0002010186513264974, |
| "learning_rate": 0.01, |
| "loss": 1.3949, |
| "loss/crossentropy": 2.765409469604492, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.23425965011119843, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.012368391504504271, |
| "grad_norm": 0.259765625, |
| "grad_norm_var": 0.0003284295399983724, |
| "learning_rate": 0.01, |
| "loss": 1.3346, |
| "loss/crossentropy": 2.446286678314209, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.23563802242279053, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.012385665794315031, |
| "grad_norm": 0.365234375, |
| "grad_norm_var": 0.0005666097005208333, |
| "learning_rate": 0.01, |
| "loss": 1.485, |
| "loss/crossentropy": 2.3494917154312134, |
| "loss/fcd": 1.47265625, |
| "loss/logits": 0.2857535183429718, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.01240294008412579, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0005729516347249349, |
| "learning_rate": 0.01, |
| "loss": 1.3814, |
| "loss/crossentropy": 2.3558719158172607, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.24474655091762543, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.01242021437393655, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.000646209716796875, |
| "learning_rate": 0.01, |
| "loss": 1.3125, |
| "loss/crossentropy": 2.364332675933838, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.24612490087747574, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.012437488663747312, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0006484826405843099, |
| "learning_rate": 0.01, |
| "loss": 1.3629, |
| "loss/crossentropy": 2.218404769897461, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.2676163464784622, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.012454762953558072, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0007059574127197266, |
| "learning_rate": 0.01, |
| "loss": 1.3642, |
| "loss/crossentropy": 2.4319703578948975, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.25568731129169464, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.012472037243368832, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0007715702056884765, |
| "learning_rate": 0.01, |
| "loss": 1.3565, |
| "loss/crossentropy": 2.603386163711548, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.23648831248283386, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.012489311533179592, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.000762033462524414, |
| "learning_rate": 0.01, |
| "loss": 1.4305, |
| "loss/crossentropy": 2.3345898389816284, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.2294597253203392, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.012506585822990352, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0007692813873291015, |
| "learning_rate": 0.01, |
| "loss": 1.3885, |
| "loss/crossentropy": 2.315110445022583, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.262426495552063, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.012523860112801112, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0007898807525634766, |
| "learning_rate": 0.01, |
| "loss": 1.2937, |
| "loss/crossentropy": 2.2987769842147827, |
| "loss/fcd": 1.0, |
| "loss/logits": 0.21975189447402954, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.012541134402611873, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0007562637329101562, |
| "learning_rate": 0.01, |
| "loss": 1.3775, |
| "loss/crossentropy": 2.5773731470108032, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.29223839938640594, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.012558408692422633, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0006650288899739584, |
| "learning_rate": 0.01, |
| "loss": 1.4041, |
| "loss/crossentropy": 2.138230562210083, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.24283046275377274, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.012575682982233393, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0007525126139322917, |
| "learning_rate": 0.01, |
| "loss": 1.4611, |
| "loss/crossentropy": 2.521793842315674, |
| "loss/fcd": 1.2265625, |
| "loss/logits": 0.2588220089673996, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.012592957272044153, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0007710774739583333, |
| "learning_rate": 0.01, |
| "loss": 1.3833, |
| "loss/crossentropy": 2.5079206228256226, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.24896685779094696, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.012610231561854913, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0007460912068684896, |
| "learning_rate": 0.01, |
| "loss": 1.398, |
| "loss/crossentropy": 2.4435055255889893, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.2766249179840088, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.012627505851665673, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0009387811024983724, |
| "learning_rate": 0.01, |
| "loss": 1.482, |
| "loss/crossentropy": 2.480614185333252, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.24479512870311737, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.012644780141476434, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0008542219797770183, |
| "learning_rate": 0.01, |
| "loss": 1.3214, |
| "loss/crossentropy": 2.556125283241272, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.25190603733062744, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.012662054431287194, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0006001631418863933, |
| "learning_rate": 0.01, |
| "loss": 1.3992, |
| "loss/crossentropy": 2.2440203428268433, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.23071999847888947, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.012679328721097954, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005847771962483723, |
| "learning_rate": 0.01, |
| "loss": 1.3884, |
| "loss/crossentropy": 2.366842269897461, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2621122822165489, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.012696603010908714, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005288283030192057, |
| "learning_rate": 0.01, |
| "loss": 1.373, |
| "loss/crossentropy": 2.528809905052185, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2601289302110672, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.012713877300719474, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0005879084269205729, |
| "learning_rate": 0.01, |
| "loss": 1.3657, |
| "loss/crossentropy": 2.1993446350097656, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.2357948124408722, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.012731151590530234, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005395889282226562, |
| "learning_rate": 0.01, |
| "loss": 1.3611, |
| "loss/crossentropy": 2.5157347917556763, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2621786296367645, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.012748425880340995, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.00045566558837890626, |
| "learning_rate": 0.01, |
| "loss": 1.3787, |
| "loss/crossentropy": 2.463285803794861, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2661950886249542, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.012765700170151755, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.00044581095377604164, |
| "learning_rate": 0.01, |
| "loss": 1.3789, |
| "loss/crossentropy": 2.7613465785980225, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24063792079687119, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.012782974459962515, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.0004956404368082683, |
| "learning_rate": 0.01, |
| "loss": 1.3809, |
| "loss/crossentropy": 2.3430649042129517, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.23180129379034042, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.012800248749773275, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0004435062408447266, |
| "learning_rate": 0.01, |
| "loss": 1.3546, |
| "loss/crossentropy": 2.347190737724304, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.23613610118627548, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.012817523039584035, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.00043320655822753906, |
| "learning_rate": 0.01, |
| "loss": 1.414, |
| "loss/crossentropy": 2.3196645975112915, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.27611708641052246, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.012834797329394795, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0004990736643473308, |
| "learning_rate": 0.01, |
| "loss": 1.3861, |
| "loss/crossentropy": 2.4212803840637207, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2471313625574112, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.012852071619205556, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0004806359608968099, |
| "learning_rate": 0.01, |
| "loss": 1.3723, |
| "loss/crossentropy": 2.527360200881958, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.24950604140758514, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.012869345909016316, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0004750569661458333, |
| "learning_rate": 0.01, |
| "loss": 1.3461, |
| "loss/crossentropy": 2.2922967672348022, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.23927000910043716, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.012886620198827076, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0004943688710530599, |
| "learning_rate": 0.01, |
| "loss": 1.3785, |
| "loss/crossentropy": 2.133127212524414, |
| "loss/fcd": 1.078125, |
| "loss/logits": 0.23443202674388885, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.012903894488637836, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.000366973876953125, |
| "learning_rate": 0.01, |
| "loss": 1.387, |
| "loss/crossentropy": 2.569379210472107, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.26725105941295624, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.012921168778448596, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.00033238728841145836, |
| "learning_rate": 0.01, |
| "loss": 1.4185, |
| "loss/crossentropy": 2.6103577613830566, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.27920565009117126, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.012938443068259356, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00032145182291666666, |
| "learning_rate": 0.01, |
| "loss": 1.4161, |
| "loss/crossentropy": 2.3525288105010986, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.21820923686027527, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.012955717358070115, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0003083388010660807, |
| "learning_rate": 0.01, |
| "loss": 1.3429, |
| "loss/crossentropy": 2.563652276992798, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25008824467658997, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.012972991647880877, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.00033416748046875, |
| "learning_rate": 0.01, |
| "loss": 1.3992, |
| "loss/crossentropy": 2.4368664026260376, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2636963874101639, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.012990265937691637, |
| "grad_norm": 0.267578125, |
| "grad_norm_var": 0.00034173329671223957, |
| "learning_rate": 0.01, |
| "loss": 1.3548, |
| "loss/crossentropy": 2.47409451007843, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.26615823060274124, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.013007540227502397, |
| "grad_norm": 0.390625, |
| "grad_norm_var": 0.0008442560831705729, |
| "learning_rate": 0.01, |
| "loss": 1.4382, |
| "loss/crossentropy": 2.667958378791809, |
| "loss/fcd": 1.22265625, |
| "loss/logits": 0.29826460778713226, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.013024814517313157, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0008722941080729166, |
| "learning_rate": 0.01, |
| "loss": 1.3857, |
| "loss/crossentropy": 2.2399171590805054, |
| "loss/fcd": 1.30078125, |
| "loss/logits": 0.3064821809530258, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.013042088807123917, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0008643945058186849, |
| "learning_rate": 0.01, |
| "loss": 1.4191, |
| "loss/crossentropy": 2.4244364500045776, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.2772462069988251, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.013059363096934676, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.000800323486328125, |
| "learning_rate": 0.01, |
| "loss": 1.3482, |
| "loss/crossentropy": 2.6471344232559204, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2606939375400543, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.013076637386745438, |
| "grad_norm": 0.345703125, |
| "grad_norm_var": 0.000935220718383789, |
| "learning_rate": 0.01, |
| "loss": 1.4094, |
| "loss/crossentropy": 2.4318645000457764, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2657194063067436, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.013093911676556198, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0009119510650634766, |
| "learning_rate": 0.01, |
| "loss": 1.4882, |
| "loss/crossentropy": 2.6587414741516113, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25396668910980225, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.013111185966366958, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0008859634399414062, |
| "learning_rate": 0.01, |
| "loss": 1.3734, |
| "loss/crossentropy": 2.320420742034912, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.22045490145683289, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.013128460256177718, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0008935928344726562, |
| "learning_rate": 0.01, |
| "loss": 1.4048, |
| "loss/crossentropy": 2.43363881111145, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2532464414834976, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.013145734545988478, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0009287357330322266, |
| "learning_rate": 0.01, |
| "loss": 1.3617, |
| "loss/crossentropy": 2.5222312211990356, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.29095427691936493, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.013163008835799237, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0009198347727457682, |
| "learning_rate": 0.01, |
| "loss": 1.3893, |
| "loss/crossentropy": 2.265801191329956, |
| "loss/fcd": 1.02734375, |
| "loss/logits": 0.23643554002046585, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.013180283125609999, |
| "grad_norm": 0.390625, |
| "grad_norm_var": 0.0013386885325113933, |
| "learning_rate": 0.01, |
| "loss": 1.4154, |
| "loss/crossentropy": 2.1754260063171387, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.244869664311409, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.013197557415420759, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.001320330301920573, |
| "learning_rate": 0.01, |
| "loss": 1.3947, |
| "loss/crossentropy": 2.3228635787963867, |
| "loss/fcd": 1.03515625, |
| "loss/logits": 0.22359148412942886, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.013214831705231519, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0013203938802083333, |
| "learning_rate": 0.01, |
| "loss": 1.4051, |
| "loss/crossentropy": 2.5446053743362427, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.24661505222320557, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.013232105995042279, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0013085524241129556, |
| "learning_rate": 0.01, |
| "loss": 1.4116, |
| "loss/crossentropy": 2.4046772718429565, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.26322653889656067, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.013249380284853039, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.00127714474995931, |
| "learning_rate": 0.01, |
| "loss": 1.3094, |
| "loss/crossentropy": 2.397523880004883, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2391202375292778, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.013266654574663798, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0012049357096354167, |
| "learning_rate": 0.01, |
| "loss": 1.3474, |
| "loss/crossentropy": 2.599183440208435, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2888915240764618, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.01328392886447456, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0009141127268473307, |
| "learning_rate": 0.01, |
| "loss": 1.4331, |
| "loss/crossentropy": 2.1059322357177734, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.23741237819194794, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.01330120315428532, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0008471171061197917, |
| "learning_rate": 0.01, |
| "loss": 1.3643, |
| "loss/crossentropy": 2.697718620300293, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.26706932485103607, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.01331847744409608, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0008389631907145183, |
| "learning_rate": 0.01, |
| "loss": 1.45, |
| "loss/crossentropy": 2.4075610637664795, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.25129370391368866, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.01333575173390684, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0008669535319010417, |
| "learning_rate": 0.01, |
| "loss": 1.3877, |
| "loss/crossentropy": 2.7534801959991455, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.30193065106868744, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.0133530260237176, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0008176008860270183, |
| "learning_rate": 0.01, |
| "loss": 1.3939, |
| "loss/crossentropy": 2.182551383972168, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.28344330191612244, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.01337030031352836, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.000862741470336914, |
| "learning_rate": 0.01, |
| "loss": 1.412, |
| "loss/crossentropy": 2.510794520378113, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.25014493614435196, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.013387574603339121, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.000925445556640625, |
| "learning_rate": 0.01, |
| "loss": 1.4564, |
| "loss/crossentropy": 2.479841709136963, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.25105684995651245, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.013404848893149881, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0009211063385009766, |
| "learning_rate": 0.01, |
| "loss": 1.3963, |
| "loss/crossentropy": 2.639458417892456, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2490757405757904, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.01342212318296064, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0008683363596598307, |
| "learning_rate": 0.01, |
| "loss": 1.4124, |
| "loss/crossentropy": 2.6080870628356934, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.24069885909557343, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.0134393974727714, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0008681615193684896, |
| "learning_rate": 0.01, |
| "loss": 1.3733, |
| "loss/crossentropy": 2.3055442571640015, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.2517802268266678, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.01345667176258216, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00043328603108723957, |
| "learning_rate": 0.01, |
| "loss": 1.4781, |
| "loss/crossentropy": 2.5537742376327515, |
| "loss/fcd": 1.26953125, |
| "loss/logits": 0.30602647364139557, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.01347394605239292, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0004603068033854167, |
| "learning_rate": 0.01, |
| "loss": 1.3961, |
| "loss/crossentropy": 2.371378183364868, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.24987629055976868, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.013491220342203682, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.00048267046610514324, |
| "learning_rate": 0.01, |
| "loss": 1.3782, |
| "loss/crossentropy": 2.570296287536621, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.22898489236831665, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.013508494632014442, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0004997094472249349, |
| "learning_rate": 0.01, |
| "loss": 1.3685, |
| "loss/crossentropy": 2.282141923904419, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.2274707406759262, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.013525768921825202, |
| "grad_norm": 0.349609375, |
| "grad_norm_var": 0.0005658308664957683, |
| "learning_rate": 0.01, |
| "loss": 1.41, |
| "loss/crossentropy": 2.378341317176819, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.3016776442527771, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.013543043211635962, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0005829970041910808, |
| "learning_rate": 0.01, |
| "loss": 1.3398, |
| "loss/crossentropy": 2.6982511281967163, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.23673634231090546, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.013560317501446722, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.00046126047770182293, |
| "learning_rate": 0.01, |
| "loss": 1.3964, |
| "loss/crossentropy": 2.371803879737854, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.23621678352355957, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.013577591791257481, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00047771135965983075, |
| "learning_rate": 0.01, |
| "loss": 1.3296, |
| "loss/crossentropy": 2.3509960174560547, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.23912984877824783, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.013594866081068241, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0005536397298177083, |
| "learning_rate": 0.01, |
| "loss": 1.3796, |
| "loss/crossentropy": 2.4273725748062134, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2564089596271515, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.013612140370879003, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0005254109700520833, |
| "learning_rate": 0.01, |
| "loss": 1.3708, |
| "loss/crossentropy": 2.4844895601272583, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24952378869056702, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.013629414660689763, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0005256493886311848, |
| "learning_rate": 0.01, |
| "loss": 1.412, |
| "loss/crossentropy": 2.415653347969055, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2528124749660492, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.013646688950500523, |
| "grad_norm": 0.390625, |
| "grad_norm_var": 0.0008763472239176432, |
| "learning_rate": 0.01, |
| "loss": 1.4382, |
| "loss/crossentropy": 2.4079452753067017, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2838260903954506, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.013663963240311282, |
| "grad_norm": 0.328125, |
| "grad_norm_var": 0.0008465925852457683, |
| "learning_rate": 0.01, |
| "loss": 1.446, |
| "loss/crossentropy": 2.3247077465057373, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2550275847315788, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.013681237530122042, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0008462905883789062, |
| "learning_rate": 0.01, |
| "loss": 1.3615, |
| "loss/crossentropy": 2.1464229822158813, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.25474052131175995, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.013698511819932802, |
| "grad_norm": 0.361328125, |
| "grad_norm_var": 0.0009821414947509765, |
| "learning_rate": 0.01, |
| "loss": 1.4535, |
| "loss/crossentropy": 2.4427038431167603, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2672760635614395, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.013715786109743564, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.00098114013671875, |
| "learning_rate": 0.01, |
| "loss": 1.4375, |
| "loss/crossentropy": 2.502182126045227, |
| "loss/fcd": 1.203125, |
| "loss/logits": 0.30062489211559296, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.013733060399554324, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0010176976521809897, |
| "learning_rate": 0.01, |
| "loss": 1.3338, |
| "loss/crossentropy": 2.537824034690857, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.24768686294555664, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.013750334689365084, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0011388142903645834, |
| "learning_rate": 0.01, |
| "loss": 1.3676, |
| "loss/crossentropy": 2.3750780820846558, |
| "loss/fcd": 1.076171875, |
| "loss/logits": 0.23160798847675323, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.013767608979175843, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.001122903823852539, |
| "learning_rate": 0.01, |
| "loss": 1.3812, |
| "loss/crossentropy": 2.628328800201416, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2566673457622528, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.013784883268986603, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0012051900227864583, |
| "learning_rate": 0.01, |
| "loss": 1.3547, |
| "loss/crossentropy": 2.0953266620635986, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.23933346569538116, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.013802157558797363, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0011489232381184896, |
| "learning_rate": 0.01, |
| "loss": 1.4166, |
| "loss/crossentropy": 2.7266474962234497, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2656974792480469, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.013819431848608125, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0010843912760416666, |
| "learning_rate": 0.01, |
| "loss": 1.3191, |
| "loss/crossentropy": 2.459654688835144, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.2644665837287903, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.013836706138418885, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0010843912760416666, |
| "learning_rate": 0.01, |
| "loss": 1.4278, |
| "loss/crossentropy": 2.629300117492676, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.24821807444095612, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.013853980428229645, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0010678609212239583, |
| "learning_rate": 0.01, |
| "loss": 1.3413, |
| "loss/crossentropy": 2.5803698301315308, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.267608180642128, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.013871254718040404, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0010577996571858725, |
| "learning_rate": 0.01, |
| "loss": 1.3176, |
| "loss/crossentropy": 2.349183440208435, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.25479844957590103, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.013888529007851164, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0010577996571858725, |
| "learning_rate": 0.01, |
| "loss": 1.3783, |
| "loss/crossentropy": 2.618894100189209, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.2711791917681694, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.013905803297661924, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0010577996571858725, |
| "learning_rate": 0.01, |
| "loss": 1.3966, |
| "loss/crossentropy": 2.3134875893592834, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2539241313934326, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.013923077587472686, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.0006611506144205729, |
| "learning_rate": 0.01, |
| "loss": 1.441, |
| "loss/crossentropy": 2.837363600730896, |
| "loss/fcd": 1.2578125, |
| "loss/logits": 0.32089151442050934, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.013940351877283446, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0006586074829101563, |
| "learning_rate": 0.01, |
| "loss": 1.3525, |
| "loss/crossentropy": 2.377834916114807, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.23237647861242294, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.013957626167094206, |
| "grad_norm": 0.271484375, |
| "grad_norm_var": 0.0007306416829427083, |
| "learning_rate": 0.01, |
| "loss": 1.3753, |
| "loss/crossentropy": 2.520345091819763, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.26999618113040924, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.013974900456904965, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0005376180013020833, |
| "learning_rate": 0.01, |
| "loss": 1.3425, |
| "loss/crossentropy": 2.55434787273407, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.26515287160873413, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.013992174746715725, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0005302270253499349, |
| "learning_rate": 0.01, |
| "loss": 1.405, |
| "loss/crossentropy": 2.320609927177429, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2443319857120514, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.014009449036526485, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0005451043446858724, |
| "learning_rate": 0.01, |
| "loss": 1.3608, |
| "loss/crossentropy": 2.3824050426483154, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2653844952583313, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.014026723326337247, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0005200703938802084, |
| "learning_rate": 0.01, |
| "loss": 1.4786, |
| "loss/crossentropy": 2.459092617034912, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.2695985734462738, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.014043997616148007, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.0005698998769124349, |
| "learning_rate": 0.01, |
| "loss": 1.2888, |
| "loss/crossentropy": 2.4817110300064087, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.22882136702537537, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.014061271905958767, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0003539880116780599, |
| "learning_rate": 0.01, |
| "loss": 1.3681, |
| "loss/crossentropy": 2.556985020637512, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.25683027505874634, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.014078546195769526, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0003661473592122396, |
| "learning_rate": 0.01, |
| "loss": 1.4285, |
| "loss/crossentropy": 2.3824613094329834, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24755095690488815, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.014095820485580286, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00038094520568847655, |
| "learning_rate": 0.01, |
| "loss": 1.3345, |
| "loss/crossentropy": 2.1579148173332214, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.23608000576496124, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.014113094775391046, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0004091739654541016, |
| "learning_rate": 0.01, |
| "loss": 1.42, |
| "loss/crossentropy": 2.556256413459778, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.23912374675273895, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.014130369065201808, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00040879249572753904, |
| "learning_rate": 0.01, |
| "loss": 1.4296, |
| "loss/crossentropy": 2.497371554374695, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.24960072338581085, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.014147643355012568, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.000379180908203125, |
| "learning_rate": 0.01, |
| "loss": 1.4164, |
| "loss/crossentropy": 2.6055017709732056, |
| "loss/fcd": 1.24609375, |
| "loss/logits": 0.30321623384952545, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.014164917644823328, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.00041667620340983075, |
| "learning_rate": 0.01, |
| "loss": 1.3435, |
| "loss/crossentropy": 2.520479202270508, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.24647565186023712, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.014182191934634087, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.00037789344787597656, |
| "learning_rate": 0.01, |
| "loss": 1.4303, |
| "loss/crossentropy": 2.4229378700256348, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.27616265416145325, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.014199466224444847, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.00028634071350097656, |
| "learning_rate": 0.01, |
| "loss": 1.4063, |
| "loss/crossentropy": 2.642806649208069, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24927609413862228, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.014216740514255607, |
| "grad_norm": 0.341796875, |
| "grad_norm_var": 0.00044040679931640626, |
| "learning_rate": 0.01, |
| "loss": 1.4389, |
| "loss/crossentropy": 2.743402600288391, |
| "loss/fcd": 1.20703125, |
| "loss/logits": 0.2956629917025566, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.014234014804066367, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.00040079752604166665, |
| "learning_rate": 0.01, |
| "loss": 1.4283, |
| "loss/crossentropy": 2.5851320028305054, |
| "loss/fcd": 1.20703125, |
| "loss/logits": 0.26086658239364624, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.014251289093877129, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0003787835439046224, |
| "learning_rate": 0.01, |
| "loss": 1.3569, |
| "loss/crossentropy": 2.5595767498016357, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.24868559837341309, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.014268563383687888, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0003745873769124349, |
| "learning_rate": 0.01, |
| "loss": 1.3698, |
| "loss/crossentropy": 2.553021550178528, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.25030215084552765, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.014285837673498648, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0003688653310139974, |
| "learning_rate": 0.01, |
| "loss": 1.3934, |
| "loss/crossentropy": 2.459465980529785, |
| "loss/fcd": 1.2578125, |
| "loss/logits": 0.27228477597236633, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.014303111963309408, |
| "grad_norm": 0.373046875, |
| "grad_norm_var": 0.0006812890370686849, |
| "learning_rate": 0.01, |
| "loss": 1.403, |
| "loss/crossentropy": 2.5050086975097656, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2527881860733032, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.014320386253120168, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0006002902984619141, |
| "learning_rate": 0.01, |
| "loss": 1.4282, |
| "loss/crossentropy": 2.5587570667266846, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2506961077451706, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.014337660542930928, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.000600433349609375, |
| "learning_rate": 0.01, |
| "loss": 1.3663, |
| "loss/crossentropy": 2.433290719985962, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.23105743527412415, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.01435493483274169, |
| "grad_norm": 0.345703125, |
| "grad_norm_var": 0.0006985823313395182, |
| "learning_rate": 0.01, |
| "loss": 1.4033, |
| "loss/crossentropy": 2.2913233041763306, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2715977430343628, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.01437220912255245, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0007214864095052083, |
| "learning_rate": 0.01, |
| "loss": 1.3672, |
| "loss/crossentropy": 2.4408832788467407, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.23768731951713562, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.01438948341236321, |
| "grad_norm": 0.3515625, |
| "grad_norm_var": 0.0008374532063802083, |
| "learning_rate": 0.01, |
| "loss": 1.3927, |
| "loss/crossentropy": 2.273505926132202, |
| "loss/fcd": 1.05078125, |
| "loss/logits": 0.2371639683842659, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.01440675770217397, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0008224328358968099, |
| "learning_rate": 0.01, |
| "loss": 1.4174, |
| "loss/crossentropy": 2.304438829421997, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2705874443054199, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.014424031991984729, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0008061091105143229, |
| "learning_rate": 0.01, |
| "loss": 1.413, |
| "loss/crossentropy": 2.4857107400894165, |
| "loss/fcd": 1.265625, |
| "loss/logits": 0.2602947950363159, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.014441306281795489, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.0007237116495768229, |
| "learning_rate": 0.01, |
| "loss": 1.4423, |
| "loss/crossentropy": 2.4861044883728027, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.25615356862545013, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.01445858057160625, |
| "grad_norm": 0.267578125, |
| "grad_norm_var": 0.0008066177368164062, |
| "learning_rate": 0.01, |
| "loss": 1.3396, |
| "loss/crossentropy": 2.3363460302352905, |
| "loss/fcd": 1.03125, |
| "loss/logits": 0.2474212720990181, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.01447585486141701, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0007843017578125, |
| "learning_rate": 0.01, |
| "loss": 1.3885, |
| "loss/crossentropy": 2.332596778869629, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2456573098897934, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.01449312915122777, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0007394790649414062, |
| "learning_rate": 0.01, |
| "loss": 1.3709, |
| "loss/crossentropy": 2.613990545272827, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.2681911140680313, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.01451040344103853, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.000803375244140625, |
| "learning_rate": 0.01, |
| "loss": 1.3305, |
| "loss/crossentropy": 2.448235511779785, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.22328373789787292, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.01452767773084929, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0008455753326416015, |
| "learning_rate": 0.01, |
| "loss": 1.3552, |
| "loss/crossentropy": 2.4329841136932373, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.2812986671924591, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.01454495202066005, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0008448282877604167, |
| "learning_rate": 0.01, |
| "loss": 1.4049, |
| "loss/crossentropy": 2.366762161254883, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.2537970468401909, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.014562226310470812, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0008669535319010417, |
| "learning_rate": 0.01, |
| "loss": 1.3474, |
| "loss/crossentropy": 2.2118855714797974, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.2319856360554695, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.014579500600281571, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0005958398183186849, |
| "learning_rate": 0.01, |
| "loss": 1.3672, |
| "loss/crossentropy": 2.427622437477112, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.26083478331565857, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.014596774890092331, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.000603485107421875, |
| "learning_rate": 0.01, |
| "loss": 1.3868, |
| "loss/crossentropy": 2.6780372858047485, |
| "loss/fcd": 1.2578125, |
| "loss/logits": 0.2781776934862137, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.014614049179903091, |
| "grad_norm": 0.337890625, |
| "grad_norm_var": 0.0006572564442952473, |
| "learning_rate": 0.01, |
| "loss": 1.3767, |
| "loss/crossentropy": 2.36633038520813, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2774253934621811, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.014631323469713851, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.0006892999013264974, |
| "learning_rate": 0.01, |
| "loss": 1.3532, |
| "loss/crossentropy": 2.598803162574768, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.27754758298397064, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.014648597759524611, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.000730133056640625, |
| "learning_rate": 0.01, |
| "loss": 1.3245, |
| "loss/crossentropy": 2.323062777519226, |
| "loss/fcd": 1.0234375, |
| "loss/logits": 0.23049668222665787, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.014665872049335373, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0005938212076822916, |
| "learning_rate": 0.01, |
| "loss": 1.497, |
| "loss/crossentropy": 2.4116770029067993, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.25217771530151367, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.014683146339146132, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0006493727366129557, |
| "learning_rate": 0.01, |
| "loss": 1.3825, |
| "loss/crossentropy": 2.784231662750244, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.301376610994339, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.014700420628956892, |
| "grad_norm": 0.68359375, |
| "grad_norm_var": 0.009682146708170573, |
| "learning_rate": 0.01, |
| "loss": 1.5242, |
| "loss/crossentropy": 2.3721545934677124, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2636701613664627, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.014717694918767652, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.009682146708170573, |
| "learning_rate": 0.01, |
| "loss": 1.3491, |
| "loss/crossentropy": 2.5496045351028442, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2594982087612152, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.014734969208578412, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.009457651774088542, |
| "learning_rate": 0.01, |
| "loss": 1.3208, |
| "loss/crossentropy": 2.211892247200012, |
| "loss/fcd": 1.0703125, |
| "loss/logits": 0.21089013665914536, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.014752243498389172, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.009530750910441081, |
| "learning_rate": 0.01, |
| "loss": 1.3612, |
| "loss/crossentropy": 2.3918616771698, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.2475578412413597, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.014769517788199932, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.009600178400675455, |
| "learning_rate": 0.01, |
| "loss": 1.3711, |
| "loss/crossentropy": 2.6660208702087402, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.25626226514577866, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.014786792078010693, |
| "grad_norm": 0.265625, |
| "grad_norm_var": 0.009698422749837239, |
| "learning_rate": 0.01, |
| "loss": 1.3272, |
| "loss/crossentropy": 2.4646941423416138, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.24187320470809937, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.014804066367821453, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.00958250363667806, |
| "learning_rate": 0.01, |
| "loss": 1.349, |
| "loss/crossentropy": 2.889734983444214, |
| "loss/fcd": 1.23046875, |
| "loss/logits": 0.28400754928588867, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.014821340657632213, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.009624671936035157, |
| "learning_rate": 0.01, |
| "loss": 1.3696, |
| "loss/crossentropy": 2.4632620811462402, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24944238364696503, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.014838614947442973, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.00966332753499349, |
| "learning_rate": 0.01, |
| "loss": 1.3636, |
| "loss/crossentropy": 2.38780677318573, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25044557452201843, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.014855889237253733, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.009620141983032227, |
| "learning_rate": 0.01, |
| "loss": 1.392, |
| "loss/crossentropy": 2.523656487464905, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2575480043888092, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.014873163527064493, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.009632619222005208, |
| "learning_rate": 0.01, |
| "loss": 1.3788, |
| "loss/crossentropy": 2.3901199102401733, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.22918711602687836, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.014890437816875254, |
| "grad_norm": 0.41015625, |
| "grad_norm_var": 0.010067224502563477, |
| "learning_rate": 0.01, |
| "loss": 1.392, |
| "loss/crossentropy": 2.2604238986968994, |
| "loss/fcd": 1.29296875, |
| "loss/logits": 0.28666311502456665, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.014907712106686014, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.00983727773030599, |
| "learning_rate": 0.01, |
| "loss": 1.3701, |
| "loss/crossentropy": 2.1219175457954407, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.2484002709388733, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.014924986396496774, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00966490109761556, |
| "learning_rate": 0.01, |
| "loss": 1.4008, |
| "loss/crossentropy": 2.4230719804763794, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2542117089033127, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.014942260686307534, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.009696563084920248, |
| "learning_rate": 0.01, |
| "loss": 1.3599, |
| "loss/crossentropy": 2.602153182029724, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.2338126003742218, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.014959534976118294, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.0097320556640625, |
| "learning_rate": 0.01, |
| "loss": 1.3659, |
| "loss/crossentropy": 2.3879982233047485, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.25103290379047394, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.014976809265929054, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0010736465454101562, |
| "learning_rate": 0.01, |
| "loss": 1.348, |
| "loss/crossentropy": 2.4637222290039062, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2280896008014679, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.014994083555739815, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0010012149810791015, |
| "learning_rate": 0.01, |
| "loss": 1.3708, |
| "loss/crossentropy": 2.784236192703247, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.28741903603076935, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.015011357845550575, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.001000833511352539, |
| "learning_rate": 0.01, |
| "loss": 1.4288, |
| "loss/crossentropy": 2.6332989931106567, |
| "loss/fcd": 1.27734375, |
| "loss/logits": 0.3306438624858856, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.015028632135361335, |
| "grad_norm": 0.359375, |
| "grad_norm_var": 0.0011690616607666015, |
| "learning_rate": 0.01, |
| "loss": 1.4187, |
| "loss/crossentropy": 2.3606460094451904, |
| "loss/fcd": 1.0546875, |
| "loss/logits": 0.23307877779006958, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.015045906425172095, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0011728286743164062, |
| "learning_rate": 0.01, |
| "loss": 1.3553, |
| "loss/crossentropy": 2.324714183807373, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.2501022219657898, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.015063180714982855, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.0010920047760009765, |
| "learning_rate": 0.01, |
| "loss": 1.3623, |
| "loss/crossentropy": 2.328053116798401, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2553166151046753, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.015080455004793615, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.00108640988667806, |
| "learning_rate": 0.01, |
| "loss": 1.4392, |
| "loss/crossentropy": 2.377878785133362, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.25394026935100555, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.015097729294604376, |
| "grad_norm": 0.349609375, |
| "grad_norm_var": 0.0011700948079427084, |
| "learning_rate": 0.01, |
| "loss": 1.3398, |
| "loss/crossentropy": 2.542131185531616, |
| "loss/fcd": 1.0625, |
| "loss/logits": 0.24263548851013184, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.015115003584415136, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0011273701985677084, |
| "learning_rate": 0.01, |
| "loss": 1.3837, |
| "loss/crossentropy": 2.443636417388916, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.27580726146698, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.015132277874225896, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0011240005493164062, |
| "learning_rate": 0.01, |
| "loss": 1.4357, |
| "loss/crossentropy": 2.752240300178528, |
| "loss/fcd": 1.17578125, |
| "loss/logits": 0.2472759708762169, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.015149552164036656, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.00113067626953125, |
| "learning_rate": 0.01, |
| "loss": 1.3789, |
| "loss/crossentropy": 2.504664421081543, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.25199174135923386, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.015166826453847416, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.0004998366038004557, |
| "learning_rate": 0.01, |
| "loss": 1.3978, |
| "loss/crossentropy": 2.3523584604263306, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.26311442255973816, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.015184100743658176, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005164941151936849, |
| "learning_rate": 0.01, |
| "loss": 1.3575, |
| "loss/crossentropy": 2.3136786818504333, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.25283563137054443, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.015201375033468937, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0005233605702718099, |
| "learning_rate": 0.01, |
| "loss": 1.4031, |
| "loss/crossentropy": 2.445231080055237, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.251323863863945, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.015218649323279697, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.0005263646443684895, |
| "learning_rate": 0.01, |
| "loss": 1.4241, |
| "loss/crossentropy": 2.5056021213531494, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2573155537247658, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.015235923613090457, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.0005330403645833333, |
| "learning_rate": 0.01, |
| "loss": 1.3779, |
| "loss/crossentropy": 2.4044970273971558, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.24030664563179016, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.015253197902901217, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0005077203114827474, |
| "learning_rate": 0.01, |
| "loss": 1.3391, |
| "loss/crossentropy": 2.2992568016052246, |
| "loss/fcd": 1.037109375, |
| "loss/logits": 0.23432201147079468, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.015270472192711977, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0005009333292643229, |
| "learning_rate": 0.01, |
| "loss": 1.3742, |
| "loss/crossentropy": 2.346727728843689, |
| "loss/fcd": 1.05078125, |
| "loss/logits": 0.2232709527015686, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.015287746482522737, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0005459944407145182, |
| "learning_rate": 0.01, |
| "loss": 1.3237, |
| "loss/crossentropy": 1.982240617275238, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.22034113854169846, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.015305020772333498, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0003636042277018229, |
| "learning_rate": 0.01, |
| "loss": 1.438, |
| "loss/crossentropy": 2.3263243436813354, |
| "loss/fcd": 1.1640625, |
| "loss/logits": 0.24902021139860153, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.015322295062144258, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0003649393717447917, |
| "learning_rate": 0.01, |
| "loss": 1.3869, |
| "loss/crossentropy": 2.56560879945755, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2558091878890991, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.015339569351955018, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00032512346903483075, |
| "learning_rate": 0.01, |
| "loss": 1.3886, |
| "loss/crossentropy": 2.4856609106063843, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.24640005826950073, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.015356843641765778, |
| "grad_norm": 0.349609375, |
| "grad_norm_var": 0.0004208882649739583, |
| "learning_rate": 0.01, |
| "loss": 1.4196, |
| "loss/crossentropy": 2.55330491065979, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.25765371322631836, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.015374117931576538, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.0003218968709309896, |
| "learning_rate": 0.01, |
| "loss": 1.3971, |
| "loss/crossentropy": 2.6354317665100098, |
| "loss/fcd": 1.3203125, |
| "loss/logits": 0.3442998379468918, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.015391392221387298, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.00032755533854166664, |
| "learning_rate": 0.01, |
| "loss": 1.3998, |
| "loss/crossentropy": 2.034050762653351, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.23992937058210373, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.015408666511198058, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0003330866495768229, |
| "learning_rate": 0.01, |
| "loss": 1.4362, |
| "loss/crossentropy": 2.7760528326034546, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2806248515844345, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.01542594080100882, |
| "grad_norm": 0.34375, |
| "grad_norm_var": 0.000394439697265625, |
| "learning_rate": 0.01, |
| "loss": 1.4516, |
| "loss/crossentropy": 2.26086688041687, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.31815242767333984, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.015443215090819579, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0004140218098958333, |
| "learning_rate": 0.01, |
| "loss": 1.3702, |
| "loss/crossentropy": 2.5985008478164673, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2603040784597397, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.015460489380630339, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0003986199696858724, |
| "learning_rate": 0.01, |
| "loss": 1.3509, |
| "loss/crossentropy": 2.3431901335716248, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.22906331717967987, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.015477763670441099, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.0004066308339436849, |
| "learning_rate": 0.01, |
| "loss": 1.3537, |
| "loss/crossentropy": 2.4866254329681396, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.23776976764202118, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.015495037960251859, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.00040791829427083335, |
| "learning_rate": 0.01, |
| "loss": 1.3942, |
| "loss/crossentropy": 2.656658411026001, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.265699565410614, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.015512312250062619, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0004048506418863932, |
| "learning_rate": 0.01, |
| "loss": 1.398, |
| "loss/crossentropy": 2.508056640625, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.2679043859243393, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.01552958653987338, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.00033086140950520834, |
| "learning_rate": 0.01, |
| "loss": 1.3786, |
| "loss/crossentropy": 2.241898775100708, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.23984474688768387, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.01554686082968414, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0003639062245686849, |
| "learning_rate": 0.01, |
| "loss": 1.4062, |
| "loss/crossentropy": 2.563822388648987, |
| "loss/fcd": 1.12890625, |
| "loss/logits": 0.2376401573419571, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0155641351194949, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0003350416819254557, |
| "learning_rate": 0.01, |
| "loss": 1.3943, |
| "loss/crossentropy": 2.4819493293762207, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.26604655385017395, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.01558140940930566, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0003422419230143229, |
| "learning_rate": 0.01, |
| "loss": 1.438, |
| "loss/crossentropy": 2.6099933385849, |
| "loss/fcd": 1.21484375, |
| "loss/logits": 0.2890657037496567, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.01559868369911642, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00033817291259765627, |
| "learning_rate": 0.01, |
| "loss": 1.4034, |
| "loss/crossentropy": 2.5849201679229736, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2732825428247452, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.01561595798892718, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0003444512685139974, |
| "learning_rate": 0.01, |
| "loss": 1.3811, |
| "loss/crossentropy": 2.3671282529830933, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24938072264194489, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.01563323227873794, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.00024871826171875, |
| "learning_rate": 0.01, |
| "loss": 1.3843, |
| "loss/crossentropy": 2.1398147344589233, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2394903600215912, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.0156505065685487, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0002757867177327474, |
| "learning_rate": 0.01, |
| "loss": 1.3808, |
| "loss/crossentropy": 2.3531702756881714, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.25511349737644196, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.01566778085835946, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0002784570058186849, |
| "learning_rate": 0.01, |
| "loss": 1.3835, |
| "loss/crossentropy": 2.5271737575531006, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.25303974002599716, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.015685055148170222, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0003013451894124349, |
| "learning_rate": 0.01, |
| "loss": 1.3966, |
| "loss/crossentropy": 2.50630259513855, |
| "loss/fcd": 1.25390625, |
| "loss/logits": 0.28888703882694244, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.015702329437980982, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.00022068023681640626, |
| "learning_rate": 0.01, |
| "loss": 1.3997, |
| "loss/crossentropy": 2.6066339015960693, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.24285603314638138, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.015719603727791742, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.00016541481018066405, |
| "learning_rate": 0.01, |
| "loss": 1.3514, |
| "loss/crossentropy": 2.349377751350403, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2379670813679695, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.015736878017602502, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.00018677711486816406, |
| "learning_rate": 0.01, |
| "loss": 1.3697, |
| "loss/crossentropy": 2.493922233581543, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.2561178654432297, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.015754152307413262, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.000218963623046875, |
| "learning_rate": 0.01, |
| "loss": 1.3902, |
| "loss/crossentropy": 2.17154997587204, |
| "loss/fcd": 1.04296875, |
| "loss/logits": 0.22504562884569168, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.015771426597224022, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.00022525787353515624, |
| "learning_rate": 0.01, |
| "loss": 1.3458, |
| "loss/crossentropy": 2.4228713512420654, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.26753516495227814, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.01578870088703478, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.00022735595703125, |
| "learning_rate": 0.01, |
| "loss": 1.4741, |
| "loss/crossentropy": 2.25216805934906, |
| "loss/fcd": 1.23046875, |
| "loss/logits": 0.33171379566192627, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.01580597517684554, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.000212860107421875, |
| "learning_rate": 0.01, |
| "loss": 1.3429, |
| "loss/crossentropy": 2.1387062072753906, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.24266959726810455, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.0158232494666563, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.00027794837951660155, |
| "learning_rate": 0.01, |
| "loss": 1.4493, |
| "loss/crossentropy": 2.02074271440506, |
| "loss/fcd": 1.23828125, |
| "loss/logits": 0.25191547721624374, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.01584052375646706, |
| "grad_norm": 0.7734375, |
| "grad_norm_var": 0.01417692502339681, |
| "learning_rate": 0.01, |
| "loss": 1.4196, |
| "loss/crossentropy": 2.47384512424469, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.2742984741926193, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.01585779804627782, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.014222462972005209, |
| "learning_rate": 0.01, |
| "loss": 1.3766, |
| "loss/crossentropy": 2.5627119541168213, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.27059850841760635, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.01587507233608858, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.014214007059733073, |
| "learning_rate": 0.01, |
| "loss": 1.4257, |
| "loss/crossentropy": 2.5728260278701782, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.26422248035669327, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.015892346625899344, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.01424706776936849, |
| "learning_rate": 0.01, |
| "loss": 1.3441, |
| "loss/crossentropy": 2.3634893894195557, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2779320180416107, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.015909620915710104, |
| "grad_norm": 0.3359375, |
| "grad_norm_var": 0.01422723134358724, |
| "learning_rate": 0.01, |
| "loss": 1.4555, |
| "loss/crossentropy": 2.176904857158661, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.2693602591753006, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.015926895205520864, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.014169820149739583, |
| "learning_rate": 0.01, |
| "loss": 1.3614, |
| "loss/crossentropy": 2.611035466194153, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2582136243581772, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.015944169495331624, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.014190610249837239, |
| "learning_rate": 0.01, |
| "loss": 1.3666, |
| "loss/crossentropy": 2.353346347808838, |
| "loss/fcd": 1.11328125, |
| "loss/logits": 0.24240678548812866, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.015961443785142384, |
| "grad_norm": 0.283203125, |
| "grad_norm_var": 0.014214579264322917, |
| "learning_rate": 0.01, |
| "loss": 1.3461, |
| "loss/crossentropy": 2.3549081087112427, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2364579290151596, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.015978718074953144, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.014224227269490559, |
| "learning_rate": 0.01, |
| "loss": 1.3649, |
| "loss/crossentropy": 2.4736167192459106, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.26356005668640137, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.015995992364763904, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.01417382558186849, |
| "learning_rate": 0.01, |
| "loss": 1.4136, |
| "loss/crossentropy": 2.3580808639526367, |
| "loss/fcd": 1.2578125, |
| "loss/logits": 0.2911546379327774, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.016013266654574664, |
| "grad_norm": 0.298828125, |
| "grad_norm_var": 0.014087867736816407, |
| "learning_rate": 0.01, |
| "loss": 1.3582, |
| "loss/crossentropy": 2.476295828819275, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.267447791993618, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.016030540944385423, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.013896942138671875, |
| "learning_rate": 0.01, |
| "loss": 1.411, |
| "loss/crossentropy": 2.6316243410110474, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2681735157966614, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.016047815234196183, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.013840230305989583, |
| "learning_rate": 0.01, |
| "loss": 1.3853, |
| "loss/crossentropy": 2.5550700426101685, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.25278639793395996, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.016065089524006943, |
| "grad_norm": 0.296875, |
| "grad_norm_var": 0.013876597086588541, |
| "learning_rate": 0.01, |
| "loss": 1.4125, |
| "loss/crossentropy": 2.511132836341858, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.26167523860931396, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.016082363813817703, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.01403514544169108, |
| "learning_rate": 0.01, |
| "loss": 1.351, |
| "loss/crossentropy": 2.468320608139038, |
| "loss/fcd": 1.1484375, |
| "loss/logits": 0.254236102104187, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.016099638103628463, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.014063119888305664, |
| "learning_rate": 0.01, |
| "loss": 1.3762, |
| "loss/crossentropy": 2.7182319164276123, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.25874409079551697, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.016116912393439226, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.00023280779520670574, |
| "learning_rate": 0.01, |
| "loss": 1.3703, |
| "loss/crossentropy": 2.3206039667129517, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2651352882385254, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.016134186683249986, |
| "grad_norm": 0.310546875, |
| "grad_norm_var": 0.00022454261779785155, |
| "learning_rate": 0.01, |
| "loss": 1.3802, |
| "loss/crossentropy": 2.498626470565796, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.259146973490715, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.016151460973060746, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.00024628639221191406, |
| "learning_rate": 0.01, |
| "loss": 1.3612, |
| "loss/crossentropy": 2.3583563566207886, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.24286328256130219, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.016168735262871506, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.00025018056233723957, |
| "learning_rate": 0.01, |
| "loss": 1.4221, |
| "loss/crossentropy": 2.4976600408554077, |
| "loss/fcd": 1.234375, |
| "loss/logits": 0.274882972240448, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.016186009552682266, |
| "grad_norm": 0.376953125, |
| "grad_norm_var": 0.0005435784657796224, |
| "learning_rate": 0.01, |
| "loss": 1.4084, |
| "loss/crossentropy": 2.4435365200042725, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.2811162769794464, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.016203283842493026, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0005657037099202473, |
| "learning_rate": 0.01, |
| "loss": 1.3418, |
| "loss/crossentropy": 2.3197275400161743, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.26322872936725616, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.016220558132303785, |
| "grad_norm": 0.2890625, |
| "grad_norm_var": 0.0005706628163655599, |
| "learning_rate": 0.01, |
| "loss": 1.3881, |
| "loss/crossentropy": 2.6520742177963257, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2682619243860245, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.016237832422114545, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0005757013956705729, |
| "learning_rate": 0.01, |
| "loss": 1.3772, |
| "loss/crossentropy": 2.4414173364639282, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.23820270597934723, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.016255106711925305, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0005737145741780599, |
| "learning_rate": 0.01, |
| "loss": 1.4165, |
| "loss/crossentropy": 2.4042497873306274, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2601849138736725, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.016272381001736065, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0005716323852539062, |
| "learning_rate": 0.01, |
| "loss": 1.4208, |
| "loss/crossentropy": 2.4315325021743774, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.2801144868135452, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.016289655291546825, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.0006620883941650391, |
| "learning_rate": 0.01, |
| "loss": 1.4608, |
| "loss/crossentropy": 2.545047879219055, |
| "loss/fcd": 1.2890625, |
| "loss/logits": 0.33230888843536377, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.016306929581357585, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0006926854451497396, |
| "learning_rate": 0.01, |
| "loss": 1.329, |
| "loss/crossentropy": 2.259741187095642, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.2455529421567917, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.016324203871168348, |
| "grad_norm": 0.294921875, |
| "grad_norm_var": 0.0006779829661051432, |
| "learning_rate": 0.01, |
| "loss": 1.3409, |
| "loss/crossentropy": 2.3239141702651978, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.24660293757915497, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.016341478160979108, |
| "grad_norm": 0.33203125, |
| "grad_norm_var": 0.0007329146067301432, |
| "learning_rate": 0.01, |
| "loss": 1.4681, |
| "loss/crossentropy": 2.3145695328712463, |
| "loss/fcd": 1.1015625, |
| "loss/logits": 0.24830932170152664, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.016358752450789868, |
| "grad_norm": 0.330078125, |
| "grad_norm_var": 0.0007279555002848308, |
| "learning_rate": 0.01, |
| "loss": 1.5011, |
| "loss/crossentropy": 2.350569486618042, |
| "loss/fcd": 1.1875, |
| "loss/logits": 0.2759709805250168, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.016376026740600628, |
| "grad_norm": 0.470703125, |
| "grad_norm_var": 0.0024080912272135416, |
| "learning_rate": 0.01, |
| "loss": 1.52, |
| "loss/crossentropy": 2.034683883190155, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.28756849467754364, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.016393301030411388, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.002434539794921875, |
| "learning_rate": 0.01, |
| "loss": 1.4182, |
| "loss/crossentropy": 2.5900092124938965, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.26620975136756897, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.016410575320222148, |
| "grad_norm": 0.314453125, |
| "grad_norm_var": 0.002431170145670573, |
| "learning_rate": 0.01, |
| "loss": 1.4163, |
| "loss/crossentropy": 2.458656430244446, |
| "loss/fcd": 1.1953125, |
| "loss/logits": 0.27218569815158844, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.016427849610032907, |
| "grad_norm": 0.326171875, |
| "grad_norm_var": 0.002330636978149414, |
| "learning_rate": 0.01, |
| "loss": 1.5638, |
| "loss/crossentropy": 2.581447720527649, |
| "loss/fcd": 1.2265625, |
| "loss/logits": 0.2988656759262085, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.016445123899843667, |
| "grad_norm": 0.412109375, |
| "grad_norm_var": 0.002758216857910156, |
| "learning_rate": 0.01, |
| "loss": 1.5667, |
| "loss/crossentropy": 2.21357798576355, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.30781693756580353, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.016462398189654427, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0026659488677978514, |
| "learning_rate": 0.01, |
| "loss": 1.3711, |
| "loss/crossentropy": 2.1623282432556152, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.24749789386987686, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.016479672479465187, |
| "grad_norm": 0.306640625, |
| "grad_norm_var": 0.0025832494099934894, |
| "learning_rate": 0.01, |
| "loss": 1.3663, |
| "loss/crossentropy": 2.683838129043579, |
| "loss/fcd": 1.19921875, |
| "loss/logits": 0.2529330998659134, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.016496946769275947, |
| "grad_norm": 0.265625, |
| "grad_norm_var": 0.0027312596638997396, |
| "learning_rate": 0.01, |
| "loss": 1.3548, |
| "loss/crossentropy": 2.3420257568359375, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.253268837928772, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.016514221059086707, |
| "grad_norm": 0.35546875, |
| "grad_norm_var": 0.002652740478515625, |
| "learning_rate": 0.01, |
| "loss": 1.369, |
| "loss/crossentropy": 2.3265002965927124, |
| "loss/fcd": 1.1171875, |
| "loss/logits": 0.24975580722093582, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.01653149534889747, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.0025789737701416016, |
| "learning_rate": 0.01, |
| "loss": 1.3913, |
| "loss/crossentropy": 2.4944993257522583, |
| "loss/fcd": 1.16796875, |
| "loss/logits": 0.25516972690820694, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.01654876963870823, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0025911808013916017, |
| "learning_rate": 0.01, |
| "loss": 1.3542, |
| "loss/crossentropy": 2.583009362220764, |
| "loss/fcd": 1.15625, |
| "loss/logits": 0.26096589863300323, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.01656604392851899, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.002695465087890625, |
| "learning_rate": 0.01, |
| "loss": 1.4014, |
| "loss/crossentropy": 2.6060469150543213, |
| "loss/fcd": 1.1796875, |
| "loss/logits": 0.298343300819397, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.01658331821832975, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0024979750315348307, |
| "learning_rate": 0.01, |
| "loss": 1.4127, |
| "loss/crossentropy": 2.4206702709198, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2592027187347412, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.01660059250814051, |
| "grad_norm": 0.302734375, |
| "grad_norm_var": 0.002465550104777018, |
| "learning_rate": 0.01, |
| "loss": 1.4039, |
| "loss/crossentropy": 2.18042528629303, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.28101974725723267, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.01661786679795127, |
| "grad_norm": 0.353515625, |
| "grad_norm_var": 0.0024996439615885416, |
| "learning_rate": 0.01, |
| "loss": 1.3448, |
| "loss/crossentropy": 2.2248626947402954, |
| "loss/fcd": 1.07421875, |
| "loss/logits": 0.2191808819770813, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.01663514108776203, |
| "grad_norm": 0.357421875, |
| "grad_norm_var": 0.002541033426920573, |
| "learning_rate": 0.01, |
| "loss": 1.4061, |
| "loss/crossentropy": 2.476745128631592, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.26761066913604736, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.01665241537757279, |
| "grad_norm": 0.33984375, |
| "grad_norm_var": 0.00121305783589681, |
| "learning_rate": 0.01, |
| "loss": 1.427, |
| "loss/crossentropy": 2.3096065521240234, |
| "loss/fcd": 1.234375, |
| "loss/logits": 0.42609208822250366, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.01666968966738355, |
| "grad_norm": 0.31640625, |
| "grad_norm_var": 0.0012012322743733723, |
| "learning_rate": 0.01, |
| "loss": 1.3957, |
| "loss/crossentropy": 2.7282618284225464, |
| "loss/fcd": 1.20703125, |
| "loss/logits": 0.28854241967201233, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.01668696395719431, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0012641747792561848, |
| "learning_rate": 0.01, |
| "loss": 1.3752, |
| "loss/crossentropy": 2.339871048927307, |
| "loss/fcd": 1.08984375, |
| "loss/logits": 0.2586899399757385, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.01670423824700507, |
| "grad_norm": 0.263671875, |
| "grad_norm_var": 0.001474746068318685, |
| "learning_rate": 0.01, |
| "loss": 1.3006, |
| "loss/crossentropy": 2.3013978004455566, |
| "loss/fcd": 1.046875, |
| "loss/logits": 0.22273491322994232, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.01672151253681583, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0008559544881184896, |
| "learning_rate": 0.01, |
| "loss": 1.4225, |
| "loss/crossentropy": 2.47222638130188, |
| "loss/fcd": 1.2421875, |
| "loss/logits": 0.2986321449279785, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.01673878682662659, |
| "grad_norm": 0.29296875, |
| "grad_norm_var": 0.0008559544881184896, |
| "learning_rate": 0.01, |
| "loss": 1.4188, |
| "loss/crossentropy": 2.2383479475975037, |
| "loss/fcd": 1.2265625, |
| "loss/logits": 0.3132626414299011, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.016756061116437352, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0008643945058186849, |
| "learning_rate": 0.01, |
| "loss": 1.363, |
| "loss/crossentropy": 2.5179413557052612, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.2516755014657974, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.016773335406248112, |
| "grad_norm": 0.279296875, |
| "grad_norm_var": 0.0007908503214518229, |
| "learning_rate": 0.01, |
| "loss": 1.3967, |
| "loss/crossentropy": 1.9743611812591553, |
| "loss/fcd": 1.05859375, |
| "loss/logits": 0.24054741859436035, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.016790609696058872, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.000740671157836914, |
| "learning_rate": 0.01, |
| "loss": 1.3595, |
| "loss/crossentropy": 2.405099630355835, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.28836295008659363, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.01680788398586963, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.0007307529449462891, |
| "learning_rate": 0.01, |
| "loss": 1.4048, |
| "loss/crossentropy": 2.583898901939392, |
| "loss/fcd": 1.2109375, |
| "loss/logits": 0.2704490125179291, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.01682515827568039, |
| "grad_norm": 0.3125, |
| "grad_norm_var": 0.0007318973541259766, |
| "learning_rate": 0.01, |
| "loss": 1.4402, |
| "loss/crossentropy": 2.486370801925659, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2756696939468384, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.01684243256549115, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.0007501602172851563, |
| "learning_rate": 0.01, |
| "loss": 1.3591, |
| "loss/crossentropy": 2.421715497970581, |
| "loss/fcd": 1.0390625, |
| "loss/logits": 0.22876735776662827, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.01685970685530191, |
| "grad_norm": 0.318359375, |
| "grad_norm_var": 0.0007433573404947917, |
| "learning_rate": 0.01, |
| "loss": 1.3213, |
| "loss/crossentropy": 2.4171801805496216, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.23518116772174835, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.01687698114511267, |
| "grad_norm": 0.4140625, |
| "grad_norm_var": 0.0014527479807535807, |
| "learning_rate": 0.01, |
| "loss": 1.4776, |
| "loss/crossentropy": 2.080851912498474, |
| "loss/fcd": 1.26953125, |
| "loss/logits": 0.22676381468772888, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.01689425543492343, |
| "grad_norm": 0.2734375, |
| "grad_norm_var": 0.0014325459798177084, |
| "learning_rate": 0.01, |
| "loss": 1.3453, |
| "loss/crossentropy": 2.2649213075637817, |
| "loss/fcd": 1.09765625, |
| "loss/logits": 0.2382289096713066, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.01691152972473419, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.00134886105855306, |
| "learning_rate": 0.01, |
| "loss": 1.4216, |
| "loss/crossentropy": 2.4842547178268433, |
| "loss/fcd": 1.16015625, |
| "loss/logits": 0.27352918684482574, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.01692880401454495, |
| "grad_norm": 0.291015625, |
| "grad_norm_var": 0.0012618382771809897, |
| "learning_rate": 0.01, |
| "loss": 1.3782, |
| "loss/crossentropy": 2.163589835166931, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.26143455505371094, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.01694607830435571, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0012567520141601562, |
| "learning_rate": 0.01, |
| "loss": 1.3912, |
| "loss/crossentropy": 2.421532988548279, |
| "loss/fcd": 1.08203125, |
| "loss/logits": 0.23204928636550903, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.016963352594166474, |
| "grad_norm": 0.28125, |
| "grad_norm_var": 0.00127256711324056, |
| "learning_rate": 0.01, |
| "loss": 1.3826, |
| "loss/crossentropy": 2.607829451560974, |
| "loss/fcd": 1.125, |
| "loss/logits": 0.2582753002643585, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.016980626883977234, |
| "grad_norm": 0.3359375, |
| "grad_norm_var": 0.0012684504191080729, |
| "learning_rate": 0.01, |
| "loss": 1.3938, |
| "loss/crossentropy": 2.430111050605774, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2326122149825096, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.016997901173787994, |
| "grad_norm": 0.369140625, |
| "grad_norm_var": 0.001544936498006185, |
| "learning_rate": 0.01, |
| "loss": 1.4349, |
| "loss/crossentropy": 2.584348440170288, |
| "loss/fcd": 1.18359375, |
| "loss/logits": 0.27420538663864136, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.017015175463598754, |
| "grad_norm": 0.333984375, |
| "grad_norm_var": 0.0015746434529622397, |
| "learning_rate": 0.01, |
| "loss": 1.4002, |
| "loss/crossentropy": 2.6233400106430054, |
| "loss/fcd": 1.13671875, |
| "loss/logits": 0.2728031575679779, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.017032449753409513, |
| "grad_norm": 0.3203125, |
| "grad_norm_var": 0.001567840576171875, |
| "learning_rate": 0.01, |
| "loss": 1.3921, |
| "loss/crossentropy": 2.2127867937088013, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.24761803448200226, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.017049724043220273, |
| "grad_norm": 0.322265625, |
| "grad_norm_var": 0.0015125910441080729, |
| "learning_rate": 0.01, |
| "loss": 1.4117, |
| "loss/crossentropy": 2.4916510581970215, |
| "loss/fcd": 1.140625, |
| "loss/logits": 0.2528844252228737, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.017066998333031033, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0014711856842041016, |
| "learning_rate": 0.01, |
| "loss": 1.3702, |
| "loss/crossentropy": 2.076325237751007, |
| "loss/fcd": 1.109375, |
| "loss/logits": 0.24822547286748886, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.017084272622841793, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0015150547027587891, |
| "learning_rate": 0.01, |
| "loss": 1.375, |
| "loss/crossentropy": 2.2751649618148804, |
| "loss/fcd": 1.06640625, |
| "loss/logits": 0.2591545879840851, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.017101546912652553, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.001517470677693685, |
| "learning_rate": 0.01, |
| "loss": 1.3438, |
| "loss/crossentropy": 2.564236044883728, |
| "loss/fcd": 1.10546875, |
| "loss/logits": 0.2575865834951401, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.017118821202463313, |
| "grad_norm": 0.27734375, |
| "grad_norm_var": 0.0015337467193603516, |
| "learning_rate": 0.01, |
| "loss": 1.2948, |
| "loss/crossentropy": 2.322708249092102, |
| "loss/fcd": 1.0859375, |
| "loss/logits": 0.23693984001874924, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.017136095492274073, |
| "grad_norm": 0.30078125, |
| "grad_norm_var": 0.0015344619750976562, |
| "learning_rate": 0.01, |
| "loss": 1.4124, |
| "loss/crossentropy": 2.4255528450012207, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.25587616115808487, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.017153369782084833, |
| "grad_norm": 0.275390625, |
| "grad_norm_var": 0.0007997989654541015, |
| "learning_rate": 0.01, |
| "loss": 1.3437, |
| "loss/crossentropy": 2.5350613594055176, |
| "loss/fcd": 1.12109375, |
| "loss/logits": 0.25402751564979553, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.017170644071895596, |
| "grad_norm": 0.30859375, |
| "grad_norm_var": 0.0007494449615478516, |
| "learning_rate": 0.01, |
| "loss": 1.4055, |
| "loss/crossentropy": 2.5626988410949707, |
| "loss/fcd": 1.14453125, |
| "loss/logits": 0.25801587104797363, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.017187918361706356, |
| "grad_norm": 0.3046875, |
| "grad_norm_var": 0.000670480728149414, |
| "learning_rate": 0.01, |
| "loss": 1.3867, |
| "loss/crossentropy": 2.7328250408172607, |
| "loss/fcd": 1.171875, |
| "loss/logits": 0.28935085237026215, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.017205192651517116, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0006787459055582683, |
| "learning_rate": 0.01, |
| "loss": 1.3854, |
| "loss/crossentropy": 2.2958213090896606, |
| "loss/fcd": 1.1328125, |
| "loss/logits": 0.2697945237159729, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.017222466941327876, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0006787459055582683, |
| "learning_rate": 0.01, |
| "loss": 1.3576, |
| "loss/crossentropy": 2.314937472343445, |
| "loss/fcd": 1.09375, |
| "loss/logits": 0.24704495817422867, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.017239741231138635, |
| "grad_norm": 0.32421875, |
| "grad_norm_var": 0.0006591637929280598, |
| "learning_rate": 0.01, |
| "loss": 1.4405, |
| "loss/crossentropy": 2.582629084587097, |
| "loss/fcd": 1.26171875, |
| "loss/logits": 0.335773229598999, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.017257015520949395, |
| "grad_norm": 0.28515625, |
| "grad_norm_var": 0.0006277561187744141, |
| "learning_rate": 0.01, |
| "loss": 1.3605, |
| "loss/crossentropy": 2.299025297164917, |
| "loss/fcd": 1.052734375, |
| "loss/logits": 0.23469385504722595, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.017274289810760155, |
| "grad_norm": 0.26953125, |
| "grad_norm_var": 0.00038700103759765626, |
| "learning_rate": 0.01, |
| "loss": 1.3825, |
| "loss/crossentropy": 2.467602014541626, |
| "loss/fcd": 1.15234375, |
| "loss/logits": 0.2697184160351753, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 300000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.70040442617856e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|