| ------------> log file ==runs2/rte/1/log_bs32_lr3e-05_20221118_060236_793692.txt |
| Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/RTE', do_eval=False, early_stop=True, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/rte/1', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='rte', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0) |
| Distributed environment: NO |
| Num processes: 1 |
| Process index: 0 |
| Local process index: 0 |
| Device: cuda |
| Mixed precision type: fp16 |
|
|
| Sample 595 of the training set: (tensor([ 101, 11929, 1010, 5553, 1012, 2570, 1006, 8418, 25311, 13860, |
| 3388, 1007, 1011, 1011, 2019, 18410, 2140, 6187, 24887, 2080, |
| 11183, 1010, 1037, 2280, 3539, 2704, 1010, 2180, 5978, 1005, |
| 1055, 4883, 2602, 2006, 4465, 1012, 102, 2047, 5077, 3539, |
| 2704, 2003, 2700, 1012, 102, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)). |
| Sample 2375 of the training set: (tensor([ 101, 1996, 5611, 2390, 2749, 3344, 2041, 1010, 2006, 5095, |
| 1010, 1037, 6923, 2510, 3169, 2046, 1996, 2225, 2924, 2237, |
| 1997, 15419, 2378, 1998, 2049, 13141, 3409, 1010, 2334, 9302, |
| 4216, 2056, 1012, 102, 1996, 5611, 2390, 3344, 2041, 1037, |
| 6923, 3169, 1999, 15419, 2378, 1012, 102, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor(0)). |
| Sample 149 of the training set: (tensor([ 101, 2048, 9767, 8461, 2379, 2019, 5499, 2082, 1999, 4501, |
| 2730, 2809, 2111, 1998, 5229, 4413, 2500, 7483, 1999, 1996, |
| 6745, 8293, 1997, 4808, 13940, 1996, 2670, 3417, 1997, 15381, |
| 1012, 102, 2809, 2111, 8461, 2048, 9767, 2379, 2019, 5499, |
| 2082, 1999, 4501, 1012, 102, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)). |
| ***** Running training ***** |
| Num examples = 2490 |
| Num Epochs = 30 |
| Instantaneous batch size per device = 32 |
| Total train batch size (w. parallel, distributed & accumulation) = 32 |
| Gradient Accumulation steps = 1 |
| Total optimization steps = 2340 |
| 000005/002340, loss: 0.694824, avg_loss: 0.691177 |
| 000010/002340, loss: 0.707565, avg_loss: 0.693715 |
| 000015/002340, loss: 0.699615, avg_loss: 0.693022 |
| 000020/002340, loss: 0.699615, avg_loss: 0.693939 |
| 000025/002340, loss: 0.699310, avg_loss: 0.694436 |
| 000030/002340, loss: 0.698532, avg_loss: 0.694941 |
| 000035/002340, loss: 0.686935, avg_loss: 0.694372 |
| 000040/002340, loss: 0.696411, avg_loss: 0.694273 |
| 000045/002340, loss: 0.692871, avg_loss: 0.693708 |
| 000050/002340, loss: 0.687256, avg_loss: 0.693756 |
| 000055/002340, loss: 0.701004, avg_loss: 0.693827 |
| 000060/002340, loss: 0.691040, avg_loss: 0.693579 |
| 000065/002340, loss: 0.689056, avg_loss: 0.693324 |
| 000070/002340, loss: 0.696518, avg_loss: 0.693440 |
| 000075/002340, loss: 0.696930, avg_loss: 0.693460 |
| 000080/002340, loss: 0.693802, avg_loss: 0.693340 |
| 000085/002340, loss: 0.688171, avg_loss: 0.693318 |
| 000090/002340, loss: 0.698029, avg_loss: 0.693154 |
| 000095/002340, loss: 0.689453, avg_loss: 0.692949 |
| 000100/002340, loss: 0.690857, avg_loss: 0.692921 |
| 000105/002340, loss: 0.689819, avg_loss: 0.692827 |
| 000110/002340, loss: 0.682220, avg_loss: 0.692768 |
| 000115/002340, loss: 0.700806, avg_loss: 0.692803 |
| 000120/002340, loss: 0.701385, avg_loss: 0.692652 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 1, step 120/2340: {'accuracy': 0.5523465703971119} |
| 000125/002340, loss: 0.693527, avg_loss: 0.692706 |
| 000130/002340, loss: 0.689957, avg_loss: 0.692658 |
| 000135/002340, loss: 0.685425, avg_loss: 0.692536 |
| 000140/002340, loss: 0.690201, avg_loss: 0.692434 |
| 000145/002340, loss: 0.686600, avg_loss: 0.692396 |
| 000150/002340, loss: 0.678986, avg_loss: 0.692177 |
| 000155/002340, loss: 0.679138, avg_loss: 0.691975 |
| 000160/002340, loss: 0.694275, avg_loss: 0.691769 |
| 000165/002340, loss: 0.692368, avg_loss: 0.691443 |
| 000170/002340, loss: 0.680664, avg_loss: 0.691252 |
| 000175/002340, loss: 0.666016, avg_loss: 0.690698 |
| 000180/002340, loss: 0.671844, avg_loss: 0.690296 |
| 000185/002340, loss: 0.651184, avg_loss: 0.689748 |
| 000190/002340, loss: 0.659752, avg_loss: 0.688919 |
| 000195/002340, loss: 0.662926, avg_loss: 0.688697 |
| 000200/002340, loss: 0.643776, avg_loss: 0.688136 |
| 000205/002340, loss: 0.693794, avg_loss: 0.687406 |
| 000210/002340, loss: 0.716675, avg_loss: 0.686937 |
| 000215/002340, loss: 0.665474, avg_loss: 0.686136 |
| 000220/002340, loss: 0.625298, avg_loss: 0.685308 |
| 000225/002340, loss: 0.656639, avg_loss: 0.685019 |
| 000230/002340, loss: 0.673508, avg_loss: 0.684550 |
| 000235/002340, loss: 0.575394, avg_loss: 0.682954 |
| 000240/002340, loss: 0.615173, avg_loss: 0.681390 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 3, step 240/2340: {'accuracy': 0.5884476534296029} |
| 000245/002340, loss: 0.566116, avg_loss: 0.679216 |
| 000250/002340, loss: 0.662231, avg_loss: 0.677990 |
| 000255/002340, loss: 0.742844, avg_loss: 0.677457 |
| 000260/002340, loss: 0.744896, avg_loss: 0.677289 |
| 000265/002340, loss: 0.524788, avg_loss: 0.675974 |
| 000270/002340, loss: 0.573128, avg_loss: 0.674871 |
| 000275/002340, loss: 0.698616, avg_loss: 0.674028 |
| 000280/002340, loss: 0.661125, avg_loss: 0.672997 |
| 000285/002340, loss: 0.577705, avg_loss: 0.671527 |
| 000290/002340, loss: 0.529144, avg_loss: 0.669498 |
| 000295/002340, loss: 0.548820, avg_loss: 0.668429 |
| 000300/002340, loss: 0.533775, avg_loss: 0.667589 |
| 000305/002340, loss: 0.724682, avg_loss: 0.666549 |
| 000310/002340, loss: 0.618702, avg_loss: 0.667052 |
| 000315/002340, loss: 0.600662, avg_loss: 0.666212 |
| 000320/002340, loss: 0.560127, avg_loss: 0.665015 |
| 000325/002340, loss: 0.667423, avg_loss: 0.663344 |
| 000330/002340, loss: 0.520096, avg_loss: 0.661692 |
| 000335/002340, loss: 0.589901, avg_loss: 0.659812 |
| 000340/002340, loss: 0.718616, avg_loss: 0.658405 |
| 000345/002340, loss: 0.523731, avg_loss: 0.657693 |
| 000350/002340, loss: 0.597912, avg_loss: 0.656364 |
| 000355/002340, loss: 0.510841, avg_loss: 0.654704 |
| 000360/002340, loss: 0.598392, avg_loss: 0.652629 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 4, step 360/2340: {'accuracy': 0.6137184115523465} |
| 000365/002340, loss: 0.509396, avg_loss: 0.650652 |
| 000370/002340, loss: 0.625957, avg_loss: 0.649372 |
| 000375/002340, loss: 0.632420, avg_loss: 0.648425 |
| 000380/002340, loss: 0.562641, avg_loss: 0.647222 |
| 000385/002340, loss: 0.649609, avg_loss: 0.645501 |
| 000390/002340, loss: 0.361694, avg_loss: 0.643182 |
| 000395/002340, loss: 0.425430, avg_loss: 0.642246 |
| 000400/002340, loss: 0.577938, avg_loss: 0.640067 |
| 000405/002340, loss: 0.554668, avg_loss: 0.638333 |
| 000410/002340, loss: 0.505466, avg_loss: 0.636457 |
| 000415/002340, loss: 0.531124, avg_loss: 0.634969 |
| 000420/002340, loss: 0.425911, avg_loss: 0.633147 |
| 000425/002340, loss: 0.532368, avg_loss: 0.632082 |
| 000430/002340, loss: 0.569756, avg_loss: 0.630961 |
| 000435/002340, loss: 0.451645, avg_loss: 0.629107 |
| 000440/002340, loss: 0.459530, avg_loss: 0.627486 |
| 000445/002340, loss: 0.380501, avg_loss: 0.625123 |
| 000450/002340, loss: 0.565880, avg_loss: 0.624122 |
| 000455/002340, loss: 0.422201, avg_loss: 0.621911 |
| 000460/002340, loss: 0.671333, avg_loss: 0.620993 |
| 000465/002340, loss: 0.427799, avg_loss: 0.618575 |
| 000470/002340, loss: 0.301590, avg_loss: 0.616753 |
| 000475/002340, loss: 0.517204, avg_loss: 0.614735 |
| 000480/002340, loss: 0.473822, avg_loss: 0.612666 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 6, step 480/2340: {'accuracy': 0.6209386281588448} |
| 000485/002340, loss: 0.235840, avg_loss: 0.610187 |
| 000490/002340, loss: 0.535803, avg_loss: 0.608769 |
| 000495/002340, loss: 0.447842, avg_loss: 0.606833 |
| 000500/002340, loss: 0.359915, avg_loss: 0.604468 |
| 000505/002340, loss: 0.473944, avg_loss: 0.601928 |
| 000510/002340, loss: 0.487707, avg_loss: 0.600405 |
| 000515/002340, loss: 0.280029, avg_loss: 0.599008 |
| 000520/002340, loss: 0.509848, avg_loss: 0.597484 |
| 000525/002340, loss: 0.646320, avg_loss: 0.596454 |
| 000530/002340, loss: 0.350674, avg_loss: 0.594710 |
| 000535/002340, loss: 0.480106, avg_loss: 0.593436 |
| 000540/002340, loss: 0.560251, avg_loss: 0.593214 |
| 000545/002340, loss: 0.387239, avg_loss: 0.591432 |
| 000550/002340, loss: 0.277430, avg_loss: 0.589320 |
| 000555/002340, loss: 0.280695, avg_loss: 0.587417 |
| 000560/002340, loss: 0.330351, avg_loss: 0.585310 |
| 000565/002340, loss: 0.391579, avg_loss: 0.583662 |
| 000570/002340, loss: 0.280355, avg_loss: 0.582107 |
| 000575/002340, loss: 0.359081, avg_loss: 0.580171 |
| 000580/002340, loss: 0.367201, avg_loss: 0.578450 |
| 000585/002340, loss: 0.430851, avg_loss: 0.577231 |
| 000590/002340, loss: 0.331879, avg_loss: 0.575557 |
| 000595/002340, loss: 0.333700, avg_loss: 0.573829 |
| 000600/002340, loss: 0.309275, avg_loss: 0.571686 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 7, step 600/2340: {'accuracy': 0.6425992779783394} |
| 000605/002340, loss: 0.461454, avg_loss: 0.570168 |
| 000610/002340, loss: 0.434152, avg_loss: 0.568408 |
| 000615/002340, loss: 0.565701, avg_loss: 0.567013 |
| 000620/002340, loss: 0.281487, avg_loss: 0.564378 |
| 000625/002340, loss: 0.183996, avg_loss: 0.562576 |
| 000630/002340, loss: 0.308249, avg_loss: 0.560548 |
| 000635/002340, loss: 0.492087, avg_loss: 0.558905 |
| 000640/002340, loss: 0.276144, avg_loss: 0.556907 |
| 000645/002340, loss: 0.379016, avg_loss: 0.555011 |
| 000650/002340, loss: 0.257240, avg_loss: 0.553119 |
| 000655/002340, loss: 0.260510, avg_loss: 0.550735 |
| 000660/002340, loss: 0.482807, avg_loss: 0.549067 |
| 000665/002340, loss: 0.313425, avg_loss: 0.547653 |
| 000670/002340, loss: 0.244961, avg_loss: 0.545744 |
| 000675/002340, loss: 0.386663, avg_loss: 0.544380 |
| 000680/002340, loss: 0.137331, avg_loss: 0.541812 |
| 000685/002340, loss: 0.301256, avg_loss: 0.539778 |
| 000690/002340, loss: 0.284186, avg_loss: 0.537928 |
| 000695/002340, loss: 0.521972, avg_loss: 0.536261 |
| 000700/002340, loss: 0.718600, avg_loss: 0.535717 |
| 000705/002340, loss: 0.237306, avg_loss: 0.534266 |
| 000710/002340, loss: 0.164028, avg_loss: 0.532027 |
| 000715/002340, loss: 0.235560, avg_loss: 0.530920 |
| 000720/002340, loss: 0.224425, avg_loss: 0.529428 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 9, step 720/2340: {'accuracy': 0.6462093862815884} |
| 000725/002340, loss: 0.250054, avg_loss: 0.527996 |
| 000730/002340, loss: 0.213790, avg_loss: 0.526521 |
| 000735/002340, loss: 0.339844, avg_loss: 0.525346 |
| 000740/002340, loss: 0.192316, avg_loss: 0.523399 |
| 000745/002340, loss: 0.322181, avg_loss: 0.521820 |
| 000750/002340, loss: 0.114270, avg_loss: 0.519722 |
| 000755/002340, loss: 0.242498, avg_loss: 0.517846 |
| 000760/002340, loss: 0.234197, avg_loss: 0.515497 |
| 000765/002340, loss: 0.332447, avg_loss: 0.513969 |
| 000770/002340, loss: 0.163693, avg_loss: 0.512496 |
| 000775/002340, loss: 0.260910, avg_loss: 0.511088 |
| 000780/002340, loss: 0.236919, avg_loss: 0.509495 |
| 000785/002340, loss: 0.151022, avg_loss: 0.507580 |
| 000790/002340, loss: 0.489914, avg_loss: 0.506298 |
| 000795/002340, loss: 0.175525, avg_loss: 0.504419 |
| 000800/002340, loss: 0.274471, avg_loss: 0.502310 |
| 000805/002340, loss: 0.308759, avg_loss: 0.500468 |
| 000810/002340, loss: 0.227170, avg_loss: 0.498888 |
| 000815/002340, loss: 0.112951, avg_loss: 0.496910 |
| 000820/002340, loss: 0.168542, avg_loss: 0.495333 |
| 000825/002340, loss: 0.163078, avg_loss: 0.493526 |
| 000830/002340, loss: 0.208418, avg_loss: 0.492144 |
| 000835/002340, loss: 0.204179, avg_loss: 0.490463 |
| 000840/002340, loss: 0.262290, avg_loss: 0.488488 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 10, step 840/2340: {'accuracy': 0.6245487364620939} |
| 000845/002340, loss: 0.166388, avg_loss: 0.486870 |
| 000850/002340, loss: 0.221429, avg_loss: 0.485510 |
| 000855/002340, loss: 0.376082, avg_loss: 0.484030 |
| 000860/002340, loss: 0.083231, avg_loss: 0.482307 |
| 000865/002340, loss: 0.161541, avg_loss: 0.480355 |
| 000870/002340, loss: 0.180701, avg_loss: 0.478405 |
| 000875/002340, loss: 0.175531, avg_loss: 0.476498 |
| 000880/002340, loss: 0.148172, avg_loss: 0.475174 |
| 000885/002340, loss: 0.110148, avg_loss: 0.473676 |
| 000890/002340, loss: 0.177225, avg_loss: 0.472175 |
| 000895/002340, loss: 0.051785, avg_loss: 0.470479 |
| 000900/002340, loss: 0.239419, avg_loss: 0.469122 |
| 000905/002340, loss: 0.294643, avg_loss: 0.467460 |
| 000910/002340, loss: 0.372546, avg_loss: 0.466119 |
| 000915/002340, loss: 0.160401, avg_loss: 0.464562 |
| 000920/002340, loss: 0.389829, avg_loss: 0.463444 |
| 000925/002340, loss: 0.461596, avg_loss: 0.462050 |
| 000930/002340, loss: 0.169349, avg_loss: 0.460443 |
| 000935/002340, loss: 0.274192, avg_loss: 0.459206 |
| 000940/002340, loss: 0.245536, avg_loss: 0.457409 |
| 000945/002340, loss: 0.124900, avg_loss: 0.455669 |
| 000950/002340, loss: 0.258810, avg_loss: 0.453951 |
| 000955/002340, loss: 0.328007, avg_loss: 0.452289 |
| 000960/002340, loss: 0.243825, avg_loss: 0.450600 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 12, step 960/2340: {'accuracy': 0.6389891696750902} |
| 000965/002340, loss: 0.201036, avg_loss: 0.449321 |
| 000970/002340, loss: 0.091728, avg_loss: 0.447797 |
| 000975/002340, loss: 0.182425, avg_loss: 0.446324 |
| 000980/002340, loss: 0.159452, avg_loss: 0.444909 |
| 000985/002340, loss: 0.142912, avg_loss: 0.443522 |
| 000990/002340, loss: 0.304327, avg_loss: 0.442004 |
| 000995/002340, loss: 0.117483, avg_loss: 0.440452 |
| 001000/002340, loss: 0.156437, avg_loss: 0.438837 |
| 001005/002340, loss: 0.032182, avg_loss: 0.437682 |
| 001010/002340, loss: 0.063084, avg_loss: 0.436744 |
| 001015/002340, loss: 0.258552, avg_loss: 0.435504 |
| 001020/002340, loss: 0.091414, avg_loss: 0.434340 |
| 001025/002340, loss: 0.100409, avg_loss: 0.432843 |
| 001030/002340, loss: 0.064708, avg_loss: 0.431516 |
| 001035/002340, loss: 0.459350, avg_loss: 0.430340 |
| 001040/002340, loss: 0.195770, avg_loss: 0.428896 |
| 001045/002340, loss: 0.101108, avg_loss: 0.427430 |
| 001050/002340, loss: 0.162723, avg_loss: 0.425868 |
| 001055/002340, loss: 0.170199, avg_loss: 0.424800 |
| 001060/002340, loss: 0.066082, avg_loss: 0.423415 |
| 001065/002340, loss: 0.139599, avg_loss: 0.422219 |
| 001070/002340, loss: 0.089475, avg_loss: 0.420665 |
| 001075/002340, loss: 0.115157, avg_loss: 0.419250 |
| 001080/002340, loss: 0.085939, avg_loss: 0.417821 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 13, step 1080/2340: {'accuracy': 0.6173285198555957} |
| 001085/002340, loss: 0.138964, avg_loss: 0.416740 |
| 001090/002340, loss: 0.385725, avg_loss: 0.415552 |
| 001095/002340, loss: 0.173466, avg_loss: 0.414612 |
| 001100/002340, loss: 0.101382, avg_loss: 0.413397 |
| 001105/002340, loss: 0.098917, avg_loss: 0.412091 |
| 001110/002340, loss: 0.088198, avg_loss: 0.410518 |
| 001115/002340, loss: 0.039977, avg_loss: 0.409207 |
| 001120/002340, loss: 0.126413, avg_loss: 0.407805 |
| 001125/002340, loss: 0.154641, avg_loss: 0.406540 |
| 001130/002340, loss: 0.221717, avg_loss: 0.405238 |
| 001135/002340, loss: 0.155590, avg_loss: 0.403870 |
| 001140/002340, loss: 0.072533, avg_loss: 0.402521 |
| 001145/002340, loss: 0.148947, avg_loss: 0.401401 |
| 001150/002340, loss: 0.202878, avg_loss: 0.400165 |
| 001155/002340, loss: 0.054971, avg_loss: 0.399305 |
| 001160/002340, loss: 0.058926, avg_loss: 0.398088 |
| 001165/002340, loss: 0.187665, avg_loss: 0.396901 |
| 001170/002340, loss: 0.091442, avg_loss: 0.395624 |
| 001175/002340, loss: 0.339817, avg_loss: 0.394529 |
| 001180/002340, loss: 0.029183, avg_loss: 0.393430 |
| 001185/002340, loss: 0.052091, avg_loss: 0.392348 |
| 001190/002340, loss: 0.175309, avg_loss: 0.391464 |
| 001195/002340, loss: 0.269615, avg_loss: 0.390438 |
| 001200/002340, loss: 0.042982, avg_loss: 0.389416 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 15, step 1200/2340: {'accuracy': 0.6353790613718412} |
| 001205/002340, loss: 0.029362, avg_loss: 0.388045 |
| 001210/002340, loss: 0.106356, avg_loss: 0.386842 |
| 001215/002340, loss: 0.055282, avg_loss: 0.385720 |
| 001220/002340, loss: 0.025587, avg_loss: 0.384474 |
| 001225/002340, loss: 0.017830, avg_loss: 0.383314 |
| 001230/002340, loss: 0.156192, avg_loss: 0.382166 |
| 001235/002340, loss: 0.017268, avg_loss: 0.381167 |
| 001240/002340, loss: 0.015908, avg_loss: 0.379919 |
| 001245/002340, loss: 0.024442, avg_loss: 0.378661 |
| 001250/002340, loss: 0.016508, avg_loss: 0.377585 |
| 001255/002340, loss: 0.021355, avg_loss: 0.376479 |
| 001260/002340, loss: 0.024076, avg_loss: 0.375165 |
| 001265/002340, loss: 0.202033, avg_loss: 0.374116 |
| 001270/002340, loss: 0.027793, avg_loss: 0.372882 |
| 001275/002340, loss: 0.027369, avg_loss: 0.372247 |
| 001280/002340, loss: 0.021813, avg_loss: 0.371052 |
| 001285/002340, loss: 0.021163, avg_loss: 0.370046 |
| 001290/002340, loss: 0.046603, avg_loss: 0.369336 |
| 001295/002340, loss: 0.076338, avg_loss: 0.368328 |
| 001300/002340, loss: 0.183380, avg_loss: 0.367225 |
| 001305/002340, loss: 0.169317, avg_loss: 0.366140 |
| 001310/002340, loss: 0.020987, avg_loss: 0.365018 |
| 001315/002340, loss: 0.169484, avg_loss: 0.364127 |
| 001320/002340, loss: 0.044023, avg_loss: 0.363106 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 16, step 1320/2340: {'accuracy': 0.6462093862815884} |
| 001325/002340, loss: 0.146640, avg_loss: 0.361943 |
| 001330/002340, loss: 0.053370, avg_loss: 0.360778 |
| 001335/002340, loss: 0.024849, avg_loss: 0.359785 |
| 001340/002340, loss: 0.040356, avg_loss: 0.358545 |
| 001345/002340, loss: 0.216520, avg_loss: 0.357564 |
| 001350/002340, loss: 0.020188, avg_loss: 0.356442 |
| 001355/002340, loss: 0.050854, avg_loss: 0.355434 |
| 001360/002340, loss: 0.013922, avg_loss: 0.354336 |
| 001365/002340, loss: 0.034302, avg_loss: 0.353537 |
| 001370/002340, loss: 0.083984, avg_loss: 0.352530 |
| 001375/002340, loss: 0.044313, avg_loss: 0.351671 |
| 001380/002340, loss: 0.197178, avg_loss: 0.350656 |
| 001385/002340, loss: 0.087372, avg_loss: 0.349721 |
| 001390/002340, loss: 0.122292, avg_loss: 0.348657 |
| 001395/002340, loss: 0.161705, avg_loss: 0.347780 |
| 001400/002340, loss: 0.014310, avg_loss: 0.346943 |
| 001405/002340, loss: 0.096345, avg_loss: 0.345930 |
| 001410/002340, loss: 0.142292, avg_loss: 0.345120 |
| 001415/002340, loss: 0.016984, avg_loss: 0.344193 |
| 001420/002340, loss: 0.014843, avg_loss: 0.343171 |
| 001425/002340, loss: 0.054250, avg_loss: 0.342329 |
| 001430/002340, loss: 0.049341, avg_loss: 0.341417 |
| 001435/002340, loss: 0.033567, avg_loss: 0.340340 |
| 001440/002340, loss: 0.108241, avg_loss: 0.339508 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 18, step 1440/2340: {'accuracy': 0.6137184115523465} |
| 001445/002340, loss: 0.148780, avg_loss: 0.338643 |
| 001450/002340, loss: 0.121979, avg_loss: 0.337871 |
| 001455/002340, loss: 0.015762, avg_loss: 0.337010 |
| 001460/002340, loss: 0.197943, avg_loss: 0.336178 |
| 001465/002340, loss: 0.019593, avg_loss: 0.335371 |
| 001470/002340, loss: 0.129545, avg_loss: 0.334404 |
| 001475/002340, loss: 0.015238, avg_loss: 0.333483 |
| 001480/002340, loss: 0.016869, avg_loss: 0.332625 |
| 001485/002340, loss: 0.011418, avg_loss: 0.331565 |
| 001490/002340, loss: 0.338315, avg_loss: 0.330893 |
| 001495/002340, loss: 0.288740, avg_loss: 0.330484 |
| 001500/002340, loss: 0.148870, avg_loss: 0.329575 |
| 001505/002340, loss: 0.013757, avg_loss: 0.328768 |
| 001510/002340, loss: 0.016786, avg_loss: 0.327894 |
| 001515/002340, loss: 0.013239, avg_loss: 0.326989 |
| 001520/002340, loss: 0.024581, avg_loss: 0.326006 |
| 001525/002340, loss: 0.017539, avg_loss: 0.325226 |
| 001530/002340, loss: 0.067678, avg_loss: 0.324287 |
| 001535/002340, loss: 0.024253, avg_loss: 0.323389 |
| 001540/002340, loss: 0.077925, avg_loss: 0.322495 |
| 001545/002340, loss: 0.024680, avg_loss: 0.321567 |
| 001550/002340, loss: 0.012920, avg_loss: 0.320824 |
| 001555/002340, loss: 0.023837, avg_loss: 0.320000 |
| 001560/002340, loss: 0.221982, avg_loss: 0.319304 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 19, step 1560/2340: {'accuracy': 0.6137184115523465} |
| 001565/002340, loss: 0.013699, avg_loss: 0.318449 |
| 001570/002340, loss: 0.011844, avg_loss: 0.317610 |
| 001575/002340, loss: 0.012580, avg_loss: 0.316855 |
| 001580/002340, loss: 0.037540, avg_loss: 0.316005 |
| 001585/002340, loss: 0.019229, avg_loss: 0.315232 |
| 001590/002340, loss: 0.048232, avg_loss: 0.314477 |
| 001595/002340, loss: 0.141452, avg_loss: 0.313963 |
| 001600/002340, loss: 0.015298, avg_loss: 0.313133 |
| 001605/002340, loss: 0.013662, avg_loss: 0.312229 |
| 001610/002340, loss: 0.160849, avg_loss: 0.311404 |
| 001615/002340, loss: 0.012301, avg_loss: 0.310524 |
| 001620/002340, loss: 0.063877, avg_loss: 0.309759 |
| 001625/002340, loss: 0.032892, avg_loss: 0.309026 |
| 001630/002340, loss: 0.177563, avg_loss: 0.308279 |
| 001635/002340, loss: 0.157313, avg_loss: 0.307644 |
| 001640/002340, loss: 0.130090, avg_loss: 0.306819 |
| 001645/002340, loss: 0.021889, avg_loss: 0.306081 |
| 001650/002340, loss: 0.152882, avg_loss: 0.305300 |
| 001655/002340, loss: 0.009122, avg_loss: 0.304627 |
| 001660/002340, loss: 0.015140, avg_loss: 0.303849 |
| 001665/002340, loss: 0.164985, avg_loss: 0.303089 |
| 001670/002340, loss: 0.008990, avg_loss: 0.302396 |
| 001675/002340, loss: 0.010757, avg_loss: 0.301671 |
| 001680/002340, loss: 0.009137, avg_loss: 0.300904 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 21, step 1680/2340: {'accuracy': 0.6173285198555957} |
| 001685/002340, loss: 0.053387, avg_loss: 0.300194 |
| 001690/002340, loss: 0.022511, avg_loss: 0.299502 |
| 001695/002340, loss: 0.105420, avg_loss: 0.298722 |
| 001700/002340, loss: 0.013549, avg_loss: 0.297988 |
| 001705/002340, loss: 0.073981, avg_loss: 0.297318 |
| 001710/002340, loss: 0.014491, avg_loss: 0.296600 |
| 001715/002340, loss: 0.154422, avg_loss: 0.295955 |
| 001720/002340, loss: 0.163267, avg_loss: 0.295310 |
| 001725/002340, loss: 0.136114, avg_loss: 0.294759 |
| 001730/002340, loss: 0.015310, avg_loss: 0.294064 |
| 001735/002340, loss: 0.087005, avg_loss: 0.293422 |
| 001740/002340, loss: 0.020296, avg_loss: 0.292756 |
| 001745/002340, loss: 0.018787, avg_loss: 0.292135 |
| 001750/002340, loss: 0.034191, avg_loss: 0.291526 |
| 001755/002340, loss: 0.045470, avg_loss: 0.290987 |
| 001760/002340, loss: 0.014372, avg_loss: 0.290662 |
| 001765/002340, loss: 0.015767, avg_loss: 0.289942 |
| 001770/002340, loss: 0.039629, avg_loss: 0.289302 |
| 001775/002340, loss: 0.016410, avg_loss: 0.288527 |
| 001780/002340, loss: 0.038289, avg_loss: 0.287933 |
| 001785/002340, loss: 0.017720, avg_loss: 0.287493 |
| 001790/002340, loss: 0.033570, avg_loss: 0.286735 |
| 001795/002340, loss: 0.012522, avg_loss: 0.286079 |
| 001800/002340, loss: 0.053891, avg_loss: 0.285344 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 23, step 1800/2340: {'accuracy': 0.6245487364620939} |
| 001805/002340, loss: 0.126177, avg_loss: 0.284716 |
| 001810/002340, loss: 0.011923, avg_loss: 0.284070 |
| 001815/002340, loss: 0.142181, avg_loss: 0.283613 |
| 001820/002340, loss: 0.010828, avg_loss: 0.282998 |
| 001825/002340, loss: 0.025087, avg_loss: 0.282492 |
| 001830/002340, loss: 0.273915, avg_loss: 0.281916 |
| 001835/002340, loss: 0.016827, avg_loss: 0.281382 |
| 001840/002340, loss: 0.010785, avg_loss: 0.280767 |
| 001845/002340, loss: 0.015339, avg_loss: 0.280337 |
| 001850/002340, loss: 0.020906, avg_loss: 0.279696 |
| 001855/002340, loss: 0.165239, avg_loss: 0.279069 |
| 001860/002340, loss: 0.053642, avg_loss: 0.278450 |
| 001865/002340, loss: 0.133574, avg_loss: 0.277862 |
| 001870/002340, loss: 0.097644, avg_loss: 0.277226 |
| 001875/002340, loss: 0.059441, avg_loss: 0.276570 |
| 001880/002340, loss: 0.016699, avg_loss: 0.275948 |
| 001885/002340, loss: 0.146401, avg_loss: 0.275488 |
| 001890/002340, loss: 0.011636, avg_loss: 0.274799 |
| 001895/002340, loss: 0.018686, avg_loss: 0.274214 |
| 001900/002340, loss: 0.026965, avg_loss: 0.273611 |
| 001905/002340, loss: 0.013933, avg_loss: 0.272935 |
| 001910/002340, loss: 0.125580, avg_loss: 0.272318 |
| 001915/002340, loss: 0.129783, avg_loss: 0.271802 |
| 001920/002340, loss: 0.116678, avg_loss: 0.271278 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 24, step 1920/2340: {'accuracy': 0.6173285198555957} |
| 001925/002340, loss: 0.254784, avg_loss: 0.270806 |
| 001930/002340, loss: 0.157526, avg_loss: 0.270238 |
| 001935/002340, loss: 0.031608, avg_loss: 0.269644 |
| 001940/002340, loss: 0.009236, avg_loss: 0.269169 |
| 001945/002340, loss: 0.009980, avg_loss: 0.268799 |
| 001950/002340, loss: 0.033835, avg_loss: 0.268168 |
| 001955/002340, loss: 0.051771, avg_loss: 0.267547 |
| 001960/002340, loss: 0.142184, avg_loss: 0.267055 |
| 001965/002340, loss: 0.046325, avg_loss: 0.266676 |
| 001970/002340, loss: 0.041966, avg_loss: 0.266192 |
| 001975/002340, loss: 0.020202, avg_loss: 0.265597 |
| 001980/002340, loss: 0.125195, avg_loss: 0.265071 |
| 001985/002340, loss: 0.019307, avg_loss: 0.264558 |
| 001990/002340, loss: 0.011511, avg_loss: 0.263954 |
| 001995/002340, loss: 0.092994, avg_loss: 0.263384 |
| 002000/002340, loss: 0.098703, avg_loss: 0.262809 |
| 002005/002340, loss: 0.017836, avg_loss: 0.262371 |
| 002010/002340, loss: 0.047947, avg_loss: 0.261831 |
| 002015/002340, loss: 0.157151, avg_loss: 0.261291 |
| 002020/002340, loss: 0.063095, avg_loss: 0.260695 |
| 002025/002340, loss: 0.239691, avg_loss: 0.260198 |
| 002030/002340, loss: 0.008953, avg_loss: 0.259652 |
| 002035/002340, loss: 0.008303, avg_loss: 0.259056 |
| 002040/002340, loss: 0.133496, avg_loss: 0.258505 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 26, step 2040/2340: {'accuracy': 0.6173285198555957} |
| 002045/002340, loss: 0.070495, avg_loss: 0.258069 |
| 002050/002340, loss: 0.082666, avg_loss: 0.257558 |
| 002055/002340, loss: 0.036117, avg_loss: 0.257011 |
| 002060/002340, loss: 0.018446, avg_loss: 0.256447 |
| 002065/002340, loss: 0.019938, avg_loss: 0.255982 |
| 002070/002340, loss: 0.010070, avg_loss: 0.255545 |
| 002075/002340, loss: 0.010592, avg_loss: 0.254990 |
| 002080/002340, loss: 0.047749, avg_loss: 0.254418 |
| 002085/002340, loss: 0.157273, avg_loss: 0.253991 |
| 002090/002340, loss: 0.012268, avg_loss: 0.253488 |
| 002095/002340, loss: 0.010397, avg_loss: 0.252964 |
| 002100/002340, loss: 0.152166, avg_loss: 0.252516 |
| 002105/002340, loss: 0.149034, avg_loss: 0.252077 |
| 002110/002340, loss: 0.022406, avg_loss: 0.251554 |
| 002115/002340, loss: 0.050635, avg_loss: 0.251001 |
| 002120/002340, loss: 0.101384, avg_loss: 0.250624 |
| 002125/002340, loss: 0.019535, avg_loss: 0.250064 |
| 002130/002340, loss: 0.017638, avg_loss: 0.249509 |
| 002135/002340, loss: 0.007454, avg_loss: 0.249097 |
| 002140/002340, loss: 0.170886, avg_loss: 0.248638 |
| 002145/002340, loss: 0.008658, avg_loss: 0.248148 |
| 002150/002340, loss: 0.018784, avg_loss: 0.247731 |
| 002155/002340, loss: 0.006945, avg_loss: 0.247294 |
| 002160/002340, loss: 0.149141, avg_loss: 0.246973 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 27, step 2160/2340: {'accuracy': 0.6173285198555957} |
| 002165/002340, loss: 0.070260, avg_loss: 0.246627 |
| 002170/002340, loss: 0.018735, avg_loss: 0.246110 |
| 002175/002340, loss: 0.011750, avg_loss: 0.245641 |
| 002180/002340, loss: 0.024557, avg_loss: 0.245194 |
| 002185/002340, loss: 0.022439, avg_loss: 0.244675 |
| 002190/002340, loss: 0.009183, avg_loss: 0.244218 |
| 002195/002340, loss: 0.147473, avg_loss: 0.243797 |
| 002200/002340, loss: 0.008439, avg_loss: 0.243311 |
| 002205/002340, loss: 0.009392, avg_loss: 0.242842 |
| 002210/002340, loss: 0.007260, avg_loss: 0.242363 |
| 002215/002340, loss: 0.006505, avg_loss: 0.241869 |
| 002220/002340, loss: 0.036663, avg_loss: 0.241415 |
| 002225/002340, loss: 0.010591, avg_loss: 0.240936 |
| 002230/002340, loss: 0.008057, avg_loss: 0.240418 |
| 002235/002340, loss: 0.005135, avg_loss: 0.240005 |
| 002240/002340, loss: 0.009763, avg_loss: 0.239661 |
| 002245/002340, loss: 0.009173, avg_loss: 0.239206 |
| 002250/002340, loss: 0.015700, avg_loss: 0.238819 |
| 002255/002340, loss: 0.021340, avg_loss: 0.238346 |
| 002260/002340, loss: 0.060185, avg_loss: 0.237882 |
| 002265/002340, loss: 0.038913, avg_loss: 0.237484 |
| 002270/002340, loss: 0.016376, avg_loss: 0.237112 |
| 002275/002340, loss: 0.010828, avg_loss: 0.236714 |
| 002280/002340, loss: 0.129731, avg_loss: 0.236370 |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| epoch 29, step 2280/2340: {'accuracy': 0.6064981949458483} |
| 002285/002340, loss: 0.044581, avg_loss: 0.235897 |
| 002290/002340, loss: 0.008923, avg_loss: 0.235524 |
| 002295/002340, loss: 0.011697, avg_loss: 0.235179 |
| 002300/002340, loss: 0.020234, avg_loss: 0.234708 |
| 002305/002340, loss: 0.024606, avg_loss: 0.234225 |
| 002310/002340, loss: 0.007431, avg_loss: 0.233798 |
| 002315/002340, loss: 0.006717, avg_loss: 0.233382 |
| 002320/002340, loss: 0.017990, avg_loss: 0.232940 |
| 002325/002340, loss: 0.145197, avg_loss: 0.232597 |
| 002330/002340, loss: 0.013951, avg_loss: 0.232139 |
| 002335/002340, loss: 0.014238, avg_loss: 0.231719 |
| 002340/002340, loss: 0.019154, avg_loss: 0.231268 |
| ***** Running train evaluation ***** |
| Num examples = 2490 |
| Instantaneous batch size per device = 32 |
| Train Dataset Result: {'accuracy': 0.9955823293172691} |
| ***** Running dev evaluation ***** |
| Num examples = 277 |
| Instantaneous batch size per device = 32 |
| Dev Dataset Result: {'accuracy': 0.6101083032490975} |
| DEV Best Result: accuracy, 0.6462093862815884 |
| Training time 0:02:36 |
|
|