diff --git a/checkpoint-112/config.json b/checkpoint-112/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-112/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-112/model.safetensors b/checkpoint-112/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1088104dfa597e23cb55b76e383f263ee36b8afc --- /dev/null +++ b/checkpoint-112/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50eaf903cc1b3effc3fa1b21c2dab0e7fe892c26d1327dba045066152cf28a3 +size 44805376 diff --git a/checkpoint-112/optimizer.pt b/checkpoint-112/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d70508568972252bbe128f75d58dea745b27ec3f --- /dev/null +++ b/checkpoint-112/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1cdf3953f170e3f58eb6147e901bb3d799e51334b1229b8eab11a42eba11d9 +size 89553018 diff --git a/checkpoint-112/rng_state.pth b/checkpoint-112/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba1f15ed970c87c0163ba05c294cba85a48495da --- /dev/null +++ b/checkpoint-112/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c71ba54f0646bd8898ac2647e53f2aebe77a1fb7add37d0e3732a9964a9ec9b +size 13990 diff --git a/checkpoint-112/scheduler.pt b/checkpoint-112/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ef8e547c272dd6c7398f89708e7723a505c1594 --- /dev/null +++ b/checkpoint-112/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cafe8b04c0d7fda41811110fc07bf501996487f6b372108ae9857fe438b0444 +size 1064 diff --git a/checkpoint-112/trainer_state.json b/checkpoint-112/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9714005f316decd50984e80468723d2c57bd456 --- /dev/null +++ b/checkpoint-112/trainer_state.json @@ -0,0 +1,183 @@ +{ + "best_global_step": 112, + "best_metric": 0.6909090909090909, + "best_model_checkpoint": "font-identifier/checkpoint-112", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 112, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.113540645421056e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-112/training_args.bin b/checkpoint-112/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-112/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-126/config.json b/checkpoint-126/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-126/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-126/model.safetensors b/checkpoint-126/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ddecae90eb01af2c4d88469e66dd159e145edb6 --- /dev/null +++ b/checkpoint-126/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0fcafc481fe312b26f9166559a5cd78ba68d154a07574aa461db33ae4c3c8e2 +size 44805376 diff --git a/checkpoint-126/optimizer.pt b/checkpoint-126/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a1f1e03b9fc17e5a8c0923dc76c21cfa866015d --- /dev/null +++ b/checkpoint-126/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7604f32daf7f9e84e695d7d7f0448189f1f3c08241f0e4fe1069f93d6edbad16 +size 89553018 diff --git a/checkpoint-126/rng_state.pth b/checkpoint-126/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d87bd7bccb14ee641511dbd0e0abca9448355be1 --- /dev/null +++ b/checkpoint-126/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d0b15529f08e1f1286ee56f01d06fa36778d8027c24990355c44c99cb246d1 +size 13990 diff --git a/checkpoint-126/scheduler.pt b/checkpoint-126/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8320fe816b5d8e029a944fb3863c96f2f8234dd2 --- /dev/null +++ b/checkpoint-126/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c463e68e18848078b2bd871c08a210b46e64a4e7cc64000912e3bb50156f7e +size 1064 diff --git a/checkpoint-126/trainer_state.json b/checkpoint-126/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..23acca4b0fb4dc479135c4e77886238a65acd643 --- /dev/null +++ b/checkpoint-126/trainer_state.json @@ -0,0 +1,199 @@ +{ + "best_global_step": 126, + "best_metric": 0.7545454545454545, + "best_model_checkpoint": "font-identifier/checkpoint-126", + "epoch": 9.0, + "eval_steps": 500, + "global_step": 126, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.002733226098688e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-126/training_args.bin b/checkpoint-126/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-126/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-14/config.json b/checkpoint-14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-14/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-14/model.safetensors b/checkpoint-14/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cb93f5e95784aac481962c0c6c9861add80c82b --- /dev/null +++ b/checkpoint-14/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b801a1dadee481f447f87699b9993fb7c2af7032b5da5880d75383931f2ba8 +size 44805376 diff --git a/checkpoint-14/optimizer.pt b/checkpoint-14/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab221d9de65ecc835481f779e647af687f5f6782 --- /dev/null +++ b/checkpoint-14/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764c756ea32b0ef4c0b253dc7957edfba7d6b17ddabfca32606dda72a89b4e53 +size 89553018 diff --git a/checkpoint-14/rng_state.pth b/checkpoint-14/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ef8028e4efa9481a6a6a3deb7ee8ee165b1ed07 --- /dev/null +++ b/checkpoint-14/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31b8321cc5b493ab28d53ab857a0fc387b45c005ec910d38eab126e903578c5 +size 13990 diff --git a/checkpoint-14/scheduler.pt b/checkpoint-14/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d71b7662afc20fafcef37238804efce2c1affa9c --- /dev/null +++ b/checkpoint-14/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeff771eceba11ececa0a4a67b23421b70685132a4ed4ec5ec6b6cd2df076d18 +size 1064 diff --git a/checkpoint-14/trainer_state.json b/checkpoint-14/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..514f5abd76121c318bc63c8eda7ee3ef900b187c --- /dev/null +++ b/checkpoint-14/trainer_state.json @@ -0,0 +1,50 @@ +{ + "best_global_step": 14, + "best_metric": 0.06818181818181818, + "best_model_checkpoint": "font-identifier/checkpoint-14", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 14, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8891925806776320.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-14/training_args.bin b/checkpoint-14/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-14/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-140/config.json b/checkpoint-140/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-140/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-140/model.safetensors b/checkpoint-140/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31bcb0c986aa05ab1f4d36e35214d58b40453c25 --- /dev/null +++ b/checkpoint-140/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c904d0f15a96f314622f8ce9f71fda949f93a54202d1814cfa64964ba995f6f7 +size 44805376 diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0761d2ac9f12f47f8c47ce47d90cf46eb61c8397 --- /dev/null +++ b/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a470cfbaa1eecc70cf1468db51ac1445bfa58d244222b7f72cbf19b57856fcac +size 89553018 diff --git a/checkpoint-140/rng_state.pth b/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b664cc490b76a79be54b1b79ad4c69127ff43c70 --- /dev/null +++ b/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d440455cc448424e4c6d4cab742fba6d6c19f6d704a5e22c29757362b0a072 +size 13990 diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a605a449ba85b145d290d8e322acc26e9a7eba --- /dev/null +++ b/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c51865cefe2f0444539ed235523536c8c919b9eb21d351c1952f953fe87b0e4 +size 1064 diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb0795e776d7d3b3ab321c4165a94859876cc958 --- /dev/null +++ b/checkpoint-140/trainer_state.json @@ -0,0 +1,222 @@ +{ + "best_global_step": 140, + "best_metric": 0.7818181818181819, + "best_model_checkpoint": "font-identifier/checkpoint-140", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 140, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.89192580677632e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-154/config.json b/checkpoint-154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-154/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-154/model.safetensors b/checkpoint-154/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fb9bfde571711b4419a10af18bfd14d04d74cd2 --- /dev/null +++ b/checkpoint-154/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a2bd22b841b7078ee33091014d4bf5a7146a7f35bd8ca1ffb0f9f01add2aed +size 44805376 diff --git a/checkpoint-154/optimizer.pt b/checkpoint-154/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..156e3d3f4e600fa7f146d37e33b9f292494290d7 --- /dev/null +++ b/checkpoint-154/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9addc9bac617b291b0abdff3447bff155953f1de9a772a685af01bbd6e8f233 +size 89553018 diff --git a/checkpoint-154/rng_state.pth b/checkpoint-154/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..34ade1322c044372738546e98e588936034b94d3 --- /dev/null +++ b/checkpoint-154/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29d3d61d48bbe3658978a32b0749a88ca50c99a737f8321cc85792846b1fe48 +size 13990 diff --git a/checkpoint-154/scheduler.pt b/checkpoint-154/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fe1dfc30e521685c83bd4a834a84ebb896f72eb --- /dev/null +++ b/checkpoint-154/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c163c3783012dd3c27dedcdcb77ea50be6a1acafde65225e3fdd7e016b7687b +size 1064 diff --git a/checkpoint-154/trainer_state.json b/checkpoint-154/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9ded2ede6cfec87476944f428cba47772e211012 --- /dev/null +++ b/checkpoint-154/trainer_state.json @@ -0,0 +1,238 @@ +{ + "best_global_step": 154, + "best_metric": 0.8, + "best_model_checkpoint": "font-identifier/checkpoint-154", + "epoch": 11.0, + "eval_steps": 500, + "global_step": 154, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.781118387453952e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-154/training_args.bin b/checkpoint-154/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-154/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-168/config.json b/checkpoint-168/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-168/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-168/model.safetensors b/checkpoint-168/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c3136253a0acedba7ef2d269b8553617a94503d --- /dev/null +++ b/checkpoint-168/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994db2ee4c064757cf621886f5d7442819b454425938e8b6c4f1205744c03228 +size 44805376 diff --git a/checkpoint-168/optimizer.pt b/checkpoint-168/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..98289158475575a66b1ee759880cc883b26fe18f --- /dev/null +++ b/checkpoint-168/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742a6c62f4ba03d57b5e147a842189525839bf8928abdd2f4480aeaf6a2612ea +size 89553018 diff --git a/checkpoint-168/rng_state.pth b/checkpoint-168/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9be44495e4db3bcf9b647d47edbf327c5633a5fe --- /dev/null +++ b/checkpoint-168/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c82a69e27a889a84e9dac29179c963d320dd413216cf009b463851e4cb2cec1 +size 13990 diff --git a/checkpoint-168/scheduler.pt b/checkpoint-168/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..46236f6fe9bdca90945e2b9f837cb7b56d49aaa0 --- /dev/null +++ b/checkpoint-168/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec959d95150727eb55080b0fd5043f44374f20a9af18e119afa404cba10321d +size 1064 diff --git a/checkpoint-168/trainer_state.json b/checkpoint-168/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..874f184bbe1e8f65f81cec41bc148c66a2f935a6 --- /dev/null +++ b/checkpoint-168/trainer_state.json @@ -0,0 +1,254 @@ +{ + "best_global_step": 168, + "best_metric": 0.8772727272727273, + "best_model_checkpoint": "font-identifier/checkpoint-168", + "epoch": 12.0, + "eval_steps": 500, + "global_step": 168, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0670310968131584e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-168/training_args.bin b/checkpoint-168/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-168/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-182/config.json b/checkpoint-182/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-182/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-182/model.safetensors b/checkpoint-182/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2c6400452c6ea524de32a0964390797e1425da8 --- /dev/null +++ b/checkpoint-182/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04783c62c1e951592bd47021cc8f40405c44c10b33b25eceb52fb8888d3e3562 +size 44805376 diff --git a/checkpoint-182/optimizer.pt b/checkpoint-182/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e91cda282580373196334acedaee08831b27f6b8 --- /dev/null +++ b/checkpoint-182/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b758606025c6d643604731a13ed47748090b30ceea5a9c49c397fbf18f32cd +size 89553018 diff --git a/checkpoint-182/rng_state.pth b/checkpoint-182/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..de3562c32da2191ca25c190a9c501df598af70f4 --- /dev/null +++ b/checkpoint-182/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53f0b8d547f2fb5901e4ba725558d6ccafdba1ecddebb259b1585640875d547 +size 13990 diff --git a/checkpoint-182/scheduler.pt b/checkpoint-182/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea3f560274b6d63925f444b4ee825fd36fe5f605 --- /dev/null +++ b/checkpoint-182/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6644b860068fa4350a2825c82e6f306f5a289d29410d5882eb740ab2cc559e +size 1064 diff --git a/checkpoint-182/trainer_state.json b/checkpoint-182/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0797594636d1505a99de7382be23e60ea1ae18f7 --- /dev/null +++ b/checkpoint-182/trainer_state.json @@ -0,0 +1,277 @@ +{ + "best_global_step": 168, + "best_metric": 0.8772727272727273, + "best_model_checkpoint": "font-identifier/checkpoint-168", + "epoch": 13.0, + "eval_steps": 500, + "global_step": 182, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1559503548809216e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-182/training_args.bin b/checkpoint-182/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-182/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-196/config.json b/checkpoint-196/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-196/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-196/model.safetensors b/checkpoint-196/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ced183de3a13259981dee017abed672687fea9bb --- /dev/null +++ b/checkpoint-196/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d3e8b20db96de5fd1d210369f3f2f7086a23d517127c286b4df08e93a420e58 +size 44805376 diff --git a/checkpoint-196/optimizer.pt b/checkpoint-196/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..013d353833c9fef16b5059baa9fd5b52a3ad2aea --- /dev/null +++ b/checkpoint-196/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9dca1c65c65b7a9dd6c5ec44771244ab662f662a517c9d9dc09084f7ca8399f +size 89553018 diff --git a/checkpoint-196/rng_state.pth b/checkpoint-196/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f6fb856806de744dd30110e5f10dfaf06ff41d49 --- /dev/null +++ b/checkpoint-196/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7263d6fb43a7b110970237ca159d53b8ee4411a6b2ed12077ee61d2a106cf758 +size 13990 diff --git a/checkpoint-196/scheduler.pt b/checkpoint-196/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f329fa541ed104b33680c011f2b5f870b5470232 --- /dev/null +++ b/checkpoint-196/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e386f17a09eb52a1103f44ddf9f804dfab726724eaae2c3b137c8e3a9bbf4399 +size 1064 diff --git a/checkpoint-196/trainer_state.json b/checkpoint-196/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..243502b6739b9ca44ddbebd8119914453540d254 --- /dev/null +++ b/checkpoint-196/trainer_state.json @@ -0,0 +1,293 @@ +{ + "best_global_step": 196, + "best_metric": 0.8818181818181818, + "best_model_checkpoint": "font-identifier/checkpoint-196", + "epoch": 14.0, + "eval_steps": 500, + "global_step": 196, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2448696129486848e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-196/training_args.bin b/checkpoint-196/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-196/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-210/config.json b/checkpoint-210/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-210/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-210/model.safetensors b/checkpoint-210/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fcb3a7c44ae4624e4dca182f179617273e96d86 --- /dev/null +++ b/checkpoint-210/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f90192707e1a6d1ad2ca8933c726d7ab15b661a42143ba77097e793af2b7205 +size 44805376 diff --git a/checkpoint-210/optimizer.pt b/checkpoint-210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c57886c2ee437116bc794b8cff3f517189e7f9a --- /dev/null +++ b/checkpoint-210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382b04c4b6d3b782010dd3698d7df01d95501f4cf319e5238e44517c0d0e1c3f +size 89553018 diff --git a/checkpoint-210/rng_state.pth b/checkpoint-210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..32571c77fc70c9bd83f5fbf0f7adf300f0701ca5 --- /dev/null +++ b/checkpoint-210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274db9ee757ea0f987ae64f2f87a0faacfb8fa28ce66eeb4d31f35334d1382f0 +size 13990 diff --git a/checkpoint-210/scheduler.pt b/checkpoint-210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d36d6c9a7d8b5c4b4740ab0e3449af81b9a1363 --- /dev/null +++ b/checkpoint-210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3ee1451d5ec94365350189c678cf5c71f7ce23886e559baae85efa5b75fe2f +size 1064 diff --git a/checkpoint-210/trainer_state.json b/checkpoint-210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0aa512fde36268bf88e8816eb9194c74a91c1b1 --- /dev/null +++ b/checkpoint-210/trainer_state.json @@ -0,0 +1,316 @@ +{ + "best_global_step": 196, + "best_metric": 0.8818181818181818, + "best_model_checkpoint": "font-identifier/checkpoint-196", + "epoch": 15.0, + "eval_steps": 500, + "global_step": 210, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.333788871016448e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-210/training_args.bin b/checkpoint-210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-224/config.json b/checkpoint-224/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-224/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-224/model.safetensors b/checkpoint-224/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..860a00f8821b2da088d4a8cec615c763396cc302 --- /dev/null +++ b/checkpoint-224/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62004b17bd637cc34b1bde1cba1a3e49b67926bd2064ac76a370bf69399806df +size 44805376 diff --git a/checkpoint-224/optimizer.pt b/checkpoint-224/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..14cd2782591ea2e4654733a0e215a167d3aa205b --- /dev/null +++ b/checkpoint-224/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead3299c52cc8121e5c35a1c8b467d332da6b6b7e54153aadc29af688362c34f +size 89553018 diff --git a/checkpoint-224/rng_state.pth b/checkpoint-224/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0144b820edf7f3cfbcdbc9d5406cdb69caa2b02c --- /dev/null +++ b/checkpoint-224/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e7638d010039b06a8cc64ba3c33012ed63b9de7a9872bf037bbdb2f6d9be5b +size 13990 diff --git a/checkpoint-224/scheduler.pt b/checkpoint-224/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0a39c29df80d41e19127c3e94ec05281cd247fa --- /dev/null +++ b/checkpoint-224/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4277a6db6835d10242c63af9725c8393b608c770be355f470af52e5c9b2df4aa +size 1064 diff --git a/checkpoint-224/trainer_state.json b/checkpoint-224/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f1b52dd456d074d9242479860b474ae4ed158e32 --- /dev/null +++ b/checkpoint-224/trainer_state.json @@ -0,0 +1,332 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 16.0, + "eval_steps": 500, + "global_step": 224, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4227081290842112e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-224/training_args.bin b/checkpoint-224/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-224/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-238/config.json b/checkpoint-238/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-238/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-238/model.safetensors b/checkpoint-238/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..854ef8d24457422cba8250f28055e4ec4ad587ae --- /dev/null +++ b/checkpoint-238/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db106dd661343d2a4aeff78dd04d7745a9f22a44bd1a1171300925f2391e357 +size 44805376 diff --git a/checkpoint-238/optimizer.pt b/checkpoint-238/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3470f95d0ec540cc136513f886ea81f0b9879885 --- /dev/null +++ b/checkpoint-238/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd9be36dba4e5ecb1c486300f9d5afc245c238c5b9037cd0559de0914f20e54 +size 89553018 diff --git a/checkpoint-238/rng_state.pth b/checkpoint-238/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4c8c563beb1ff055f4fdfefcee2dcf94673a582 --- /dev/null +++ b/checkpoint-238/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6aa565eb6766f58bd7a5a080cee8ac56c9e0bbc9706c944fdc84ebade8e84fe +size 13990 diff --git a/checkpoint-238/scheduler.pt b/checkpoint-238/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..faab60a418f9dbcbce70c08a9b16448f6a91a0aa --- /dev/null +++ b/checkpoint-238/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b78d36297b1847a0bc4d07bf2670396356ae38276bfb9a325cc93d201044fac +size 1064 diff --git a/checkpoint-238/trainer_state.json b/checkpoint-238/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..43ee06313b026935a04a583ea84005e5549ef99f --- /dev/null +++ b/checkpoint-238/trainer_state.json @@ -0,0 +1,348 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 17.0, + "eval_steps": 500, + "global_step": 238, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5116273871519744e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-238/training_args.bin b/checkpoint-238/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-238/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-252/config.json b/checkpoint-252/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-252/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-252/model.safetensors b/checkpoint-252/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cb7774360b44a3041c1b0f5fa6a7b601226f8f5 --- /dev/null +++ b/checkpoint-252/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb3a1af477520b744c96f1a19314b4d05a49b659c912db66259576a715e6493 +size 44805376 diff --git a/checkpoint-252/optimizer.pt b/checkpoint-252/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe63b952b87111109d98d2b37e356940742a88f5 --- /dev/null +++ b/checkpoint-252/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:341ccb6335908b26b8b778369455c972ec12c588fe1fc87dd977e034e3f95b33 +size 89553018 diff --git a/checkpoint-252/rng_state.pth b/checkpoint-252/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e811183ec2d5d2864738dc942e242679172ef655 --- /dev/null +++ b/checkpoint-252/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7e4d0731ebf7f42f78e7befb50b88eae67e5c346cd1481ded2066d8946b729 +size 13990 diff --git a/checkpoint-252/scheduler.pt b/checkpoint-252/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bff5e3f4ade223a7460ef3768be549a5cc5bd6c1 --- /dev/null +++ b/checkpoint-252/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47f586ac808a01bfe819f2c7e3803fb522230a502a6e3a2048910618201558f3 +size 1064 diff --git a/checkpoint-252/trainer_state.json b/checkpoint-252/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..713d1d3bb18fd009ef52b9b6dca7923c97ab634d --- /dev/null +++ b/checkpoint-252/trainer_state.json @@ -0,0 +1,371 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 18.0, + "eval_steps": 500, + "global_step": 252, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6005466452197376e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-252/training_args.bin b/checkpoint-252/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-252/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-266/config.json b/checkpoint-266/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-266/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-266/model.safetensors b/checkpoint-266/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca46e1292ff7f63f266907acc3cb3710ba83eb4e --- /dev/null +++ b/checkpoint-266/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f449c9ad4cdd7de4346f2e05c9f9b49f947f43db746e86691c25009fbf7bf414 +size 44805376 diff --git a/checkpoint-266/optimizer.pt b/checkpoint-266/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..126637905ef44c40600d01852251dbda742ba447 --- /dev/null +++ b/checkpoint-266/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffba5cc4483d1f3564efacb512d8d874c67da1707fce7adfabbdec92b1f3d0c3 +size 89553018 diff --git a/checkpoint-266/rng_state.pth b/checkpoint-266/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..39bd7dc3b62eee56ed26d7b980a71505847607eb --- /dev/null +++ b/checkpoint-266/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e43cdfd244a4a190a1a3254ca567c3c2755018065379c842384fbed45bfbde6 +size 13990 diff --git a/checkpoint-266/scheduler.pt b/checkpoint-266/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b38ebf8994fbe83cad53dfaed2c079f00fca3408 --- /dev/null +++ b/checkpoint-266/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950d862e4560c8beb385e182e1eb1a72a79855e3926004d03e1e4f55fd47e3f2 +size 1064 diff --git a/checkpoint-266/trainer_state.json b/checkpoint-266/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b7f9a5f2f6ac481587a364ea553cb3c2e38da8b7 --- /dev/null +++ b/checkpoint-266/trainer_state.json @@ -0,0 +1,387 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 19.0, + "eval_steps": 500, + "global_step": 266, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6894659032875008e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-266/training_args.bin b/checkpoint-266/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-266/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-28/config.json b/checkpoint-28/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-28/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-28/model.safetensors b/checkpoint-28/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31ac9e3b7ac05d2e4924c55ded93e556a4d075a1 --- /dev/null +++ b/checkpoint-28/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a798ab5cac5931bf41159bee9c10c751362ec84234bce876d590044c87ce5dc +size 44805376 diff --git a/checkpoint-28/optimizer.pt b/checkpoint-28/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6182a11e776e065ebd460a5f44e588af97734518 --- /dev/null +++ b/checkpoint-28/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2726b336c4df3979f6f00fb0fe2e309b1256ee01c58600ddc5e63f65be27c279 +size 89553018 diff --git a/checkpoint-28/rng_state.pth b/checkpoint-28/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..577451e420703614017c8f505d2b9f8cc5039a6e --- /dev/null +++ b/checkpoint-28/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e2fa0db023d9d26fddc8b93dc283380a10a7feeddbf1fea9540fef32cf21be7 +size 13990 diff --git a/checkpoint-28/scheduler.pt b/checkpoint-28/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cca3452d336aa3f27baef9f83f1fad4795e815a --- /dev/null +++ b/checkpoint-28/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ceae7a902f48af6de81cb1fed608449d22f479ac9db4329a9a8d1876759ca9 +size 1064 diff --git a/checkpoint-28/trainer_state.json b/checkpoint-28/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0348b1bfccbcf27891d394a6443e9bc92ec33547 --- /dev/null +++ b/checkpoint-28/trainer_state.json @@ -0,0 +1,66 @@ +{ + "best_global_step": 28, + "best_metric": 0.12727272727272726, + "best_model_checkpoint": "font-identifier/checkpoint-28", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 28, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.778385161355264e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-28/training_args.bin b/checkpoint-28/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-28/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-280/config.json b/checkpoint-280/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-280/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-280/model.safetensors b/checkpoint-280/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6417482beb31d67f8dfe3abb05c32e7b66fbc6b9 --- /dev/null +++ b/checkpoint-280/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae4b7d47c0f95d293e15dcac71ebcf80fd2d4995a0e69e2b6a34e4d82d7a369 +size 44805376 diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f482896bb3e2f7dfb6fed1d35f7930ae5c445764 --- /dev/null +++ b/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d764e0bdd017372a08a53d385d653d710541e2b0f64b68332d1e46ae456385 +size 89553018 diff --git a/checkpoint-280/rng_state.pth b/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b3915ca9703b172a179368150a0dcb79c3b758f --- /dev/null +++ b/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c511b665b94fd2f5c789802a7eea18173049fa70eae78dbfb445e633123c83b +size 13990 diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8f3e0d584bdfff576b6128ca9791cbf701dcd5f --- /dev/null +++ b/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc29f5be00102ed1d274550539774cec3f489603620ff335196c0fcb03a7ff5f +size 1064 diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ea1e0f96f4192fe5a202ebc9ef00cf88464eb00 --- /dev/null +++ b/checkpoint-280/trainer_state.json @@ -0,0 +1,410 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 20.0, + "eval_steps": 500, + "global_step": 280, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.778385161355264e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-294/config.json b/checkpoint-294/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-294/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-294/model.safetensors b/checkpoint-294/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34a8e75367179e28fa948c5927832c33699bb1fa --- /dev/null +++ b/checkpoint-294/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54cb3c489989cdf6e1e95e47cbf77636f91f481007c628c5370d74d4cb6e118d +size 44805376 diff --git a/checkpoint-294/optimizer.pt b/checkpoint-294/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..927dd76f0ed5a8562b9dea0bd952676dbaea3cb6 --- /dev/null +++ b/checkpoint-294/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce2fa9b207c522de868d2cb207c55508f5700ee1c2db7be3bedc7872099fa45 +size 89553018 diff --git a/checkpoint-294/rng_state.pth b/checkpoint-294/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..77ae6e2dc932695f75237d5ca56ae52a987e916b --- /dev/null +++ b/checkpoint-294/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66fcbdeb3b1fbe26ff882ec79ad0cd2a34bbbfa2451c1d3494280985b513b76 +size 13990 diff --git a/checkpoint-294/scheduler.pt b/checkpoint-294/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a3accbeb2596125b3f0a7f8a556b79d1a3eac50 --- /dev/null +++ b/checkpoint-294/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ecad388e4ede5c900c37c3e4cc4e4ce6da9c01f820aca93848d14b4dcd2f8c0 +size 1064 diff --git a/checkpoint-294/trainer_state.json b/checkpoint-294/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..035940d002374169d08d950050047ae0ea72bb44 --- /dev/null +++ b/checkpoint-294/trainer_state.json @@ -0,0 +1,426 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 21.0, + "eval_steps": 500, + "global_step": 294, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8673044194230272e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-294/training_args.bin b/checkpoint-294/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-294/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-308/config.json b/checkpoint-308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-308/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-308/model.safetensors b/checkpoint-308/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ade1a217f21426d345e1229bcc239f1193a8da6e --- /dev/null +++ b/checkpoint-308/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c88807076d53699aa83230cfc22eeca3da2faa2e665281501b91e1c53b4d077 +size 44805376 diff --git a/checkpoint-308/optimizer.pt b/checkpoint-308/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f05023e96b36c5d5706c41fc17e64b85e8b8ed96 --- /dev/null +++ b/checkpoint-308/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d844ebec2c937832592078020e361fea749894c154b41f6a412e9b1bb818dc +size 89553018 diff --git a/checkpoint-308/rng_state.pth b/checkpoint-308/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3bb014f285fe1393f7e7f63920adcc9fb5a324ce --- /dev/null +++ b/checkpoint-308/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315f72a22d66c33d3401cd63d6470f3679dfcdae20b5184310181515dced0f4c +size 13990 diff --git a/checkpoint-308/scheduler.pt b/checkpoint-308/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..da96849d792752485075542d4363092991b82250 --- /dev/null +++ b/checkpoint-308/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0ad10d2b2114d7b848cacc143836d6d7d11953a934099d8e8a27171b975045 +size 1064 diff --git a/checkpoint-308/trainer_state.json b/checkpoint-308/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ae83bc6d5c62d23e9e0490714cba52941ecfb82 --- /dev/null +++ b/checkpoint-308/trainer_state.json @@ -0,0 +1,442 @@ +{ + "best_global_step": 224, + "best_metric": 0.9227272727272727, + "best_model_checkpoint": "font-identifier/checkpoint-224", + "epoch": 22.0, + "eval_steps": 500, + "global_step": 308, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.9562236774907904e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-308/training_args.bin b/checkpoint-308/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-308/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-322/config.json b/checkpoint-322/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-322/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-322/model.safetensors b/checkpoint-322/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c72501ea639c02e7fafc2cc65898f7315c338c74 --- /dev/null +++ b/checkpoint-322/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03727d36fa9c9fddf232d2dda7fcd75b3dc9ae8a36b9aa238047a3af8414072d +size 44805376 diff --git a/checkpoint-322/optimizer.pt b/checkpoint-322/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..762ab2968fecbf19da0e667522ec7c080e9e5d00 --- /dev/null +++ b/checkpoint-322/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9934771dd638d9045211944050db32d3f1a3ac7439ae3830362271a51c5d964 +size 89553018 diff --git a/checkpoint-322/rng_state.pth b/checkpoint-322/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..78310908ae39a74694578d5f9427d4283828a5a4 --- /dev/null +++ b/checkpoint-322/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84bd2289b7b0831befd14c8ae8161168f67ca69a2e620e55ad9f8b42bc882dd9 +size 13990 diff --git a/checkpoint-322/scheduler.pt b/checkpoint-322/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..948df1d51e6a806b6e067ca36f6db3b071f3f572 --- /dev/null +++ b/checkpoint-322/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ca298d662558a31dcb65d734ce3047f36db34074535545e8bddb584a1c18fd +size 1064 diff --git a/checkpoint-322/trainer_state.json b/checkpoint-322/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3485c541c99dbeaa9dee149f4cc7ec0dff652da1 --- /dev/null +++ b/checkpoint-322/trainer_state.json @@ -0,0 +1,465 @@ +{ + "best_global_step": 322, + "best_metric": 0.95, + "best_model_checkpoint": "font-identifier/checkpoint-322", + "epoch": 23.0, + "eval_steps": 500, + "global_step": 322, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.0451429355585536e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-322/training_args.bin b/checkpoint-322/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-322/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-336/config.json b/checkpoint-336/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-336/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-336/model.safetensors b/checkpoint-336/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c85127fc4af639769ee7495dca434694c531d548 --- /dev/null +++ b/checkpoint-336/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5aa5c3a76ffdec8f80ad27a7ef1aee63783cc0233cdf5862fb23d64d5dc7ec +size 44805376 diff --git a/checkpoint-336/optimizer.pt b/checkpoint-336/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ba9cf9dc77a6018eccf332f0e18fac714307f86 --- /dev/null +++ b/checkpoint-336/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024a63506663acff532e7131bbd2facdd64ce358f25ef865a7dc1bb2e3f9a445 +size 89553018 diff --git a/checkpoint-336/rng_state.pth b/checkpoint-336/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa3f3f8cd3832242aea0a09f04e623fd8ad9bf4a --- /dev/null +++ b/checkpoint-336/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c87b0641edd969feb347c23ac3250b31b28c8331d68208e5da06edf63dfc5b +size 13990 diff --git a/checkpoint-336/scheduler.pt b/checkpoint-336/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b2e24bdef671388bbf271402c1d50506518bcea --- /dev/null +++ b/checkpoint-336/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e754e038d7c2437c34a97e94b80e8d01ecf0f15b0808fa28e71fca1a95dad4b +size 1064 diff --git a/checkpoint-336/trainer_state.json b/checkpoint-336/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aa38a43b79a169fa267b46dc770f133528f0104f --- /dev/null +++ b/checkpoint-336/trainer_state.json @@ -0,0 +1,481 @@ +{ + "best_global_step": 322, + "best_metric": 0.95, + "best_model_checkpoint": "font-identifier/checkpoint-322", + "epoch": 24.0, + "eval_steps": 500, + "global_step": 336, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.1340621936263168e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-336/training_args.bin b/checkpoint-336/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-336/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-350/config.json b/checkpoint-350/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-350/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-350/model.safetensors b/checkpoint-350/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b40b6b7835a7d1b78a71d3e9cf80d28812b19f7 --- /dev/null +++ b/checkpoint-350/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ee37180f70ca60374ea42667f1f8c152cd94311e89a2eb656fe732d04f8c2e +size 44805376 diff --git a/checkpoint-350/optimizer.pt b/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..85064ebe3e0ccb97592c1d9793f8be5c9ff0fc41 --- /dev/null +++ b/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af77cb6dfd2f4a08a0be7cac37cbbf46e6d3efeceba5efcfe29f76d9866f17d +size 89553018 diff --git a/checkpoint-350/rng_state.pth b/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..55e4c1b56651525aa24fa66411cf58b0da7b5b35 --- /dev/null +++ b/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f8523644e6e15cd02b0a3a3d027c4a1d706c1da1f8770bb62c1c4e8aa5e79c +size 13990 diff --git a/checkpoint-350/scheduler.pt b/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3368dc0d3ba1cb43e0d67fa3fb06530e6ff03bcb --- /dev/null +++ b/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36fe6f83c76039af56a3f2b43a9336ebf4e3fb07e2c707ea2386a749351f704a +size 1064 diff --git a/checkpoint-350/trainer_state.json b/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9ba4da37c51830e1460f06d40b13fd8cceacbf7b --- /dev/null +++ b/checkpoint-350/trainer_state.json @@ -0,0 +1,504 @@ +{ + "best_global_step": 322, + "best_metric": 0.95, + "best_model_checkpoint": "font-identifier/checkpoint-322", + "epoch": 25.0, + "eval_steps": 500, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.22298145169408e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-350/training_args.bin b/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-364/config.json b/checkpoint-364/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-364/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-364/model.safetensors b/checkpoint-364/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0c3107a37235ef59d6a263ca7d31e37e4cd9279 --- /dev/null +++ b/checkpoint-364/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a730d71c5ed6feaabd3378af37405a52d658f5a2d2d28284bbfb8cbfc770d07 +size 44805376 diff --git a/checkpoint-364/optimizer.pt b/checkpoint-364/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a31db09fc0a58c0d26263986040dd56c729bb118 --- /dev/null +++ b/checkpoint-364/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375314a2d5d43812166f363d5a7d5e16ee6602d7926d37e82910741a78cc9b56 +size 89553018 diff --git a/checkpoint-364/rng_state.pth b/checkpoint-364/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8fbeb96337379477e219a07604ce5456edc48c53 --- /dev/null +++ b/checkpoint-364/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe180d138ac44185b807c7b41ab30f855eef6e21502f77b2d338602be309418 +size 13990 diff --git a/checkpoint-364/scheduler.pt b/checkpoint-364/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b2ae522f003e444ed553194d499c219525d0094 --- /dev/null +++ b/checkpoint-364/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d8949f1d92c631bf5bf1b72813f078d94e6d9ee311b926f26e95357b0b1c8d +size 1064 diff --git a/checkpoint-364/trainer_state.json b/checkpoint-364/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..77a2b14f2b925eabd9c1eca9802dae9322eb2e17 --- /dev/null +++ b/checkpoint-364/trainer_state.json @@ -0,0 +1,520 @@ +{ + "best_global_step": 322, + "best_metric": 0.95, + "best_model_checkpoint": "font-identifier/checkpoint-322", + "epoch": 26.0, + "eval_steps": 500, + "global_step": 364, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.3119007097618432e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-364/training_args.bin b/checkpoint-364/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-364/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-378/config.json b/checkpoint-378/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-378/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-378/model.safetensors b/checkpoint-378/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1c193647d2ab3f18a4a7dcdadeee2247b6dd718 --- /dev/null +++ b/checkpoint-378/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7aef10766412f1cc3c7af952cc1ed5813e2308cb5b06b8cb2def1fab1fcb4a +size 44805376 diff --git a/checkpoint-378/optimizer.pt b/checkpoint-378/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0dd3765411562b545e12c1d0a262ea96c61b090 --- /dev/null +++ b/checkpoint-378/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:084c2cd6d228cd1cba492ae600a06affc8395987cdf5b390453ba28bd182d06b +size 89553018 diff --git a/checkpoint-378/rng_state.pth b/checkpoint-378/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ca1a64f2db7dc1b4754879e17fae3980f386c0c --- /dev/null +++ b/checkpoint-378/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18f58694e01396107752ef36610b825adf39178b6244f5dd46215bc333e6358 +size 13990 diff --git a/checkpoint-378/scheduler.pt b/checkpoint-378/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b0095c4eb6ee2510b417f450359e0d97a5c1c0f --- /dev/null +++ b/checkpoint-378/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c83eff16cb0b1e957789b0a9fae18306a92f32606616668f381bf61bff2d0c7 +size 1064 diff --git a/checkpoint-378/trainer_state.json b/checkpoint-378/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ce1cb2fa8bf4709852ffb3591ad8174c553668fb --- /dev/null +++ b/checkpoint-378/trainer_state.json @@ -0,0 +1,536 @@ +{ + "best_global_step": 322, + "best_metric": 0.95, + "best_model_checkpoint": "font-identifier/checkpoint-322", + "epoch": 27.0, + "eval_steps": 500, + "global_step": 378, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.4008199678296064e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-378/training_args.bin b/checkpoint-378/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-378/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-392/config.json b/checkpoint-392/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-392/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-392/model.safetensors b/checkpoint-392/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1de46dab8edeecdeb0a2b95ab3ffe1e5b6b679d --- /dev/null +++ b/checkpoint-392/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a5adee10e6e439ab937b2187ba52bd6dc44975a131522765b8efe9491d934e +size 44805376 diff --git a/checkpoint-392/optimizer.pt b/checkpoint-392/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6dda3c87a8d6b6c42d433920ccef507dec862c9 --- /dev/null +++ b/checkpoint-392/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20b913ab6c690cab47b0b9455e2b974b907f6a8a4d5c8936debf4e55cb83b51 +size 89553018 diff --git a/checkpoint-392/rng_state.pth b/checkpoint-392/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc3b83e1b3e374a40885e7c151b5250c05fed450 --- /dev/null +++ b/checkpoint-392/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5bf4b2a79cbe21232cee1ccfbaae6614b33e5cf16bde5ab29b95cdb38a4b611 +size 13990 diff --git a/checkpoint-392/scheduler.pt b/checkpoint-392/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c89c0efa40d39322024ad31fab23a221f214effe --- /dev/null +++ b/checkpoint-392/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6aaa4272a3a151155cdc155a2d1dc28bee51466ae6ea0b003ae7bd68eafc998 +size 1064 diff --git a/checkpoint-392/trainer_state.json b/checkpoint-392/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..20d2f637a3fd307238a663dacb5cc5b9451ba741 --- /dev/null +++ b/checkpoint-392/trainer_state.json @@ -0,0 +1,559 @@ +{ + "best_global_step": 322, + "best_metric": 0.95, + "best_model_checkpoint": "font-identifier/checkpoint-322", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 392, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.4897392258973696e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-392/training_args.bin b/checkpoint-392/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-392/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-406/config.json b/checkpoint-406/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-406/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-406/model.safetensors b/checkpoint-406/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef7ef979c0461092bac3bca813f4d16b87db8e12 --- /dev/null +++ b/checkpoint-406/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a270b73a3bc1f52e17f9b45b8ad77306fe653563cce096c3bf65f213955cee7 +size 44805376 diff --git a/checkpoint-406/optimizer.pt b/checkpoint-406/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9abd07f14f35c2f423f83a4e35460da55a60dbea --- /dev/null +++ b/checkpoint-406/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7012145ba566ef01bd653bf99a3b8c7511d76ede2915941b02164dbda233f884 +size 89553018 diff --git a/checkpoint-406/rng_state.pth b/checkpoint-406/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b93935932e32bf3e32aba7e80357a6ae14d01a84 --- /dev/null +++ b/checkpoint-406/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f08ecee95e435e2edc9e18b31748156f43f880e57908aa800bf24bdf47a207c +size 13990 diff --git a/checkpoint-406/scheduler.pt b/checkpoint-406/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fa89d49d32daaf79f290e4ceb36541a52a7cb26 --- /dev/null +++ b/checkpoint-406/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fdd7420f87e635a9a38e8b827c3ab79a3a60f9fd23f2c5b6002c2faa9b53ed +size 1064 diff --git a/checkpoint-406/trainer_state.json b/checkpoint-406/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..45bb195dd90786aa621d8cb6cf5ef98815a64d7d --- /dev/null +++ b/checkpoint-406/trainer_state.json @@ -0,0 +1,575 @@ +{ + "best_global_step": 406, + "best_metric": 0.9545454545454546, + "best_model_checkpoint": "font-identifier/checkpoint-406", + "epoch": 29.0, + "eval_steps": 500, + "global_step": 406, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.5786584839651328e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-406/training_args.bin b/checkpoint-406/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-406/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-42/config.json b/checkpoint-42/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-42/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-42/model.safetensors b/checkpoint-42/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..526766dcd9d1309be27e8c6ab4c42c23605ac7bf --- /dev/null +++ b/checkpoint-42/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c44318e7bda4a3cfdeacfc10e47457672a8bebc782ee8362d1cefb0582a60e4 +size 44805376 diff --git a/checkpoint-42/optimizer.pt b/checkpoint-42/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a7b7aceca19bc602f3b7ec1477cd870fd1e15a6 --- /dev/null +++ b/checkpoint-42/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47ea6cf79e36a6da15c6565b9c93262ed0d176b626ab8c4594228f0195c1103 +size 89553018 diff --git a/checkpoint-42/rng_state.pth b/checkpoint-42/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9eaedb407a83655dd28bd7ada24285f0b77f365b --- /dev/null +++ b/checkpoint-42/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398cc0975a01c5b2f5c8aabeba696b1358acbb6320aa1581f8116607306894e9 +size 13990 diff --git a/checkpoint-42/scheduler.pt b/checkpoint-42/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..53aaa2469b1beeaf6b02a631afdf868f1a2f2628 --- /dev/null +++ b/checkpoint-42/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc0ffc87183e5a5a2e460084ef30facf49642a5ceca53e1dcd2630a09d379db +size 1064 diff --git a/checkpoint-42/trainer_state.json b/checkpoint-42/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..51695ca14ddb9e002e0ffbba2b07b40daefd23dc --- /dev/null +++ b/checkpoint-42/trainer_state.json @@ -0,0 +1,89 @@ +{ + "best_global_step": 42, + "best_metric": 0.20909090909090908, + "best_model_checkpoint": "font-identifier/checkpoint-42", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 42, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.667577742032896e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-42/training_args.bin b/checkpoint-42/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-42/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-420/config.json b/checkpoint-420/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-420/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-420/model.safetensors b/checkpoint-420/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32205e76feebd7ca811458f2b3ec2fef9b99e8ae --- /dev/null +++ b/checkpoint-420/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261a5d0313dea6796a5c4578927bba9a02806e7c3cc1feba58e17fb202b99b0f +size 44805376 diff --git a/checkpoint-420/optimizer.pt b/checkpoint-420/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1925f2ec7a321357624de978a1931e9124457841 --- /dev/null +++ b/checkpoint-420/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b8d2eb1aa0390440c336a336bc2cbf8aa4059ea3673998a37cde5b5b58dbcd +size 89553018 diff --git a/checkpoint-420/rng_state.pth b/checkpoint-420/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..795e06d0103987999c07238b5e6ec441d372b764 --- /dev/null +++ b/checkpoint-420/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c651d984e23cb6dbb0e0ecd7da06f301e16efd76e0011b7c901a1569d1407f7 +size 13990 diff --git a/checkpoint-420/scheduler.pt b/checkpoint-420/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3292d203dd5e91c8c41daae8b56107fc7e71be0d --- /dev/null +++ b/checkpoint-420/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c94a6e48860ceb40570447657f679e2d116538ad2c0f1ac7318593130bea69 +size 1064 diff --git a/checkpoint-420/trainer_state.json b/checkpoint-420/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1de3e6ab8f91969c2ce7e9bc14b1be8ac0b0cdff --- /dev/null +++ b/checkpoint-420/trainer_state.json @@ -0,0 +1,598 @@ +{ + "best_global_step": 406, + "best_metric": 0.9545454545454546, + "best_model_checkpoint": "font-identifier/checkpoint-406", + "epoch": 30.0, + "eval_steps": 500, + "global_step": 420, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.667577742032896e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-420/training_args.bin b/checkpoint-420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-434/config.json b/checkpoint-434/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-434/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-434/model.safetensors b/checkpoint-434/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1381165b91de7d5f914170320643bac42ffc55c0 --- /dev/null +++ b/checkpoint-434/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7201389344ad8a9029987a630d7d7502daf06b788949c0703e2a8793707cad0c +size 44805376 diff --git a/checkpoint-434/optimizer.pt b/checkpoint-434/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c422134abba8edb710debaded2049f783519bab0 --- /dev/null +++ b/checkpoint-434/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706d02a66c58a3c39c5c4f2296a1dfba85d609a60d5cbd1eebe41d4442d777c0 +size 89553018 diff --git a/checkpoint-434/rng_state.pth b/checkpoint-434/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4284befdb947597db1b8f3a165346e5908d94d2a --- /dev/null +++ b/checkpoint-434/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38cbb5c556ae3bb5116dcfd4924e8d39ce29128f795557f726d5ee0c3da42b9 +size 13990 diff --git a/checkpoint-434/scheduler.pt b/checkpoint-434/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c10f93701485fa12e24785f3d51d30afff76b0cd --- /dev/null +++ b/checkpoint-434/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3ae8fd2a710964042feddac23cef19213071b3822895114aa946e2a96c5bc +size 1064 diff --git a/checkpoint-434/trainer_state.json b/checkpoint-434/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a59c106ae0fe9fce3712066627c13206d188e140 --- /dev/null +++ b/checkpoint-434/trainer_state.json @@ -0,0 +1,614 @@ +{ + "best_global_step": 406, + "best_metric": 0.9545454545454546, + "best_model_checkpoint": "font-identifier/checkpoint-406", + "epoch": 31.0, + "eval_steps": 500, + "global_step": 434, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.7564970001006592e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-434/training_args.bin b/checkpoint-434/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-434/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-448/config.json b/checkpoint-448/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-448/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-448/model.safetensors b/checkpoint-448/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..658fe2bc37ea17c7979af13da0122763c821b6a5 --- /dev/null +++ b/checkpoint-448/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552763e497e77c0afc0dc7d5b7513d5142a4922cd14b121d7f19154f21902024 +size 44805376 diff --git a/checkpoint-448/optimizer.pt b/checkpoint-448/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..44c9dac11871bd2a2ab4b8155c1c6860237fe5ab --- /dev/null +++ b/checkpoint-448/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4644e5f678fb92c449e69c9200836e9488f91d2c732a020c0a11b2abb6a4a7e5 +size 89553018 diff --git a/checkpoint-448/rng_state.pth b/checkpoint-448/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d2a8b75eeae492bf27b55fd060c1633d749c2d8 --- /dev/null +++ b/checkpoint-448/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d84cf93233d424ca113be4c4e1989f19eb01fab820286a23b34b23cf7e89d9a +size 13990 diff --git a/checkpoint-448/scheduler.pt b/checkpoint-448/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c79e621b887b22c0741a86dc47d12e6cfd92373 --- /dev/null +++ b/checkpoint-448/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d2118915db6fc3478c8d3fa135e3ee8262361b103613468ee56ce074791ba0 +size 1064 diff --git a/checkpoint-448/trainer_state.json b/checkpoint-448/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a4fbc385d0e1180ca07c2dffddf660e2f15d0e1c --- /dev/null +++ b/checkpoint-448/trainer_state.json @@ -0,0 +1,630 @@ +{ + "best_global_step": 406, + "best_metric": 0.9545454545454546, + "best_model_checkpoint": "font-identifier/checkpoint-406", + "epoch": 32.0, + "eval_steps": 500, + "global_step": 448, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.8454162581684224e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-448/training_args.bin b/checkpoint-448/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-448/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-462/config.json b/checkpoint-462/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-462/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-462/model.safetensors b/checkpoint-462/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb1ebb7bfe8530c99bdbab11861f7babab9ffe6e --- /dev/null +++ b/checkpoint-462/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9523fe87277d9028d3894f3fead5f459297b0aa1f95e90e87e270f9d4606600b +size 44805376 diff --git a/checkpoint-462/optimizer.pt b/checkpoint-462/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f65da7bd7f04c3f68450e2a5be7a179c3badddf0 --- /dev/null +++ b/checkpoint-462/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbfe90ef7fa3c59fd2d1df036002921e93109b60cddc8eff6a31ef332b31c158 +size 89553018 diff --git a/checkpoint-462/rng_state.pth b/checkpoint-462/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2762c2a758e4bc42e251dbc9fc82e0fb962f361e --- /dev/null +++ b/checkpoint-462/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72365e47a5479260d6a0b8e3113d5f75ad7eb99ff25d4a77632ebece01d69de1 +size 13990 diff --git a/checkpoint-462/scheduler.pt b/checkpoint-462/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..893a0083449b0eac76163a1daf51ddb01dc9665c --- /dev/null +++ b/checkpoint-462/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76995deacded8dfdecf53fdd8f2d6dea681494c9f5056422907d1254f36730d +size 1064 diff --git a/checkpoint-462/trainer_state.json b/checkpoint-462/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fb0762083952899e60ceb99d6c8766df844bfd6a --- /dev/null +++ b/checkpoint-462/trainer_state.json @@ -0,0 +1,653 @@ +{ + "best_global_step": 406, + "best_metric": 0.9545454545454546, + "best_model_checkpoint": "font-identifier/checkpoint-406", + "epoch": 33.0, + "eval_steps": 500, + "global_step": 462, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9343355162361856e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-462/training_args.bin b/checkpoint-462/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-462/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-476/config.json b/checkpoint-476/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-476/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-476/model.safetensors b/checkpoint-476/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b84562073a7f9648852d8fad64cf741a9614f0f --- /dev/null +++ b/checkpoint-476/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846c8403b418613893d87e0db298b252aaf380ecd380da0a0114d262edac3b51 +size 44805376 diff --git a/checkpoint-476/optimizer.pt b/checkpoint-476/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..189dc1e9feb8f0843d4426f928847d37520a6aa8 --- /dev/null +++ b/checkpoint-476/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d14ca3205dd585d01ab2ea54c7a10ce4f29fbd1463cfd3aa8041403ff54ad0 +size 89553018 diff --git a/checkpoint-476/rng_state.pth b/checkpoint-476/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a19f06bfa561e441da60d7a0d7ec0884cfe929e6 --- /dev/null +++ b/checkpoint-476/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe75b90b3ff1ce085c5aa5a75c05dd958be7809a61ddaf1f03dd57b4e4354c28 +size 13990 diff --git a/checkpoint-476/scheduler.pt b/checkpoint-476/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dab87c11281b9563a3e3281501e4ef47de336da --- /dev/null +++ b/checkpoint-476/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20adcbd7d963e19e2759dc9bb1b9bc70ae2eedee173d9c848537890961973fb6 +size 1064 diff --git a/checkpoint-476/trainer_state.json b/checkpoint-476/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..88ae77d1ebbc780719bd7c9a96acd90c0d2df19f --- /dev/null +++ b/checkpoint-476/trainer_state.json @@ -0,0 +1,669 @@ +{ + "best_global_step": 406, + "best_metric": 0.9545454545454546, + "best_model_checkpoint": "font-identifier/checkpoint-406", + "epoch": 34.0, + "eval_steps": 500, + "global_step": 476, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.023254774303949e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-476/training_args.bin b/checkpoint-476/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-476/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-490/config.json b/checkpoint-490/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-490/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-490/model.safetensors b/checkpoint-490/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36d80993767a8c323c463614011843f43528f771 --- /dev/null +++ b/checkpoint-490/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7792e3687d64f7bfac011fa2c139321c5aaf18c5ea7a23f81742d7cb2c5d8a +size 44805376 diff --git a/checkpoint-490/optimizer.pt b/checkpoint-490/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae5d7c55c1baf64628d14c09b92b5b78d7a91374 --- /dev/null +++ b/checkpoint-490/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ec40902084884f33ad6cfcd25f721d20680fd64fe5695feb36dc2c086e0d9b +size 89553018 diff --git a/checkpoint-490/rng_state.pth b/checkpoint-490/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..36aa6a0e2a30a550d7f622c9ebd634a0d78a3ef9 --- /dev/null +++ b/checkpoint-490/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09798ddc66c094ff4f197524ab95461837aa1df0c4961f150b1391a2f298b9de +size 13990 diff --git a/checkpoint-490/scheduler.pt b/checkpoint-490/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..adb71e017377b359ed3ef0af11e78732f6a5d363 --- /dev/null +++ b/checkpoint-490/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c392d0086aba9f6a7cea55f8dec6657b89575fde15a1f38894a909dc7af32a +size 1064 diff --git a/checkpoint-490/trainer_state.json b/checkpoint-490/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d139effc354019f27f7c63a60ec21bef20df9856 --- /dev/null +++ b/checkpoint-490/trainer_state.json @@ -0,0 +1,692 @@ +{ + "best_global_step": 490, + "best_metric": 0.9636363636363636, + "best_model_checkpoint": "font-identifier/checkpoint-490", + "epoch": 35.0, + "eval_steps": 500, + "global_step": 490, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.112174032371712e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-490/training_args.bin b/checkpoint-490/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-490/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-504/config.json b/checkpoint-504/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-504/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-504/model.safetensors b/checkpoint-504/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dad10e5a86b33bc03a3780982018580476ccbc21 --- /dev/null +++ b/checkpoint-504/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267a174986b0296a98e8e431a83cfedf1c46472f3456f2c47d2885ed29be7e89 +size 44805376 diff --git a/checkpoint-504/optimizer.pt b/checkpoint-504/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1ae419acbe1738f1530a907e92d747eaf7a3ef1 --- /dev/null +++ b/checkpoint-504/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c918245bbdd7d52e9aa7dc0625b17785933889dae1906315dbfd6acd62ecbe4 +size 89553018 diff --git a/checkpoint-504/rng_state.pth b/checkpoint-504/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..17a1cc1e936080cfea3c7ec4f9fcfae1ac272cc0 --- /dev/null +++ b/checkpoint-504/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3158b097d3f9fafd58a52da4afa292f05906b447cb14558a185778cdaa8cc618 +size 13990 diff --git a/checkpoint-504/scheduler.pt b/checkpoint-504/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47f3a15169fbd03489987c2532eb96e74202d856 --- /dev/null +++ b/checkpoint-504/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c053fd0be140588cd5b12ac781d484391dee05f137076d41dba9d180cc56e012 +size 1064 diff --git a/checkpoint-504/trainer_state.json b/checkpoint-504/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9840cbc2e3ce7c7ff8d85bef9371c06ff1ba6164 --- /dev/null +++ b/checkpoint-504/trainer_state.json @@ -0,0 +1,708 @@ +{ + "best_global_step": 490, + "best_metric": 0.9636363636363636, + "best_model_checkpoint": "font-identifier/checkpoint-490", + "epoch": 36.0, + "eval_steps": 500, + "global_step": 504, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.201093290439475e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-504/training_args.bin b/checkpoint-504/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-504/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-518/config.json b/checkpoint-518/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-518/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-518/model.safetensors b/checkpoint-518/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c23ebf75c8429919ee38d484e24f1017bd5c9871 --- /dev/null +++ b/checkpoint-518/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6dfb7d03f25a37c035cd026e39352f24bdf6407f31876ba7c2f3f6a776d5be +size 44805376 diff --git a/checkpoint-518/optimizer.pt b/checkpoint-518/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..22dd78c838a0e3d24720355fba0b8ef40a3bc911 --- /dev/null +++ b/checkpoint-518/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb20e77910242a5ed7c13c2d86e38add035cc2d8a1b6df6a6764cf3d68138638 +size 89553018 diff --git a/checkpoint-518/rng_state.pth b/checkpoint-518/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0735903a8fbb71fe74244efe8f7b80ac1b3cf81a --- /dev/null +++ b/checkpoint-518/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd684e2ff549b612f5deec4c468fa82ece1958f9771f2c30538ac0d3390841b3 +size 13990 diff --git a/checkpoint-518/scheduler.pt b/checkpoint-518/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..48375ccfdb1847b0c6f27c48a42103b0a7f49b68 --- /dev/null +++ b/checkpoint-518/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28706ba75b79e72edec41650d8456eb4229d48e829249c2075ecaa0860280b7f +size 1064 diff --git a/checkpoint-518/trainer_state.json b/checkpoint-518/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b7d765cca5368b36661efaa538c9361f39abf6c --- /dev/null +++ b/checkpoint-518/trainer_state.json @@ -0,0 +1,724 @@ +{ + "best_global_step": 490, + "best_metric": 0.9636363636363636, + "best_model_checkpoint": "font-identifier/checkpoint-490", + "epoch": 37.0, + "eval_steps": 500, + "global_step": 518, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.2900125485072384e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-518/training_args.bin b/checkpoint-518/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-518/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-532/config.json b/checkpoint-532/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-532/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-532/model.safetensors b/checkpoint-532/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..918e6a04660640dba97b8c083004df3dc7a9ce5c --- /dev/null +++ b/checkpoint-532/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9c56208c5278daaec52da82565138e10480bf92e509b78c6f3f6fb6a181d59 +size 44805376 diff --git a/checkpoint-532/optimizer.pt b/checkpoint-532/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcfd7a929c79df2a0323225de37a84b389069a27 --- /dev/null +++ b/checkpoint-532/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74bf83151c556237f2a0baeeba73d0b7192355b9fae70f56ae2bf02bfc819fcb +size 89553018 diff --git a/checkpoint-532/rng_state.pth b/checkpoint-532/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1678d1e098d3fea6b70313a10978bf55c977e99d --- /dev/null +++ b/checkpoint-532/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3674f8700b7baa06da893a92224e1feca96ee889df8b7cc13f99c3c497febf5c +size 13990 diff --git a/checkpoint-532/scheduler.pt b/checkpoint-532/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..546d0912ed19817527cdc174010a7d83ebdc3d38 --- /dev/null +++ b/checkpoint-532/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28c368696ad87f5ddb4d6f81c5a6ce00b9a235afc1bb0a8f4522cca7132e912b +size 1064 diff --git a/checkpoint-532/trainer_state.json b/checkpoint-532/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3410f4f37b316bb549b4e06304bb8a72f1b38c49 --- /dev/null +++ b/checkpoint-532/trainer_state.json @@ -0,0 +1,747 @@ +{ + "best_global_step": 490, + "best_metric": 0.9636363636363636, + "best_model_checkpoint": "font-identifier/checkpoint-490", + "epoch": 38.0, + "eval_steps": 500, + "global_step": 532, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.3789318065750016e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-532/training_args.bin b/checkpoint-532/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-532/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-546/config.json b/checkpoint-546/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-546/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-546/model.safetensors b/checkpoint-546/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9aee5f9ae7955c19a8b2b9fcbd478bd9644c2385 --- /dev/null +++ b/checkpoint-546/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c438a9508c4fd6f4c24c4deb92235c80a26aa56e669502acdd1e985fec737adf +size 44805376 diff --git a/checkpoint-546/optimizer.pt b/checkpoint-546/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..91ccc72a2c3a31364920ee175198edfb1ad84ff8 --- /dev/null +++ b/checkpoint-546/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc8d53976ef362e3a863843f0c7c497426f25465d493940a9fb56f544bdf6bb +size 89553018 diff --git a/checkpoint-546/rng_state.pth b/checkpoint-546/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..63e4e35478cd39128b5570884cbb06018a7c65bf --- /dev/null +++ b/checkpoint-546/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839f6d935699532b0d93551ca2d5f0a87f655870676a6c3983f756639fafef53 +size 13990 diff --git a/checkpoint-546/scheduler.pt b/checkpoint-546/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b93ec985f8a4945f97f99e234a3880c1b0040bb --- /dev/null +++ b/checkpoint-546/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b20be0e3a259b90e141cba020a818468f2e72ea327a674d8f66972d5807e44c9 +size 1064 diff --git a/checkpoint-546/trainer_state.json b/checkpoint-546/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb2321fa72eb29502cb5dcf35f389fda873c982 --- /dev/null +++ b/checkpoint-546/trainer_state.json @@ -0,0 +1,763 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 39.0, + "eval_steps": 500, + "global_step": 546, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.467851064642765e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-546/training_args.bin b/checkpoint-546/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-546/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-56/config.json b/checkpoint-56/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-56/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-56/model.safetensors b/checkpoint-56/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d0102ec21ad8d53cc2da4f1810076c042c24120 --- /dev/null +++ b/checkpoint-56/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0a5d63250d095bb1378c25da6ad08180d791b517ed82182543afa39b65264f9 +size 44805376 diff --git a/checkpoint-56/optimizer.pt b/checkpoint-56/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc47c94786e3683c397932233576a19ddea8bdb2 --- /dev/null +++ b/checkpoint-56/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d9c816fe612c612a757c6598bf2282bfed866c1a7a5fd288e97d13d5f44735 +size 89553018 diff --git a/checkpoint-56/rng_state.pth b/checkpoint-56/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e8090f336859fac8dbd7ae24908eb9f11796875 --- /dev/null +++ b/checkpoint-56/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:453aa5cbab4e8fc7d71376c0467d316d6ba2b414a30cc622e7c3845b257b72e2 +size 13990 diff --git a/checkpoint-56/scheduler.pt b/checkpoint-56/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c712900a19868b21a66cfd6fb2c7b5b6f982f602 --- /dev/null +++ b/checkpoint-56/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7fda463180ea08cd59a664a95fbfff2281b937a8a6cbfb867e876d5053ccf4d +size 1064 diff --git a/checkpoint-56/trainer_state.json b/checkpoint-56/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..497db4284736d209d75469dbaac463845991f517 --- /dev/null +++ b/checkpoint-56/trainer_state.json @@ -0,0 +1,105 @@ +{ + "best_global_step": 56, + "best_metric": 0.37727272727272726, + "best_model_checkpoint": "font-identifier/checkpoint-56", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 56, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.556770322710528e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-56/training_args.bin b/checkpoint-56/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-56/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-560/config.json b/checkpoint-560/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-560/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-560/model.safetensors b/checkpoint-560/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..318fa36195024212a7953088d54f6e838b099384 --- /dev/null +++ b/checkpoint-560/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31dfc17baf66e8adfeac7f7faea99ba185ae419dd5dea6cf06236eda919cbd24 +size 44805376 diff --git a/checkpoint-560/optimizer.pt b/checkpoint-560/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c48615c6ffc6db7ca088f4e37c8ba80f452109bd --- /dev/null +++ b/checkpoint-560/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d883c7a645470cd98bb3906666108508ddebee7fad70a459eac3f880fcc26361 +size 89553018 diff --git a/checkpoint-560/rng_state.pth b/checkpoint-560/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..74437d3c447c83703688c2889a34b05a5575463f --- /dev/null +++ b/checkpoint-560/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9131cbef7edfdbb3c15b2584d4879e615945ecc795a1f03585abc6940d8268 +size 13990 diff --git a/checkpoint-560/scheduler.pt b/checkpoint-560/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4ed8bba25e9d1007fd95b10a1fd3102c098819f --- /dev/null +++ b/checkpoint-560/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b33bed918be1c5464b7ab4fd41fc7f8396f217a7bc296928866c844f1d04e81 +size 1064 diff --git a/checkpoint-560/trainer_state.json b/checkpoint-560/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..34d57167b0c27ee229194f81e8b58ec054b7beaa --- /dev/null +++ b/checkpoint-560/trainer_state.json @@ -0,0 +1,786 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 40.0, + "eval_steps": 500, + "global_step": 560, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.556770322710528e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-560/training_args.bin b/checkpoint-560/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-560/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-574/config.json b/checkpoint-574/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-574/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-574/model.safetensors b/checkpoint-574/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65d74d21688959b9caff06f9cf8c8334ead04c0c --- /dev/null +++ b/checkpoint-574/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac91828f748a02f8bf7312405a585afeed04ec1f0922edde55ae47bac6ad0c9 +size 44805376 diff --git a/checkpoint-574/optimizer.pt b/checkpoint-574/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cd574a6bef26a8e70a1eb08c7d3e1791393c099 --- /dev/null +++ b/checkpoint-574/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4812012b4c97aee21798c47f0329c97c7d8b2f745b15bfab1899e7eaa57e8f +size 89553018 diff --git a/checkpoint-574/rng_state.pth b/checkpoint-574/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8246d263e54f69991a361a40d27c313d258dd23d --- /dev/null +++ b/checkpoint-574/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404e2b045dfe1a1c2cc4b42e5d79122bbcf341a0a7879ca2e464f4aec996c897 +size 13990 diff --git a/checkpoint-574/scheduler.pt b/checkpoint-574/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ed0c2c6ee68c28d74a8ebd3d37bce8dfeeaf213 --- /dev/null +++ b/checkpoint-574/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0e52db472c6a77116dc95cebb6ef0940c5451637d50d98140ba3d51fa9efc2 +size 1064 diff --git a/checkpoint-574/trainer_state.json b/checkpoint-574/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..095d4820af0048d9594084b72c7692fe9f7d2120 --- /dev/null +++ b/checkpoint-574/trainer_state.json @@ -0,0 +1,802 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 41.0, + "eval_steps": 500, + "global_step": 574, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.645689580778291e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-574/training_args.bin b/checkpoint-574/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-574/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-588/config.json b/checkpoint-588/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-588/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-588/model.safetensors b/checkpoint-588/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6fe6d37d0cab735fa9dd7ebb6b42a71f7686b26 --- /dev/null +++ b/checkpoint-588/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e5345c816eee7c2957b3a2af935c3d1be0cd08b3d6faba2ac06bab5c754586 +size 44805376 diff --git a/checkpoint-588/optimizer.pt b/checkpoint-588/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b35fd4eea33b6d6804036f0d39ffc0e5fc815ba --- /dev/null +++ b/checkpoint-588/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b63bc21bbbb0e9427d5d2ee170565ac49268cfe4d2557d5d32e6545decc624a +size 89553018 diff --git a/checkpoint-588/rng_state.pth b/checkpoint-588/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c5d74d209f03337139e00f4fb24a948a30191bc2 --- /dev/null +++ b/checkpoint-588/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1efbe3eb87b7b52574bd77fd9cc6d84d691dccb910422a2799d55fc8bcf3232 +size 13990 diff --git a/checkpoint-588/scheduler.pt b/checkpoint-588/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db64c9667881b7b5e0830bcc1b802d30a4eb3812 --- /dev/null +++ b/checkpoint-588/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5508a89056a5b0920290e87f02edf654ab51a74ab3fcac944f591da78017e9e +size 1064 diff --git a/checkpoint-588/trainer_state.json b/checkpoint-588/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b0f092be222f00cbf05c89681481ff0f39b4571f --- /dev/null +++ b/checkpoint-588/trainer_state.json @@ -0,0 +1,818 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 42.0, + "eval_steps": 500, + "global_step": 588, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.7346088388460544e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-588/training_args.bin b/checkpoint-588/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-588/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-602/config.json b/checkpoint-602/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-602/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-602/model.safetensors b/checkpoint-602/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bc320549d2226e30b4f6ad1b74b060e4f250cd3 --- /dev/null +++ b/checkpoint-602/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a5b93ecc6d087c38cce1062abc5edc77898251bc32d21e889dbce7ab05273e +size 44805376 diff --git a/checkpoint-602/optimizer.pt b/checkpoint-602/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab651320017d17925461e32d642879ec96c8b408 --- /dev/null +++ b/checkpoint-602/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54cedd217052d5345ffc504f939ff57f060fc10c42412d064bc60ce99d8a25d +size 89553018 diff --git a/checkpoint-602/rng_state.pth b/checkpoint-602/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef374c8120146876acc81fe29848a39c13e61ecf --- /dev/null +++ b/checkpoint-602/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020ea025d40b320488e5af196555cce07692d620e60b74458429ef25facfb4bd +size 13990 diff --git a/checkpoint-602/scheduler.pt b/checkpoint-602/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f14ce523bfc822c04d2e5d56ca0a0b2408a4d211 --- /dev/null +++ b/checkpoint-602/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f68e4e1ea84b6fca6b0f4aa4929585ad7866329cf4b3854dc8dc4924c894bb +size 1064 diff --git a/checkpoint-602/trainer_state.json b/checkpoint-602/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..78853fc49f2002d13aa18d31fd51f11315988298 --- /dev/null +++ b/checkpoint-602/trainer_state.json @@ -0,0 +1,841 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 43.0, + "eval_steps": 500, + "global_step": 602, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.8235280969138176e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-602/training_args.bin b/checkpoint-602/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-602/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-616/config.json b/checkpoint-616/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-616/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-616/model.safetensors b/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3e6dffaed3dd2010eae4b4a8b602636e9fefa9f --- /dev/null +++ b/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cabec99f14876b344ded63c7b97fc8a4488865bc940ba6c0045cc5bb08ded88a +size 44805376 diff --git a/checkpoint-616/optimizer.pt b/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9564fa0b47095aac8d5b833cc4390bdf81d783ca --- /dev/null +++ b/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596501e51f4d7f20bc095d6bad8b9ed5e3b7b5b641de8da71abb4b615f9568a6 +size 89553018 diff --git a/checkpoint-616/rng_state.pth b/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4e587ac0fdc2399f63a14306ebd2528a58a372d --- /dev/null +++ b/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd35c68dd40954d90d4a377528fd8387f539f232ab42ef3ba340721f0d55f5d0 +size 13990 diff --git a/checkpoint-616/scheduler.pt b/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..34bc5c2e9d285384ee38c36a45901896a420ac1f --- /dev/null +++ b/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906fab6d26b5259c583f9b6eb6fa266b9f60d76e5d43732d1877243915dd785b +size 1064 diff --git a/checkpoint-616/trainer_state.json b/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a63d5a663aeacbfd52e1fbd43c33f1cbf1ffa6c6 --- /dev/null +++ b/checkpoint-616/trainer_state.json @@ -0,0 +1,857 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 44.0, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.912447354981581e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-616/training_args.bin b/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-630/config.json b/checkpoint-630/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-630/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-630/model.safetensors b/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..feb4ef87a0b9b9f65a8bce70a73fce31823353a6 --- /dev/null +++ b/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4711b0171970836e8f96fc70ea0beddb88df5f6cfda4f39b27177ff79e433c2 +size 44805376 diff --git a/checkpoint-630/optimizer.pt b/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..79e2103ca2eb3fc76d1f6754dd2c9bfc0b7b574b --- /dev/null +++ b/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29d0aa9acf0eff721dc987f8879436674de3798f855fe8ac681ee29e96c8103 +size 89553018 diff --git a/checkpoint-630/rng_state.pth b/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5dd80a6f62d55da34bb1ccf0bd04310f1f43645a --- /dev/null +++ b/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf8c5df955bd8a86253c4128b3015343deec2e66d6f7d9a279da563d4631da8 +size 13990 diff --git a/checkpoint-630/scheduler.pt b/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a14ff274d2f236fa29fc15f2c72cc3a3fd293535 --- /dev/null +++ b/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6091fe9d8e1881b6e8d11e638dc2725ce7440fa52711706ae184fa1ed01cef7 +size 1064 diff --git a/checkpoint-630/trainer_state.json b/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0e74f7c3184a24d998168f490367adb65e3ef9c3 --- /dev/null +++ b/checkpoint-630/trainer_state.json @@ -0,0 +1,880 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 45.0, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + }, + { + "epoch": 44.29090909090909, + "grad_norm": 4.978816986083984, + "learning_rate": 6.428571428571429e-06, + "loss": 0.2198, + "step": 620 + }, + { + "epoch": 45.0, + "grad_norm": 4.146751403808594, + "learning_rate": 5.634920634920635e-06, + "loss": 0.1494, + "step": 630 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.14049798250198364, + "eval_runtime": 3.9252, + "eval_samples_per_second": 56.048, + "eval_steps_per_second": 3.567, + "step": 630 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.001366613049344e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-630/training_args.bin b/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-644/config.json b/checkpoint-644/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-644/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-644/model.safetensors b/checkpoint-644/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec28aa4424e7ad821f7ccbd0fdaf4bb64cd41774 --- /dev/null +++ b/checkpoint-644/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49711d26607354db63412b0ddd0aff788032dd5aadc558cefe6f5e58a8bb8168 +size 44805376 diff --git a/checkpoint-644/optimizer.pt b/checkpoint-644/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e17c5f2b86b77ab924cbf34874077851815f0c9a --- /dev/null +++ b/checkpoint-644/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659eaa80b1d6cefb676c356cd5ba8dd36c7557e3a7282ca570f7b82872e6984c +size 89553018 diff --git a/checkpoint-644/rng_state.pth b/checkpoint-644/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c6f3e0e9a9e82e52c68ff62c824d1a3af464fbb --- /dev/null +++ b/checkpoint-644/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048f12ef078366e26568476a4e9c0d6de1b39f5f3cf2d9e463fc90866cc8ccb0 +size 13990 diff --git a/checkpoint-644/scheduler.pt b/checkpoint-644/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf2f78da735df4a549480eabaea0a5db76f5e52d --- /dev/null +++ b/checkpoint-644/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d998306dbea3bba827fe943668b334d3a87c2a13c7ee9d196f13961f065f539e +size 1064 diff --git a/checkpoint-644/trainer_state.json b/checkpoint-644/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..357e0c9d11f96aec153e54b5054f6fa91bccc8a6 --- /dev/null +++ b/checkpoint-644/trainer_state.json @@ -0,0 +1,896 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 46.0, + "eval_steps": 500, + "global_step": 644, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + }, + { + "epoch": 44.29090909090909, + "grad_norm": 4.978816986083984, + "learning_rate": 6.428571428571429e-06, + "loss": 0.2198, + "step": 620 + }, + { + "epoch": 45.0, + "grad_norm": 4.146751403808594, + "learning_rate": 5.634920634920635e-06, + "loss": 0.1494, + "step": 630 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.14049798250198364, + "eval_runtime": 3.9252, + "eval_samples_per_second": 56.048, + "eval_steps_per_second": 3.567, + "step": 630 + }, + { + "epoch": 45.72727272727273, + "grad_norm": 4.996974945068359, + "learning_rate": 4.841269841269842e-06, + "loss": 0.1891, + "step": 640 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.11218445748090744, + "eval_runtime": 3.493, + "eval_samples_per_second": 62.984, + "eval_steps_per_second": 4.008, + "step": 644 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.090285871117107e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-644/training_args.bin b/checkpoint-644/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-644/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-658/config.json b/checkpoint-658/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-658/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-658/model.safetensors b/checkpoint-658/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6cc2b57817ded3945dcafd2eef9ea49da07ef463 --- /dev/null +++ b/checkpoint-658/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360d620bf016a360d6f509c805eb8538e024359a2e3d2fd4d3c60f2ae86c0ae9 +size 44805376 diff --git a/checkpoint-658/optimizer.pt b/checkpoint-658/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbef9cdb932144fcd897c21b80be70d475fa7840 --- /dev/null +++ b/checkpoint-658/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a126e5346e41c20b861097f7ab574057a692ce944d9f1da893145d04944d635 +size 89553018 diff --git a/checkpoint-658/rng_state.pth b/checkpoint-658/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..db2deabf313c49a92b2df8837ae10b1a32bc24ae --- /dev/null +++ b/checkpoint-658/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3893e8b23e998018aeccb235bc00b880a73d677caaa753de4d55fb040fea9976 +size 13990 diff --git a/checkpoint-658/scheduler.pt b/checkpoint-658/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..97b324a0af4e7e7a888f4d4fcb679e27aa66b156 --- /dev/null +++ b/checkpoint-658/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a008a1637d24635c989b1e5a5d72c521adf210b31d7a441e9f1b8bd5e12c5658 +size 1064 diff --git a/checkpoint-658/trainer_state.json b/checkpoint-658/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e45620e34443f365a8bd71e3619eb81009947938 --- /dev/null +++ b/checkpoint-658/trainer_state.json @@ -0,0 +1,912 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 47.0, + "eval_steps": 500, + "global_step": 658, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + }, + { + "epoch": 44.29090909090909, + "grad_norm": 4.978816986083984, + "learning_rate": 6.428571428571429e-06, + "loss": 0.2198, + "step": 620 + }, + { + "epoch": 45.0, + "grad_norm": 4.146751403808594, + "learning_rate": 5.634920634920635e-06, + "loss": 0.1494, + "step": 630 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.14049798250198364, + "eval_runtime": 3.9252, + "eval_samples_per_second": 56.048, + "eval_steps_per_second": 3.567, + "step": 630 + }, + { + "epoch": 45.72727272727273, + "grad_norm": 4.996974945068359, + "learning_rate": 4.841269841269842e-06, + "loss": 0.1891, + "step": 640 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.11218445748090744, + "eval_runtime": 3.493, + "eval_samples_per_second": 62.984, + "eval_steps_per_second": 4.008, + "step": 644 + }, + { + "epoch": 46.43636363636364, + "grad_norm": 5.156983375549316, + "learning_rate": 4.047619047619048e-06, + "loss": 0.2012, + "step": 650 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12153752893209457, + "eval_runtime": 4.0405, + "eval_samples_per_second": 54.449, + "eval_steps_per_second": 3.465, + "step": 658 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.1792051291848704e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-658/training_args.bin b/checkpoint-658/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-658/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-672/config.json b/checkpoint-672/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-672/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-672/model.safetensors b/checkpoint-672/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77d91b9c9cd8e7bb6acc51fa3c951fee6c8b30c4 --- /dev/null +++ b/checkpoint-672/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec334aaf3d53a0a1b8c8a1cdeedd8dda02dd117c2646147ae83d8ae8bd579de4 +size 44805376 diff --git a/checkpoint-672/optimizer.pt b/checkpoint-672/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf76f4aa616443f289227e526c711c38c6e51f80 --- /dev/null +++ b/checkpoint-672/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0fcf7d974b762f438201d6f32f93df667e4de3c1ed9c391b4a50b87849efb9 +size 89553018 diff --git a/checkpoint-672/rng_state.pth b/checkpoint-672/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f192c52806a39ebd2daf48c7503410e01019ec6c --- /dev/null +++ b/checkpoint-672/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7267ec6705af7fe46146a464217201d1f490ab4a485cc449b919c4f804e4b600 +size 13990 diff --git a/checkpoint-672/scheduler.pt b/checkpoint-672/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..81bd37ff489967cdf666314e96d98027693b1c10 --- /dev/null +++ b/checkpoint-672/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abfe7541daada90a977cd8c52d4dc664864d17c934908328ba62819f0551e58b +size 1064 diff --git a/checkpoint-672/trainer_state.json b/checkpoint-672/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef6da9c082103b906f3ca8f6a51e17e825711445 --- /dev/null +++ b/checkpoint-672/trainer_state.json @@ -0,0 +1,935 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 48.0, + "eval_steps": 500, + "global_step": 672, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + }, + { + "epoch": 44.29090909090909, + "grad_norm": 4.978816986083984, + "learning_rate": 6.428571428571429e-06, + "loss": 0.2198, + "step": 620 + }, + { + "epoch": 45.0, + "grad_norm": 4.146751403808594, + "learning_rate": 5.634920634920635e-06, + "loss": 0.1494, + "step": 630 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.14049798250198364, + "eval_runtime": 3.9252, + "eval_samples_per_second": 56.048, + "eval_steps_per_second": 3.567, + "step": 630 + }, + { + "epoch": 45.72727272727273, + "grad_norm": 4.996974945068359, + "learning_rate": 4.841269841269842e-06, + "loss": 0.1891, + "step": 640 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.11218445748090744, + "eval_runtime": 3.493, + "eval_samples_per_second": 62.984, + "eval_steps_per_second": 4.008, + "step": 644 + }, + { + "epoch": 46.43636363636364, + "grad_norm": 5.156983375549316, + "learning_rate": 4.047619047619048e-06, + "loss": 0.2012, + "step": 650 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12153752893209457, + "eval_runtime": 4.0405, + "eval_samples_per_second": 54.449, + "eval_steps_per_second": 3.465, + "step": 658 + }, + { + "epoch": 47.14545454545455, + "grad_norm": 5.211794853210449, + "learning_rate": 3.2539682539682544e-06, + "loss": 0.1896, + "step": 660 + }, + { + "epoch": 47.872727272727275, + "grad_norm": 3.83272123336792, + "learning_rate": 2.4603174603174605e-06, + "loss": 0.181, + "step": 670 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1783747524023056, + "eval_runtime": 4.3084, + "eval_samples_per_second": 51.063, + "eval_steps_per_second": 3.249, + "step": 672 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.2681243872526336e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-672/training_args.bin b/checkpoint-672/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-672/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-686/config.json b/checkpoint-686/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-686/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-686/model.safetensors b/checkpoint-686/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef02ef380311b2f59251160b4d61f3fbb46a46be --- /dev/null +++ b/checkpoint-686/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c53be8fbc40168687be16ac5638abf0f37ddd69e747ada1d2ef9068044cf62 +size 44805376 diff --git a/checkpoint-686/optimizer.pt b/checkpoint-686/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72542dfde882041e267684eca8b54a11e71eee0a --- /dev/null +++ b/checkpoint-686/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e6349fdf9d352f00acbeb7f89d1702556fa7799206dc7caa5612f749514cff +size 89553018 diff --git a/checkpoint-686/rng_state.pth b/checkpoint-686/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb3ac38fed6a794dccf2e5285d148fffe4164cfb --- /dev/null +++ b/checkpoint-686/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c1b4c5606308003683c164a07bbe9f38201b80caeb7eb9b84b154ceb47fc89 +size 13990 diff --git a/checkpoint-686/scheduler.pt b/checkpoint-686/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8b09651468c53e546af74b00a829055fe538317 --- /dev/null +++ b/checkpoint-686/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ad8e75e716b4acd7eb302f79380ea56b4811c52ed7376ab91e95759b2cf7c9 +size 1064 diff --git a/checkpoint-686/trainer_state.json b/checkpoint-686/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaf6d7e70f320d7c8ae9e56759cc649f4092678 --- /dev/null +++ b/checkpoint-686/trainer_state.json @@ -0,0 +1,951 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 49.0, + "eval_steps": 500, + "global_step": 686, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + }, + { + "epoch": 44.29090909090909, + "grad_norm": 4.978816986083984, + "learning_rate": 6.428571428571429e-06, + "loss": 0.2198, + "step": 620 + }, + { + "epoch": 45.0, + "grad_norm": 4.146751403808594, + "learning_rate": 5.634920634920635e-06, + "loss": 0.1494, + "step": 630 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.14049798250198364, + "eval_runtime": 3.9252, + "eval_samples_per_second": 56.048, + "eval_steps_per_second": 3.567, + "step": 630 + }, + { + "epoch": 45.72727272727273, + "grad_norm": 4.996974945068359, + "learning_rate": 4.841269841269842e-06, + "loss": 0.1891, + "step": 640 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.11218445748090744, + "eval_runtime": 3.493, + "eval_samples_per_second": 62.984, + "eval_steps_per_second": 4.008, + "step": 644 + }, + { + "epoch": 46.43636363636364, + "grad_norm": 5.156983375549316, + "learning_rate": 4.047619047619048e-06, + "loss": 0.2012, + "step": 650 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12153752893209457, + "eval_runtime": 4.0405, + "eval_samples_per_second": 54.449, + "eval_steps_per_second": 3.465, + "step": 658 + }, + { + "epoch": 47.14545454545455, + "grad_norm": 5.211794853210449, + "learning_rate": 3.2539682539682544e-06, + "loss": 0.1896, + "step": 660 + }, + { + "epoch": 47.872727272727275, + "grad_norm": 3.83272123336792, + "learning_rate": 2.4603174603174605e-06, + "loss": 0.181, + "step": 670 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1783747524023056, + "eval_runtime": 4.3084, + "eval_samples_per_second": 51.063, + "eval_steps_per_second": 3.249, + "step": 672 + }, + { + "epoch": 48.58181818181818, + "grad_norm": 4.118733882904053, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.1757, + "step": 680 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.17033246159553528, + "eval_runtime": 4.1496, + "eval_samples_per_second": 53.017, + "eval_steps_per_second": 3.374, + "step": 686 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.357043645320397e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-686/training_args.bin b/checkpoint-686/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-686/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-70/config.json b/checkpoint-70/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-70/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-70/model.safetensors b/checkpoint-70/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c7ed0fc13bee635d46581f84176719d83c4c2e8 --- /dev/null +++ b/checkpoint-70/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7622451158c2051be176aa24757db135b9e08b41ae079d954a270a8282ddeee +size 44805376 diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..40a53157bdbbe9bd4c610b817312419de483a24b --- /dev/null +++ b/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e32bd9620376a8953355477e4eb05b08350e4c8a37f0836cca679be84865f61 +size 89553018 diff --git a/checkpoint-70/rng_state.pth b/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..40f17de13348dba0f84dfc884f96803ac218675c --- /dev/null +++ b/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0745187fc155f9ec36f9b7d509c778a859d0cc422de08c3f3e9056a2e72eeab +size 13990 diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d7c9d7951df539e7fe7843dcef7c570613d871f --- /dev/null +++ b/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8718b5e36d6e8be5a1f7827ad6560c77655adf7a440e2c7b21fcb0bb54ed010 +size 1064 diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15e8019f05d6cf0af11a4c378eea959a333a43e1 --- /dev/null +++ b/checkpoint-70/trainer_state.json @@ -0,0 +1,128 @@ +{ + "best_global_step": 70, + "best_metric": 0.5363636363636364, + "best_model_checkpoint": "font-identifier/checkpoint-70", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 70, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.44596290338816e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-700/config.json b/checkpoint-700/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-700/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-700/model.safetensors b/checkpoint-700/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c2d2a05e95b78b43b9ccafbb3a2badaac5522bb --- /dev/null +++ b/checkpoint-700/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c2615fd5175cab0322f62e5932a8f6b4b685f492f60b73011c543a15372b1e +size 44805376 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..40aa961c1a440b692345f2735b8e28db9f67e8d3 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4393f0de1c27fdaa70e801852ce49c2a4ef4eeacc4e4ed64807b881091cecd43 +size 89553018 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f5b6418e8c91a74b03c25609d223d4075a46b8a --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9fa8beaba7ef062bc00c42b808412ec328015242cfb692bcbfd5a2ea2b5e670 +size 13990 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f663f8bb5b81d81fd63167397553b1c23fc99e44 --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5f6ae3291c22c58bd9cc47d5dbc69751dd18ed7942915e3be3c0447b7f4fb81 +size 1064 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..10e9dd65ade49bd51590dd57d5be8645f05a791b --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,974 @@ +{ + "best_global_step": 546, + "best_metric": 0.9727272727272728, + "best_model_checkpoint": "font-identifier/checkpoint-546", + "epoch": 50.0, + "eval_steps": 500, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + }, + { + "epoch": 7.1454545454545455, + "grad_norm": 6.231616973876953, + "learning_rate": 4.7698412698412706e-05, + "loss": 1.2501, + "step": 100 + }, + { + "epoch": 7.872727272727273, + "grad_norm": 5.9844584465026855, + "learning_rate": 4.690476190476191e-05, + "loss": 1.076, + "step": 110 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6909090909090909, + "eval_loss": 0.8790816068649292, + "eval_runtime": 4.5841, + "eval_samples_per_second": 47.992, + "eval_steps_per_second": 3.054, + "step": 112 + }, + { + "epoch": 8.581818181818182, + "grad_norm": 5.29907751083374, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.9811, + "step": 120 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7545454545454545, + "eval_loss": 0.7573429346084595, + "eval_runtime": 4.4185, + "eval_samples_per_second": 49.79, + "eval_steps_per_second": 3.168, + "step": 126 + }, + { + "epoch": 9.290909090909091, + "grad_norm": 5.92799711227417, + "learning_rate": 4.531746031746032e-05, + "loss": 0.8655, + "step": 130 + }, + { + "epoch": 10.0, + "grad_norm": 3.577127695083618, + "learning_rate": 4.4523809523809525e-05, + "loss": 0.7309, + "step": 140 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7818181818181819, + "eval_loss": 0.6194924712181091, + "eval_runtime": 4.0981, + "eval_samples_per_second": 53.683, + "eval_steps_per_second": 3.416, + "step": 140 + }, + { + "epoch": 10.727272727272727, + "grad_norm": 5.252236843109131, + "learning_rate": 4.373015873015873e-05, + "loss": 0.7776, + "step": 150 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.8, + "eval_loss": 0.5426079034805298, + "eval_runtime": 4.0014, + "eval_samples_per_second": 54.981, + "eval_steps_per_second": 3.499, + "step": 154 + }, + { + "epoch": 11.436363636363636, + "grad_norm": 4.398848533630371, + "learning_rate": 4.2936507936507935e-05, + "loss": 0.7365, + "step": 160 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8772727272727273, + "eval_loss": 0.40285420417785645, + "eval_runtime": 4.3238, + "eval_samples_per_second": 50.882, + "eval_steps_per_second": 3.238, + "step": 168 + }, + { + "epoch": 12.145454545454545, + "grad_norm": 5.8070549964904785, + "learning_rate": 4.214285714285714e-05, + "loss": 0.6116, + "step": 170 + }, + { + "epoch": 12.872727272727273, + "grad_norm": 6.383784770965576, + "learning_rate": 4.134920634920635e-05, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.8363636363636363, + "eval_loss": 0.4417766332626343, + "eval_runtime": 4.8355, + "eval_samples_per_second": 45.497, + "eval_steps_per_second": 2.895, + "step": 182 + }, + { + "epoch": 13.581818181818182, + "grad_norm": 4.959994316101074, + "learning_rate": 4.055555555555556e-05, + "loss": 0.5838, + "step": 190 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.35380780696868896, + "eval_runtime": 3.8077, + "eval_samples_per_second": 57.777, + "eval_steps_per_second": 3.677, + "step": 196 + }, + { + "epoch": 14.290909090909091, + "grad_norm": 5.2857770919799805, + "learning_rate": 3.976190476190476e-05, + "loss": 0.4826, + "step": 200 + }, + { + "epoch": 15.0, + "grad_norm": 3.971461296081543, + "learning_rate": 3.896825396825397e-05, + "loss": 0.4491, + "step": 210 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.3833666145801544, + "eval_runtime": 4.0015, + "eval_samples_per_second": 54.979, + "eval_steps_per_second": 3.499, + "step": 210 + }, + { + "epoch": 15.727272727272727, + "grad_norm": 4.3175764083862305, + "learning_rate": 3.817460317460317e-05, + "loss": 0.5056, + "step": 220 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9227272727272727, + "eval_loss": 0.27014854550361633, + "eval_runtime": 3.7487, + "eval_samples_per_second": 58.687, + "eval_steps_per_second": 3.735, + "step": 224 + }, + { + "epoch": 16.436363636363637, + "grad_norm": 4.672767162322998, + "learning_rate": 3.7380952380952386e-05, + "loss": 0.4364, + "step": 230 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3142429292201996, + "eval_runtime": 3.9413, + "eval_samples_per_second": 55.819, + "eval_steps_per_second": 3.552, + "step": 238 + }, + { + "epoch": 17.145454545454545, + "grad_norm": 5.194856643676758, + "learning_rate": 3.658730158730159e-05, + "loss": 0.4631, + "step": 240 + }, + { + "epoch": 17.87272727272727, + "grad_norm": 5.319342136383057, + "learning_rate": 3.5793650793650795e-05, + "loss": 0.364, + "step": 250 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.2616939842700958, + "eval_runtime": 3.7398, + "eval_samples_per_second": 58.827, + "eval_steps_per_second": 3.744, + "step": 252 + }, + { + "epoch": 18.581818181818182, + "grad_norm": 5.951942443847656, + "learning_rate": 3.5e-05, + "loss": 0.3845, + "step": 260 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.8818181818181818, + "eval_loss": 0.3091982901096344, + "eval_runtime": 4.1464, + "eval_samples_per_second": 53.059, + "eval_steps_per_second": 3.376, + "step": 266 + }, + { + "epoch": 19.29090909090909, + "grad_norm": 4.990575790405273, + "learning_rate": 3.420634920634921e-05, + "loss": 0.4096, + "step": 270 + }, + { + "epoch": 20.0, + "grad_norm": 4.905520439147949, + "learning_rate": 3.3412698412698413e-05, + "loss": 0.3873, + "step": 280 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.23085929453372955, + "eval_runtime": 4.0568, + "eval_samples_per_second": 54.23, + "eval_steps_per_second": 3.451, + "step": 280 + }, + { + "epoch": 20.727272727272727, + "grad_norm": 3.991994857788086, + "learning_rate": 3.261904761904762e-05, + "loss": 0.3397, + "step": 290 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.9181818181818182, + "eval_loss": 0.22669094800949097, + "eval_runtime": 3.7565, + "eval_samples_per_second": 58.566, + "eval_steps_per_second": 3.727, + "step": 294 + }, + { + "epoch": 21.436363636363637, + "grad_norm": 3.291044235229492, + "learning_rate": 3.182539682539682e-05, + "loss": 0.3731, + "step": 300 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9136363636363637, + "eval_loss": 0.22054576873779297, + "eval_runtime": 3.7364, + "eval_samples_per_second": 58.88, + "eval_steps_per_second": 3.747, + "step": 308 + }, + { + "epoch": 22.145454545454545, + "grad_norm": 5.276898384094238, + "learning_rate": 3.103174603174603e-05, + "loss": 0.3431, + "step": 310 + }, + { + "epoch": 22.87272727272727, + "grad_norm": 4.37393856048584, + "learning_rate": 3.0238095238095236e-05, + "loss": 0.329, + "step": 320 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.95, + "eval_loss": 0.1515827178955078, + "eval_runtime": 3.8186, + "eval_samples_per_second": 57.612, + "eval_steps_per_second": 3.666, + "step": 322 + }, + { + "epoch": 23.581818181818182, + "grad_norm": 6.394493103027344, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.3041, + "step": 330 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.20813795924186707, + "eval_runtime": 3.6425, + "eval_samples_per_second": 60.399, + "eval_steps_per_second": 3.844, + "step": 336 + }, + { + "epoch": 24.29090909090909, + "grad_norm": 4.260524272918701, + "learning_rate": 2.865079365079365e-05, + "loss": 0.2926, + "step": 340 + }, + { + "epoch": 25.0, + "grad_norm": 4.748322486877441, + "learning_rate": 2.785714285714286e-05, + "loss": 0.2996, + "step": 350 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.18759672343730927, + "eval_runtime": 3.5621, + "eval_samples_per_second": 61.761, + "eval_steps_per_second": 3.93, + "step": 350 + }, + { + "epoch": 25.727272727272727, + "grad_norm": 3.307917356491089, + "learning_rate": 2.7063492063492062e-05, + "loss": 0.2825, + "step": 360 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.224106565117836, + "eval_runtime": 3.6057, + "eval_samples_per_second": 61.015, + "eval_steps_per_second": 3.883, + "step": 364 + }, + { + "epoch": 26.436363636363637, + "grad_norm": 4.714457988739014, + "learning_rate": 2.626984126984127e-05, + "loss": 0.2929, + "step": 370 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.2054794728755951, + "eval_runtime": 3.627, + "eval_samples_per_second": 60.656, + "eval_steps_per_second": 3.86, + "step": 378 + }, + { + "epoch": 27.145454545454545, + "grad_norm": 3.81807541847229, + "learning_rate": 2.5476190476190476e-05, + "loss": 0.296, + "step": 380 + }, + { + "epoch": 27.87272727272727, + "grad_norm": 3.4400370121002197, + "learning_rate": 2.4682539682539684e-05, + "loss": 0.2574, + "step": 390 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9318181818181818, + "eval_loss": 0.16666613519191742, + "eval_runtime": 3.5829, + "eval_samples_per_second": 61.402, + "eval_steps_per_second": 3.907, + "step": 392 + }, + { + "epoch": 28.581818181818182, + "grad_norm": 4.114112854003906, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.2662, + "step": 400 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.1585608422756195, + "eval_runtime": 3.738, + "eval_samples_per_second": 58.855, + "eval_steps_per_second": 3.745, + "step": 406 + }, + { + "epoch": 29.29090909090909, + "grad_norm": 4.3739728927612305, + "learning_rate": 2.3095238095238097e-05, + "loss": 0.2585, + "step": 410 + }, + { + "epoch": 30.0, + "grad_norm": 2.7083423137664795, + "learning_rate": 2.2301587301587305e-05, + "loss": 0.2391, + "step": 420 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.9272727272727272, + "eval_loss": 0.17817389965057373, + "eval_runtime": 3.6137, + "eval_samples_per_second": 60.879, + "eval_steps_per_second": 3.874, + "step": 420 + }, + { + "epoch": 30.727272727272727, + "grad_norm": 5.332400321960449, + "learning_rate": 2.150793650793651e-05, + "loss": 0.2642, + "step": 430 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.9409090909090909, + "eval_loss": 0.15896284580230713, + "eval_runtime": 3.6786, + "eval_samples_per_second": 59.805, + "eval_steps_per_second": 3.806, + "step": 434 + }, + { + "epoch": 31.436363636363637, + "grad_norm": 2.9920341968536377, + "learning_rate": 2.0714285714285718e-05, + "loss": 0.2323, + "step": 440 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.1662004142999649, + "eval_runtime": 3.5572, + "eval_samples_per_second": 61.846, + "eval_steps_per_second": 3.936, + "step": 448 + }, + { + "epoch": 32.14545454545455, + "grad_norm": 4.736130714416504, + "learning_rate": 1.992063492063492e-05, + "loss": 0.2073, + "step": 450 + }, + { + "epoch": 32.872727272727275, + "grad_norm": 2.845208168029785, + "learning_rate": 1.9126984126984128e-05, + "loss": 0.2261, + "step": 460 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.15488994121551514, + "eval_runtime": 3.6011, + "eval_samples_per_second": 61.092, + "eval_steps_per_second": 3.888, + "step": 462 + }, + { + "epoch": 33.58181818181818, + "grad_norm": 3.733150005340576, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.2116, + "step": 470 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.95, + "eval_loss": 0.15380583703517914, + "eval_runtime": 3.5186, + "eval_samples_per_second": 62.525, + "eval_steps_per_second": 3.979, + "step": 476 + }, + { + "epoch": 34.29090909090909, + "grad_norm": 3.829446315765381, + "learning_rate": 1.7539682539682538e-05, + "loss": 0.2694, + "step": 480 + }, + { + "epoch": 35.0, + "grad_norm": 3.4856090545654297, + "learning_rate": 1.6746031746031746e-05, + "loss": 0.211, + "step": 490 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.14965535700321198, + "eval_runtime": 3.5432, + "eval_samples_per_second": 62.091, + "eval_steps_per_second": 3.951, + "step": 490 + }, + { + "epoch": 35.72727272727273, + "grad_norm": 3.3690829277038574, + "learning_rate": 1.595238095238095e-05, + "loss": 0.2472, + "step": 500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.15793798863887787, + "eval_runtime": 3.5484, + "eval_samples_per_second": 62.0, + "eval_steps_per_second": 3.945, + "step": 504 + }, + { + "epoch": 36.43636363636364, + "grad_norm": 4.752399444580078, + "learning_rate": 1.5158730158730159e-05, + "loss": 0.2185, + "step": 510 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12271636724472046, + "eval_runtime": 3.9513, + "eval_samples_per_second": 55.678, + "eval_steps_per_second": 3.543, + "step": 518 + }, + { + "epoch": 37.14545454545455, + "grad_norm": 3.8303024768829346, + "learning_rate": 1.4365079365079364e-05, + "loss": 0.1708, + "step": 520 + }, + { + "epoch": 37.872727272727275, + "grad_norm": 3.642642021179199, + "learning_rate": 1.357142857142857e-05, + "loss": 0.2123, + "step": 530 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.95, + "eval_loss": 0.13889062404632568, + "eval_runtime": 3.9208, + "eval_samples_per_second": 56.111, + "eval_steps_per_second": 3.571, + "step": 532 + }, + { + "epoch": 38.58181818181818, + "grad_norm": 2.8652360439300537, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.1691, + "step": 540 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.10398547351360321, + "eval_runtime": 4.7148, + "eval_samples_per_second": 46.662, + "eval_steps_per_second": 2.969, + "step": 546 + }, + { + "epoch": 39.29090909090909, + "grad_norm": 4.876323223114014, + "learning_rate": 1.1984126984126985e-05, + "loss": 0.1969, + "step": 550 + }, + { + "epoch": 40.0, + "grad_norm": 3.435255289077759, + "learning_rate": 1.1190476190476192e-05, + "loss": 0.1805, + "step": 560 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.9545454545454546, + "eval_loss": 0.14451798796653748, + "eval_runtime": 5.5215, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 2.536, + "step": 560 + }, + { + "epoch": 40.72727272727273, + "grad_norm": 4.050150394439697, + "learning_rate": 1.0396825396825398e-05, + "loss": 0.1828, + "step": 570 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.13493339717388153, + "eval_runtime": 3.8663, + "eval_samples_per_second": 56.902, + "eval_steps_per_second": 3.621, + "step": 574 + }, + { + "epoch": 41.43636363636364, + "grad_norm": 2.726930856704712, + "learning_rate": 9.603174603174605e-06, + "loss": 0.2005, + "step": 580 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1417720913887024, + "eval_runtime": 4.2, + "eval_samples_per_second": 52.381, + "eval_steps_per_second": 3.333, + "step": 588 + }, + { + "epoch": 42.14545454545455, + "grad_norm": 5.61188268661499, + "learning_rate": 8.80952380952381e-06, + "loss": 0.1956, + "step": 590 + }, + { + "epoch": 42.872727272727275, + "grad_norm": 2.0175414085388184, + "learning_rate": 8.015873015873016e-06, + "loss": 0.1986, + "step": 600 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.16132618486881256, + "eval_runtime": 3.9372, + "eval_samples_per_second": 55.877, + "eval_steps_per_second": 3.556, + "step": 602 + }, + { + "epoch": 43.58181818181818, + "grad_norm": 3.885935068130493, + "learning_rate": 7.222222222222222e-06, + "loss": 0.2012, + "step": 610 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.12059630453586578, + "eval_runtime": 4.4119, + "eval_samples_per_second": 49.865, + "eval_steps_per_second": 3.173, + "step": 616 + }, + { + "epoch": 44.29090909090909, + "grad_norm": 4.978816986083984, + "learning_rate": 6.428571428571429e-06, + "loss": 0.2198, + "step": 620 + }, + { + "epoch": 45.0, + "grad_norm": 4.146751403808594, + "learning_rate": 5.634920634920635e-06, + "loss": 0.1494, + "step": 630 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.9590909090909091, + "eval_loss": 0.14049798250198364, + "eval_runtime": 3.9252, + "eval_samples_per_second": 56.048, + "eval_steps_per_second": 3.567, + "step": 630 + }, + { + "epoch": 45.72727272727273, + "grad_norm": 4.996974945068359, + "learning_rate": 4.841269841269842e-06, + "loss": 0.1891, + "step": 640 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.9727272727272728, + "eval_loss": 0.11218445748090744, + "eval_runtime": 3.493, + "eval_samples_per_second": 62.984, + "eval_steps_per_second": 4.008, + "step": 644 + }, + { + "epoch": 46.43636363636364, + "grad_norm": 5.156983375549316, + "learning_rate": 4.047619047619048e-06, + "loss": 0.2012, + "step": 650 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.9636363636363636, + "eval_loss": 0.12153752893209457, + "eval_runtime": 4.0405, + "eval_samples_per_second": 54.449, + "eval_steps_per_second": 3.465, + "step": 658 + }, + { + "epoch": 47.14545454545455, + "grad_norm": 5.211794853210449, + "learning_rate": 3.2539682539682544e-06, + "loss": 0.1896, + "step": 660 + }, + { + "epoch": 47.872727272727275, + "grad_norm": 3.83272123336792, + "learning_rate": 2.4603174603174605e-06, + "loss": 0.181, + "step": 670 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.9454545454545454, + "eval_loss": 0.1783747524023056, + "eval_runtime": 4.3084, + "eval_samples_per_second": 51.063, + "eval_steps_per_second": 3.249, + "step": 672 + }, + { + "epoch": 48.58181818181818, + "grad_norm": 4.118733882904053, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.1757, + "step": 680 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.9363636363636364, + "eval_loss": 0.17033246159553528, + "eval_runtime": 4.1496, + "eval_samples_per_second": 53.017, + "eval_steps_per_second": 3.374, + "step": 686 + }, + { + "epoch": 49.29090909090909, + "grad_norm": 2.3046481609344482, + "learning_rate": 8.730158730158731e-07, + "loss": 0.1828, + "step": 690 + }, + { + "epoch": 50.0, + "grad_norm": 4.6628217697143555, + "learning_rate": 7.936507936507937e-08, + "loss": 0.1603, + "step": 700 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.9681818181818181, + "eval_loss": 0.1184125617146492, + "eval_runtime": 3.6328, + "eval_samples_per_second": 60.559, + "eval_steps_per_second": 3.854, + "step": 700 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.44596290338816e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-84/config.json b/checkpoint-84/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-84/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-84/model.safetensors b/checkpoint-84/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0fd2fd78f54a6ed19ef62e3a1835252f3b52a01 --- /dev/null +++ b/checkpoint-84/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d902a2b2b4055519f48525fd41893ec664802a549f99c794bd74c564fb0366 +size 44805376 diff --git a/checkpoint-84/optimizer.pt b/checkpoint-84/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..71447dc127896cc0c58bb04486b3919f763ecf2d --- /dev/null +++ b/checkpoint-84/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd138abff16b2733dcbdf9271fa5f91929a17c04601301b97a2c15d0538a8b87 +size 89553018 diff --git a/checkpoint-84/rng_state.pth b/checkpoint-84/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..81d2ccfd9aef83dc01df0b0ea013b0db21e1b1e0 --- /dev/null +++ b/checkpoint-84/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2469beb02534c08d00eac4e2ae587b76ca19d605c88e2076c5c8f3c07dc435c +size 13990 diff --git a/checkpoint-84/scheduler.pt b/checkpoint-84/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0806fa7c8b3fcdf199bb338673f7ed489b4dd62 --- /dev/null +++ b/checkpoint-84/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d23b943305348e3bb00bc3bdfc88888c91ec077e1c0d49426ed66f007f2314 +size 1064 diff --git a/checkpoint-84/trainer_state.json b/checkpoint-84/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7085209368a2cb8c4cb16273bc7df31c3cf962 --- /dev/null +++ b/checkpoint-84/trainer_state.json @@ -0,0 +1,144 @@ +{ + "best_global_step": 84, + "best_metric": 0.6681818181818182, + "best_model_checkpoint": "font-identifier/checkpoint-84", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 84, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.335155484065792e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-84/training_args.bin b/checkpoint-84/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-84/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304 diff --git a/checkpoint-98/config.json b/checkpoint-98/config.json new file mode 100644 index 0000000000000000000000000000000000000000..832c8027a26aaad8102f4227c64f5687d7a2433e --- /dev/null +++ b/checkpoint-98/config.json @@ -0,0 +1,88 @@ +{ + "architectures": [ + "ResNetForImageClassification" + ], + "depths": [ + 2, + 2, + 2, + 2 + ], + "downsample_in_bottleneck": false, + "downsample_in_first_stage": false, + "embedding_size": 64, + "hidden_act": "relu", + "hidden_sizes": [ + 64, + 128, + 256, + 512 + ], + "id2label": { + "0": "Arial", + "1": "Arial Black", + "10": "Times New Roman", + "11": "Times New Roman Bold", + "12": "Times New Roman Bold Italic", + "13": "Times New Roman Italic", + "14": "Trebuchet MS", + "15": "Trebuchet MS Bold", + "16": "Trebuchet MS Bold Italic", + "17": "Trebuchet MS Italic", + "18": "Verdana", + "19": "Verdana Bold", + "2": "Arial Bold", + "20": "Verdana Bold Italic", + "21": "Verdana Italic", + "3": "Arial Bold Italic", + "4": "Avenir", + "5": "Courier", + "6": "Georgia", + "7": "Helvetica", + "8": "Tahoma", + "9": "Tahoma Bold" + }, + "label2id": { + "Arial": "0", + "Arial Black": "1", + "Arial Bold": "2", + "Arial Bold Italic": "3", + "Avenir": "4", + "Courier": "5", + "Georgia": "6", + "Helvetica": "7", + "Tahoma": "8", + "Tahoma Bold": "9", + "Times New Roman": "10", + "Times New Roman Bold": "11", + "Times New Roman Bold Italic": "12", + "Times New Roman Italic": "13", + "Trebuchet MS": "14", + "Trebuchet MS Bold": "15", + "Trebuchet MS Bold Italic": "16", + "Trebuchet MS Italic": "17", + "Verdana": "18", + "Verdana Bold": "19", + "Verdana Bold Italic": "20", + "Verdana Italic": "21" + }, + "layer_type": "basic", + "model_type": "resnet", + "num_channels": 3, + "out_features": [ + "stage4" + ], + "out_indices": [ + 4 + ], + "problem_type": "single_label_classification", + "stage_names": [ + "stem", + "stage1", + "stage2", + "stage3", + "stage4" + ], + "torch_dtype": "float32", + "transformers_version": "4.53.3" +} diff --git a/checkpoint-98/model.safetensors b/checkpoint-98/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84dc1979f1ddc15184b849531bbba4c2055efdf7 --- /dev/null +++ b/checkpoint-98/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4914e80347e4f08b3cbc3c2cc49ce2dd71e7f0b86b2a67d28df792a815b19c9b +size 44805376 diff --git a/checkpoint-98/optimizer.pt b/checkpoint-98/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..55b046134d6dda6a8967131da98bb3c0150e5876 --- /dev/null +++ b/checkpoint-98/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9566bfc28c47a5e55080134c85d1652baa68adeb86c08a0051698751ed7f9d28 +size 89553018 diff --git a/checkpoint-98/rng_state.pth b/checkpoint-98/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..24d7c76e807b9f0fd8200c0bc4ec141214874173 --- /dev/null +++ b/checkpoint-98/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad316766becf4f423f7f804b69741986aa1eb1bf94c2f3688ddb11298f8d1eb1 +size 13990 diff --git a/checkpoint-98/scheduler.pt b/checkpoint-98/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..243bba25f8be5cad6713dff3bfd83f3c94723705 --- /dev/null +++ b/checkpoint-98/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50bc125662bae4eedced5b0327324165da787a2fc31c0a4b3ba68091d332006d +size 1064 diff --git a/checkpoint-98/trainer_state.json b/checkpoint-98/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..23a95f82e3828c3b804080bf5962b9c1f361f478 --- /dev/null +++ b/checkpoint-98/trainer_state.json @@ -0,0 +1,160 @@ +{ + "best_global_step": 98, + "best_metric": 0.6818181818181818, + "best_model_checkpoint": "font-identifier/checkpoint-98", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 98, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.7272727272727273, + "grad_norm": 9.430075645446777, + "learning_rate": 6.428571428571429e-06, + "loss": 3.3213, + "step": 10 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.06818181818181818, + "eval_loss": 3.2453081607818604, + "eval_runtime": 2.4333, + "eval_samples_per_second": 90.411, + "eval_steps_per_second": 5.753, + "step": 14 + }, + { + "epoch": 1.4363636363636363, + "grad_norm": 9.360711097717285, + "learning_rate": 1.357142857142857e-05, + "loss": 3.1711, + "step": 20 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.12727272727272726, + "eval_loss": 3.0051403045654297, + "eval_runtime": 2.0612, + "eval_samples_per_second": 106.735, + "eval_steps_per_second": 6.792, + "step": 28 + }, + { + "epoch": 2.1454545454545455, + "grad_norm": 8.500679016113281, + "learning_rate": 2.0714285714285718e-05, + "loss": 2.983, + "step": 30 + }, + { + "epoch": 2.8727272727272726, + "grad_norm": 9.82868766784668, + "learning_rate": 2.785714285714286e-05, + "loss": 2.8729, + "step": 40 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.20909090909090908, + "eval_loss": 2.6284613609313965, + "eval_runtime": 2.0503, + "eval_samples_per_second": 107.302, + "eval_steps_per_second": 6.828, + "step": 42 + }, + { + "epoch": 3.581818181818182, + "grad_norm": 7.700014114379883, + "learning_rate": 3.5e-05, + "loss": 2.562, + "step": 50 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.37727272727272726, + "eval_loss": 2.160020112991333, + "eval_runtime": 2.3625, + "eval_samples_per_second": 93.121, + "eval_steps_per_second": 5.926, + "step": 56 + }, + { + "epoch": 4.290909090909091, + "grad_norm": 7.527115821838379, + "learning_rate": 4.214285714285714e-05, + "loss": 2.2406, + "step": 60 + }, + { + "epoch": 5.0, + "grad_norm": 5.579476833343506, + "learning_rate": 4.928571428571429e-05, + "loss": 1.8675, + "step": 70 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.5363636363636364, + "eval_loss": 1.639161229133606, + "eval_runtime": 2.0879, + "eval_samples_per_second": 105.372, + "eval_steps_per_second": 6.705, + "step": 70 + }, + { + "epoch": 5.7272727272727275, + "grad_norm": 6.313197135925293, + "learning_rate": 4.928571428571429e-05, + "loss": 1.6359, + "step": 80 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6681818181818182, + "eval_loss": 1.2266921997070312, + "eval_runtime": 2.1098, + "eval_samples_per_second": 104.274, + "eval_steps_per_second": 6.636, + "step": 84 + }, + { + "epoch": 6.4363636363636365, + "grad_norm": 5.8209004402160645, + "learning_rate": 4.84920634920635e-05, + "loss": 1.3499, + "step": 90 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6818181818181818, + "eval_loss": 1.0587564706802368, + "eval_runtime": 2.9116, + "eval_samples_per_second": 75.56, + "eval_steps_per_second": 4.808, + "step": 98 + } + ], + "logging_steps": 10, + "max_steps": 700, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.224348064743424e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-98/training_args.bin b/checkpoint-98/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47d0e2fb89ceae4c8128f1026dc7d62880c2b5c4 --- /dev/null +++ b/checkpoint-98/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe902249061f5c6116674b4b3dc02d72fa3794a0c9100f91b35a0c0b09d98f6c +size 5304