privacy-filter-it / finetune_summary.json
capazme's picture
Upload fine-tuned checkpoint: checkpoint_step1_italian_docs_v2
041b204 verified
{
"artifacts": {
"config_json": "/kaggle/working/checkpoint_step1_italian_docs_v2/config.json",
"model_safetensors": "/kaggle/working/checkpoint_step1_italian_docs_v2/model.safetensors",
"summary_json": "/kaggle/working/checkpoint_step1_italian_docs_v2/finetune_summary.json"
},
"base_checkpoint": "/root/.opf/privacy_filter",
"batch_size": 1,
"best_epoch": 14,
"best_metric": 1.256955582021411e-06,
"best_metric_name": "validation_loss",
"checkpoint_category_version": "v2",
"device": "cuda",
"elapsed_s": 8991.771278478,
"encoding": "o200k_base",
"epoch_metrics": [
{
"elapsed_s": 611.6480500910002,
"epoch": 1,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.18406920914817018,
"train_token_accuracy": 0.9626421135608234,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0010319853343994626,
"validation_token_accuracy": 0.9997995087965516,
"validation_tokens": 39902
},
{
"elapsed_s": 603.1925267070001,
"epoch": 2,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.0006751688251858842,
"train_token_accuracy": 0.9999309240733668,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0004481305598764045,
"validation_token_accuracy": 0.9998746929978447,
"validation_tokens": 39902
},
{
"elapsed_s": 595.9333531040002,
"epoch": 3,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.0004284777879854081,
"train_token_accuracy": 0.9999349873631688,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0011235129029634563,
"validation_token_accuracy": 0.9997995087965516,
"validation_tokens": 39902
},
{
"elapsed_s": 594.8147814399999,
"epoch": 4,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.0004589788315978451,
"train_token_accuracy": 0.999910607624357,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0005426766982118603,
"validation_token_accuracy": 0.9997995087965516,
"validation_tokens": 39902
},
{
"elapsed_s": 594.5084605920001,
"epoch": 5,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.00025950950850595604,
"train_token_accuracy": 0.9999431139427726,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0006384832375886648,
"validation_token_accuracy": 0.9998997543982758,
"validation_tokens": 39902
},
{
"elapsed_s": 601.9116796880007,
"epoch": 6,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.00033364914724159483,
"train_token_accuracy": 0.9999390506529706,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.00023689508041231628,
"validation_token_accuracy": 0.9999498771991379,
"validation_tokens": 39902
},
{
"elapsed_s": 599.1612815619992,
"epoch": 7,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.00021150960081863236,
"train_token_accuracy": 0.9999431139427726,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0001638231382475539,
"validation_token_accuracy": 0.9999248157987068,
"validation_tokens": 39902
},
{
"elapsed_s": 595.1891281780008,
"epoch": 8,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.00011636061623750538,
"train_token_accuracy": 0.9999837468407922,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.00019882116281728446,
"validation_token_accuracy": 0.9999248157987068,
"validation_tokens": 39902
},
{
"elapsed_s": 593.3688169760007,
"epoch": 9,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.0001755927075592558,
"train_token_accuracy": 0.9999634303917824,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.00022523166237503278,
"validation_token_accuracy": 0.9998997543982758,
"validation_tokens": 39902
},
{
"elapsed_s": 593.3411462200002,
"epoch": 10,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.0001144706639999654,
"train_token_accuracy": 0.9999796835509902,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.00022547725828748192,
"validation_token_accuracy": 0.9999248157987068,
"validation_tokens": 39902
},
{
"elapsed_s": 603.6394337089987,
"epoch": 11,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.00017106058540362378,
"train_token_accuracy": 0.9999634303917824,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 5.802690870656621e-05,
"validation_token_accuracy": 1.0,
"validation_tokens": 39902
},
{
"elapsed_s": 596.6633162110011,
"epoch": 12,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 5.1230087411499746e-05,
"train_token_accuracy": 0.999995936710198,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0001793956412968626,
"validation_token_accuracy": 0.999974938599569,
"validation_tokens": 39902
},
{
"elapsed_s": 597.4834135400015,
"epoch": 13,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 9.818313904666025e-05,
"train_token_accuracy": 0.9999756202611882,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 0.0001664656747663711,
"validation_token_accuracy": 0.999974938599569,
"validation_tokens": 39902
},
{
"elapsed_s": 605.1379507809997,
"epoch": 14,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 0.00013058107322193007,
"train_token_accuracy": 0.9999756202611882,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 1.256955582021411e-06,
"validation_token_accuracy": 1.0,
"validation_tokens": 39902
},
{
"elapsed_s": 597.8637465249994,
"epoch": 15,
"optimizer_steps": 1875,
"train_batches": 7500,
"train_loss": 3.845093296363575e-06,
"train_token_accuracy": 1.0,
"train_tokens": 246106,
"validation_batches": 1250,
"validation_loss": 1.754828393192001e-05,
"validation_token_accuracy": 1.0,
"validation_tokens": 39902
}
],
"epochs": 15,
"generated_at_unix": 1777048760.4942422,
"grad_accum_steps": 4,
"label_space_json_path": "/kaggle/working/custom_label_space.json",
"label_space_source": "label-space-json",
"learning_rate": 1e-05,
"max_grad_norm": 1.0,
"num_output_labels": 73,
"num_train_examples": 7500,
"num_train_windows": 7500,
"num_validation_examples": 1250,
"num_validation_windows": 1250,
"output_checkpoint_dir": "/kaggle/working/checkpoint_step1_italian_docs_v2",
"output_head_reinitialized": true,
"output_head_rows_copied": 73,
"output_head_rows_copied_exact": 33,
"output_head_rows_copied_fallback": 40,
"resolved_category_version": "italian_legal_v1",
"resolved_n_ctx": 128000,
"schema_version": 1,
"serialized_param_dtype": "bfloat16",
"span_class_names": [
"O",
"private_person",
"private_address",
"private_email",
"private_phone",
"private_url",
"private_date",
"account_number",
"secret",
"codice_fiscale",
"carta_identita",
"patente",
"passaporto",
"partita_iva",
"iban",
"tessera_sanitaria",
"numero_procedimento",
"riferimento_catastale",
"parte_in_causa"
],
"train_dataset": "/kaggle/working/datasets/step1_train.jsonl",
"train_dataset_variant": "full",
"validation_dataset": "/kaggle/working/datasets/step1_val.jsonl",
"validation_dataset_variant": "full",
"validation_split": null,
"weight_decay": 0.01
}