Upload final model artifacts

Browse files

Files changed (10) hide show

README.md +76 -0
config.json +56 -0
eval_run_config.json +35 -0
metrics_summary.json +561 -0
metrics_summary.txt +561 -0
model.safetensors +3 -0
run_config.json +35 -0
tokenizer.json +0 -0
tokenizer_config.json +14 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,76 @@

+---
+language:
+- en
+library_name: transformers
+license: mit
+pipeline_tag: text-classification
+tags:
+- arxiv
+- scientific-text-classification
+- scibert
+- streamlit-demo
+datasets:
+- librarian-bots/arxiv-metadata-snapshot
+metrics:
+- accuracy
+- f1
+---
+# Article Topic Service SciBERT
+SciBERT text classifier for scientific article topic prediction from article title and abstract.
+## Labels
+- Artificial Intelligence
+- Natural Language Processing
+- Computer Vision
+- Machine Learning
+- Computer Science Theory and Algorithms
+- Mathematics
+- Statistics
+- Electrical Engineering
+- Astrophysics
+- Condensed Matter Physics
+- Quantum Physics
+- Quantitative Biology
+## Dataset
+Balanced 12-class subset built from `librarian-bots/arxiv-metadata-snapshot`.
+- Train: 30,000 examples
+- Validation: 3,600 examples
+- Test: 3,600 examples
+## Metrics
+- Validation accuracy: 0.8350
+- Validation macro F1: 0.8351
+- Test accuracy: 0.8356
+- Test macro F1: 0.8351
+- Title-only test accuracy: 0.7522
+- Title-only test macro F1: 0.7495
+## Usage
+```python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
+model_id = "Ian-Khalzov/article-topic-service-scibert"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForSequenceClassification.from_pretrained(model_id)
+text = "Title: Large language models for scientific document classification\n\nAbstract: We study..."
+inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
+with torch.inference_mode():
+    probs = torch.softmax(model(**inputs).logits[0], dim=-1)
+predicted_label = model.config.id2label[int(probs.argmax())]
+print(predicted_label)
+```
+## Notes
+The current baseline is strongest on physics-heavy classes and weakest on the broad `Machine Learning` category, where topical overlap with AI, NLP, CV, and Statistics remains high.

config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "add_cross_attention": false,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": null,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Artificial Intelligence",
+    "1": "Natural Language Processing",
+    "2": "Computer Vision",
+    "3": "Machine Learning",
+    "4": "Computer Science Theory and Algorithms",
+    "5": "Mathematics",
+    "6": "Statistics",
+    "7": "Electrical Engineering",
+    "8": "Astrophysics",
+    "9": "Condensed Matter Physics",
+    "10": "Quantum Physics",
+    "11": "Quantitative Biology"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "is_decoder": false,
+  "label2id": {
+    "Artificial Intelligence": 0,
+    "Astrophysics": 8,
+    "Computer Science Theory and Algorithms": 4,
+    "Computer Vision": 2,
+    "Condensed Matter Physics": 9,
+    "Electrical Engineering": 7,
+    "Machine Learning": 3,
+    "Mathematics": 5,
+    "Natural Language Processing": 1,
+    "Quantitative Biology": 11,
+    "Quantum Physics": 10,
+    "Statistics": 6
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.5.0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 31090
+}

eval_run_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "prepared_at_utc": "2026-04-06T15:01:43.444774+00:00",
+  "model_name": "/home/yakh/ML/ML2/article_topic_service/artifacts/scibert_topics12_run1",
+  "max_length": 256,
+  "per_device_train_batch_size": 4,
+  "per_device_eval_batch_size": 8,
+  "gradient_accumulation_steps": 8,
+  "num_train_epochs": 12,
+  "learning_rate": 2e-05,
+  "weight_decay": 0.01,
+  "warmup_ratio": 0.1,
+  "label_smoothing_factor": 0.05,
+  "title_only_prob": 0.2,
+  "early_stopping_patience": 3,
+  "seed": 42,
+  "resume_from_checkpoint": null,
+  "use_bf16": true,
+  "use_fp16": false,
+  "taxonomy_profile": "topics12",
+  "num_labels": 12,
+  "label_order": [
+    "artificial_intelligence",
+    "natural_language_processing",
+    "computer_vision",
+    "machine_learning",
+    "computer_science_theory",
+    "mathematics",
+    "statistics",
+    "electrical_engineering",
+    "astrophysics",
+    "condensed_matter_physics",
+    "quantum_physics",
+    "quantitative_biology"
+  ]
+}

metrics_summary.json ADDED Viewed

	@@ -0,0 +1,561 @@

+{
+  "train_runtime": {
+    "skipped_train": true
+  },
+  "eval_metrics": {
+    "train": {
+      "train_loss": 0.5018106698989868,
+      "train_model_preparation_time": 0.0013,
+      "train_accuracy": 0.9251333333333334,
+      "train_macro_f1": 0.9246237953755961,
+      "train_runtime": 90.3107,
+      "train_samples_per_second": 332.186,
+      "train_steps_per_second": 41.523,
+      "epoch": 0
+    },
+    "validation": {
+      "validation_loss": 0.709372878074646,
+      "validation_model_preparation_time": 0.0013,
+      "validation_accuracy": 0.835,
+      "validation_macro_f1": 0.8351179604209936,
+      "validation_runtime": 11.0597,
+      "validation_samples_per_second": 325.505,
+      "validation_steps_per_second": 40.688,
+      "epoch": 0
+    },
+    "test_full": {
+      "test_full_loss": 0.7299672365188599,
+      "test_full_model_preparation_time": 0.0013,
+      "test_full_accuracy": 0.8355555555555556,
+      "test_full_macro_f1": 0.835052453623228,
+      "test_full_runtime": 10.953,
+      "test_full_samples_per_second": 328.676,
+      "test_full_steps_per_second": 41.084,
+      "epoch": 0
+    },
+    "test_title_only": {
+      "test_title_only_loss": 1.0085811614990234,
+      "test_title_only_model_preparation_time": 0.0013,
+      "test_title_only_accuracy": 0.7522222222222222,
+      "test_title_only_macro_f1": 0.7495277786217475,
+      "test_title_only_runtime": 6.6335,
+      "test_title_only_samples_per_second": 542.697,
+      "test_title_only_steps_per_second": 67.837,
+      "epoch": 0
+    }
+  },
+  "test_full_classification_report": {
+    "Artificial Intelligence": {
+      "precision": 0.7289719626168224,
+      "recall": 0.78,
+      "f1-score": 0.7536231884057971,
+      "support": 300.0
+    },
+    "Natural Language Processing": {
+      "precision": 0.852760736196319,
+      "recall": 0.9266666666666666,
+      "f1-score": 0.8881789137380192,
+      "support": 300.0
+    },
+    "Computer Vision": {
+      "precision": 0.8379310344827586,
+      "recall": 0.81,
+      "f1-score": 0.823728813559322,
+      "support": 300.0
+    },
+    "Machine Learning": {
+      "precision": 0.6564885496183206,
+      "recall": 0.5733333333333334,
+      "f1-score": 0.6120996441281139,
+      "support": 300.0
+    },
+    "CS Theory and Algorithms": {
+      "precision": 0.8657718120805369,
+      "recall": 0.86,
+      "f1-score": 0.862876254180602,
+      "support": 300.0
+    },
+    "Mathematics": {
+      "precision": 0.9622641509433962,
+      "recall": 0.85,
+      "f1-score": 0.9026548672566371,
+      "support": 300.0
+    },
+    "Statistics": {
+      "precision": 0.7125748502994012,
+      "recall": 0.7933333333333333,
+      "f1-score": 0.750788643533123,
+      "support": 300.0
+    },
+    "Electrical Engineering": {
+      "precision": 0.7736486486486487,
+      "recall": 0.7633333333333333,
+      "f1-score": 0.7684563758389261,
+      "support": 300.0
+    },
+    "Astrophysics": {
+      "precision": 0.9966329966329966,
+      "recall": 0.9866666666666667,
+      "f1-score": 0.9916247906197655,
+      "support": 300.0
+    },
+    "Condensed Matter Physics": {
+      "precision": 0.916083916083916,
+      "recall": 0.8733333333333333,
+      "f1-score": 0.89419795221843,
+      "support": 300.0
+    },
+    "Quantum Physics": {
+      "precision": 0.8676923076923077,
+      "recall": 0.94,
+      "f1-score": 0.9024,
+      "support": 300.0
+    },
+    "Quantitative Biology": {
+      "precision": 0.87,
+      "recall": 0.87,
+      "f1-score": 0.87,
+      "support": 300.0
+    },
+    "accuracy": 0.8355555555555556,
+    "macro avg": {
+      "precision": 0.8367350804412852,
+      "recall": 0.8355555555555555,
+      "f1-score": 0.835052453623228,
+      "support": 3600.0
+    },
+    "weighted avg": {
+      "precision": 0.8367350804412854,
+      "recall": 0.8355555555555556,
+      "f1-score": 0.835052453623228,
+      "support": 3600.0
+    }
+  },
+  "test_full_confusion_matrix": [
+    [
+      234,
+      15,
+      4,
+      22,
+      11,
+      0,
+      10,
+      0,
+      0,
+      0,
+      2,
+      2
+    ],
+    [
+      9,
+      278,
+      5,
+      4,
+      0,
+      0,
+      1,
+      1,
+      0,
+      0,
+      0,
+      2
+    ],
+    [
+      3,
+      5,
+      243,
+      15,
+      0,
+      0,
+      1,
+      31,
+      0,
+      0,
+      0,
+      2
+    ],
+    [
+      32,
+      14,
+      19,
+      172,
+      8,
+      1,
+      40,
+      9,
+      0,
+      0,
+      0,
+      5
+    ],
+    [
+      25,
+      1,
+      0,
+      1,
+      258,
+      2,
+      3,
+      2,
+      0,
+      2,
+      5,
+      1
+    ],
+    [
+      1,
+      0,
+      0,
+      3,
+      12,
+      255,
+      13,
+      6,
+      0,
+      3,
+      3,
+      4
+    ],
+    [
+      4,
+      3,
+      0,
+      31,
+      2,
+      0,
+      238,
+      8,
+      0,
+      0,
+      1,
+      13
+    ],
+    [
+      6,
+      7,
+      18,
+      8,
+      5,
+      5,
+      15,
+      229,
+      0,
+      1,
+      0,
+      6
+    ],
+    [
+      1,
+      0,
+      1,
+      0,
+      0,
+      0,
+      0,
+      1,
+      296,
+      0,
+      1,
+      0
+    ],
+    [
+      0,
+      0,
+      0,
+      1,
+      1,
+      0,
+      1,
+      0,
+      1,
+      262,
+      30,
+      4
+    ],
+    [
+      0,
+      0,
+      0,
+      2,
+      0,
+      0,
+      0,
+      1,
+      0,
+      15,
+      282,
+      0
+    ],
+    [
+      6,
+      3,
+      0,
+      3,
+      1,
+      2,
+      12,
+      8,
+      0,
+      3,
+      1,
+      261
+    ]
+  ],
+  "test_title_only_classification_report": {
+    "Artificial Intelligence": {
+      "precision": 0.6678321678321678,
+      "recall": 0.6366666666666667,
+      "f1-score": 0.6518771331058021,
+      "support": 300.0
+    },
+    "Natural Language Processing": {
+      "precision": 0.7485207100591716,
+      "recall": 0.8433333333333334,
+      "f1-score": 0.7931034482758621,
+      "support": 300.0
+    },
+    "Computer Vision": {
+      "precision": 0.7210884353741497,
+      "recall": 0.7066666666666667,
+      "f1-score": 0.7138047138047138,
+      "support": 300.0
+    },
+    "Machine Learning": {
+      "precision": 0.5879828326180258,
+      "recall": 0.45666666666666667,
+      "f1-score": 0.5140712945590994,
+      "support": 300.0
+    },
+    "CS Theory and Algorithms": {
+      "precision": 0.7033639143730887,
+      "recall": 0.7666666666666667,
+      "f1-score": 0.733652312599681,
+      "support": 300.0
+    },
+    "Mathematics": {
+      "precision": 0.837037037037037,
+      "recall": 0.7533333333333333,
+      "f1-score": 0.7929824561403509,
+      "support": 300.0
+    },
+    "Statistics": {
+      "precision": 0.6128048780487805,
+      "recall": 0.67,
+      "f1-score": 0.6401273885350318,
+      "support": 300.0
+    },
+    "Electrical Engineering": {
+      "precision": 0.7295373665480427,
+      "recall": 0.6833333333333333,
+      "f1-score": 0.7056798623063684,
+      "support": 300.0
+    },
+    "Astrophysics": {
+      "precision": 0.9534883720930233,
+      "recall": 0.9566666666666667,
+      "f1-score": 0.9550748752079867,
+      "support": 300.0
+    },
+    "Condensed Matter Physics": {
+      "precision": 0.8430034129692833,
+      "recall": 0.8233333333333334,
+      "f1-score": 0.8330522765598651,
+      "support": 300.0
+    },
+    "Quantum Physics": {
+      "precision": 0.7834757834757835,
+      "recall": 0.9166666666666666,
+      "f1-score": 0.8448540706605223,
+      "support": 300.0
+    },
+    "Quantitative Biology": {
+      "precision": 0.8187919463087249,
+      "recall": 0.8133333333333334,
+      "f1-score": 0.8160535117056856,
+      "support": 300.0
+    },
+    "accuracy": 0.7522222222222222,
+    "macro avg": {
+      "precision": 0.7505772380614398,
+      "recall": 0.7522222222222222,
+      "f1-score": 0.7495277786217475,
+      "support": 3600.0
+    },
+    "weighted avg": {
+      "precision": 0.7505772380614398,
+      "recall": 0.7522222222222222,
+      "f1-score": 0.7495277786217475,
+      "support": 3600.0
+    }
+  },
+  "test_title_only_confusion_matrix": [
+    [
+      191,
+      30,
+      10,
+      19,
+      18,
+      2,
+      17,
+      3,
+      1,
+      1,
+      5,
+      3
+    ],
+    [
+      12,
+      253,
+      11,
+      13,
+      0,
+      0,
+      5,
+      1,
+      0,
+      1,
+      1,
+      3
+    ],
+    [
+      6,
+      11,
+      212,
+      19,
+      5,
+      2,
+      3,
+      37,
+      2,
+      0,
+      1,
+      2
+    ],
+    [
+      21,
+      22,
+      28,
+      137,
+      21,
+      1,
+      48,
+      9,
+      1,
+      2,
+      1,
+      9
+    ],
+    [
+      24,
+      4,
+      0,
+      6,
+      230,
+      10,
+      12,
+      4,
+      0,
+      1,
+      7,
+      2
+    ],
+    [
+      4,
+      0,
+      2,
+      3,
+      25,
+      226,
+      11,
+      7,
+      1,
+      11,
+      5,
+      5
+    ],
+    [
+      11,
+      4,
+      2,
+      28,
+      11,
+      9,
+      201,
+      10,
+      1,
+      5,
+      2,
+      16
+    ],
+    [
+      3,
+      8,
+      24,
+      4,
+      9,
+      8,
+      16,
+      205,
+      3,
+      3,
+      4,
+      13
+    ],
+    [
+      2,
+      0,
+      3,
+      0,
+      0,
+      3,
+      0,
+      0,
+      287,
+      3,
+      2,
+      0
+    ],
+    [
+      1,
+      0,
+      0,
+      0,
+      0,
+      5,
+      1,
+      0,
+      2,
+      247,
+      43,
+      1
+    ],
+    [
+      1,
+      1,
+      0,
+      1,
+      5,
+      2,
+      2,
+      0,
+      2,
+      11,
+      275,
+      0
+    ],
+    [
+      10,
+      5,
+      2,
+      3,
+      3,
+      2,
+      12,
+      5,
+      1,
+      8,
+      5,
+      244
+    ]
+  ]
+}

metrics_summary.txt ADDED Viewed

	@@ -0,0 +1,561 @@

+{
+  "train_runtime": {
+    "skipped_train": true
+  },
+  "eval_metrics": {
+    "train": {
+      "train_loss": 0.5018106698989868,
+      "train_model_preparation_time": 0.0013,
+      "train_accuracy": 0.9251333333333334,
+      "train_macro_f1": 0.9246237953755961,
+      "train_runtime": 90.3107,
+      "train_samples_per_second": 332.186,
+      "train_steps_per_second": 41.523,
+      "epoch": 0
+    },
+    "validation": {
+      "validation_loss": 0.709372878074646,
+      "validation_model_preparation_time": 0.0013,
+      "validation_accuracy": 0.835,
+      "validation_macro_f1": 0.8351179604209936,
+      "validation_runtime": 11.0597,
+      "validation_samples_per_second": 325.505,
+      "validation_steps_per_second": 40.688,
+      "epoch": 0
+    },
+    "test_full": {
+      "test_full_loss": 0.7299672365188599,
+      "test_full_model_preparation_time": 0.0013,
+      "test_full_accuracy": 0.8355555555555556,
+      "test_full_macro_f1": 0.835052453623228,
+      "test_full_runtime": 10.953,
+      "test_full_samples_per_second": 328.676,
+      "test_full_steps_per_second": 41.084,
+      "epoch": 0
+    },
+    "test_title_only": {
+      "test_title_only_loss": 1.0085811614990234,
+      "test_title_only_model_preparation_time": 0.0013,
+      "test_title_only_accuracy": 0.7522222222222222,
+      "test_title_only_macro_f1": 0.7495277786217475,
+      "test_title_only_runtime": 6.6335,
+      "test_title_only_samples_per_second": 542.697,
+      "test_title_only_steps_per_second": 67.837,
+      "epoch": 0
+    }
+  },
+  "test_full_classification_report": {
+    "Artificial Intelligence": {
+      "precision": 0.7289719626168224,
+      "recall": 0.78,
+      "f1-score": 0.7536231884057971,
+      "support": 300.0
+    },
+    "Natural Language Processing": {
+      "precision": 0.852760736196319,
+      "recall": 0.9266666666666666,
+      "f1-score": 0.8881789137380192,
+      "support": 300.0
+    },
+    "Computer Vision": {
+      "precision": 0.8379310344827586,
+      "recall": 0.81,
+      "f1-score": 0.823728813559322,
+      "support": 300.0
+    },
+    "Machine Learning": {
+      "precision": 0.6564885496183206,
+      "recall": 0.5733333333333334,
+      "f1-score": 0.6120996441281139,
+      "support": 300.0
+    },
+    "CS Theory and Algorithms": {
+      "precision": 0.8657718120805369,
+      "recall": 0.86,
+      "f1-score": 0.862876254180602,
+      "support": 300.0
+    },
+    "Mathematics": {
+      "precision": 0.9622641509433962,
+      "recall": 0.85,
+      "f1-score": 0.9026548672566371,
+      "support": 300.0
+    },
+    "Statistics": {
+      "precision": 0.7125748502994012,
+      "recall": 0.7933333333333333,
+      "f1-score": 0.750788643533123,
+      "support": 300.0
+    },
+    "Electrical Engineering": {
+      "precision": 0.7736486486486487,
+      "recall": 0.7633333333333333,
+      "f1-score": 0.7684563758389261,
+      "support": 300.0
+    },
+    "Astrophysics": {
+      "precision": 0.9966329966329966,
+      "recall": 0.9866666666666667,
+      "f1-score": 0.9916247906197655,
+      "support": 300.0
+    },
+    "Condensed Matter Physics": {
+      "precision": 0.916083916083916,
+      "recall": 0.8733333333333333,
+      "f1-score": 0.89419795221843,
+      "support": 300.0
+    },
+    "Quantum Physics": {
+      "precision": 0.8676923076923077,
+      "recall": 0.94,
+      "f1-score": 0.9024,
+      "support": 300.0
+    },
+    "Quantitative Biology": {
+      "precision": 0.87,
+      "recall": 0.87,
+      "f1-score": 0.87,
+      "support": 300.0
+    },
+    "accuracy": 0.8355555555555556,
+    "macro avg": {
+      "precision": 0.8367350804412852,
+      "recall": 0.8355555555555555,
+      "f1-score": 0.835052453623228,
+      "support": 3600.0
+    },
+    "weighted avg": {
+      "precision": 0.8367350804412854,
+      "recall": 0.8355555555555556,
+      "f1-score": 0.835052453623228,
+      "support": 3600.0
+    }
+  },
+  "test_full_confusion_matrix": [
+    [
+      234,
+      15,
+      4,
+      22,
+      11,
+      0,
+      10,
+      0,
+      0,
+      0,
+      2,
+      2
+    ],
+    [
+      9,
+      278,
+      5,
+      4,
+      0,
+      0,
+      1,
+      1,
+      0,
+      0,
+      0,
+      2
+    ],
+    [
+      3,
+      5,
+      243,
+      15,
+      0,
+      0,
+      1,
+      31,
+      0,
+      0,
+      0,
+      2
+    ],
+    [
+      32,
+      14,
+      19,
+      172,
+      8,
+      1,
+      40,
+      9,
+      0,
+      0,
+      0,
+      5
+    ],
+    [
+      25,
+      1,
+      0,
+      1,
+      258,
+      2,
+      3,
+      2,
+      0,
+      2,
+      5,
+      1
+    ],
+    [
+      1,
+      0,
+      0,
+      3,
+      12,
+      255,
+      13,
+      6,
+      0,
+      3,
+      3,
+      4
+    ],
+    [
+      4,
+      3,
+      0,
+      31,
+      2,
+      0,
+      238,
+      8,
+      0,
+      0,
+      1,
+      13
+    ],
+    [
+      6,
+      7,
+      18,
+      8,
+      5,
+      5,
+      15,
+      229,
+      0,
+      1,
+      0,
+      6
+    ],
+    [
+      1,
+      0,
+      1,
+      0,
+      0,
+      0,
+      0,
+      1,
+      296,
+      0,
+      1,
+      0
+    ],
+    [
+      0,
+      0,
+      0,
+      1,
+      1,
+      0,
+      1,
+      0,
+      1,
+      262,
+      30,
+      4
+    ],
+    [
+      0,
+      0,
+      0,
+      2,
+      0,
+      0,
+      0,
+      1,
+      0,
+      15,
+      282,
+      0
+    ],
+    [
+      6,
+      3,
+      0,
+      3,
+      1,
+      2,
+      12,
+      8,
+      0,
+      3,
+      1,
+      261
+    ]
+  ],
+  "test_title_only_classification_report": {
+    "Artificial Intelligence": {
+      "precision": 0.6678321678321678,
+      "recall": 0.6366666666666667,
+      "f1-score": 0.6518771331058021,
+      "support": 300.0
+    },
+    "Natural Language Processing": {
+      "precision": 0.7485207100591716,
+      "recall": 0.8433333333333334,
+      "f1-score": 0.7931034482758621,
+      "support": 300.0
+    },
+    "Computer Vision": {
+      "precision": 0.7210884353741497,
+      "recall": 0.7066666666666667,
+      "f1-score": 0.7138047138047138,
+      "support": 300.0
+    },
+    "Machine Learning": {
+      "precision": 0.5879828326180258,
+      "recall": 0.45666666666666667,
+      "f1-score": 0.5140712945590994,
+      "support": 300.0
+    },
+    "CS Theory and Algorithms": {
+      "precision": 0.7033639143730887,
+      "recall": 0.7666666666666667,
+      "f1-score": 0.733652312599681,
+      "support": 300.0
+    },
+    "Mathematics": {
+      "precision": 0.837037037037037,
+      "recall": 0.7533333333333333,
+      "f1-score": 0.7929824561403509,
+      "support": 300.0
+    },
+    "Statistics": {
+      "precision": 0.6128048780487805,
+      "recall": 0.67,
+      "f1-score": 0.6401273885350318,
+      "support": 300.0
+    },
+    "Electrical Engineering": {
+      "precision": 0.7295373665480427,
+      "recall": 0.6833333333333333,
+      "f1-score": 0.7056798623063684,
+      "support": 300.0
+    },
+    "Astrophysics": {
+      "precision": 0.9534883720930233,
+      "recall": 0.9566666666666667,
+      "f1-score": 0.9550748752079867,
+      "support": 300.0
+    },
+    "Condensed Matter Physics": {
+      "precision": 0.8430034129692833,
+      "recall": 0.8233333333333334,
+      "f1-score": 0.8330522765598651,
+      "support": 300.0
+    },
+    "Quantum Physics": {
+      "precision": 0.7834757834757835,
+      "recall": 0.9166666666666666,
+      "f1-score": 0.8448540706605223,
+      "support": 300.0
+    },
+    "Quantitative Biology": {
+      "precision": 0.8187919463087249,
+      "recall": 0.8133333333333334,
+      "f1-score": 0.8160535117056856,
+      "support": 300.0
+    },
+    "accuracy": 0.7522222222222222,
+    "macro avg": {
+      "precision": 0.7505772380614398,
+      "recall": 0.7522222222222222,
+      "f1-score": 0.7495277786217475,
+      "support": 3600.0
+    },
+    "weighted avg": {
+      "precision": 0.7505772380614398,
+      "recall": 0.7522222222222222,
+      "f1-score": 0.7495277786217475,
+      "support": 3600.0
+    }
+  },
+  "test_title_only_confusion_matrix": [
+    [
+      191,
+      30,
+      10,
+      19,
+      18,
+      2,
+      17,
+      3,
+      1,
+      1,
+      5,
+      3
+    ],
+    [
+      12,
+      253,
+      11,
+      13,
+      0,
+      0,
+      5,
+      1,
+      0,
+      1,
+      1,
+      3
+    ],
+    [
+      6,
+      11,
+      212,
+      19,
+      5,
+      2,
+      3,
+      37,
+      2,
+      0,
+      1,
+      2
+    ],
+    [
+      21,
+      22,
+      28,
+      137,
+      21,
+      1,
+      48,
+      9,
+      1,
+      2,
+      1,
+      9
+    ],
+    [
+      24,
+      4,
+      0,
+      6,
+      230,
+      10,
+      12,
+      4,
+      0,
+      1,
+      7,
+      2
+    ],
+    [
+      4,
+      0,
+      2,
+      3,
+      25,
+      226,
+      11,
+      7,
+      1,
+      11,
+      5,
+      5
+    ],
+    [
+      11,
+      4,
+      2,
+      28,
+      11,
+      9,
+      201,
+      10,
+      1,
+      5,
+      2,
+      16
+    ],
+    [
+      3,
+      8,
+      24,
+      4,
+      9,
+      8,
+      16,
+      205,
+      3,
+      3,
+      4,
+      13
+    ],
+    [
+      2,
+      0,
+      3,
+      0,
+      0,
+      3,
+      0,
+      0,
+      287,
+      3,
+      2,
+      0
+    ],
+    [
+      1,
+      0,
+      0,
+      0,
+      0,
+      5,
+      1,
+      0,
+      2,
+      247,
+      43,
+      1
+    ],
+    [
+      1,
+      1,
+      0,
+      1,
+      5,
+      2,
+      2,
+      0,
+      2,
+      11,
+      275,
+      0
+    ],
+    [
+      10,
+      5,
+      2,
+      3,
+      3,
+      2,
+      12,
+      5,
+      1,
+      8,
+      5,
+      244
+    ]
+  ]
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb43480cebf6a605d7ea6fc34268b5caac9fdc6b31296205b6b58bae8e579b5
+size 439734280

run_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "prepared_at_utc": "2026-04-06T14:00:00+00:00",
+  "model_name": "allenai/scibert_scivocab_uncased",
+  "max_length": 256,
+  "per_device_train_batch_size": 4,
+  "per_device_eval_batch_size": 8,
+  "gradient_accumulation_steps": 8,
+  "num_train_epochs": 12,
+  "learning_rate": 2e-05,
+  "weight_decay": 0.01,
+  "warmup_ratio": 0.1,
+  "label_smoothing_factor": 0.05,
+  "title_only_prob": 0.2,
+  "early_stopping_patience": 3,
+  "seed": 42,
+  "resume_from_checkpoint": null,
+  "use_bf16": true,
+  "use_fp16": false,
+  "taxonomy_profile": "topics12",
+  "num_labels": 12,
+  "label_order": [
+    "artificial_intelligence",
+    "natural_language_processing",
+    "computer_vision",
+    "machine_learning",
+    "computer_science_theory",
+    "mathematics",
+    "statistics",
+    "electrical_engineering",
+    "astrophysics",
+    "condensed_matter_physics",
+    "quantum_physics",
+    "quantitative_biology"
+  ]
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "backend": "tokenizers",
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "is_local": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9ea637bcad0c6445dbfe1a0110faae0f86e61c5a830e59ef2821d192ab5ae31
+size 5265