Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .ipynb_checkpoints/code-checkpoint.ipynb +6 -0
- .ipynb_checkpoints/new-checkpoint.ipynb +1113 -0
- bert-quantization/finetuned-bert-emotion-baseline/config.json +42 -0
- bert-quantization/finetuned-bert-emotion-baseline/model.safetensors +3 -0
- bert-quantization/finetuned-bert-emotion-baseline/special_tokens_map.json +7 -0
- bert-quantization/finetuned-bert-emotion-baseline/tokenizer.json +0 -0
- bert-quantization/finetuned-bert-emotion-baseline/tokenizer_config.json +56 -0
- bert-quantization/finetuned-bert-emotion-baseline/vocab.txt +0 -0
- bert-quantization/finetuned-bert-emotion-baseline_old/config.json +42 -0
- bert-quantization/finetuned-bert-emotion-baseline_old/model.safetensors +3 -0
- bert-quantization/finetuned-bert-emotion-baseline_old/special_tokens_map.json +7 -0
- bert-quantization/finetuned-bert-emotion-baseline_old/tokenizer.json +0 -0
- bert-quantization/finetuned-bert-emotion-baseline_old/tokenizer_config.json +56 -0
- bert-quantization/finetuned-bert-emotion-baseline_old/vocab.txt +0 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/config.json +42 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/model.safetensors +3 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/optimizer.pt +3 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/rng_state.pth +3 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/scaler.pt +3 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/scheduler.pt +3 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/special_tokens_map.json +7 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/tokenizer.json +0 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/tokenizer_config.json +56 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/trainer_state.json +51 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/training_args.bin +3 -0
- bert-quantization/finetuned-bert-emotion_old/checkpoint-500/vocab.txt +0 -0
- bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-26-07_bionlp/events.out.tfevents.1763495771.bionlp.1753567.0 +3 -0
- bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-27-12_bionlp/events.out.tfevents.1763495834.bionlp.1754079.0 +3 -0
- bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-27-45_bionlp/events.out.tfevents.1763495865.bionlp.1754079.1 +3 -0
- bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-28-00_bionlp/events.out.tfevents.1763495880.bionlp.1754079.2 +3 -0
- bert-quantization/quantized-bert-emotion-ptq/model.pth +3 -0
- bert-quantization/quantized-bert-emotion-qat/model.pth +3 -0
- bert-quantization/readme.md +1 -0
- bert-quantization/test-00000-of-00001.parquet +3 -0
- bert-quantization/train-00000-of-00001.parquet +3 -0
- bert-quantization/validation-00000-of-00001.parquet +3 -0
- code.ipynb +341 -0
- finetuned-bert-emotion-baseline/config.json +42 -0
- finetuned-bert-emotion-baseline/model.safetensors +3 -0
- finetuned-bert-emotion-baseline/special_tokens_map.json +7 -0
- finetuned-bert-emotion-baseline/tokenizer.json +0 -0
- finetuned-bert-emotion-baseline/tokenizer_config.json +56 -0
- finetuned-bert-emotion-baseline/vocab.txt +0 -0
- finetuned-bert-emotion-baseline_old/config.json +42 -0
- finetuned-bert-emotion-baseline_old/model.safetensors +3 -0
- finetuned-bert-emotion-baseline_old/special_tokens_map.json +7 -0
- finetuned-bert-emotion-baseline_old/tokenizer.json +0 -0
- finetuned-bert-emotion-baseline_old/tokenizer_config.json +56 -0
- finetuned-bert-emotion-baseline_old/vocab.txt +0 -0
- finetuned-bert-emotion_old/checkpoint-500/config.json +42 -0
.ipynb_checkpoints/code-checkpoint.ipynb
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [],
|
| 3 |
+
"metadata": {},
|
| 4 |
+
"nbformat": 4,
|
| 5 |
+
"nbformat_minor": 5
|
| 6 |
+
}
|
.ipynb_checkpoints/new-checkpoint.ipynb
ADDED
|
@@ -0,0 +1,1113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "ed3c9aa0-655a-49b8-a3e1-104c7267d05f",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"# Nayana Barai"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "markdown",
|
| 13 |
+
"id": "126eb6d5-5bb4-4493-8788-74b29a8a5c7a",
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"source": [
|
| 16 |
+
"# Part 1: Baseline Model Fine-Tuning (FP32/FP16)"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "markdown",
|
| 21 |
+
"id": "d56957e4-e901-4283-b0fa-4177437f970e",
|
| 22 |
+
"metadata": {
|
| 23 |
+
"jp-MarkdownHeadingCollapsed": true
|
| 24 |
+
},
|
| 25 |
+
"source": [
|
| 26 |
+
"## 1.1. Install and Import Libraries"
|
| 27 |
+
]
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": 1,
|
| 32 |
+
"id": "aa750fb7-b8cc-4364-adde-dc886e6dcc2e",
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"outputs": [
|
| 35 |
+
{
|
| 36 |
+
"name": "stdout",
|
| 37 |
+
"output_type": "stream",
|
| 38 |
+
"text": [
|
| 39 |
+
"Requirement already up-to-date: transformers in /home/saisab/.local/lib/python3.8/site-packages (4.46.3)\n",
|
| 40 |
+
"Requirement already up-to-date: datasets in /home/saisab/.local/lib/python3.8/site-packages (3.1.0)\n",
|
| 41 |
+
"Requirement already up-to-date: scikit-learn in /home/saisab/.local/lib/python3.8/site-packages (1.3.2)\n",
|
| 42 |
+
"Requirement already up-to-date: accelerate in /home/saisab/.local/lib/python3.8/site-packages (1.0.1)\n",
|
| 43 |
+
"Requirement already satisfied, skipping upgrade: tokenizers<0.21,>=0.20 in /home/saisab/.local/lib/python3.8/site-packages (from transformers) (0.20.3)\n",
|
| 44 |
+
"Requirement already satisfied, skipping upgrade: safetensors>=0.4.1 in /home/saisab/.local/lib/python3.8/site-packages (from transformers) (0.5.3)\n",
|
| 45 |
+
"Requirement already satisfied, skipping upgrade: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.24.4)\n",
|
| 46 |
+
"Requirement already satisfied, skipping upgrade: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.26.2)\n",
|
| 47 |
+
"Requirement already satisfied, skipping upgrade: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.15.4)\n",
|
| 48 |
+
"Requirement already satisfied, skipping upgrade: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from transformers) (5.3.1)\n",
|
| 49 |
+
"Requirement already satisfied, skipping upgrade: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.66.2)\n",
|
| 50 |
+
"Requirement already satisfied, skipping upgrade: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2024.5.15)\n",
|
| 51 |
+
"Requirement already satisfied, skipping upgrade: requests in /home/saisab/.local/lib/python3.8/site-packages (from transformers) (2.32.4)\n",
|
| 52 |
+
"Requirement already satisfied, skipping upgrade: packaging>=20.0 in /home/saisab/.local/lib/python3.8/site-packages (from transformers) (24.2)\n",
|
| 53 |
+
"Requirement already satisfied, skipping upgrade: fsspec[http]<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (2024.6.1)\n",
|
| 54 |
+
"Requirement already satisfied, skipping upgrade: aiohttp in /home/saisab/.local/lib/python3.8/site-packages (from datasets) (3.10.11)\n",
|
| 55 |
+
"Requirement already satisfied, skipping upgrade: xxhash in /usr/local/lib/python3.8/dist-packages (from datasets) (3.4.1)\n",
|
| 56 |
+
"Requirement already satisfied, skipping upgrade: multiprocess<0.70.17 in /home/saisab/.local/lib/python3.8/site-packages (from datasets) (0.70.16)\n",
|
| 57 |
+
"Requirement already satisfied, skipping upgrade: dill<0.3.9,>=0.3.0 in /home/saisab/.local/lib/python3.8/site-packages (from datasets) (0.3.8)\n",
|
| 58 |
+
"Requirement already satisfied, skipping upgrade: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (2.0.3)\n",
|
| 59 |
+
"Requirement already satisfied, skipping upgrade: pyarrow>=15.0.0 in /home/saisab/.local/lib/python3.8/site-packages (from datasets) (17.0.0)\n",
|
| 60 |
+
"Requirement already satisfied, skipping upgrade: joblib>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.4.2)\n",
|
| 61 |
+
"Requirement already satisfied, skipping upgrade: scipy>=1.5.0 in /home/saisab/.local/lib/python3.8/site-packages (from scikit-learn) (1.10.1)\n",
|
| 62 |
+
"Requirement already satisfied, skipping upgrade: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.5.0)\n",
|
| 63 |
+
"Requirement already satisfied, skipping upgrade: torch>=1.10.0 in /usr/local/lib/python3.8/dist-packages (from accelerate) (2.4.1+cpu)\n",
|
| 64 |
+
"Requirement already satisfied, skipping upgrade: psutil in /usr/lib/python3/dist-packages (from accelerate) (5.5.1)\n",
|
| 65 |
+
"Requirement already satisfied, skipping upgrade: typing-extensions>=3.7.4.3 in /home/saisab/.local/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (4.13.2)\n",
|
| 66 |
+
"Requirement already satisfied, skipping upgrade: charset_normalizer<4,>=2 in /home/saisab/.local/lib/python3.8/site-packages (from requests->transformers) (3.4.4)\n",
|
| 67 |
+
"Requirement already satisfied, skipping upgrade: urllib3<3,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.2.3)\n",
|
| 68 |
+
"Requirement already satisfied, skipping upgrade: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->transformers) (2.8)\n",
|
| 69 |
+
"Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->transformers) (2019.11.28)\n",
|
| 70 |
+
"Requirement already satisfied, skipping upgrade: async-timeout<6.0,>=4.0; python_version < \"3.11\" in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (4.0.3)\n",
|
| 71 |
+
"Requirement already satisfied, skipping upgrade: aiohappyeyeballs>=2.3.0 in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (2.4.4)\n",
|
| 72 |
+
"Requirement already satisfied, skipping upgrade: aiosignal>=1.1.2 in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (1.3.1)\n",
|
| 73 |
+
"Requirement already satisfied, skipping upgrade: yarl<2.0,>=1.12.0 in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (1.15.2)\n",
|
| 74 |
+
"Requirement already satisfied, skipping upgrade: attrs>=17.3.0 in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (25.3.0)\n",
|
| 75 |
+
"Requirement already satisfied, skipping upgrade: multidict<7.0,>=4.5 in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (6.1.0)\n",
|
| 76 |
+
"Requirement already satisfied, skipping upgrade: frozenlist>=1.1.1 in /home/saisab/.local/lib/python3.8/site-packages (from aiohttp->datasets) (1.5.0)\n",
|
| 77 |
+
"Requirement already satisfied, skipping upgrade: python-dateutil>=2.8.2 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.9.0.post0)\n",
|
| 78 |
+
"Requirement already satisfied, skipping upgrade: tzdata>=2022.1 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2024.1)\n",
|
| 79 |
+
"Requirement already satisfied, skipping upgrade: pytz>=2020.1 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2024.1)\n",
|
| 80 |
+
"Requirement already satisfied, skipping upgrade: networkx in /usr/local/lib/python3.8/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n",
|
| 81 |
+
"Requirement already satisfied, skipping upgrade: sympy in /usr/local/lib/python3.8/dist-packages (from torch>=1.10.0->accelerate) (1.12.1)\n",
|
| 82 |
+
"Requirement already satisfied, skipping upgrade: jinja2 in /home/saisab/.local/lib/python3.8/site-packages (from torch>=1.10.0->accelerate) (3.1.6)\n",
|
| 83 |
+
"Requirement already satisfied, skipping upgrade: propcache>=0.2.0 in /home/saisab/.local/lib/python3.8/site-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n",
|
| 84 |
+
"Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
|
| 85 |
+
"Requirement already satisfied, skipping upgrade: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.8/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n",
|
| 86 |
+
"Requirement already satisfied, skipping upgrade: MarkupSafe>=2.0 in /home/saisab/.local/lib/python3.8/site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)\n"
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"name": "stderr",
|
| 91 |
+
"output_type": "stream",
|
| 92 |
+
"text": [
|
| 93 |
+
"/home/saisab/py10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 94 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 95 |
+
"/home/saisab/py10/lib/python3.10/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/home/saisab/py10/lib/python3.10/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n",
|
| 96 |
+
" warn(\n",
|
| 97 |
+
"2025-11-19 17:40:23.856880: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
| 98 |
+
"2025-11-19 17:40:23.901681: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
| 99 |
+
"2025-11-19 17:40:23.901728: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
| 100 |
+
"2025-11-19 17:40:23.903288: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
| 101 |
+
"2025-11-19 17:40:23.911990: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
| 102 |
+
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
| 103 |
+
"2025-11-19 17:40:25.468369: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"name": "stdout",
|
| 108 |
+
"output_type": "stream",
|
| 109 |
+
"text": [
|
| 110 |
+
"------------------------------------------------------------\n",
|
| 111 |
+
"✅ Transformers version: 4.55.4\n",
|
| 112 |
+
"✅ Datasets version: 4.3.0\n",
|
| 113 |
+
"✅ PyTorch version: 2.7.0+cu126\n",
|
| 114 |
+
"------------------------------------------------------------\n"
|
| 115 |
+
]
|
| 116 |
+
}
|
| 117 |
+
],
|
| 118 |
+
"source": [
|
| 119 |
+
"# 1. INSTALL LIBRARIES\n",
|
| 120 |
+
"!pip install transformers datasets scikit-learn accelerate -U\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"# 2. IMPORT EVERYTHING\n",
|
| 123 |
+
"import transformers\n",
|
| 124 |
+
"import datasets\n",
|
| 125 |
+
"import torch\n",
|
| 126 |
+
"import numpy as np\n",
|
| 127 |
+
"from datasets import load_dataset\n",
|
| 128 |
+
"from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
|
| 129 |
+
"from transformers import TrainingArguments, Trainer\n",
|
| 130 |
+
"from sklearn.metrics import accuracy_score, f1_score\n",
|
| 131 |
+
"from tqdm.auto import tqdm\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"# 3. VERIFY THE VERSION (This is the critical check)\n",
|
| 134 |
+
"print(\"---\" * 20)\n",
|
| 135 |
+
"print(f\"✅ Transformers version: {transformers.__version__}\")\n",
|
| 136 |
+
"print(f\"✅ Datasets version: {datasets.__version__}\")\n",
|
| 137 |
+
"print(f\"✅ PyTorch version: {torch.__version__}\")\n",
|
| 138 |
+
"print(\"---\" * 20)\n",
|
| 139 |
+
"\n",
|
| 140 |
+
"# If the transformers version is 4.x.x or higher, the rest of the code will work.\n",
|
| 141 |
+
"# If it is still 3.x.x or lower, the Colab environment is broken."
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"cell_type": "markdown",
|
| 146 |
+
"id": "d6f182e1-1331-4248-a9ee-34e9d2cac30f",
|
| 147 |
+
"metadata": {
|
| 148 |
+
"jp-MarkdownHeadingCollapsed": true
|
| 149 |
+
},
|
| 150 |
+
"source": [
|
| 151 |
+
"## 1.2. Load the Dataset\n"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"cell_type": "code",
|
| 156 |
+
"execution_count": 2,
|
| 157 |
+
"id": "f7ab2507-4b6f-4999-ad0c-11f1718f904f",
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"outputs": [
|
| 160 |
+
{
|
| 161 |
+
"name": "stderr",
|
| 162 |
+
"output_type": "stream",
|
| 163 |
+
"text": [
|
| 164 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 165 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 166 |
+
]
|
| 167 |
+
}
|
| 168 |
+
],
|
| 169 |
+
"source": [
|
| 170 |
+
"# Load dataset\n",
|
| 171 |
+
"emotion_dataset = load_dataset(\"dair-ai/emotion\")\n",
|
| 172 |
+
"\n",
|
| 173 |
+
"# Load tokenizer\n",
|
| 174 |
+
"model_checkpoint = \"bert-base-uncased\"\n",
|
| 175 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)\n",
|
| 176 |
+
"\n",
|
| 177 |
+
"def tokenize_function(examples):\n",
|
| 178 |
+
" return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n",
|
| 179 |
+
"\n",
|
| 180 |
+
"tokenized_datasets = emotion_dataset.map(tokenize_function, batched=True)\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"# Load model\n",
|
| 183 |
+
"labels = emotion_dataset[\"train\"].features[\"label\"].names\n",
|
| 184 |
+
"num_labels = len(labels)\n",
|
| 185 |
+
"id2label = {i: label for i, label in enumerate(labels)}\n",
|
| 186 |
+
"label2id = {label: i for i, label in enumerate(labels)}\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"model = AutoModelForSequenceClassification.from_pretrained(\n",
|
| 189 |
+
" model_checkpoint,\n",
|
| 190 |
+
" num_labels=num_labels,\n",
|
| 191 |
+
" id2label=id2label,\n",
|
| 192 |
+
" label2id=label2id\n",
|
| 193 |
+
")"
|
| 194 |
+
]
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"cell_type": "markdown",
|
| 198 |
+
"id": "6c396ac7-db63-40eb-9b40-3dd0b11cd255",
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"source": [
|
| 201 |
+
"## 1.3. Entire Training"
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"cell_type": "code",
|
| 206 |
+
"execution_count": 3,
|
| 207 |
+
"id": "ae2f0908-c375-4f72-9940-1e2f5ea59107",
|
| 208 |
+
"metadata": {},
|
| 209 |
+
"outputs": [
|
| 210 |
+
{
|
| 211 |
+
"name": "stderr",
|
| 212 |
+
"output_type": "stream",
|
| 213 |
+
"text": [
|
| 214 |
+
" 0%| | 0/3000 [00:00<?, ?it/s]"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"name": "stdout",
|
| 219 |
+
"output_type": "stream",
|
| 220 |
+
"text": [
|
| 221 |
+
"Starting manual training loop...\n"
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"name": "stderr",
|
| 226 |
+
"output_type": "stream",
|
| 227 |
+
"text": [
|
| 228 |
+
" 33%|████████████▋ | 1001/3000 [27:00<10:54:13, 19.64s/it]"
|
| 229 |
+
]
|
| 230 |
+
},
|
| 231 |
+
{
|
| 232 |
+
"name": "stdout",
|
| 233 |
+
"output_type": "stream",
|
| 234 |
+
"text": [
|
| 235 |
+
"\n",
|
| 236 |
+
"--- Epoch 1 / 3 ---\n",
|
| 237 |
+
"Validation Macro F1: 0.9084\n",
|
| 238 |
+
"Validation Accuracy: 0.9325\n",
|
| 239 |
+
"Best model saved at epoch 1\n"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"name": "stderr",
|
| 244 |
+
"output_type": "stream",
|
| 245 |
+
"text": [
|
| 246 |
+
" 67%|██████████████████████████ | 2001/3000 [54:32<5:38:14, 20.32s/it]"
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"name": "stdout",
|
| 251 |
+
"output_type": "stream",
|
| 252 |
+
"text": [
|
| 253 |
+
"\n",
|
| 254 |
+
"--- Epoch 2 / 3 ---\n",
|
| 255 |
+
"Validation Macro F1: 0.9168\n",
|
| 256 |
+
"Validation Accuracy: 0.9390\n",
|
| 257 |
+
"Best model saved at epoch 2\n"
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"name": "stderr",
|
| 262 |
+
"output_type": "stream",
|
| 263 |
+
"text": [
|
| 264 |
+
"100%|███████████████████████████████████████| 3000/3000 [1:19:34<00:00, 1.48s/it]"
|
| 265 |
+
]
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"name": "stdout",
|
| 269 |
+
"output_type": "stream",
|
| 270 |
+
"text": [
|
| 271 |
+
"\n",
|
| 272 |
+
"--- Epoch 3 / 3 ---\n",
|
| 273 |
+
"Validation Macro F1: 0.9159\n",
|
| 274 |
+
"Validation Accuracy: 0.9390\n",
|
| 275 |
+
"\n",
|
| 276 |
+
"🎉 Training complete. Best model has been loaded.\n",
|
| 277 |
+
"Baseline model saved to ./finetuned-bert-emotion-baseline\n"
|
| 278 |
+
]
|
| 279 |
+
}
|
| 280 |
+
],
|
| 281 |
+
"source": [
|
| 282 |
+
"import torch\n",
|
| 283 |
+
"import numpy as np\n",
|
| 284 |
+
"from torch.utils.data import DataLoader\n",
|
| 285 |
+
"from transformers import DataCollatorWithPadding, get_scheduler\n",
|
| 286 |
+
"from sklearn.metrics import accuracy_score, f1_score\n",
|
| 287 |
+
"from tqdm.auto import tqdm\n",
|
| 288 |
+
"import copy\n",
|
| 289 |
+
"\n",
|
| 290 |
+
"# --- 1. Prepare DataLoaders ---\n",
|
| 291 |
+
"# We must re-run this preparation in case the notebook state was lost.\n",
|
| 292 |
+
"tokenized_datasets.set_format(\"torch\")\n",
|
| 293 |
+
"tokenized_datasets = tokenized_datasets.remove_columns([\"text\"])\n",
|
| 294 |
+
"tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n",
|
| 295 |
+
"\n",
|
| 296 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
| 297 |
+
"\n",
|
| 298 |
+
"train_dataloader = DataLoader(\n",
|
| 299 |
+
" tokenized_datasets[\"train\"], shuffle=True, batch_size=16, collate_fn=data_collator\n",
|
| 300 |
+
")\n",
|
| 301 |
+
"eval_dataloader = DataLoader(\n",
|
| 302 |
+
" tokenized_datasets[\"validation\"], batch_size=64, collate_fn=data_collator\n",
|
| 303 |
+
")\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"# --- 2. Set up Optimizer and Device ---\n",
|
| 306 |
+
"optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)\n",
|
| 307 |
+
"device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
|
| 308 |
+
"model.to(device)\n",
|
| 309 |
+
"\n",
|
| 310 |
+
"# --- 3. Set up Scheduler and Training Parameters ---\n",
|
| 311 |
+
"num_epochs = 3\n",
|
| 312 |
+
"num_training_steps = num_epochs * len(train_dataloader)\n",
|
| 313 |
+
"lr_scheduler = get_scheduler(\n",
|
| 314 |
+
" name=\"linear\", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps\n",
|
| 315 |
+
")\n",
|
| 316 |
+
"\n",
|
| 317 |
+
"# --- 4. The Training and Evaluation Loop ---\n",
|
| 318 |
+
"progress_bar = tqdm(range(num_training_steps))\n",
|
| 319 |
+
"best_metric = -1.0\n",
|
| 320 |
+
"best_model_state = None\n",
|
| 321 |
+
"\n",
|
| 322 |
+
"print(\"Starting manual training loop...\")\n",
|
| 323 |
+
"for epoch in range(num_epochs):\n",
|
| 324 |
+
" model.train()\n",
|
| 325 |
+
" for batch in train_dataloader:\n",
|
| 326 |
+
" batch = {k: v.to(device) for k, v in batch.items()}\n",
|
| 327 |
+
" outputs = model(**batch)\n",
|
| 328 |
+
" loss = outputs.loss\n",
|
| 329 |
+
" loss.backward()\n",
|
| 330 |
+
"\n",
|
| 331 |
+
" optimizer.step()\n",
|
| 332 |
+
" lr_scheduler.step()\n",
|
| 333 |
+
" optimizer.zero_grad()\n",
|
| 334 |
+
" progress_bar.update(1)\n",
|
| 335 |
+
"\n",
|
| 336 |
+
" # --- Evaluation after each epoch ---\n",
|
| 337 |
+
" model.eval()\n",
|
| 338 |
+
" all_predictions = []\n",
|
| 339 |
+
" all_labels = []\n",
|
| 340 |
+
" for batch in eval_dataloader:\n",
|
| 341 |
+
" batch = {k: v.to(device) for k, v in batch.items()}\n",
|
| 342 |
+
" with torch.no_grad():\n",
|
| 343 |
+
" outputs = model(**batch)\n",
|
| 344 |
+
" \n",
|
| 345 |
+
" logits = outputs.logits\n",
|
| 346 |
+
" predictions = torch.argmax(logits, dim=-1).cpu().numpy()\n",
|
| 347 |
+
" labels = batch[\"labels\"].cpu().numpy()\n",
|
| 348 |
+
" all_predictions.extend(predictions)\n",
|
| 349 |
+
" all_labels.extend(labels)\n",
|
| 350 |
+
"\n",
|
| 351 |
+
" macro_f1 = f1_score(all_labels, all_predictions, average=\"macro\")\n",
|
| 352 |
+
" accuracy = accuracy_score(all_labels, all_predictions)\n",
|
| 353 |
+
" \n",
|
| 354 |
+
" print(f\"\\n--- Epoch {epoch + 1} / {num_epochs} ---\")\n",
|
| 355 |
+
" print(f\"Validation Macro F1: {macro_f1:.4f}\")\n",
|
| 356 |
+
" print(f\"Validation Accuracy: {accuracy:.4f}\")\n",
|
| 357 |
+
"\n",
|
| 358 |
+
" # Save the best model\n",
|
| 359 |
+
" if macro_f1 > best_metric:\n",
|
| 360 |
+
" best_metric = macro_f1\n",
|
| 361 |
+
" best_model_state = copy.deepcopy(model.state_dict())\n",
|
| 362 |
+
" print(f\"Best model saved at epoch {epoch + 1}\")\n",
|
| 363 |
+
"\n",
|
| 364 |
+
"# --- 5. Load the Best Model ---\n",
|
| 365 |
+
"model.load_state_dict(best_model_state)\n",
|
| 366 |
+
"print(\"\\n🎉 Training complete. Best model has been loaded.\")\n",
|
| 367 |
+
"\n",
|
| 368 |
+
"# --- 6. Save the Final Baseline Model ---\n",
|
| 369 |
+
"baseline_model_dir = \"./finetuned-bert-emotion-baseline\"\n",
|
| 370 |
+
"model.save_pretrained(baseline_model_dir)\n",
|
| 371 |
+
"tokenizer.save_pretrained(baseline_model_dir)\n",
|
| 372 |
+
"print(f\"Baseline model saved to {baseline_model_dir}\")"
|
| 373 |
+
]
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"cell_type": "markdown",
|
| 377 |
+
"id": "2a48c9f2-6503-4d3d-8541-14963654e5dc",
|
| 378 |
+
"metadata": {},
|
| 379 |
+
"source": [
|
| 380 |
+
"## 1.8. Evaluate the Baseline Model on the Test Set\n",
|
| 381 |
+
"Now that we have our best model, we must evaluate its performance on the test split of the dataset. This provides the final, unbiased measure of our model's performance. We will create a test_dataloader and run a similar evaluation loop."
|
| 382 |
+
]
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"cell_type": "code",
|
| 386 |
+
"execution_count": 4,
|
| 387 |
+
"id": "b27ff9b5-1dfd-4b11-ad9c-1da187224648",
|
| 388 |
+
"metadata": {},
|
| 389 |
+
"outputs": [
|
| 390 |
+
{
|
| 391 |
+
"name": "stdout",
|
| 392 |
+
"output_type": "stream",
|
| 393 |
+
"text": [
|
| 394 |
+
"Training is commented out. Loading the saved baseline model directly.\n"
|
| 395 |
+
]
|
| 396 |
+
}
|
| 397 |
+
],
|
| 398 |
+
"source": [
|
| 399 |
+
"import time\n",
|
| 400 |
+
"import os\n",
|
| 401 |
+
"import pandas as pd\n",
|
| 402 |
+
"from sklearn.metrics import classification_report, confusion_matrix\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"# --- 1. Create the Test DataLoader ---\n",
|
| 405 |
+
"test_dataloader = DataLoader(\n",
|
| 406 |
+
" tokenized_datasets[\"test\"], batch_size=64, collate_fn=data_collator\n",
|
| 407 |
+
")\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"# --- 2. Run Evaluation on the Test Set ---\n",
|
| 410 |
+
"model.eval()\n",
|
| 411 |
+
"all_predictions = []\n",
|
| 412 |
+
"all_labels = []\n",
|
| 413 |
+
"inference_times = []\n",
|
| 414 |
+
"\n",
|
| 415 |
+
"print(\"Evaluating on the test set...\")\n",
|
| 416 |
+
"for batch in tqdm(test_dataloader):\n",
|
| 417 |
+
" batch = {k: v.to(device) for k, v in batch.items()}\n",
|
| 418 |
+
" start_time = time.time()\n",
|
| 419 |
+
" \n",
|
| 420 |
+
" with torch.no_grad():\n",
|
| 421 |
+
" outputs = model(**batch)\n",
|
| 422 |
+
" \n",
|
| 423 |
+
" end_time = time.time()\n",
|
| 424 |
+
" inference_times.append(end_time - start_time)\n",
|
| 425 |
+
"\n",
|
| 426 |
+
" logits = outputs.logits\n",
|
| 427 |
+
" predictions = torch.argmax(logits, dim=-1).cpu().numpy()\n",
|
| 428 |
+
" labels = batch[\"labels\"].cpu().numpy()\n",
|
| 429 |
+
" all_predictions.extend(predictions)\n",
|
| 430 |
+
" all_labels.extend(labels)\n",
|
| 431 |
+
"\n",
|
| 432 |
+
"# --- 3. Calculate Final Metrics ---\n",
|
| 433 |
+
"# Performance Metrics\n",
|
| 434 |
+
"test_macro_f1 = f1_score(all_labels, all_predictions, average=\"macro\")\n",
|
| 435 |
+
"test_accuracy = accuracy_score(all_labels, all_predictions)\n",
|
| 436 |
+
"\n",
|
| 437 |
+
"# Model Size (CORRECTED FILENAME)\n",
|
| 438 |
+
"model_size_mb = os.path.getsize(f\"{baseline_model_dir}/model.safetensors\") / (1024 * 1024)\n",
|
| 439 |
+
"\n",
|
| 440 |
+
"# Latency Metrics\n",
|
| 441 |
+
"avg_batch_latency_ms = (sum(inference_times) / len(inference_times)) * 1000\n",
|
| 442 |
+
"avg_example_latency_ms = avg_batch_latency_ms / test_dataloader.batch_size\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"# Store results in a dictionary for later\n",
|
| 445 |
+
"baseline_results = {\n",
|
| 446 |
+
" \"model\": \"Baseline (FP32)\",\n",
|
| 447 |
+
" \"accuracy\": test_accuracy,\n",
|
| 448 |
+
" \"macro_f1\": test_macro_f1,\n",
|
| 449 |
+
" \"model_size_mb\": model_size_mb,\n",
|
| 450 |
+
" \"latency_ms_per_batch\": avg_batch_latency_ms\n",
|
| 451 |
+
"}\n",
|
| 452 |
+
"\n",
|
| 453 |
+
"print(\"\\n--- Baseline Model Test Results ---\")\n",
|
| 454 |
+
"print(f\"Accuracy: {test_accuracy:.4f}\")\n",
|
| 455 |
+
"print(f\"Macro F1-Score: {test_macro_f1:.4f}\")\n",
|
| 456 |
+
"print(f\"Model Size: {model_size_mb:.2f} MB\")\n",
|
| 457 |
+
"print(f\"Avg. Latency per Batch: {avg_batch_latency_ms:.2f} ms\")\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"\n",
|
| 460 |
+
"# \"\"\"\n",
|
| 461 |
+
"# import torch\n",
|
| 462 |
+
"# import numpy as np\n",
|
| 463 |
+
"# from torch.utils.data import DataLoader\n",
|
| 464 |
+
"# from transformers import DataCollatorWithPadding, get_scheduler\n",
|
| 465 |
+
"# from sklearn.metrics import accuracy_score, f1_score\n",
|
| 466 |
+
"# from tqdm.auto import tqdm\n",
|
| 467 |
+
"# import copy\n",
|
| 468 |
+
"\n",
|
| 469 |
+
"# # --- 1. Prepare DataLoaders ---\n",
|
| 470 |
+
"# # We must re-run this preparation in case the notebook state was lost.\n",
|
| 471 |
+
"# tokenized_datasets.set_format(\"torch\")\n",
|
| 472 |
+
"# tokenized_datasets = tokenized_datasets.remove_columns([\"text\"])\n",
|
| 473 |
+
"# tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n",
|
| 474 |
+
"\n",
|
| 475 |
+
"# data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
| 476 |
+
"\n",
|
| 477 |
+
"# train_dataloader = DataLoader(\n",
|
| 478 |
+
"# tokenized_datasets[\"train\"], shuffle=True, batch_size=16, collate_fn=data_collator\n",
|
| 479 |
+
"# )\n",
|
| 480 |
+
"# eval_dataloader = DataLoader(\n",
|
| 481 |
+
"# tokenized_datasets[\"validation\"], batch_size=64, collate_fn=data_collator\n",
|
| 482 |
+
"# )\n",
|
| 483 |
+
"\n",
|
| 484 |
+
"# # --- 2. Set up Optimizer and Device ---\n",
|
| 485 |
+
"# optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)\n",
|
| 486 |
+
"# device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
|
| 487 |
+
"# model.to(device)\n",
|
| 488 |
+
"\n",
|
| 489 |
+
"# # --- 3. Set up Scheduler and Training Parameters ---\n",
|
| 490 |
+
"# num_epochs = 3\n",
|
| 491 |
+
"# num_training_steps = num_epochs * len(train_dataloader)\n",
|
| 492 |
+
"# lr_scheduler = get_scheduler(\n",
|
| 493 |
+
"# name=\"linear\", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps\n",
|
| 494 |
+
"# )\n",
|
| 495 |
+
"\n",
|
| 496 |
+
"# # --- 4. The Training and Evaluation Loop ---\n",
|
| 497 |
+
"# progress_bar = tqdm(range(num_training_steps))\n",
|
| 498 |
+
"# best_metric = -1.0\n",
|
| 499 |
+
"# best_model_state = None\n",
|
| 500 |
+
"\n",
|
| 501 |
+
"# print(\"Starting manual training loop...\")\n",
|
| 502 |
+
"# for epoch in range(num_epochs):\n",
|
| 503 |
+
"# model.train()\n",
|
| 504 |
+
"# for batch in train_dataloader:\n",
|
| 505 |
+
"# batch = {k: v.to(device) for k, v in batch.items()}\n",
|
| 506 |
+
"# outputs = model(**batch)\n",
|
| 507 |
+
"# loss = outputs.loss\n",
|
| 508 |
+
"# loss.backward()\n",
|
| 509 |
+
"\n",
|
| 510 |
+
"# optimizer.step()\n",
|
| 511 |
+
"# lr_scheduler.step()\n",
|
| 512 |
+
"# optimizer.zero_grad()\n",
|
| 513 |
+
"# progress_bar.update(1)\n",
|
| 514 |
+
"\n",
|
| 515 |
+
"# # --- Evaluation after each epoch ---\n",
|
| 516 |
+
"# model.eval()\n",
|
| 517 |
+
"# all_predictions = []\n",
|
| 518 |
+
"# all_labels = []\n",
|
| 519 |
+
"# for batch in eval_dataloader:\n",
|
| 520 |
+
"# batch = {k: v.to(device) for k, v in batch.items()}\n",
|
| 521 |
+
"# with torch.no_grad():\n",
|
| 522 |
+
"# outputs = model(**batch)\n",
|
| 523 |
+
" \n",
|
| 524 |
+
"# logits = outputs.logits\n",
|
| 525 |
+
"# predictions = torch.argmax(logits, dim=-1).cpu().numpy()\n",
|
| 526 |
+
"# labels = batch[\"labels\"].cpu().numpy()\n",
|
| 527 |
+
"# all_predictions.extend(predictions)\n",
|
| 528 |
+
"# all_labels.extend(labels)\n",
|
| 529 |
+
"\n",
|
| 530 |
+
"# macro_f1 = f1_score(all_labels, all_predictions, average=\"macro\")\n",
|
| 531 |
+
"# accuracy = accuracy_score(all_labels, all_predictions)\n",
|
| 532 |
+
" \n",
|
| 533 |
+
"# print(f\"\\n--- Epoch {epoch + 1} / {num_epochs} ---\")\n",
|
| 534 |
+
"# print(f\"Validation Macro F1: {macro_f1:.4f}\")\n",
|
| 535 |
+
"# print(f\"Validation Accuracy: {accuracy:.4f}\")\n",
|
| 536 |
+
"\n",
|
| 537 |
+
"# # Save the best model\n",
|
| 538 |
+
"# if macro_f1 > best_metric:\n",
|
| 539 |
+
"# best_metric = macro_f1\n",
|
| 540 |
+
"# best_model_state = copy.deepcopy(model.state_dict())\n",
|
| 541 |
+
"# print(f\"Best model saved at epoch {epoch + 1}\")\n",
|
| 542 |
+
"\n",
|
| 543 |
+
"# # --- 5. Load the Best Model ---\n",
|
| 544 |
+
"# model.load_state_dict(best_model_state)\n",
|
| 545 |
+
"# print(\"\\n🎉 Training complete. Best model has been loaded.\")\n",
|
| 546 |
+
"\n",
|
| 547 |
+
"# # --- 6. Save the Final Baseline Model ---\n",
|
| 548 |
+
"# baseline_model_dir = \"./finetuned-bert-emotion-baseline\"\n",
|
| 549 |
+
"# model.save_pretrained(baseline_model_dir)\n",
|
| 550 |
+
"# tokenizer.save_pretrained(baseline_model_dir)\n",
|
| 551 |
+
"# print(f\"Baseline model saved to {baseline_model_dir}\")\n",
|
| 552 |
+
"# \"\"\"\n",
|
| 553 |
+
"\n",
|
| 554 |
+
"# # --- TRAINING SKIPPED: Load the already fine-tuned model from disk ---\n",
|
| 555 |
+
"# print(\"Training is commented out. Loading the saved baseline model directly.\")\n",
|
| 556 |
+
"# baseline_model_dir = \"./finetuned-bert-emotion-baseline\"\n",
|
| 557 |
+
"# # The 'model' variable is what the next cells expect. We load our saved model into it.\n",
|
| 558 |
+
"# model = AutoModelForSequenceClassification.from_pretrained(baseline_model_dir)\n",
|
| 559 |
+
"# # Also need to define the device for the evaluation cell\n",
|
| 560 |
+
"# device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
|
| 561 |
+
"# model.to(device)\n",
|
| 562 |
+
"\n",
|
| 563 |
+
"# # We still need the DataCollator for the evaluation cell\n",
|
| 564 |
+
"# from transformers import DataCollatorWithPadding\n",
|
| 565 |
+
"# data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
|
| 566 |
+
]
|
| 567 |
+
},
|
| 568 |
+
{
|
| 569 |
+
"cell_type": "markdown",
|
| 570 |
+
"id": "f01d7baf-7080-488e-bf62-fc4df9c6e738",
|
| 571 |
+
"metadata": {},
|
| 572 |
+
"source": [
|
| 573 |
+
"## 1.9. Analysis: Confusion Matrix and Classification Report"
|
| 574 |
+
]
|
| 575 |
+
},
|
| 576 |
+
{
|
| 577 |
+
"cell_type": "code",
|
| 578 |
+
"execution_count": 5,
|
| 579 |
+
"id": "2f17fc36-936c-42b6-a2ab-a3310e217a5e",
|
| 580 |
+
"metadata": {},
|
| 581 |
+
"outputs": [
|
| 582 |
+
{
|
| 583 |
+
"name": "stdout",
|
| 584 |
+
"output_type": "stream",
|
| 585 |
+
"text": [
|
| 586 |
+
"--- Classification Report ---\n",
|
| 587 |
+
" precision recall f1-score support\n",
|
| 588 |
+
"\n",
|
| 589 |
+
" sadness 0.95 0.97 0.96 550\n",
|
| 590 |
+
" joy 0.96 0.95 0.95 704\n",
|
| 591 |
+
" love 0.86 0.88 0.87 178\n",
|
| 592 |
+
" anger 0.98 0.92 0.95 275\n",
|
| 593 |
+
" fear 0.89 0.92 0.90 212\n",
|
| 594 |
+
" surprise 0.83 0.89 0.86 81\n",
|
| 595 |
+
"\n",
|
| 596 |
+
" accuracy 0.94 2000\n",
|
| 597 |
+
" macro avg 0.91 0.92 0.92 2000\n",
|
| 598 |
+
"weighted avg 0.94 0.94 0.94 2000\n",
|
| 599 |
+
"\n",
|
| 600 |
+
"\n",
|
| 601 |
+
"--- Confusion Matrix ---\n",
|
| 602 |
+
" sadness joy love anger fear surprise\n",
|
| 603 |
+
"sadness 536 1 1 4 8 0\n",
|
| 604 |
+
"joy 4 668 23 0 2 7\n",
|
| 605 |
+
"love 0 22 156 0 0 0\n",
|
| 606 |
+
"anger 14 2 1 252 6 0\n",
|
| 607 |
+
"fear 7 1 0 2 194 8\n",
|
| 608 |
+
"surprise 1 1 0 0 7 72\n"
|
| 609 |
+
]
|
| 610 |
+
}
|
| 611 |
+
],
|
| 612 |
+
"source": [
|
| 613 |
+
"from sklearn.metrics import classification_report, confusion_matrix\n",
|
| 614 |
+
"import pandas as pd\n",
|
| 615 |
+
"\n",
|
| 616 |
+
"# Get the label names from our label mapping\n",
|
| 617 |
+
"label_names = list(label2id.keys())\n",
|
| 618 |
+
"\n",
|
| 619 |
+
"# Generate and print the classification report\n",
|
| 620 |
+
"print(\"--- Classification Report ---\")\n",
|
| 621 |
+
"report = classification_report(all_labels, all_predictions, target_names=label_names)\n",
|
| 622 |
+
"print(report)\n",
|
| 623 |
+
"\n",
|
| 624 |
+
"# Generate and print the confusion matrix\n",
|
| 625 |
+
"print(\"\\n--- Confusion Matrix ---\")\n",
|
| 626 |
+
"conf_matrix = confusion_matrix(all_labels, all_predictions)\n",
|
| 627 |
+
"\n",
|
| 628 |
+
"# Use pandas for a more readable confusion matrix\n",
|
| 629 |
+
"conf_matrix_df = pd.DataFrame(conf_matrix, index=label_names, columns=label_names)\n",
|
| 630 |
+
"print(conf_matrix_df)"
|
| 631 |
+
]
|
| 632 |
+
},
|
| 633 |
+
{
|
| 634 |
+
"cell_type": "markdown",
|
| 635 |
+
"id": "144c6d74-ea3c-44da-8ab7-82a5430c73b8",
|
| 636 |
+
"metadata": {},
|
| 637 |
+
"source": [
|
| 638 |
+
"# Part 2: Post-Training Quantization (PTQ)"
|
| 639 |
+
]
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"cell_type": "markdown",
|
| 643 |
+
"id": "7a22fbad-64be-41b1-a455-d570573933db",
|
| 644 |
+
"metadata": {},
|
| 645 |
+
"source": [
|
| 646 |
+
"## 2.1. Load the Baseline Model and Apply Dynamic Quantization"
|
| 647 |
+
]
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"cell_type": "code",
|
| 651 |
+
"execution_count": 6,
|
| 652 |
+
"id": "b8139f89-73a5-43f9-9f71-4c2112499f48",
|
| 653 |
+
"metadata": {},
|
| 654 |
+
"outputs": [
|
| 655 |
+
{
|
| 656 |
+
"name": "stdout",
|
| 657 |
+
"output_type": "stream",
|
| 658 |
+
"text": [
|
| 659 |
+
"--- Original FP32 Model ---\n",
|
| 660 |
+
"BertForSequenceClassification(\n",
|
| 661 |
+
" (bert): BertModel(\n",
|
| 662 |
+
" (embeddings): BertEmbeddings(\n",
|
| 663 |
+
" (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
|
| 664 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 665 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 666 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 667 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 668 |
+
" )\n",
|
| 669 |
+
" (encoder): BertEncoder(\n",
|
| 670 |
+
" (layer): ModuleList(\n",
|
| 671 |
+
" (0-11): 12 x BertLayer(\n",
|
| 672 |
+
" (attention): BertAttention(\n",
|
| 673 |
+
" (self): BertSdpaSelfAttention(\n",
|
| 674 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 675 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 676 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 677 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 678 |
+
" )\n",
|
| 679 |
+
" (output): BertSelfOutput(\n",
|
| 680 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 681 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 682 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 683 |
+
" )\n",
|
| 684 |
+
" )\n",
|
| 685 |
+
" (intermediate): BertIntermediate(\n",
|
| 686 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 687 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 688 |
+
" )\n",
|
| 689 |
+
" (output): BertOutput(\n",
|
| 690 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 691 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 692 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 693 |
+
" )\n",
|
| 694 |
+
" )\n",
|
| 695 |
+
" )\n",
|
| 696 |
+
" )\n",
|
| 697 |
+
" (pooler): BertPooler(\n",
|
| 698 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 699 |
+
" (activation): Tanh()\n",
|
| 700 |
+
" )\n",
|
| 701 |
+
" )\n",
|
| 702 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 703 |
+
" (classifier): Linear(in_features=768, out_features=6, bias=True)\n",
|
| 704 |
+
")\n",
|
| 705 |
+
"\n",
|
| 706 |
+
"--- Quantized INT8 Model ---\n",
|
| 707 |
+
"BertForSequenceClassification(\n",
|
| 708 |
+
" (bert): BertModel(\n",
|
| 709 |
+
" (embeddings): BertEmbeddings(\n",
|
| 710 |
+
" (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
|
| 711 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 712 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 713 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 714 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 715 |
+
" )\n",
|
| 716 |
+
" (encoder): BertEncoder(\n",
|
| 717 |
+
" (layer): ModuleList(\n",
|
| 718 |
+
" (0-11): 12 x BertLayer(\n",
|
| 719 |
+
" (attention): BertAttention(\n",
|
| 720 |
+
" (self): BertSdpaSelfAttention(\n",
|
| 721 |
+
" (query): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 722 |
+
" (key): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 723 |
+
" (value): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 724 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 725 |
+
" )\n",
|
| 726 |
+
" (output): BertSelfOutput(\n",
|
| 727 |
+
" (dense): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 728 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 729 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 730 |
+
" )\n",
|
| 731 |
+
" )\n",
|
| 732 |
+
" (intermediate): BertIntermediate(\n",
|
| 733 |
+
" (dense): DynamicQuantizedLinear(in_features=768, out_features=3072, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 734 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 735 |
+
" )\n",
|
| 736 |
+
" (output): BertOutput(\n",
|
| 737 |
+
" (dense): DynamicQuantizedLinear(in_features=3072, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 738 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 739 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 740 |
+
" )\n",
|
| 741 |
+
" )\n",
|
| 742 |
+
" )\n",
|
| 743 |
+
" )\n",
|
| 744 |
+
" (pooler): BertPooler(\n",
|
| 745 |
+
" (dense): DynamicQuantizedLinear(in_features=768, out_features=768, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 746 |
+
" (activation): Tanh()\n",
|
| 747 |
+
" )\n",
|
| 748 |
+
" )\n",
|
| 749 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 750 |
+
" (classifier): DynamicQuantizedLinear(in_features=768, out_features=6, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
|
| 751 |
+
")\n"
|
| 752 |
+
]
|
| 753 |
+
}
|
| 754 |
+
],
|
| 755 |
+
"source": [
|
| 756 |
+
"import torch\n",
|
| 757 |
+
"from torch.quantization import quantize_dynamic\n",
|
| 758 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 759 |
+
"\n",
|
| 760 |
+
"# --- 1. Load the fine-tuned FP32 model ---\n",
|
| 761 |
+
"baseline_model_dir = \"./finetuned-bert-emotion-baseline\"\n",
|
| 762 |
+
"model_fp32 = AutoModelForSequenceClassification.from_pretrained(baseline_model_dir)\n",
|
| 763 |
+
"\n",
|
| 764 |
+
"# --- 2. Apply dynamic quantization ---\n",
|
| 765 |
+
"# We specify that we want to quantize the Linear layers of the model.\n",
|
| 766 |
+
"# The model must be in evaluation mode and on the CPU.\n",
|
| 767 |
+
"model_quantized = quantize_dynamic(\n",
|
| 768 |
+
" model=model_fp32.to(\"cpu\").eval(),\n",
|
| 769 |
+
" qconfig_spec={torch.nn.Linear},\n",
|
| 770 |
+
" dtype=torch.qint8\n",
|
| 771 |
+
")\n",
|
| 772 |
+
"\n",
|
| 773 |
+
"print(\"--- Original FP32 Model ---\")\n",
|
| 774 |
+
"print(model_fp32)\n",
|
| 775 |
+
"print(\"\\n--- Quantized INT8 Model ---\")\n",
|
| 776 |
+
"print(model_quantized)"
|
| 777 |
+
]
|
| 778 |
+
},
|
| 779 |
+
{
|
| 780 |
+
"cell_type": "markdown",
|
| 781 |
+
"id": "1030dff5-668f-4178-a360-b0cd067aefb3",
|
| 782 |
+
"metadata": {},
|
| 783 |
+
"source": [
|
| 784 |
+
"## 2.2. Evaluate the Quantized Model"
|
| 785 |
+
]
|
| 786 |
+
},
|
| 787 |
+
{
|
| 788 |
+
"cell_type": "code",
|
| 789 |
+
"execution_count": 7,
|
| 790 |
+
"id": "b235484a-04e6-4bc9-b509-6929d4f2b12f",
|
| 791 |
+
"metadata": {},
|
| 792 |
+
"outputs": [
|
| 793 |
+
{
|
| 794 |
+
"name": "stdout",
|
| 795 |
+
"output_type": "stream",
|
| 796 |
+
"text": [
|
| 797 |
+
"Evaluating quantized model on the CPU...\n"
|
| 798 |
+
]
|
| 799 |
+
},
|
| 800 |
+
{
|
| 801 |
+
"name": "stderr",
|
| 802 |
+
"output_type": "stream",
|
| 803 |
+
"text": [
|
| 804 |
+
"\n",
|
| 805 |
+
" 0%| | 0/32 [00:00<?, ?it/s]\u001b[A\n",
|
| 806 |
+
" 3%|█▍ | 1/32 [00:06<03:23, 6.57s/it]\u001b[A\n",
|
| 807 |
+
" 6%|██▉ | 2/32 [00:13<03:18, 6.62s/it]\u001b[A\n",
|
| 808 |
+
" 9%|████▎ | 3/32 [00:20<03:16, 6.78s/it]\u001b[A\n",
|
| 809 |
+
" 12%|█████▊ | 4/32 [00:27<03:14, 6.95s/it]\u001b[A\n",
|
| 810 |
+
" 16%|███████▏ | 5/32 [00:34<03:12, 7.14s/it]\u001b[A\n",
|
| 811 |
+
" 19%|████████▋ | 6/32 [00:42<03:06, 7.19s/it]\u001b[A\n",
|
| 812 |
+
" 22%|██████████ | 7/32 [00:49<02:58, 7.12s/it]\u001b[A\n",
|
| 813 |
+
" 25%|███████████▌ | 8/32 [00:55<02:48, 7.00s/it]\u001b[A\n",
|
| 814 |
+
" 28%|████████████▉ | 9/32 [01:03<02:42, 7.08s/it]\u001b[A\n",
|
| 815 |
+
" 31%|██████████████ | 10/32 [01:10<02:36, 7.12s/it]\u001b[A\n",
|
| 816 |
+
" 34%|███████████████▍ | 11/32 [01:17<02:28, 7.09s/it]\u001b[A\n",
|
| 817 |
+
" 38%|████████████████▉ | 12/32 [01:24<02:19, 6.96s/it]\u001b[A\n",
|
| 818 |
+
" 41%|██████████████████▎ | 13/32 [01:30<02:10, 6.87s/it]\u001b[A\n",
|
| 819 |
+
" 44%|███████████████████▋ | 14/32 [01:37<02:02, 6.82s/it]\u001b[A\n",
|
| 820 |
+
" 47%|█████████████████████ | 15/32 [01:44<01:55, 6.79s/it]\u001b[A\n",
|
| 821 |
+
" 50%|██████████████████████▌ | 16/32 [01:51<01:50, 6.90s/it]\u001b[A\n",
|
| 822 |
+
" 53%|███████████████████████▉ | 17/32 [01:58<01:45, 7.00s/it]\u001b[A\n",
|
| 823 |
+
" 56%|████████████████��████████▎ | 18/32 [02:06<01:40, 7.19s/it]\u001b[A\n",
|
| 824 |
+
" 59%|██████████████████████████▋ | 19/32 [02:13<01:34, 7.27s/it]\u001b[A\n",
|
| 825 |
+
" 62%|████████████████████████████▏ | 20/32 [02:21<01:27, 7.30s/it]\u001b[A\n",
|
| 826 |
+
" 66%|█████████████████████████████▌ | 21/32 [02:28<01:20, 7.36s/it]\u001b[A\n",
|
| 827 |
+
" 69%|██████████████████████████████▉ | 22/32 [02:35<01:13, 7.37s/it]\u001b[A\n",
|
| 828 |
+
" 72%|████████████████████████████████▎ | 23/32 [02:43<01:06, 7.40s/it]\u001b[A\n",
|
| 829 |
+
" 75%|█████████████████████████████████▊ | 24/32 [02:50<00:58, 7.37s/it]\u001b[A\n",
|
| 830 |
+
" 78%|███████████████████████████████████▏ | 25/32 [02:58<00:51, 7.39s/it]\u001b[A\n",
|
| 831 |
+
" 81%|████████████████████████████████████▌ | 26/32 [03:05<00:44, 7.40s/it]\u001b[A\n",
|
| 832 |
+
" 84%|█████████████████████████████████████▉ | 27/32 [03:12<00:36, 7.40s/it]\u001b[A\n",
|
| 833 |
+
" 88%|███████████████████████████████████████▍ | 28/32 [03:20<00:29, 7.40s/it]\u001b[A\n",
|
| 834 |
+
" 91%|████████████████████████████████████████▊ | 29/32 [03:27<00:21, 7.31s/it]\u001b[A\n",
|
| 835 |
+
" 94%|██████████████████████████████████████████▏ | 30/32 [03:34<00:14, 7.30s/it]\u001b[A\n",
|
| 836 |
+
" 97%|███████████████████████████████████████████▌ | 31/32 [03:42<00:07, 7.35s/it]\u001b[A\n",
|
| 837 |
+
"100%|█████████████████████████████████████████████| 32/32 [03:43<00:00, 7.00s/it]\u001b[A"
|
| 838 |
+
]
|
| 839 |
+
},
|
| 840 |
+
{
|
| 841 |
+
"name": "stdout",
|
| 842 |
+
"output_type": "stream",
|
| 843 |
+
"text": [
|
| 844 |
+
"\n",
|
| 845 |
+
"--- PTQ Model Test Results ---\n",
|
| 846 |
+
"Accuracy: 0.9265\n",
|
| 847 |
+
"Macro F1-Score: 0.8869\n",
|
| 848 |
+
"Model Size: 173.08 MB\n",
|
| 849 |
+
"Avg. Latency per Batch (CPU): 6970.90 ms\n"
|
| 850 |
+
]
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"name": "stderr",
|
| 854 |
+
"output_type": "stream",
|
| 855 |
+
"text": [
|
| 856 |
+
"\n"
|
| 857 |
+
]
|
| 858 |
+
}
|
| 859 |
+
],
|
| 860 |
+
"source": [
|
| 861 |
+
"import os\n",
|
| 862 |
+
"import time\n",
|
| 863 |
+
"\n",
|
| 864 |
+
"# --- 1. Save the quantized model to disk to measure its size ---\n",
|
| 865 |
+
"quantized_model_dir = \"./quantized-bert-emotion-ptq\"\n",
|
| 866 |
+
"os.makedirs(quantized_model_dir, exist_ok=True)\n",
|
| 867 |
+
"quantized_model_path = f\"{quantized_model_dir}/model.pth\"\n",
|
| 868 |
+
"torch.save(model_quantized.state_dict(), quantized_model_path)\n",
|
| 869 |
+
"\n",
|
| 870 |
+
"# --- 2. Set up the evaluation device and dataloader ---\n",
|
| 871 |
+
"# IMPORTANT: Evaluation must be on the CPU\n",
|
| 872 |
+
"device_cpu = torch.device(\"cpu\")\n",
|
| 873 |
+
"model_quantized.to(device_cpu)\n",
|
| 874 |
+
"model_quantized.eval()\n",
|
| 875 |
+
"\n",
|
| 876 |
+
"# Re-create the test dataloader just to be safe\n",
|
| 877 |
+
"test_dataloader = DataLoader(\n",
|
| 878 |
+
" tokenized_datasets[\"test\"], batch_size=64, collate_fn=data_collator\n",
|
| 879 |
+
")\n",
|
| 880 |
+
"\n",
|
| 881 |
+
"# --- 3. Run Evaluation on the Test Set ---\n",
|
| 882 |
+
"all_predictions = []\n",
|
| 883 |
+
"all_labels = []\n",
|
| 884 |
+
"inference_times = []\n",
|
| 885 |
+
"\n",
|
| 886 |
+
"print(\"Evaluating quantized model on the CPU...\")\n",
|
| 887 |
+
"for batch in tqdm(test_dataloader):\n",
|
| 888 |
+
" # No need to move to device, as it's already CPU data\n",
|
| 889 |
+
" batch = {k: v.to(device_cpu) for k, v in batch.items()}\n",
|
| 890 |
+
" start_time = time.time()\n",
|
| 891 |
+
" \n",
|
| 892 |
+
" with torch.no_grad():\n",
|
| 893 |
+
" outputs = model_quantized(**batch)\n",
|
| 894 |
+
" \n",
|
| 895 |
+
" end_time = time.time()\n",
|
| 896 |
+
" inference_times.append(end_time - start_time)\n",
|
| 897 |
+
"\n",
|
| 898 |
+
" logits = outputs.logits\n",
|
| 899 |
+
" predictions = torch.argmax(logits, dim=-1).numpy()\n",
|
| 900 |
+
" labels = batch[\"labels\"].numpy()\n",
|
| 901 |
+
" all_predictions.extend(predictions)\n",
|
| 902 |
+
" all_labels.extend(labels)\n",
|
| 903 |
+
"\n",
|
| 904 |
+
"# --- 4. Calculate Final Metrics ---\n",
|
| 905 |
+
"# Performance Metrics\n",
|
| 906 |
+
"ptq_macro_f1 = f1_score(all_labels, all_predictions, average=\"macro\")\n",
|
| 907 |
+
"ptq_accuracy = accuracy_score(all_labels, all_predictions)\n",
|
| 908 |
+
"\n",
|
| 909 |
+
"# Model Size\n",
|
| 910 |
+
"ptq_model_size_mb = os.path.getsize(quantized_model_path) / (1024 * 1024)\n",
|
| 911 |
+
"\n",
|
| 912 |
+
"# Latency Metrics\n",
|
| 913 |
+
"ptq_avg_batch_latency_ms = (sum(inference_times) / len(inference_times)) * 1000\n",
|
| 914 |
+
"\n",
|
| 915 |
+
"# Store results for the final comparison table\n",
|
| 916 |
+
"ptq_results = {\n",
|
| 917 |
+
" \"model\": \"PTQ (INT8)\",\n",
|
| 918 |
+
" \"accuracy\": ptq_accuracy,\n",
|
| 919 |
+
" \"macro_f1\": ptq_macro_f1,\n",
|
| 920 |
+
" \"model_size_mb\": ptq_model_size_mb,\n",
|
| 921 |
+
" \"latency_ms_per_batch\": ptq_avg_batch_latency_ms\n",
|
| 922 |
+
"}\n",
|
| 923 |
+
"\n",
|
| 924 |
+
"print(\"\\n--- PTQ Model Test Results ---\")\n",
|
| 925 |
+
"print(f\"Accuracy: {ptq_accuracy:.4f}\")\n",
|
| 926 |
+
"print(f\"Macro F1-Score: {ptq_macro_f1:.4f}\")\n",
|
| 927 |
+
"print(f\"Model Size: {ptq_model_size_mb:.2f} MB\")\n",
|
| 928 |
+
"print(f\"Avg. Latency per Batch (CPU): {ptq_avg_batch_latency_ms:.2f} ms\")"
|
| 929 |
+
]
|
| 930 |
+
},
|
| 931 |
+
{
|
| 932 |
+
"cell_type": "markdown",
|
| 933 |
+
"id": "ae54a97c-99f3-4a50-bcfd-d3763f6f03f2",
|
| 934 |
+
"metadata": {},
|
| 935 |
+
"source": [
|
| 936 |
+
"## 2.3. Comparison: Baseline vs. PTQ"
|
| 937 |
+
]
|
| 938 |
+
},
|
| 939 |
+
{
|
| 940 |
+
"cell_type": "code",
|
| 941 |
+
"execution_count": 8,
|
| 942 |
+
"id": "e40a6927-cbec-45e2-808b-478dc5a2f3d5",
|
| 943 |
+
"metadata": {},
|
| 944 |
+
"outputs": [
|
| 945 |
+
{
|
| 946 |
+
"ename": "NameError",
|
| 947 |
+
"evalue": "name 'baseline_results' is not defined",
|
| 948 |
+
"output_type": "error",
|
| 949 |
+
"traceback": [
|
| 950 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 951 |
+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
| 952 |
+
"Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Create a DataFrame from our stored results\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m comparison_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame([\u001b[43mbaseline_results\u001b[49m, ptq_results])\n\u001b[1;32m 5\u001b[0m comparison_df\u001b[38;5;241m.\u001b[39mset_index(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m'\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Add a note about the latency measurement context\u001b[39;00m\n",
|
| 953 |
+
"\u001b[0;31mNameError\u001b[0m: name 'baseline_results' is not defined"
|
| 954 |
+
]
|
| 955 |
+
}
|
| 956 |
+
],
|
| 957 |
+
"source": [
|
| 958 |
+
"import pandas as pd\n",
|
| 959 |
+
"\n",
|
| 960 |
+
"# Create a DataFrame from our stored results\n",
|
| 961 |
+
"comparison_df = pd.DataFrame([baseline_results, ptq_results])\n",
|
| 962 |
+
"comparison_df.set_index('model', inplace=True)\n",
|
| 963 |
+
"\n",
|
| 964 |
+
"# Add a note about the latency measurement context\n",
|
| 965 |
+
"comparison_df.rename(columns={'latency_ms_per_batch': 'Latency (ms/batch)'}, inplace=True)\n",
|
| 966 |
+
"comparison_df['Latency (ms/batch)'] = comparison_df['Latency (ms/batch)'].round(2).astype(str)\n",
|
| 967 |
+
"comparison_df.loc['Baseline (FP32)', 'Latency (ms/batch)'] += ' (GPU)'\n",
|
| 968 |
+
"comparison_df.loc['PTQ (INT8)', 'Latency (ms/batch)'] += ' (CPU)'\n",
|
| 969 |
+
"\n",
|
| 970 |
+
"\n",
|
| 971 |
+
"print(\"--- Performance and Resource Comparison ---\")\n",
|
| 972 |
+
"print(comparison_df)"
|
| 973 |
+
]
|
| 974 |
+
},
|
| 975 |
+
{
|
| 976 |
+
"cell_type": "markdown",
|
| 977 |
+
"id": "25229ca4-4f12-4422-845a-24a8cddfe65d",
|
| 978 |
+
"metadata": {},
|
| 979 |
+
"source": [
|
| 980 |
+
"# **Part 3: Quantization-Aware Training (QAT)**\n",
|
| 981 |
+
"Quantization-Aware Training (QAT) is a more advanced technique. Instead of quantizing a fully trained model, we introduce the \"simulation\" of quantization *during* the fine-tuning process. This allows the model to learn and adapt to the effects of reduced precision, which can lead to significantly better accuracy compared to PTQ.\n"
|
| 982 |
+
]
|
| 983 |
+
},
|
| 984 |
+
{
|
| 985 |
+
"cell_type": "markdown",
|
| 986 |
+
"id": "a550134b-a9ef-416d-a87d-93ca72629d63",
|
| 987 |
+
"metadata": {},
|
| 988 |
+
"source": [
|
| 989 |
+
"## 3.1. Prepare the Model for QAT\n",
|
| 990 |
+
"We start by loading a fresh copy of our fine-tuned FP32 model. Then, we attach a quantization configuration and use a PyTorch helper function, `prepare_qat`, to insert special modules (`FakeQuantize`) into the model. These modules will simulate the effect of INT8 quantization during training."
|
| 991 |
+
]
|
| 992 |
+
},
|
| 993 |
+
{
|
| 994 |
+
"cell_type": "code",
|
| 995 |
+
"execution_count": null,
|
| 996 |
+
"id": "69926273-f2d8-493e-ac00-6652f6eb9680",
|
| 997 |
+
"metadata": {},
|
| 998 |
+
"outputs": [],
|
| 999 |
+
"source": [
|
| 1000 |
+
"from torch.quantization import get_default_qat_qconfig, prepare_qat\n",
|
| 1001 |
+
"\n",
|
| 1002 |
+
"# --- 1. Load a fresh copy of the fine-tuned FP32 model ---\n",
|
| 1003 |
+
"model_for_qat = AutoModelForSequenceClassification.from_pretrained(baseline_model_dir)\n",
|
| 1004 |
+
"model_for_qat.train()\n",
|
| 1005 |
+
"\n",
|
| 1006 |
+
"# --- 2. Get the default QAT configuration ---\n",
|
| 1007 |
+
"qconfig = get_default_qat_qconfig('fbgemm')\n",
|
| 1008 |
+
"model_for_qat.qconfig = qconfig\n",
|
| 1009 |
+
"\n",
|
| 1010 |
+
"# --- 3. CRITICAL FIX: Exclude embeddings from quantization ---\n",
|
| 1011 |
+
"# By setting the qconfig of the embeddings module to None, we tell PyTorch to skip it.\n",
|
| 1012 |
+
"model_for_qat.bert.embeddings.qconfig = None\n",
|
| 1013 |
+
"print(\"Disabled quantization for the following module:\")\n",
|
| 1014 |
+
"print(model_for_qat.bert.embeddings)\n",
|
| 1015 |
+
"\n",
|
| 1016 |
+
"\n",
|
| 1017 |
+
"# --- 4. Prepare the model for QAT ---\n",
|
| 1018 |
+
"model_qat_prepared = prepare_qat(model_for_qat)\n",
|
| 1019 |
+
"\n",
|
| 1020 |
+
"# You can inspect the model below and see that the Embedding layers\n",
|
| 1021 |
+
"# no longer have FakeQuantize modules attached.\n",
|
| 1022 |
+
"print(\"\\n--- Model Prepared for QAT (Embeddings Skipped) ---\")\n",
|
| 1023 |
+
"print(model_qat_prepared.bert.embeddings)"
|
| 1024 |
+
]
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"cell_type": "markdown",
|
| 1028 |
+
"id": "a4f1c146-708a-4560-a446-b22d24590974",
|
| 1029 |
+
"metadata": {},
|
| 1030 |
+
"source": [
|
| 1031 |
+
"## 3.2. Fine-Tune the QAT-Prepared Model"
|
| 1032 |
+
]
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"cell_type": "code",
|
| 1036 |
+
"execution_count": null,
|
| 1037 |
+
"id": "bab9b605-0fca-4ef9-834c-e1fed6c05e83",
|
| 1038 |
+
"metadata": {},
|
| 1039 |
+
"outputs": [],
|
| 1040 |
+
"source": [
|
| 1041 |
+
"import optimum\n",
|
| 1042 |
+
"from optimum.intel import INCQuantizer\n",
|
| 1043 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 1044 |
+
"\n",
|
| 1045 |
+
"# --- 1. Load the fine-tuned FP32 model ---\n",
|
| 1046 |
+
"# This is the same model we started with in Part 2\n",
|
| 1047 |
+
"baseline_model_dir = \"./finetuned-bert-emotion-baseline\"\n",
|
| 1048 |
+
"model_fp32 = AutoModelForSequenceClassification.from_pretrained(baseline_model_dir)\n",
|
| 1049 |
+
"\n",
|
| 1050 |
+
"# --- 2. Initialize the Quantizer ---\n",
|
| 1051 |
+
"quantizer = INCQuantizer.from_pretrained(model_fp32)\n",
|
| 1052 |
+
"\n",
|
| 1053 |
+
"# --- 3. Create a calibration function ---\n",
|
| 1054 |
+
"# The quantizer needs a small sample of data to observe the model's behavior.\n",
|
| 1055 |
+
"# We'll use 100 examples from the training set.\n",
|
| 1056 |
+
"calibration_dataset = tokenized_datasets[\"train\"].select(range(100))\n",
|
| 1057 |
+
"# Remove columns the model doesn't expect\n",
|
| 1058 |
+
"calibration_dataset = calibration_dataset.remove_columns([\"labels\", \"text\"])\n",
|
| 1059 |
+
"\n",
|
| 1060 |
+
"def calibration_func(model):\n",
|
| 1061 |
+
" data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
| 1062 |
+
" dataloader = DataLoader(calibration_dataset, batch_size=8, collate_fn=data_collator)\n",
|
| 1063 |
+
" for batch in dataloader:\n",
|
| 1064 |
+
" try:\n",
|
| 1065 |
+
" model(**batch)\n",
|
| 1066 |
+
" except Exception:\n",
|
| 1067 |
+
" continue\n",
|
| 1068 |
+
"\n",
|
| 1069 |
+
"# --- 4. Define the quantization configuration and run quantization ---\n",
|
| 1070 |
+
"from optimum.intel.neural_compressor import INCConfig\n",
|
| 1071 |
+
"# This is a default static quantization configuration\n",
|
| 1072 |
+
"quantization_config = INCConfig(quantization={\"approach\": \"static\"})\n",
|
| 1073 |
+
"\n",
|
| 1074 |
+
"static_quantized_model_dir = \"./quantized-bert-emotion-static-optimum\"\n",
|
| 1075 |
+
"\n",
|
| 1076 |
+
"quantizer.quantize(\n",
|
| 1077 |
+
" quantization_config=quantization_config,\n",
|
| 1078 |
+
" calibration_function=calibration_func,\n",
|
| 1079 |
+
" save_directory=static_quantized_model_dir,\n",
|
| 1080 |
+
")"
|
| 1081 |
+
]
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"cell_type": "code",
|
| 1085 |
+
"execution_count": null,
|
| 1086 |
+
"id": "2a26880e-6b30-4b81-8d93-a5fad4d89c3f",
|
| 1087 |
+
"metadata": {},
|
| 1088 |
+
"outputs": [],
|
| 1089 |
+
"source": []
|
| 1090 |
+
}
|
| 1091 |
+
],
|
| 1092 |
+
"metadata": {
|
| 1093 |
+
"kernelspec": {
|
| 1094 |
+
"display_name": "Python (py10)",
|
| 1095 |
+
"language": "python",
|
| 1096 |
+
"name": "py10"
|
| 1097 |
+
},
|
| 1098 |
+
"language_info": {
|
| 1099 |
+
"codemirror_mode": {
|
| 1100 |
+
"name": "ipython",
|
| 1101 |
+
"version": 3
|
| 1102 |
+
},
|
| 1103 |
+
"file_extension": ".py",
|
| 1104 |
+
"mimetype": "text/x-python",
|
| 1105 |
+
"name": "python",
|
| 1106 |
+
"nbconvert_exporter": "python",
|
| 1107 |
+
"pygments_lexer": "ipython3",
|
| 1108 |
+
"version": "3.10.18"
|
| 1109 |
+
}
|
| 1110 |
+
},
|
| 1111 |
+
"nbformat": 4,
|
| 1112 |
+
"nbformat_minor": 5
|
| 1113 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "sadness",
|
| 13 |
+
"1": "joy",
|
| 14 |
+
"2": "love",
|
| 15 |
+
"3": "anger",
|
| 16 |
+
"4": "fear",
|
| 17 |
+
"5": "surprise"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 3072,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"anger": 3,
|
| 23 |
+
"fear": 4,
|
| 24 |
+
"joy": 1,
|
| 25 |
+
"love": 2,
|
| 26 |
+
"sadness": 0,
|
| 27 |
+
"surprise": 5
|
| 28 |
+
},
|
| 29 |
+
"layer_norm_eps": 1e-12,
|
| 30 |
+
"max_position_embeddings": 512,
|
| 31 |
+
"model_type": "bert",
|
| 32 |
+
"num_attention_heads": 12,
|
| 33 |
+
"num_hidden_layers": 12,
|
| 34 |
+
"pad_token_id": 0,
|
| 35 |
+
"position_embedding_type": "absolute",
|
| 36 |
+
"problem_type": "single_label_classification",
|
| 37 |
+
"torch_dtype": "float32",
|
| 38 |
+
"transformers_version": "4.55.4",
|
| 39 |
+
"type_vocab_size": 2,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 30522
|
| 42 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab149b23bbbacdf66c277f4175862c73511815a7645b59f989e477a984e0e495
|
| 3 |
+
size 437970952
|
bert-quantization/finetuned-bert-emotion-baseline/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bert-quantization/finetuned-bert-emotion-baseline/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "BertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bert-quantization/finetuned-bert-emotion-baseline_old/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "sadness",
|
| 14 |
+
"1": "joy",
|
| 15 |
+
"2": "love",
|
| 16 |
+
"3": "anger",
|
| 17 |
+
"4": "fear",
|
| 18 |
+
"5": "surprise"
|
| 19 |
+
},
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 3072,
|
| 22 |
+
"label2id": {
|
| 23 |
+
"anger": 3,
|
| 24 |
+
"fear": 4,
|
| 25 |
+
"joy": 1,
|
| 26 |
+
"love": 2,
|
| 27 |
+
"sadness": 0,
|
| 28 |
+
"surprise": 5
|
| 29 |
+
},
|
| 30 |
+
"layer_norm_eps": 1e-12,
|
| 31 |
+
"max_position_embeddings": 512,
|
| 32 |
+
"model_type": "bert",
|
| 33 |
+
"num_attention_heads": 12,
|
| 34 |
+
"num_hidden_layers": 12,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"position_embedding_type": "absolute",
|
| 37 |
+
"problem_type": "single_label_classification",
|
| 38 |
+
"transformers_version": "4.57.1",
|
| 39 |
+
"type_vocab_size": 2,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 30522
|
| 42 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline_old/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b8931e43e01f5ba449f4998f02fb548519d5c2b0f5a89cd48a29c40ff83ef9a
|
| 3 |
+
size 437970952
|
bert-quantization/finetuned-bert-emotion-baseline_old/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline_old/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bert-quantization/finetuned-bert-emotion-baseline_old/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "BertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
bert-quantization/finetuned-bert-emotion-baseline_old/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "sadness",
|
| 14 |
+
"1": "joy",
|
| 15 |
+
"2": "love",
|
| 16 |
+
"3": "anger",
|
| 17 |
+
"4": "fear",
|
| 18 |
+
"5": "surprise"
|
| 19 |
+
},
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 3072,
|
| 22 |
+
"label2id": {
|
| 23 |
+
"anger": 3,
|
| 24 |
+
"fear": 4,
|
| 25 |
+
"joy": 1,
|
| 26 |
+
"love": 2,
|
| 27 |
+
"sadness": 0,
|
| 28 |
+
"surprise": 5
|
| 29 |
+
},
|
| 30 |
+
"layer_norm_eps": 1e-12,
|
| 31 |
+
"max_position_embeddings": 512,
|
| 32 |
+
"model_type": "bert",
|
| 33 |
+
"num_attention_heads": 12,
|
| 34 |
+
"num_hidden_layers": 12,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"position_embedding_type": "absolute",
|
| 37 |
+
"problem_type": "single_label_classification",
|
| 38 |
+
"transformers_version": "4.57.1",
|
| 39 |
+
"type_vocab_size": 2,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 30522
|
| 42 |
+
}
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfb89d4afcc1b6abc5e147ee6ad3e74675c03c8cabc51d91428ef08b0758eb4e
|
| 3 |
+
size 437970952
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6ae23b525de6cbe4e15f770d3681a243c2a287e0efa910f8fab54f2320641f
|
| 3 |
+
size 876063371
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1847caf3f93cc1c06d1cf28b70a94087e5805e268de0f04105c9dd58248bf978
|
| 3 |
+
size 14645
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a199a0cf14b7dfda361456ff6ae43c0038ba3f9ae1ead9e16b596859030bdbc
|
| 3 |
+
size 1383
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54217a04340a9098239906778a680ae34f72ffa7b952279102ca38030f69bd9a
|
| 3 |
+
size 1465
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "BertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/trainer_state.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 500,
|
| 3 |
+
"best_metric": 0.9016178091296787,
|
| 4 |
+
"best_model_checkpoint": "finetuned-bert-emotion/checkpoint-500",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 500,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"grad_norm": 10.136147499084473,
|
| 15 |
+
"learning_rate": 3.343333333333333e-05,
|
| 16 |
+
"loss": 0.4592,
|
| 17 |
+
"step": 500
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 1.0,
|
| 21 |
+
"eval_accuracy": 0.9265,
|
| 22 |
+
"eval_loss": 0.17461606860160828,
|
| 23 |
+
"eval_macro_f1": 0.9016178091296787,
|
| 24 |
+
"eval_runtime": 11.2103,
|
| 25 |
+
"eval_samples_per_second": 178.407,
|
| 26 |
+
"eval_steps_per_second": 2.855,
|
| 27 |
+
"step": 500
|
| 28 |
+
}
|
| 29 |
+
],
|
| 30 |
+
"logging_steps": 500,
|
| 31 |
+
"max_steps": 1500,
|
| 32 |
+
"num_input_tokens_seen": 0,
|
| 33 |
+
"num_train_epochs": 3,
|
| 34 |
+
"save_steps": 500,
|
| 35 |
+
"stateful_callbacks": {
|
| 36 |
+
"TrainerControl": {
|
| 37 |
+
"args": {
|
| 38 |
+
"should_epoch_stop": false,
|
| 39 |
+
"should_evaluate": false,
|
| 40 |
+
"should_log": false,
|
| 41 |
+
"should_save": true,
|
| 42 |
+
"should_training_stop": false
|
| 43 |
+
},
|
| 44 |
+
"attributes": {}
|
| 45 |
+
}
|
| 46 |
+
},
|
| 47 |
+
"total_flos": 4209928077312000.0,
|
| 48 |
+
"train_batch_size": 32,
|
| 49 |
+
"trial_name": null,
|
| 50 |
+
"trial_params": null
|
| 51 |
+
}
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df95bf391b6af85792f90b6b04ab2088777d3e9f3a7acea45898a6041e71603c
|
| 3 |
+
size 5841
|
bert-quantization/finetuned-bert-emotion_old/checkpoint-500/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-26-07_bionlp/events.out.tfevents.1763495771.bionlp.1753567.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b846558373f571238cc00969ab893df840d213f1cb59541621709b66d2512130
|
| 3 |
+
size 4184
|
bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-27-12_bionlp/events.out.tfevents.1763495834.bionlp.1754079.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d827c1ce5573d53bbae78648be165cc0e4cda9e62044e3a1a6837f52a695d32
|
| 3 |
+
size 5258
|
bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-27-45_bionlp/events.out.tfevents.1763495865.bionlp.1754079.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43ba619fdd16b7b00d7556c07c9a8d480c6c3865b8cca6d3f3b4367623501925
|
| 3 |
+
size 5257
|
bert-quantization/finetuned-bert-emotion_old/runs/Nov19_01-28-00_bionlp/events.out.tfevents.1763495880.bionlp.1754079.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4850f9e11d229731509698790f60630cac575e758a2a3865bcfaf039bbca8792
|
| 3 |
+
size 5842
|
bert-quantization/quantized-bert-emotion-ptq/model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ffc5ee138342c8ff471cf82efef3e46154f80d30f1cc7aa02f6f32aed4ed95b
|
| 3 |
+
size 181483583
|
bert-quantization/quantized-bert-emotion-qat/model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a698c5f127a0bdbb6c30bedeaf6fa3d9f8c52d058f22cf327145e896cf67903
|
| 3 |
+
size 182875639
|
bert-quantization/readme.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
bert-quantization/test-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f8407fa1ca9c310f55781f082ed73812f6551e8dda2c61973123a121869245b
|
| 3 |
+
size 128987
|
bert-quantization/train-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10817f0f2ea42358bc62f69a09dfb8bd71701727df6d5a387bea742f3ea06417
|
| 3 |
+
size 1030740
|
bert-quantization/validation-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c70f0e660b5ebd1ea9a37d2a851f516f08a6d6477cdfc11be204e22a2f1102fd
|
| 3 |
+
size 127466
|
code.ipynb
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "f74714d6-2f2c-4578-820c-165b24b384e4",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"# !pip install transformers datasets torch scikit-learn"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 2,
|
| 16 |
+
"id": "4d408bd1-630e-44bd-a29f-272b70897479",
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"outputs": [
|
| 19 |
+
{
|
| 20 |
+
"name": "stderr",
|
| 21 |
+
"output_type": "stream",
|
| 22 |
+
"text": [
|
| 23 |
+
"/home/saisab/py10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 24 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 25 |
+
]
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"name": "stdout",
|
| 29 |
+
"output_type": "stream",
|
| 30 |
+
"text": [
|
| 31 |
+
"DatasetDict({\n",
|
| 32 |
+
" train: Dataset({\n",
|
| 33 |
+
" features: ['text', 'label'],\n",
|
| 34 |
+
" num_rows: 16000\n",
|
| 35 |
+
" })\n",
|
| 36 |
+
" validation: Dataset({\n",
|
| 37 |
+
" features: ['text', 'label'],\n",
|
| 38 |
+
" num_rows: 2000\n",
|
| 39 |
+
" })\n",
|
| 40 |
+
" test: Dataset({\n",
|
| 41 |
+
" features: ['text', 'label'],\n",
|
| 42 |
+
" num_rows: 2000\n",
|
| 43 |
+
" })\n",
|
| 44 |
+
"})\n"
|
| 45 |
+
]
|
| 46 |
+
}
|
| 47 |
+
],
|
| 48 |
+
"source": [
|
| 49 |
+
"from datasets import load_dataset\n",
|
| 50 |
+
"data_files = {\n",
|
| 51 |
+
" \"train\": \"train-00000-of-00001.parquet\",\n",
|
| 52 |
+
" \"validation\": \"validation-00000-of-00001.parquet\",\n",
|
| 53 |
+
" \"test\": \"test-00000-of-00001.parquet\"\n",
|
| 54 |
+
"}\n",
|
| 55 |
+
"emotion_dataset = load_dataset(\"parquet\", data_files=data_files)\n",
|
| 56 |
+
"print(emotion_dataset)"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"cell_type": "markdown",
|
| 61 |
+
"id": "866924e3-ae0a-4e54-b52d-b55e39a61993",
|
| 62 |
+
"metadata": {},
|
| 63 |
+
"source": [
|
| 64 |
+
"Task 1: Baseline Fine-tuning (FP32/FP16)\n",
|
| 65 |
+
"In this step, we will fine-tune a pre-trained bert-base-uncased model on our emotion dataset. This will serve as our baseline for performance and model size, against which we'll compare our quantized models later.\n",
|
| 66 |
+
"1.1. Preprocessing the Data\n"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": 3,
|
| 72 |
+
"id": "8dee48a4-7c22-417e-a19b-02b422a0909a",
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"outputs": [
|
| 75 |
+
{
|
| 76 |
+
"name": "stderr",
|
| 77 |
+
"output_type": "stream",
|
| 78 |
+
"text": [
|
| 79 |
+
"Map: 100%|█| 2000/2000 [00:00<00:00, 3358.52 examp"
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"name": "stdout",
|
| 84 |
+
"output_type": "stream",
|
| 85 |
+
"text": [
|
| 86 |
+
"DatasetDict({\n",
|
| 87 |
+
" train: Dataset({\n",
|
| 88 |
+
" features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 89 |
+
" num_rows: 16000\n",
|
| 90 |
+
" })\n",
|
| 91 |
+
" validation: Dataset({\n",
|
| 92 |
+
" features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 93 |
+
" num_rows: 2000\n",
|
| 94 |
+
" })\n",
|
| 95 |
+
" test: Dataset({\n",
|
| 96 |
+
" features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 97 |
+
" num_rows: 2000\n",
|
| 98 |
+
" })\n",
|
| 99 |
+
"})\n"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"name": "stderr",
|
| 104 |
+
"output_type": "stream",
|
| 105 |
+
"text": [
|
| 106 |
+
"\n"
|
| 107 |
+
]
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"source": [
|
| 111 |
+
"from transformers import AutoTokenizer\n",
|
| 112 |
+
"model_checkpoint = \"bert-base-uncased\"\n",
|
| 113 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)\n",
|
| 114 |
+
"def tokenize_function(examples):\n",
|
| 115 |
+
" return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n",
|
| 116 |
+
"tokenized_datasets = emotion_dataset.map(tokenize_function, batched=True)\n",
|
| 117 |
+
"print(tokenized_datasets)"
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"cell_type": "markdown",
|
| 122 |
+
"id": "071fd634-7077-45da-b6a9-de7abe935082",
|
| 123 |
+
"metadata": {},
|
| 124 |
+
"source": [
|
| 125 |
+
"1.2. Loading the Model\n",
|
| 126 |
+
"Now, we'll load the bert-base-uncased model. We need to configure it for sequence classification with 6 labels, corresponding to the 6 emotions in our dataset. We'll also create mappings between the label IDs (0, 1, 2...) and their names (\"sadness\", \"joy\", etc.) for better readability."
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "code",
|
| 131 |
+
"execution_count": 4,
|
| 132 |
+
"id": "eb817928-c546-46b4-b231-c571a5d31f0b",
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [
|
| 135 |
+
{
|
| 136 |
+
"name": "stderr",
|
| 137 |
+
"output_type": "stream",
|
| 138 |
+
"text": [
|
| 139 |
+
"/home/saisab/py10/lib/python3.10/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/home/saisab/py10/lib/python3.10/site-packages/torchvision/image.so: undefined symbol: _ZN3c1017RegisterOperatorsD1Ev'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n",
|
| 140 |
+
" warn(\n",
|
| 141 |
+
"2025-11-19 01:27:11.237598: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
| 142 |
+
"2025-11-19 01:27:11.273783: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
| 143 |
+
"2025-11-19 01:27:11.273822: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
| 144 |
+
"2025-11-19 01:27:11.275099: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
| 145 |
+
"2025-11-19 01:27:11.281657: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
| 146 |
+
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
| 147 |
+
"2025-11-19 01:27:12.147896: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
|
| 148 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 149 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 150 |
+
]
|
| 151 |
+
}
|
| 152 |
+
],
|
| 153 |
+
"source": [
|
| 154 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 155 |
+
"num_labels = 6\n",
|
| 156 |
+
"labels = emotion_dataset[\"train\"].features[\"label\"].names\n",
|
| 157 |
+
"id2label = {i: label for i, label in enumerate(labels)}\n",
|
| 158 |
+
"label2id = {label: i for i, label in enumerate(labels)}\n",
|
| 159 |
+
"model = AutoModelForSequenceClassification.from_pretrained(\n",
|
| 160 |
+
" model_checkpoint,\n",
|
| 161 |
+
" num_labels=num_labels,\n",
|
| 162 |
+
" id2label=id2label,\n",
|
| 163 |
+
" label2id=label2id\n",
|
| 164 |
+
")"
|
| 165 |
+
]
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"cell_type": "markdown",
|
| 169 |
+
"id": "092d2f5f-f161-474d-9a7d-0f76029be4bc",
|
| 170 |
+
"metadata": {},
|
| 171 |
+
"source": [
|
| 172 |
+
"1.3. Defining Metrics\n",
|
| 173 |
+
"The assignment requires us to report the macro F1-score and accuracy. We'll create a function that computes these metrics during the evaluation phase of our training."
|
| 174 |
+
]
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"cell_type": "code",
|
| 178 |
+
"execution_count": 5,
|
| 179 |
+
"id": "f8309e7e-ff28-450d-b7ff-94ddc7c86838",
|
| 180 |
+
"metadata": {},
|
| 181 |
+
"outputs": [],
|
| 182 |
+
"source": [
|
| 183 |
+
"import numpy as np\n",
|
| 184 |
+
"from sklearn.metrics import accuracy_score, f1_score\n",
|
| 185 |
+
"\n",
|
| 186 |
+
"def compute_metrics(eval_pred):\n",
|
| 187 |
+
" \"\"\"Computes accuracy and F1 score for a given set of predictions.\"\"\"\n",
|
| 188 |
+
" logits, labels = eval_pred\n",
|
| 189 |
+
" predictions = np.argmax(logits, axis=-1)\n",
|
| 190 |
+
" \n",
|
| 191 |
+
" # Calculate metrics\n",
|
| 192 |
+
" accuracy = accuracy_score(labels, predictions)\n",
|
| 193 |
+
" macro_f1 = f1_score(labels, predictions, average=\"macro\")\n",
|
| 194 |
+
" \n",
|
| 195 |
+
" return {\n",
|
| 196 |
+
" \"accuracy\": accuracy,\n",
|
| 197 |
+
" \"macro_f1\": macro_f1,\n",
|
| 198 |
+
" }"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"cell_type": "markdown",
|
| 203 |
+
"id": "674cb718-82f4-4500-8733-2078137264fe",
|
| 204 |
+
"metadata": {},
|
| 205 |
+
"source": [
|
| 206 |
+
"1.4. Fine-Tuning the Model\n",
|
| 207 |
+
"This is where we set up and run the training process. We'll use the Trainer API from the Hugging Face transformers library, which simplifies the training loop.\n",
|
| 208 |
+
"The assignment suggests using FP16 (mixed-precision training) to speed things up, which we'll enable. We'll train for 3 epochs, which is usually sufficient for fine-tuning tasks like this."
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": 6,
|
| 214 |
+
"id": "9445d846-ae7a-43ac-92b8-d470baf34346",
|
| 215 |
+
"metadata": {},
|
| 216 |
+
"outputs": [],
|
| 217 |
+
"source": [
|
| 218 |
+
"# !pip install --upgrade transformers"
|
| 219 |
+
]
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"cell_type": "code",
|
| 223 |
+
"execution_count": null,
|
| 224 |
+
"id": "dc905f68-0e26-413b-9281-62246dc80af6",
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [
|
| 227 |
+
{
|
| 228 |
+
"name": "stderr",
|
| 229 |
+
"output_type": "stream",
|
| 230 |
+
"text": [
|
| 231 |
+
"/tmp/ipykernel_1754079/4283579479.py:34: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
|
| 232 |
+
" trainer = Trainer(\n"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"data": {
|
| 237 |
+
"text/html": [
|
| 238 |
+
"\n",
|
| 239 |
+
" <div>\n",
|
| 240 |
+
" \n",
|
| 241 |
+
" <progress value='263' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 242 |
+
" [ 263/1500 02:32 < 12:04, 1.71 it/s, Epoch 0.52/3]\n",
|
| 243 |
+
" </div>\n",
|
| 244 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
| 245 |
+
" <thead>\n",
|
| 246 |
+
" <tr style=\"text-align: left;\">\n",
|
| 247 |
+
" <th>Epoch</th>\n",
|
| 248 |
+
" <th>Training Loss</th>\n",
|
| 249 |
+
" <th>Validation Loss</th>\n",
|
| 250 |
+
" </tr>\n",
|
| 251 |
+
" </thead>\n",
|
| 252 |
+
" <tbody>\n",
|
| 253 |
+
" </tbody>\n",
|
| 254 |
+
"</table><p>"
|
| 255 |
+
],
|
| 256 |
+
"text/plain": [
|
| 257 |
+
"<IPython.core.display.HTML object>"
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"output_type": "display_data"
|
| 262 |
+
}
|
| 263 |
+
],
|
| 264 |
+
"source": [
|
| 265 |
+
"from transformers import TrainingArguments, Trainer\n",
|
| 266 |
+
"import numpy as np\n",
|
| 267 |
+
"from sklearn.metrics import accuracy_score, f1_score\n",
|
| 268 |
+
"\n",
|
| 269 |
+
"# Re-defining the compute_metrics function to be safe\n",
|
| 270 |
+
"def compute_metrics(eval_pred):\n",
|
| 271 |
+
" logits, labels = eval_pred\n",
|
| 272 |
+
" predictions = np.argmax(logits, axis=-1)\n",
|
| 273 |
+
" accuracy = accuracy_score(labels, predictions)\n",
|
| 274 |
+
" macro_f1 = f1_score(labels, predictions, average=\"macro\")\n",
|
| 275 |
+
" return {\"accuracy\": accuracy, \"macro_f1\": macro_f1}\n",
|
| 276 |
+
"\n",
|
| 277 |
+
"model_name = \"finetuned-bert-emotion\"\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"# Updated TrainingArguments with modern parameters\n",
|
| 280 |
+
"training_args = TrainingArguments(\n",
|
| 281 |
+
" output_dir=model_name,\n",
|
| 282 |
+
" num_train_epochs=3,\n",
|
| 283 |
+
" per_device_train_batch_size=32,\n",
|
| 284 |
+
" per_device_eval_batch_size=64,\n",
|
| 285 |
+
" \n",
|
| 286 |
+
" # --- Modern arguments (replacements) ---\n",
|
| 287 |
+
" eval_strategy=\"epoch\", # Replaces evaluate_during_training\n",
|
| 288 |
+
" save_strategy=\"epoch\", # Save at end of each epoch\n",
|
| 289 |
+
" logging_strategy=\"epoch\", # Log at end of each epoch\n",
|
| 290 |
+
" # --------------------------------------\n",
|
| 291 |
+
" \n",
|
| 292 |
+
" load_best_model_at_end=True,\n",
|
| 293 |
+
" metric_for_best_model=\"macro_f1\",\n",
|
| 294 |
+
" fp16=True,\n",
|
| 295 |
+
" push_to_hub=False,\n",
|
| 296 |
+
")\n",
|
| 297 |
+
"\n",
|
| 298 |
+
"trainer = Trainer(\n",
|
| 299 |
+
" model=model,\n",
|
| 300 |
+
" args=training_args,\n",
|
| 301 |
+
" train_dataset=tokenized_datasets[\"train\"],\n",
|
| 302 |
+
" eval_dataset=tokenized_datasets[\"validation\"],\n",
|
| 303 |
+
" tokenizer=tokenizer,\n",
|
| 304 |
+
" compute_metrics=compute_metrics,\n",
|
| 305 |
+
")\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"# Train the model\n",
|
| 308 |
+
"trainer.train()"
|
| 309 |
+
]
|
| 310 |
+
},
|
| 311 |
+
{
|
| 312 |
+
"cell_type": "code",
|
| 313 |
+
"execution_count": null,
|
| 314 |
+
"id": "54aeb214-3a1f-4108-b58a-5bcea3f91e18",
|
| 315 |
+
"metadata": {},
|
| 316 |
+
"outputs": [],
|
| 317 |
+
"source": []
|
| 318 |
+
}
|
| 319 |
+
],
|
| 320 |
+
"metadata": {
|
| 321 |
+
"kernelspec": {
|
| 322 |
+
"display_name": "Python (py10)",
|
| 323 |
+
"language": "python",
|
| 324 |
+
"name": "py10"
|
| 325 |
+
},
|
| 326 |
+
"language_info": {
|
| 327 |
+
"codemirror_mode": {
|
| 328 |
+
"name": "ipython",
|
| 329 |
+
"version": 3
|
| 330 |
+
},
|
| 331 |
+
"file_extension": ".py",
|
| 332 |
+
"mimetype": "text/x-python",
|
| 333 |
+
"name": "python",
|
| 334 |
+
"nbconvert_exporter": "python",
|
| 335 |
+
"pygments_lexer": "ipython3",
|
| 336 |
+
"version": "3.10.18"
|
| 337 |
+
}
|
| 338 |
+
},
|
| 339 |
+
"nbformat": 4,
|
| 340 |
+
"nbformat_minor": 5
|
| 341 |
+
}
|
finetuned-bert-emotion-baseline/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "sadness",
|
| 13 |
+
"1": "joy",
|
| 14 |
+
"2": "love",
|
| 15 |
+
"3": "anger",
|
| 16 |
+
"4": "fear",
|
| 17 |
+
"5": "surprise"
|
| 18 |
+
},
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 3072,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"anger": 3,
|
| 23 |
+
"fear": 4,
|
| 24 |
+
"joy": 1,
|
| 25 |
+
"love": 2,
|
| 26 |
+
"sadness": 0,
|
| 27 |
+
"surprise": 5
|
| 28 |
+
},
|
| 29 |
+
"layer_norm_eps": 1e-12,
|
| 30 |
+
"max_position_embeddings": 512,
|
| 31 |
+
"model_type": "bert",
|
| 32 |
+
"num_attention_heads": 12,
|
| 33 |
+
"num_hidden_layers": 12,
|
| 34 |
+
"pad_token_id": 0,
|
| 35 |
+
"position_embedding_type": "absolute",
|
| 36 |
+
"problem_type": "single_label_classification",
|
| 37 |
+
"torch_dtype": "float32",
|
| 38 |
+
"transformers_version": "4.55.4",
|
| 39 |
+
"type_vocab_size": 2,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 30522
|
| 42 |
+
}
|
finetuned-bert-emotion-baseline/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab149b23bbbacdf66c277f4175862c73511815a7645b59f989e477a984e0e495
|
| 3 |
+
size 437970952
|
finetuned-bert-emotion-baseline/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
finetuned-bert-emotion-baseline/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
finetuned-bert-emotion-baseline/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "BertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
finetuned-bert-emotion-baseline/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
finetuned-bert-emotion-baseline_old/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "sadness",
|
| 14 |
+
"1": "joy",
|
| 15 |
+
"2": "love",
|
| 16 |
+
"3": "anger",
|
| 17 |
+
"4": "fear",
|
| 18 |
+
"5": "surprise"
|
| 19 |
+
},
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 3072,
|
| 22 |
+
"label2id": {
|
| 23 |
+
"anger": 3,
|
| 24 |
+
"fear": 4,
|
| 25 |
+
"joy": 1,
|
| 26 |
+
"love": 2,
|
| 27 |
+
"sadness": 0,
|
| 28 |
+
"surprise": 5
|
| 29 |
+
},
|
| 30 |
+
"layer_norm_eps": 1e-12,
|
| 31 |
+
"max_position_embeddings": 512,
|
| 32 |
+
"model_type": "bert",
|
| 33 |
+
"num_attention_heads": 12,
|
| 34 |
+
"num_hidden_layers": 12,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"position_embedding_type": "absolute",
|
| 37 |
+
"problem_type": "single_label_classification",
|
| 38 |
+
"transformers_version": "4.57.1",
|
| 39 |
+
"type_vocab_size": 2,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 30522
|
| 42 |
+
}
|
finetuned-bert-emotion-baseline_old/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b8931e43e01f5ba449f4998f02fb548519d5c2b0f5a89cd48a29c40ff83ef9a
|
| 3 |
+
size 437970952
|
finetuned-bert-emotion-baseline_old/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
finetuned-bert-emotion-baseline_old/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
finetuned-bert-emotion-baseline_old/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "BertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
finetuned-bert-emotion-baseline_old/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
finetuned-bert-emotion_old/checkpoint-500/config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"id2label": {
|
| 13 |
+
"0": "sadness",
|
| 14 |
+
"1": "joy",
|
| 15 |
+
"2": "love",
|
| 16 |
+
"3": "anger",
|
| 17 |
+
"4": "fear",
|
| 18 |
+
"5": "surprise"
|
| 19 |
+
},
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 3072,
|
| 22 |
+
"label2id": {
|
| 23 |
+
"anger": 3,
|
| 24 |
+
"fear": 4,
|
| 25 |
+
"joy": 1,
|
| 26 |
+
"love": 2,
|
| 27 |
+
"sadness": 0,
|
| 28 |
+
"surprise": 5
|
| 29 |
+
},
|
| 30 |
+
"layer_norm_eps": 1e-12,
|
| 31 |
+
"max_position_embeddings": 512,
|
| 32 |
+
"model_type": "bert",
|
| 33 |
+
"num_attention_heads": 12,
|
| 34 |
+
"num_hidden_layers": 12,
|
| 35 |
+
"pad_token_id": 0,
|
| 36 |
+
"position_embedding_type": "absolute",
|
| 37 |
+
"problem_type": "single_label_classification",
|
| 38 |
+
"transformers_version": "4.57.1",
|
| 39 |
+
"type_vocab_size": 2,
|
| 40 |
+
"use_cache": true,
|
| 41 |
+
"vocab_size": 30522
|
| 42 |
+
}
|