rkoh commited on
Commit
e08895b
·
verified ·
1 Parent(s): c0cb40e
Files changed (4) hide show
  1. README.md +16 -11
  2. config.json +10 -10
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
- base_model: knowledgator/gliclass-large-v1.0
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # results
16
 
17
- This model is a fine-tuned version of [knowledgator/gliclass-large-v1.0](https://huggingface.co/knowledgator/gliclass-large-v1.0) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 0.0
20
 
@@ -36,25 +36,30 @@ More information needed
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 1e-05
39
- - train_batch_size: 2
40
- - eval_batch_size: 2
41
  - seed: 42
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
- - num_epochs: 3
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | 0.1308 | 1.0 | 55 | 0.2450 |
51
- | 0.0 | 2.0 | 110 | 0.0001 |
52
- | 0.0001 | 3.0 | 165 | 0.0 |
 
 
 
 
 
53
 
54
 
55
  ### Framework versions
56
 
57
- - Transformers 4.44.2
58
- - Pytorch 2.5.0+cu121
59
  - Datasets 3.1.0
60
- - Tokenizers 0.19.1
 
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
+ base_model: knowledgator/gliclass-small-v1.0
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # results
16
 
17
+ This model is a fine-tuned version of [knowledgator/gliclass-small-v1.0](https://huggingface.co/knowledgator/gliclass-small-v1.0) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 0.0
20
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 1e-05
39
+ - train_batch_size: 8
40
+ - eval_batch_size: 8
41
  - seed: 42
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
+ - num_epochs: 8
45
 
46
  ### Training results
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
+ | 0.967 | 1.0 | 14 | 0.0061 |
51
+ | 0.1299 | 2.0 | 28 | 0.0000 |
52
+ | 0.0213 | 3.0 | 42 | 0.0000 |
53
+ | 0.1641 | 4.0 | 56 | 0.0000 |
54
+ | 0.0434 | 5.0 | 70 | 0.0 |
55
+ | 0.066 | 6.0 | 84 | 0.0000 |
56
+ | 0.061 | 7.0 | 98 | 0.0000 |
57
+ | 0.0265 | 8.0 | 112 | 0.0 |
58
 
59
 
60
  ### Framework versions
61
 
62
+ - Transformers 4.45.2
63
+ - Pytorch 2.5.1+cu121
64
  - Datasets 3.1.0
65
+ - Tokenizers 0.20.3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "knowledgator/gliclass-large-v1.0",
3
  "architecture_type": "uni-encoder",
4
  "architectures": [
5
  "GLiClassModel"
@@ -8,7 +8,7 @@
8
  "contrastive_loss_coef": 0,
9
  "embed_class_token": true,
10
  "encoder_config": {
11
- "_name_or_path": "microsoft/deberta-v3-large",
12
  "add_cross_attention": false,
13
  "architectures": null,
14
  "attention_probs_dropout_prob": 0.1,
@@ -29,13 +29,13 @@
29
  "forced_eos_token_id": null,
30
  "hidden_act": "gelu",
31
  "hidden_dropout_prob": 0.1,
32
- "hidden_size": 1024,
33
  "id2label": {
34
  "0": "LABEL_0",
35
  "1": "LABEL_1"
36
  },
37
  "initializer_range": 0.02,
38
- "intermediate_size": 4096,
39
  "is_decoder": false,
40
  "is_encoder_decoder": false,
41
  "label2id": {
@@ -51,10 +51,10 @@
51
  "model_type": "deberta-v2",
52
  "no_repeat_ngram_size": 0,
53
  "norm_rel_ebd": "layer_norm",
54
- "num_attention_heads": 16,
55
  "num_beam_groups": 1,
56
  "num_beams": 1,
57
- "num_hidden_layers": 24,
58
  "num_return_sequences": 1,
59
  "output_attentions": false,
60
  "output_hidden_states": false,
@@ -62,7 +62,7 @@
62
  "pad_token_id": 0,
63
  "pooler_dropout": 0,
64
  "pooler_hidden_act": "gelu",
65
- "pooler_hidden_size": 1024,
66
  "pos_att_type": [
67
  "p2c",
68
  "c2p"
@@ -95,11 +95,11 @@
95
  "use_bfloat16": false,
96
  "vocab_size": 128003
97
  },
98
- "encoder_model_name": "microsoft/deberta-v3-large",
99
  "extract_text_features": false,
100
  "focal_loss_alpha": -1,
101
  "focal_loss_gamma": -1,
102
- "hidden_size": 1024,
103
  "ignore_index": -100,
104
  "initializer_range": 0.03,
105
  "logit_scale_init_value": 2.6592,
@@ -113,7 +113,7 @@
113
  "squeeze_layers": false,
114
  "text_token_index": 128004,
115
  "torch_dtype": "float32",
116
- "transformers_version": "4.44.2",
117
  "use_lstm": false,
118
  "vocab_size": 128003
119
  }
 
1
  {
2
+ "_name_or_path": "knowledgator/gliclass-small-v1.0",
3
  "architecture_type": "uni-encoder",
4
  "architectures": [
5
  "GLiClassModel"
 
8
  "contrastive_loss_coef": 0,
9
  "embed_class_token": true,
10
  "encoder_config": {
11
+ "_name_or_path": "microsoft/deberta-v3-small",
12
  "add_cross_attention": false,
13
  "architectures": null,
14
  "attention_probs_dropout_prob": 0.1,
 
29
  "forced_eos_token_id": null,
30
  "hidden_act": "gelu",
31
  "hidden_dropout_prob": 0.1,
32
+ "hidden_size": 768,
33
  "id2label": {
34
  "0": "LABEL_0",
35
  "1": "LABEL_1"
36
  },
37
  "initializer_range": 0.02,
38
+ "intermediate_size": 3072,
39
  "is_decoder": false,
40
  "is_encoder_decoder": false,
41
  "label2id": {
 
51
  "model_type": "deberta-v2",
52
  "no_repeat_ngram_size": 0,
53
  "norm_rel_ebd": "layer_norm",
54
+ "num_attention_heads": 12,
55
  "num_beam_groups": 1,
56
  "num_beams": 1,
57
+ "num_hidden_layers": 6,
58
  "num_return_sequences": 1,
59
  "output_attentions": false,
60
  "output_hidden_states": false,
 
62
  "pad_token_id": 0,
63
  "pooler_dropout": 0,
64
  "pooler_hidden_act": "gelu",
65
+ "pooler_hidden_size": 768,
66
  "pos_att_type": [
67
  "p2c",
68
  "c2p"
 
95
  "use_bfloat16": false,
96
  "vocab_size": 128003
97
  },
98
+ "encoder_model_name": "microsoft/deberta-v3-small",
99
  "extract_text_features": false,
100
  "focal_loss_alpha": -1,
101
  "focal_loss_gamma": -1,
102
+ "hidden_size": 768,
103
  "ignore_index": -100,
104
  "initializer_range": 0.03,
105
  "logit_scale_init_value": 2.6592,
 
113
  "squeeze_layers": false,
114
  "text_token_index": 128004,
115
  "torch_dtype": "float32",
116
+ "transformers_version": "4.45.2",
117
  "use_lstm": false,
118
  "vocab_size": 128003
119
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0148bfefc5c4190f9f39cdec229daeb58c9500b8ee2a01db70e4c44e2550e22f
3
- size 1752499444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee7dbf14d73a842029c53d4b1b97844a763a62a81f5dc4469a5d920be2f2d8c
3
+ size 574383356
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f43be2d5484608363df5690505ab8d988e85530a9f6ba9a7191023a2c5beed9e
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d82a94ab66401fecc01c7fa5a9074444bc244a8c7a4a392703cd4a84c172212
3
  size 5240