hanslab37 commited on
Commit
e3f8208
·
verified ·
1 Parent(s): 70d375d

Training in progress, epoch 0

Browse files
Files changed (5) hide show
  1. README.md +3 -5
  2. config.json +51 -63
  3. model.safetensors +2 -2
  4. preprocessor_config.json +10 -18
  5. training_args.bin +1 -1
README.md CHANGED
@@ -30,20 +30,18 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # architectural_styles_classifier
32
 
33
- This model is a fine-tuned version of [nvidia/mit-b0](https://huggingface.co/nvidia/mit-b0) on the Architectural style dataset, retrieved from https://www.kaggle.com/datasets/dumitrux/architectural-styles-dataset.
34
  It achieves the following results on the evaluation set:
35
  - Loss: 1.0414
36
  - Accuracy: 0.7252
37
 
38
  ## Model description
39
 
40
- Presentation link: https://www.canva.com/design/DAGLBMAs1K4/d8qvLN2nchSYVmnrwYzx0w/edit?utm_content=DAGLBMAs1K4&utm_campaign=designshare&utm_medium=link2&utm_source=sharebutton
41
-
42
- You can try the model inference in this link: https://huggingface.co/spaces/hanslab37/technospire
43
 
44
  ## Intended uses & limitations
45
 
46
- This model were developed only as part of personal experiment and portfolio to learn about developing image classification model with Huggingface. For now, it can be used to experiment only. Not recommended for daily use.
47
 
48
  ## Training and evaluation data
49
 
 
30
 
31
  # architectural_styles_classifier
32
 
33
+ This model is a fine-tuned version of [nvidia/mit-b0](https://huggingface.co/nvidia/mit-b0) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
  - Loss: 1.0414
36
  - Accuracy: 0.7252
37
 
38
  ## Model description
39
 
40
+ More information needed
 
 
41
 
42
  ## Intended uses & limitations
43
 
44
+ More information needed
45
 
46
  ## Training and evaluation data
47
 
config.json CHANGED
@@ -1,29 +1,32 @@
1
  {
2
- "_name_or_path": "google/efficientnet-b3",
3
  "architectures": [
4
- "EfficientNetForImageClassification"
5
  ],
6
- "batch_norm_eps": 0.001,
7
- "batch_norm_momentum": 0.99,
8
- "depth_coefficient": 1.4,
9
- "depth_divisor": 8,
10
- "depthwise_padding": [
11
- 5,
12
- 18
 
13
  ],
14
- "drop_connect_rate": 0.2,
15
- "dropout_rate": 0.3,
16
- "expand_ratios": [
17
  1,
18
- 6,
19
- 6,
20
- 6,
21
- 6,
22
- 6,
23
- 6
 
 
 
 
 
 
24
  ],
25
- "hidden_act": "swish",
26
- "hidden_dim": 1536,
27
  "id2label": {
28
  "0": "Achaemenid architecture",
29
  "1": "American Foursquare architecture",
@@ -51,26 +54,8 @@
51
  "8": "Beaux-Arts architecture",
52
  "9": "Byzantine architecture"
53
  },
54
- "image_size": 300,
55
- "in_channels": [
56
- 32,
57
- 16,
58
- 24,
59
- 40,
60
- 80,
61
- 112,
62
- 192
63
- ],
64
  "initializer_range": 0.02,
65
- "kernel_sizes": [
66
- 3,
67
- 3,
68
- 5,
69
- 3,
70
- 5,
71
- 5,
72
- 3
73
- ],
74
  "label2id": {
75
  "Achaemenid architecture": "0",
76
  "American Foursquare architecture": "1",
@@ -98,40 +83,43 @@
98
  "Russian Revival architecture": "23",
99
  "Tudor Revival architecture": "24"
100
  },
101
- "model_type": "efficientnet",
102
- "num_block_repeats": [
 
 
 
 
 
 
 
103
  1,
104
  2,
105
- 2,
 
 
 
 
 
 
106
  3,
107
  3,
 
 
 
 
 
 
 
108
  4,
 
109
  1
110
  ],
111
- "num_channels": 3,
112
- "num_hidden_layers": 64,
113
- "out_channels": [
114
- 16,
115
- 24,
116
- 40,
117
- 80,
118
- 112,
119
- 192,
120
- 320
121
- ],
122
- "pooling_type": "mean",
123
- "problem_type": "single_label_classification",
124
- "squeeze_expansion_ratio": 0.25,
125
  "strides": [
126
- 1,
127
- 2,
128
- 2,
129
  2,
130
- 1,
131
  2,
132
- 1
133
  ],
134
  "torch_dtype": "float32",
135
- "transformers_version": "4.41.1",
136
- "width_coefficient": 1.2
137
  }
 
1
  {
2
+ "_name_or_path": "nvidia/mit-b0",
3
  "architectures": [
4
+ "SegformerForImageClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout_prob": 0.1,
8
+ "decoder_hidden_size": 256,
9
+ "depths": [
10
+ 2,
11
+ 2,
12
+ 2,
13
+ 2
14
  ],
15
+ "downsampling_rates": [
 
 
16
  1,
17
+ 4,
18
+ 8,
19
+ 16
20
+ ],
21
+ "drop_path_rate": 0.1,
22
+ "hidden_act": "gelu",
23
+ "hidden_dropout_prob": 0.0,
24
+ "hidden_sizes": [
25
+ 32,
26
+ 64,
27
+ 160,
28
+ 256
29
  ],
 
 
30
  "id2label": {
31
  "0": "Achaemenid architecture",
32
  "1": "American Foursquare architecture",
 
54
  "8": "Beaux-Arts architecture",
55
  "9": "Byzantine architecture"
56
  },
57
+ "image_size": 224,
 
 
 
 
 
 
 
 
 
58
  "initializer_range": 0.02,
 
 
 
 
 
 
 
 
 
59
  "label2id": {
60
  "Achaemenid architecture": "0",
61
  "American Foursquare architecture": "1",
 
83
  "Russian Revival architecture": "23",
84
  "Tudor Revival architecture": "24"
85
  },
86
+ "layer_norm_eps": 1e-06,
87
+ "mlp_ratios": [
88
+ 4,
89
+ 4,
90
+ 4,
91
+ 4
92
+ ],
93
+ "model_type": "segformer",
94
+ "num_attention_heads": [
95
  1,
96
  2,
97
+ 5,
98
+ 8
99
+ ],
100
+ "num_channels": 3,
101
+ "num_encoder_blocks": 4,
102
+ "patch_sizes": [
103
+ 7,
104
  3,
105
  3,
106
+ 3
107
+ ],
108
+ "problem_type": "single_label_classification",
109
+ "reshape_last_stage": true,
110
+ "semantic_loss_ignore_index": 255,
111
+ "sr_ratios": [
112
+ 8,
113
  4,
114
+ 2,
115
  1
116
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "strides": [
118
+ 4,
 
 
119
  2,
 
120
  2,
121
+ 2
122
  ],
123
  "torch_dtype": "float32",
124
+ "transformers_version": "4.41.1"
 
125
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e6c2eaaa25e67aaa55cede1bbdf928b6f85cc88902dbb7fe1450a4fd0a0bdab
3
- size 43362788
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f7f2c0acbbf16f27ac01e53966d03fa366e8b3d3bd099165bd0558471dd627
3
+ size 13325828
preprocessor_config.json CHANGED
@@ -1,28 +1,22 @@
1
  {
2
  "_valid_processor_keys": [
3
  "images",
 
4
  "do_resize",
5
  "size",
6
  "resample",
7
- "do_center_crop",
8
- "crop_size",
9
  "do_rescale",
10
  "rescale_factor",
11
- "rescale_offset",
12
  "do_normalize",
13
  "image_mean",
14
  "image_std",
15
- "include_top",
16
  "return_tensors",
17
  "data_format",
18
  "input_data_format"
19
  ],
20
- "crop_size": {
21
- "height": 289,
22
- "width": 289
23
- },
24
- "do_center_crop": false,
25
  "do_normalize": true,
 
26
  "do_rescale": true,
27
  "do_resize": true,
28
  "image_mean": [
@@ -30,18 +24,16 @@
30
  0.456,
31
  0.406
32
  ],
33
- "image_processor_type": "EfficientNetImageProcessor",
34
  "image_std": [
35
- 0.47853944,
36
- 0.4732864,
37
- 0.47434163
38
  ],
39
- "include_top": true,
40
- "resample": 0,
41
  "rescale_factor": 0.00392156862745098,
42
- "rescale_offset": false,
43
  "size": {
44
- "height": 300,
45
- "width": 300
46
  }
47
  }
 
1
  {
2
  "_valid_processor_keys": [
3
  "images",
4
+ "segmentation_maps",
5
  "do_resize",
6
  "size",
7
  "resample",
 
 
8
  "do_rescale",
9
  "rescale_factor",
 
10
  "do_normalize",
11
  "image_mean",
12
  "image_std",
13
+ "do_reduce_labels",
14
  "return_tensors",
15
  "data_format",
16
  "input_data_format"
17
  ],
 
 
 
 
 
18
  "do_normalize": true,
19
+ "do_reduce_labels": false,
20
  "do_rescale": true,
21
  "do_resize": true,
22
  "image_mean": [
 
24
  0.456,
25
  0.406
26
  ],
27
+ "image_processor_type": "SegformerImageProcessor",
28
  "image_std": [
29
+ 0.229,
30
+ 0.224,
31
+ 0.225
32
  ],
33
+ "resample": 2,
 
34
  "rescale_factor": 0.00392156862745098,
 
35
  "size": {
36
+ "height": 512,
37
+ "width": 512
38
  }
39
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0f75eb877dbf4cd27fef65056fb69b6d89c9112a693d5d60f456b0ac42fbe15
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf7c5c1182d58471883402b8933ffb4a7a19706bf9b3b2d2557f4466c986f5b
3
  size 5112