Training in progress, epoch 0

Browse files

Files changed (5) hide show

README.md +3 -5
config.json +51 -63
model.safetensors +2 -2
preprocessor_config.json +10 -18
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -30,20 +30,18 @@ should probably proofread and complete it, then remove this comment. -->
 # architectural_styles_classifier
-This model is a fine-tuned version of [nvidia/mit-b0](https://huggingface.co/nvidia/mit-b0) on the Architectural style dataset, retrieved from https://www.kaggle.com/datasets/dumitrux/architectural-styles-dataset.
 It achieves the following results on the evaluation set:
 - Loss: 1.0414
 - Accuracy: 0.7252
 ## Model description
-Presentation link: https://www.canva.com/design/DAGLBMAs1K4/d8qvLN2nchSYVmnrwYzx0w/edit?utm_content=DAGLBMAs1K4&utm_campaign=designshare&utm_medium=link2&utm_source=sharebutton
-You can try the model inference in this link: https://huggingface.co/spaces/hanslab37/technospire
 ## Intended uses & limitations
-This model were developed only as part of personal experiment and portfolio to learn about developing image classification model with Huggingface. For now, it can be used to experiment only. Not recommended for daily use.
 ## Training and evaluation data

 # architectural_styles_classifier
+This model is a fine-tuned version of [nvidia/mit-b0](https://huggingface.co/nvidia/mit-b0) on the imagefolder dataset.
 It achieves the following results on the evaluation set:
 - Loss: 1.0414
 - Accuracy: 0.7252
 ## Model description
+More information needed
 ## Intended uses & limitations
+More information needed
 ## Training and evaluation data

config.json CHANGED Viewed

@@ -1,29 +1,32 @@
 {
-  "_name_or_path": "google/efficientnet-b3",
   "architectures": [
-    "EfficientNetForImageClassification"
   ],
-  "batch_norm_eps": 0.001,
-  "batch_norm_momentum": 0.99,
-  "depth_coefficient": 1.4,
-  "depth_divisor": 8,
-  "depthwise_padding": [
-    5,
-    18
   ],
-  "drop_connect_rate": 0.2,
-  "dropout_rate": 0.3,
-  "expand_ratios": [
     1,
-    6,
-    6,
-    6,
-    6,
-    6,
-    6
   ],
-  "hidden_act": "swish",
-  "hidden_dim": 1536,
   "id2label": {
     "0": "Achaemenid architecture",
     "1": "American Foursquare architecture",
@@ -51,26 +54,8 @@
     "8": "Beaux-Arts architecture",
     "9": "Byzantine architecture"
   },
-  "image_size": 300,
-  "in_channels": [
-    32,
-    16,
-    24,
-    40,
-    80,
-    112,
-    192
-  ],
   "initializer_range": 0.02,
-  "kernel_sizes": [
-    3,
-    3,
-    5,
-    3,
-    5,
-    5,
-    3
-  ],
   "label2id": {
     "Achaemenid architecture": "0",
     "American Foursquare architecture": "1",
@@ -98,40 +83,43 @@
     "Russian Revival architecture": "23",
     "Tudor Revival architecture": "24"
   },
-  "model_type": "efficientnet",
-  "num_block_repeats": [
     1,
     2,
-    2,
     3,
     3,
     4,
     1
   ],
-  "num_channels": 3,
-  "num_hidden_layers": 64,
-  "out_channels": [
-    16,
-    24,
-    40,
-    80,
-    112,
-    192,
-    320
-  ],
-  "pooling_type": "mean",
-  "problem_type": "single_label_classification",
-  "squeeze_expansion_ratio": 0.25,
   "strides": [
-    1,
-    2,
-    2,
     2,
-    1,
     2,
-    1
   ],
   "torch_dtype": "float32",
-  "transformers_version": "4.41.1",
-  "width_coefficient": 1.2
 }

 {
+  "_name_or_path": "nvidia/mit-b0",
   "architectures": [
+    "SegformerForImageClassification"
   ],
+  "attention_probs_dropout_prob": 0.0,
+  "classifier_dropout_prob": 0.1,
+  "decoder_hidden_size": 256,
+  "depths": [
+    2,
+    2,
+    2,
+    2
   ],
+  "downsampling_rates": [
     1,
+    4,
+    8,
+    16
+  ],
+  "drop_path_rate": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_sizes": [
+    32,
+    64,
+    160,
+    256
   ],
   "id2label": {
     "0": "Achaemenid architecture",
     "1": "American Foursquare architecture",
     "8": "Beaux-Arts architecture",
     "9": "Byzantine architecture"
   },
+  "image_size": 224,
   "initializer_range": 0.02,
   "label2id": {
     "Achaemenid architecture": "0",
     "American Foursquare architecture": "1",
     "Russian Revival architecture": "23",
     "Tudor Revival architecture": "24"
   },
+  "layer_norm_eps": 1e-06,
+  "mlp_ratios": [
+    4,
+    4,
+    4,
+    4
+  ],
+  "model_type": "segformer",
+  "num_attention_heads": [
     1,
     2,
+    5,
+    8
+  ],
+  "num_channels": 3,
+  "num_encoder_blocks": 4,
+  "patch_sizes": [
+    7,
     3,
     3,
+    3
+  ],
+  "problem_type": "single_label_classification",
+  "reshape_last_stage": true,
+  "semantic_loss_ignore_index": 255,
+  "sr_ratios": [
+    8,
     4,
+    2,
     1
   ],
   "strides": [
+    4,
     2,
     2,
+    2
   ],
   "torch_dtype": "float32",
+  "transformers_version": "4.41.1"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e6c2eaaa25e67aaa55cede1bbdf928b6f85cc88902dbb7fe1450a4fd0a0bdab
-size 43362788

 version https://git-lfs.github.com/spec/v1
+oid sha256:26f7f2c0acbbf16f27ac01e53966d03fa366e8b3d3bd099165bd0558471dd627
+size 13325828

preprocessor_config.json CHANGED Viewed

@@ -1,28 +1,22 @@
 {
   "_valid_processor_keys": [
     "images",
     "do_resize",
     "size",
     "resample",
-    "do_center_crop",
-    "crop_size",
     "do_rescale",
     "rescale_factor",
-    "rescale_offset",
     "do_normalize",
     "image_mean",
     "image_std",
-    "include_top",
     "return_tensors",
     "data_format",
     "input_data_format"
   ],
-  "crop_size": {
-    "height": 289,
-    "width": 289
-  },
-  "do_center_crop": false,
   "do_normalize": true,
   "do_rescale": true,
   "do_resize": true,
   "image_mean": [
@@ -30,18 +24,16 @@
     0.456,
     0.406
   ],
-  "image_processor_type": "EfficientNetImageProcessor",
   "image_std": [
-    0.47853944,
-    0.4732864,
-    0.47434163
   ],
-  "include_top": true,
-  "resample": 0,
   "rescale_factor": 0.00392156862745098,
-  "rescale_offset": false,
   "size": {
-    "height": 300,
-    "width": 300
   }
 }

 {
   "_valid_processor_keys": [
     "images",
+    "segmentation_maps",
     "do_resize",
     "size",
     "resample",
     "do_rescale",
     "rescale_factor",
     "do_normalize",
     "image_mean",
     "image_std",
+    "do_reduce_labels",
     "return_tensors",
     "data_format",
     "input_data_format"
   ],
   "do_normalize": true,
+  "do_reduce_labels": false,
   "do_rescale": true,
   "do_resize": true,
   "image_mean": [
     0.456,
     0.406
   ],
+  "image_processor_type": "SegformerImageProcessor",
   "image_std": [
+    0.229,
+    0.224,
+    0.225
   ],
+  "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "height": 512,
+    "width": 512
   }
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0f75eb877dbf4cd27fef65056fb69b6d89c9112a693d5d60f456b0ac42fbe15
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdf7c5c1182d58471883402b8933ffb4a7a19706bf9b3b2d2557f4466c986f5b
 size 5112