File size: 2,173 Bytes
5a64d6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{
  "model_type": "baramnuri",
  "architectures": ["BaramNuri"],
  "model_name": "baramnuri-beta",
  "version": "0.1.0-beta",

  "num_classes": 5,
  "class_names": ["정상", "졸음운전", "물건찾기", "휴대폰 사용", "운전자 폭행"],
  "class_names_en": ["normal", "drowsy_driving", "searching_object", "phone_usage", "driver_assault"],

  "backbone": {
    "type": "video_swin_t",
    "pretrained_on": "kinetics-400",
    "stages_used": [1, 2, 3],
    "feature_dim": 384
  },

  "ssm_block": {
    "type": "selective_ssm",
    "d_state": 16,
    "d_conv": 4,
    "expand": 2,
    "n_layers": 2,
    "dropout": 0.2
  },

  "input_spec": {
    "channels": 3,
    "num_frames": 30,
    "height": 224,
    "width": 224,
    "fps": 30,
    "format": "BCTHW"
  },

  "model_stats": {
    "total_parameters": 14203205,
    "total_parameters_readable": "14.20M",
    "model_size_fp32_mb": 54,
    "model_size_fp16_mb": 27,
    "model_size_int8_mb": 13
  },

  "training": {
    "method": "knowledge_distillation",
    "teacher_model": "Video Swin-T (27.86M)",
    "teacher_accuracy": 0.9805,
    "teacher_f1": 0.9757,
    "epochs_trained": 6,
    "best_accuracy": 0.9617,
    "best_macro_f1": 0.9504,
    "optimizer": "AdamW",
    "learning_rate": 1e-4,
    "weight_decay": 0.05,
    "batch_size": 96,
    "data_augmentation": ["resize", "normalize"]
  },

  "performance": {
    "accuracy": 0.9617,
    "macro_f1": 0.9504,
    "per_class_f1": {
      "정상": 0.93,
      "졸음운전": 0.97,
      "물건찾기": 0.94,
      "휴대폰 사용": 0.94,
      "운전자 폭행": 0.99
    }
  },

  "comparison_with_teacher": {
    "parameter_reduction": "49%",
    "size_reduction": "49%",
    "accuracy_retention": "98.1%",
    "f1_retention": "97.4%",
    "training_speed_improvement": "40%"
  },

  "license": "Apache-2.0",
  "language": ["ko", "en"],
  "tags": [
    "video-classification",
    "driver-behavior",
    "knowledge-distillation",
    "video-swin-transformer",
    "state-space-model",
    "ssm",
    "mamba-style",
    "lightweight",
    "korean"
  ]
}