File size: 1,372 Bytes
5fee096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Dataset CiFar100
dataset: &dataset cifar100
init_cls_num: &init_cls_num 10
inc_cls_num: &inc_cls_num 10
task_num: &task_num 10
epoch: &epoch 20
image_size: &image_size 224

dataset: *dataset
task_num: *task_num
init_cls_num: *init_cls_num
inc_cls_num: *inc_cls_num
epoch: *epoch
val_per_epoch: *epoch

batch_size: 128
testing_times: 1 #10

setting: task-agnostic

# Distributed training
n_gpu: 1
pin_memory: True

train_trfms:
  - RandomResizedCrop:
      size: *image_size
  - RandomHorizontalFlip: {}
  - ToTensor: {}
  - Normalize:
      mean: [0., 0., 0.]
      std: [1., 1., 1.]

test_trfms: 
  - Resize:
      size: *image_size
  - ToTensor: {}
  - Normalize:
      mean: [0., 0., 0.]
      std: [1., 1., 1.]

optimizer:
  name: Adam
  kwargs:
    lr: 0.0005
    weight_decay: 0.0
    betas: [0.9, 0.999]

lr_scheduler:
  name: CosineSchedule
  kwargs:
    K: *epoch

backbone:
  name: vit_pt_imnet
  kwargs:
    pretrained: True
    model_name : vit_base_patch16_224_in21k
    attn_layer: MultiHeadAttention_MultiMaskedLoRA3
    block_layer: ResidualAttentionBiBlock # Bi
    transformer_layer: Transformer_Proj
    lora_rank: 10

classifier:
  name: MInfLoRA3
  kwargs:
    dataset: *dataset
    init_cls_num: *init_cls_num
    inc_cls_num: *inc_cls_num
    task_num: *task_num
    lame: 1.0
    lamb: 0.95
    embd_dim: 768
    use_ca: False
    eval_mat: False