| AMP: true |
| CUDNN_BENCHMARK: true |
| CUDNN_DETERMINISTIC: false |
| DATA: |
| EOS_INDEX: 2 |
| IMAGE_CROP_SIZE: 224 |
| IMAGE_TRANSFORM_TRAIN: |
| - random_resized_crop |
| - horizontal_flip |
| - color_jitter |
| - normalize |
| IMAGE_TRANSFORM_VAL: |
| - smallest_resize |
| - center_crop |
| - normalize |
| MASKED_LM: |
| MASK_PROBABILITY: 0.85 |
| MASK_PROPORTION: 0.15 |
| REPLACE_PROBABILITY: 0.1 |
| MASK_INDEX: 3 |
| MAX_CAPTION_LENGTH: 50 |
| ROOT: datasets/redcaps/tarfiles/*.tar |
| SOS_INDEX: 1 |
| TOKENIZER_MODEL: datasets/common_30k.model |
| UNK_INDEX: 0 |
| USE_PERCENTAGE: 100.0 |
| USE_SINGLE_CAPTION: false |
| VOCAB_SIZE: 30000 |
| MODEL: |
| DECODER: |
| BEAM_SIZE: 5 |
| MAX_DECODING_STEPS: 30 |
| NAME: nucleus_sampling |
| NUCLEUS_SIZE: 0.9 |
| LABEL_SMOOTHING: 0.1 |
| NAME: virtex_web |
| TEXTUAL: |
| DROPOUT: 0.1 |
| NAME: transdec_prenorm::L6_H512_A8_F2048 |
| VISUAL: |
| FEATURE_SIZE: 2048 |
| FROZEN: false |
| NAME: torchvision::resnet50 |
| PRETRAINED: false |
| OPTIM: |
| BATCH_SIZE: 256 |
| CLIP_GRAD_NORM: 10.0 |
| CNN_LR: 0.0005 |
| LOOKAHEAD: |
| ALPHA: 0.5 |
| STEPS: 5 |
| USE: false |
| LR: 0.0005 |
| LR_DECAY_NAME: cosine |
| LR_GAMMA: 0.1 |
| LR_STEPS: [] |
| NO_DECAY: .*textual.(embedding|transformer).*(norm.*|bias) |
| NUM_ITERATIONS: 1500000 |
| OPTIMIZER_NAME: adamw |
| SGD_MOMENTUM: 0.9 |
| WARMUP_STEPS: 10000 |
| WEIGHT_DECAY: 0.01 |
| RANDOM_SEED: 0 |
|
|