| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| model: |
| base_learning_rate: 4.5e-06 |
| params: |
| ddconfig: |
| attn_resolutions: |
| - 16 |
| ch: 128 |
| ch_mult: |
| - 1 |
| - 1 |
| - 2 |
| - 2 |
| - 4 |
| double_z: false |
| dropout: 0.0 |
| in_channels: 3 |
| num_res_blocks: 2 |
| out_ch: 3 |
| resolution: 256 |
| z_channels: 256 |
| embed_dim: 256 |
| lossconfig: |
| params: |
| codebook_weight: 1.0 |
| disc_conditional: false |
| disc_in_channels: 3 |
| disc_num_layers: 2 |
| disc_start: 0 |
| disc_weight: 0.75 |
| target: vqloss.VQLPIPSWithDiscriminator |
| monitor: val/rec_loss |
| n_embed: 16384 |
| target: vqmodel.VQModel |
|
|
| Working with z of shape (1, 256, 16, 16) = 65536 dimensions. |
| loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth |
| VQLPIPSWithDiscriminator running with hinge loss. |
| Loaded VQ encoder. |
| Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch. |
| Number of parameters: 1528398400 |
| Running on 16 GPUs total |
| => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt' |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| Epoch: 0 | Training loss: 5.36132043386911 | Elapsed time: 5611.475602149963 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 1 | Training loss: 5.353246255544992 | Elapsed time: 5609.176076889038 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 2 | Training loss: 5.352081888848609 | Elapsed time: 5609.765173435211 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 3 | Training loss: 5.348592715401511 | Elapsed time: 5609.775065898895 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 4 | Training loss: 5.345363831496263 | Elapsed time: 5608.818708658218 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 5 | Training loss: 5.339491759766113 | Elapsed time: 5609.52224946022 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 6 | Training loss: 5.33984013451682 | Elapsed time: 5609.668743610382 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 7 | Training loss: 5.330916097471407 | Elapsed time: 5609.77694439888 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 8 | Training loss: 5.328678440642761 | Elapsed time: 5609.147026062012 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 9 | Training loss: 5.322817993259335 | Elapsed time: 5610.099377155304 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 10 | Training loss: 5.322999638491696 | Elapsed time: 5609.647800445557 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 11 | Training loss: 5.3166510385709564 | Elapsed time: 5609.4286053180695 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 12 | Training loss: 5.314551228552789 | Elapsed time: 5609.531573057175 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 13 | Training loss: 5.310544481096449 | Elapsed time: 5609.943645954132 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 14 | Training loss: 5.307691298760139 | Elapsed time: 5610.385461807251 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 15 | Training loss: 5.304726327406419 | Elapsed time: 5609.1782310009 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 16 | Training loss: 5.3027097130393415 | Elapsed time: 5608.50914645195 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 17 | Training loss: 5.2987160214891915 | Elapsed time: 5609.189682483673 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 18 | Training loss: 5.295181529648178 | Elapsed time: 5609.2520797252655 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 19 | Training loss: 5.2975093622426765 | Elapsed time: 5609.4860327243805 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 20 | Training loss: 5.286686991120909 | Elapsed time: 5610.3333423137665 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 21 | Training loss: 5.290533660294174 | Elapsed time: 5609.560243368149 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 22 | Training loss: 5.285777743069918 | Elapsed time: 5609.129464626312 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 23 | Training loss: 5.282636421996278 | Elapsed time: 5609.90997338295 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 24 | Training loss: 5.279898757248611 | Elapsed time: 5609.894840478897 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 25 | Training loss: 5.275894250307645 | Elapsed time: 5609.922780275345 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 26 | Training loss: 5.273422160324874 | Elapsed time: 5608.922451257706 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 27 | Training loss: 5.273311212560633 | Elapsed time: 5609.505341529846 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 28 | Training loss: 5.267515561011407 | Elapsed time: 5609.848466873169 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 29 | Training loss: 5.2649664964590155 | Elapsed time: 5608.95155954361 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| srun: Job step aborted: Waiting up to 32 seconds for job step to finish. |
| slurmstepd: error: *** JOB 25928917 ON ga001 CANCELLED AT 2022-10-17T22:21:40 *** |
| slurmstepd: error: *** STEP 25928917.0 ON ga001 CANCELLED AT 2022-10-17T22:21:40 *** |
| |