| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) |
| model: |
| base_learning_rate: 4.5e-06 |
| params: |
| ddconfig: |
| attn_resolutions: |
| - 16 |
| ch: 128 |
| ch_mult: |
| - 1 |
| - 1 |
| - 2 |
| - 2 |
| - 4 |
| double_z: false |
| dropout: 0.0 |
| in_channels: 3 |
| num_res_blocks: 2 |
| out_ch: 3 |
| resolution: 256 |
| z_channels: 256 |
| embed_dim: 256 |
| lossconfig: |
| params: |
| codebook_weight: 1.0 |
| disc_conditional: false |
| disc_in_channels: 3 |
| disc_num_layers: 2 |
| disc_start: 0 |
| disc_weight: 0.75 |
| target: vqloss.VQLPIPSWithDiscriminator |
| monitor: val/rec_loss |
| n_embed: 16384 |
| target: vqmodel.VQModel |
|
|
| Working with z of shape (1, 256, 16, 16) = 65536 dimensions. |
| loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth |
| VQLPIPSWithDiscriminator running with hinge loss. |
| Loaded VQ encoder. |
| Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch. |
| Number of parameters: 1528398400 |
| Running on 16 GPUs total |
| => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt' |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. |
| warnings.warn(warning.format(ret)) |
| Epoch: 0 | Training loss: 5.265875637805188 | Elapsed time: 5567.522572040558 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 1 | Training loss: 5.259369312823712 | Elapsed time: 5564.492578029633 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 2 | Training loss: 5.2597381899525955 | Elapsed time: 5564.743162870407 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 3 | Training loss: 5.258067237366211 | Elapsed time: 5564.359503269196 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 4 | Training loss: 5.256607461618734 | Elapsed time: 5564.1615924835205 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 5 | Training loss: 5.25204824624838 | Elapsed time: 5564.931565761566 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 6 | Training loss: 5.2539677929568604 | Elapsed time: 5563.973826169968 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 7 | Training loss: 5.24619766086727 | Elapsed time: 5564.175024271011 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 8 | Training loss: 5.245521523211743 | Elapsed time: 5565.000099182129 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 9 | Training loss: 5.2409252663116 | Elapsed time: 5566.062009334564 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 10 | Training loss: 5.242174459813715 | Elapsed time: 5564.810876607895 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 11 | Training loss: 5.236654115080476 | Elapsed time: 5564.1297216415405 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 12 | Training loss: 5.236161358706601 | Elapsed time: 5564.3255116939545 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 13 | Training loss: 5.233254426294988 | Elapsed time: 5563.92977309227 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 14 | Training loss: 5.23119745626078 | Elapsed time: 5564.57776761055 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 15 | Training loss: 5.229216562570273 | Elapsed time: 5564.952026605606 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 16 | Training loss: 5.22834527714031 | Elapsed time: 5564.028384447098 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 17 | Training loss: 5.22506249367774 | Elapsed time: 5564.405800104141 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 18 | Training loss: 5.222301427896444 | Elapsed time: 5564.323853731155 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 19 | Training loss: 5.225507423141739 | Elapsed time: 5564.729813575745 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 20 | Training loss: 5.21564470618874 | Elapsed time: 5564.293010473251 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 21 | Training loss: 5.22011399159541 | Elapsed time: 5564.039561748505 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 22 | Training loss: 5.216228381641857 | Elapsed time: 5563.3787343502045 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 23 | Training loss: 5.213790066401799 | Elapsed time: 5564.072102308273 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 24 | Training loss: 5.211814184789057 | Elapsed time: 5564.09782910347 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 25 | Training loss: 5.208216408225564 | Elapsed time: 5564.173808813095 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 26 | Training loss: 5.206609721950718 | Elapsed time: 5564.133508682251 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 27 | Training loss: 5.2072193223875125 | Elapsed time: 5564.5976548194885 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 28 | Training loss: 5.202074414842969 | Elapsed time: 5564.557286977768 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| Epoch: 29 | Training loss: 5.200261769880663 | Elapsed time: 5564.025668859482 |
| Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt |
| srun: Job step aborted: Waiting up to 32 seconds for job step to finish. |
| slurmstepd: error: *** JOB 25995681 ON ga001 CANCELLED AT 2022-10-19T21:05:48 *** |
| slurmstepd: error: *** STEP 25995681.0 ON ga001 CANCELLED AT 2022-10-19T21:05:48 *** |
| |