Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) model: base_learning_rate: 4.5e-06 params: ddconfig: attn_resolutions: - 16 ch: 128 ch_mult: - 1 - 1 - 2 - 2 - 4 double_z: false dropout: 0.0 in_channels: 3 num_res_blocks: 2 out_ch: 3 resolution: 256 z_channels: 256 embed_dim: 256 lossconfig: params: codebook_weight: 1.0 disc_conditional: false disc_in_channels: 3 disc_num_layers: 2 disc_start: 0 disc_weight: 0.75 target: vqloss.VQLPIPSWithDiscriminator monitor: val/rec_loss n_embed: 16384 target: vqmodel.VQModel Working with z of shape (1, 256, 16, 16) = 65536 dimensions. loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth VQLPIPSWithDiscriminator running with hinge loss. Loaded VQ encoder. Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch. Number of parameters: 750659840 Running on 8 GPUs total => no checkpoint loaded, will train from scratch /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. warnings.warn(warning.format(ret)) Epoch: 0 | Training loss: 6.135367824123813 | Elapsed time: 4453.789637804031 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 1 | Training loss: 5.798131484299392 | Elapsed time: 4448.56702375412 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 2 | Training loss: 5.7218508500319265 | Elapsed time: 4448.635702133179 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 3 | Training loss: 5.675868084475949 | Elapsed time: 4448.570371866226 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 4 | Training loss: 5.64415309231479 | Elapsed time: 4448.48592376709 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 5 | Training loss: 5.617594873083459 | Elapsed time: 4448.740148067474 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 6 | Training loss: 5.5994461619770615 | Elapsed time: 4449.281894683838 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 7 | Training loss: 5.580984299856943 | Elapsed time: 4448.514730215073 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 8 | Training loss: 5.568105853306545 | Elapsed time: 4448.5986959934235 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 9 | Training loss: 5.553497776713643 | Elapsed time: 4449.114318370819 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 10 | Training loss: 5.545027699884954 | Elapsed time: 4449.138834238052 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 11 | Training loss: 5.531521415519905 | Elapsed time: 4449.098812580109 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 12 | Training loss: 5.522672693498365 | Elapsed time: 4448.901001691818 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 13 | Training loss: 5.515013064823665 | Elapsed time: 4448.462759017944 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 14 | Training loss: 5.508660832556573 | Elapsed time: 4448.8206622600555 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 15 | Training loss: 5.500996496865561 | Elapsed time: 4448.373802423477 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 16 | Training loss: 5.494677847463053 | Elapsed time: 4449.025486946106 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 17 | Training loss: 5.488317275642753 | Elapsed time: 4448.813071966171 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 18 | Training loss: 5.482922156159575 | Elapsed time: 4448.179989337921 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 19 | Training loss: 5.480782058879688 | Elapsed time: 4448.937339067459 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 20 | Training loss: 5.471766509876384 | Elapsed time: 4448.567655324936 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 21 | Training loss: 5.468871520973228 | Elapsed time: 4448.808972358704 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 22 | Training loss: 5.463682885698743 | Elapsed time: 4447.909594774246 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 23 | Training loss: 5.459242056466483 | Elapsed time: 4447.975906133652 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 24 | Training loss: 5.454185632654242 | Elapsed time: 4447.988601446152 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 25 | Training loss: 5.451898510329849 | Elapsed time: 4448.234513998032 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 26 | Training loss: 5.446100732496569 | Elapsed time: 4448.3813943862915 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 27 | Training loss: 5.443605179839082 | Elapsed time: 4448.738905668259 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 28 | Training loss: 5.440151975633619 | Elapsed time: 4448.119179487228 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 29 | Training loss: 5.435839955123154 | Elapsed time: 4447.811242103577 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 30 | Training loss: 5.43510546612811 | Elapsed time: 4447.706588983536 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 31 | Training loss: 5.428354823505962 | Elapsed time: 4448.152802705765 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 32 | Training loss: 5.4250246925430226 | Elapsed time: 4448.008017539978 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 33 | Training loss: 5.425318639833372 | Elapsed time: 4448.51774430275 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 34 | Training loss: 5.419239971187565 | Elapsed time: 4447.93063378334 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 35 | Training loss: 5.418465005815565 | Elapsed time: 4447.893654823303 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 36 | Training loss: 5.416968753978566 | Elapsed time: 4447.303329944611 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt Epoch: 37 | Training loss: 5.414549265088854 | Elapsed time: 4447.923640966415 Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt slurmstepd: error: *** JOB 25434829 ON ga007 CANCELLED AT 2022-09-27T05:16:34 DUE TO TIME LIMIT *** slurmstepd: error: *** STEP 25434829.0 ON ga007 CANCELLED AT 2022-09-27T05:16:34 DUE TO TIME LIMIT *** srun: Job step aborted: Waiting up to 32 seconds for job step to finish.