DYunt
/

SUMMIT

Model card Files Files and versions

SUMMIT / util /lr_decay.py

DYunt's picture

Upload 26 files

2659b26 verified 6 months ago

history blame contribute delete

2.54 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.

	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.
	# --------------------------------------------------------
	# References:
	# ELECTRA https://github.com/google-research/electra
	# BEiT: https://github.com/microsoft/unilm/tree/master/beit
	# --------------------------------------------------------

	import json


	def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
	"""
	Parameter groups for layer-wise lr decay
	Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
	"""
	param_group_names = {}
	param_groups = {}

	num_layers = len(model.blocks) + 1

	layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))

	for n, p in model.named_parameters():
	if not p.requires_grad:
	continue

	# no decay: all 1D parameters and model specific ones
	if p.ndim == 1 or n in no_weight_decay_list:
	g_decay = "no_decay"
	this_decay = 0.
	else:
	g_decay = "decay"
	this_decay = weight_decay

	layer_id = get_layer_id_for_vit(n, num_layers)
	group_name = "layer_%d_%s" % (layer_id, g_decay)

	if group_name not in param_group_names:
	this_scale = layer_scales[layer_id]

	param_group_names[group_name] = {
	"lr_scale": this_scale,
	"weight_decay": this_decay,
	"params": [],
	}
	param_groups[group_name] = {
	"lr_scale": this_scale,
	"weight_decay": this_decay,
	"params": [],
	}

	param_group_names[group_name]["params"].append(n)
	param_groups[group_name]["params"].append(p)

	# print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))

	return list(param_groups.values())


	def get_layer_id_for_vit(name, num_layers):
	"""
	Assign a parameter with its layer id
	Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
	"""
	if name in ['cls_token', 'pos_embed']:
	return 0
	elif name.startswith('patch_embed'):
	return 0
	elif name.startswith('blocks'):
	return int(name.split('.')[1]) + 1
	else:
	return num_layers