NeMo_Canary / tests /export /utils /test_lora_converter.py

Upload folder using huggingface_hub

b386992 verified 9 months ago

4.68 kB

	# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	import pytest
	import torch


	@pytest.mark.run_only_on('GPU')
	def test_replace_number_add_offset():
	from nemo.export.utils.lora_converter import replace_number_add_offset

	# Test with no offset
	key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight"
	assert replace_number_add_offset(key, 0) == key

	# Test with positive offset
	assert replace_number_add_offset(key, 1) == "layers.1.self_attention.lora_kqv_adapter.linear_in.weight"

	# Test with negative offset
	assert replace_number_add_offset(key, -1) == "layers.-1.self_attention.lora_kqv_adapter.linear_in.weight"

	# Test with key that doesn't contain layer number
	key = "embedding.word_embeddings.weight"
	assert replace_number_add_offset(key, 1) == key


	@pytest.mark.run_only_on('GPU')
	def test_rename_qkv_keys():
	from nemo.export.utils.lora_converter import rename_qkv_keys

	key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight"
	new_keys = rename_qkv_keys(key)

	assert len(new_keys) == 3
	assert new_keys[0] == "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight"
	assert new_keys[1] == "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight"
	assert new_keys[2] == "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight"


	@pytest.mark.run_only_on('GPU')
	def test_reformat_module_names_to_hf():
	from nemo.export.utils.lora_converter import reformat_module_names_to_hf

	# Create sample tensors with NeMo-style names
	tensors = {
	"q_adapter.linear_in.weight": torch.randn(10, 10),
	"k_adapter.linear_out.weight": torch.randn(10, 10),
	"v_adapter.linear_in.weight": torch.randn(10, 10),
	"lora_dense_attention_adapter.linear_out.weight": torch.randn(10, 10),
	"lora_4htoh_adapter.linear_in.weight": torch.randn(10, 10),
	"gate_adapter.linear_out.weight": torch.randn(10, 10),
	"up_adapter.linear_in.weight": torch.randn(10, 10),
	}

	new_tensors, module_names = reformat_module_names_to_hf(tensors)

	# Check that all tensors were converted
	assert len(new_tensors) == len(tensors)

	# Check that module names were correctly identified
	expected_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "down_proj", "gate_proj", "up_proj"]
	assert set(module_names) == set(expected_modules)

	# Check some specific conversions
	assert "base_model.q_proj.lora_A.weight" in new_tensors
	assert "base_model.k_proj.lora_B.weight" in new_tensors
	assert "base_model.v_proj.lora_A.weight" in new_tensors


	@pytest.mark.run_only_on('GPU')
	def test_convert_lora_weights_to_canonical():
	from nemo.export.utils.lora_converter import convert_lora_weights_to_canonical

	# Create a sample config
	config = {
	"hidden_size": 512,
	"num_attention_heads": 8,
	"num_query_groups": 4,
	"peft": {"lora_tuning": {"adapter_dim": 16}},
	}

	# Create sample fused QKV weights
	lora_weights = {
	"layers.0.self_attention.lora_kqv_adapter.linear_in.weight": torch.randn(16, 1024),
	"layers.0.self_attention.lora_kqv_adapter.linear_out.weight": torch.randn(1024, 16),
	"layers.0.lora_hto4h_adapter.linear_in.weight": torch.randn(16, 1024),
	"layers.0.lora_hto4h_adapter.linear_out.weight": torch.randn(2048, 16),
	}

	converted_weights = convert_lora_weights_to_canonical(config, lora_weights)

	# Check that QKV weights were unfused
	assert "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" in converted_weights
	assert "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" in converted_weights
	assert "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" in converted_weights

	# Check that H-to-4H weights were unfused
	assert "layers.0.lora_unfused_hto4h_adapter.gate_adapter.linear_in.weight" in converted_weights
	assert "layers.0.lora_unfused_hto4h_adapter.up_adapter.linear_in.weight" in converted_weights