| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| import pytest |
| import torch |
|
|
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_replace_number_add_offset(): |
| from nemo.export.utils.lora_converter import replace_number_add_offset |
|
|
| |
| key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight" |
| assert replace_number_add_offset(key, 0) == key |
|
|
| |
| assert replace_number_add_offset(key, 1) == "layers.1.self_attention.lora_kqv_adapter.linear_in.weight" |
|
|
| |
| assert replace_number_add_offset(key, -1) == "layers.-1.self_attention.lora_kqv_adapter.linear_in.weight" |
|
|
| |
| key = "embedding.word_embeddings.weight" |
| assert replace_number_add_offset(key, 1) == key |
|
|
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_rename_qkv_keys(): |
| from nemo.export.utils.lora_converter import rename_qkv_keys |
|
|
| key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight" |
| new_keys = rename_qkv_keys(key) |
|
|
| assert len(new_keys) == 3 |
| assert new_keys[0] == "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" |
| assert new_keys[1] == "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" |
| assert new_keys[2] == "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" |
|
|
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_reformat_module_names_to_hf(): |
| from nemo.export.utils.lora_converter import reformat_module_names_to_hf |
|
|
| |
| tensors = { |
| "q_adapter.linear_in.weight": torch.randn(10, 10), |
| "k_adapter.linear_out.weight": torch.randn(10, 10), |
| "v_adapter.linear_in.weight": torch.randn(10, 10), |
| "lora_dense_attention_adapter.linear_out.weight": torch.randn(10, 10), |
| "lora_4htoh_adapter.linear_in.weight": torch.randn(10, 10), |
| "gate_adapter.linear_out.weight": torch.randn(10, 10), |
| "up_adapter.linear_in.weight": torch.randn(10, 10), |
| } |
|
|
| new_tensors, module_names = reformat_module_names_to_hf(tensors) |
|
|
| |
| assert len(new_tensors) == len(tensors) |
|
|
| |
| expected_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "down_proj", "gate_proj", "up_proj"] |
| assert set(module_names) == set(expected_modules) |
|
|
| |
| assert "base_model.q_proj.lora_A.weight" in new_tensors |
| assert "base_model.k_proj.lora_B.weight" in new_tensors |
| assert "base_model.v_proj.lora_A.weight" in new_tensors |
|
|
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_convert_lora_weights_to_canonical(): |
| from nemo.export.utils.lora_converter import convert_lora_weights_to_canonical |
|
|
| |
| config = { |
| "hidden_size": 512, |
| "num_attention_heads": 8, |
| "num_query_groups": 4, |
| "peft": {"lora_tuning": {"adapter_dim": 16}}, |
| } |
|
|
| |
| lora_weights = { |
| "layers.0.self_attention.lora_kqv_adapter.linear_in.weight": torch.randn(16, 1024), |
| "layers.0.self_attention.lora_kqv_adapter.linear_out.weight": torch.randn(1024, 16), |
| "layers.0.lora_hto4h_adapter.linear_in.weight": torch.randn(16, 1024), |
| "layers.0.lora_hto4h_adapter.linear_out.weight": torch.randn(2048, 16), |
| } |
|
|
| converted_weights = convert_lora_weights_to_canonical(config, lora_weights) |
|
|
| |
| assert "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" in converted_weights |
| assert "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" in converted_weights |
| assert "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" in converted_weights |
|
|
| |
| assert "layers.0.lora_unfused_hto4h_adapter.gate_adapter.linear_in.weight" in converted_weights |
| assert "layers.0.lora_unfused_hto4h_adapter.up_adapter.linear_in.weight" in converted_weights |
|
|