| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import pytest |
|
|
| from nemo.utils.flops_formulas import FLOPSConfig, bert, gpt3, llama2, llama3, mixtral, nemotron, transformer |
| from nemo.utils.hyena_flops_formulas import hyena |
|
|
|
|
| @pytest.fixture |
| def flops_config(): |
| return FLOPSConfig( |
| gbs=1, |
| enc_seq_len=128, |
| hs=768, |
| layers=12, |
| ffn_hs=3072, |
| attention_heads=12, |
| moe_router_topk=2, |
| query_groups=12, |
| vocab_size=50257, |
| model_pattern="SDH*", |
| ) |
|
|
|
|
| def test_gpt3(flops_config): |
| expected_flops = 97240743936 |
| assert gpt3(flops_config) == expected_flops |
|
|
|
|
| def test_llama2(flops_config): |
| expected_flops = 107659395072.0 |
| assert llama2(flops_config) == expected_flops |
|
|
|
|
| def test_llama3(flops_config): |
| expected_flops = 164433494016.0 |
| assert llama3(flops_config) == expected_flops |
|
|
|
|
| def test_nemotron(flops_config): |
| expected_flops = 218036699136.0 |
| assert nemotron(flops_config) == expected_flops |
|
|
|
|
| def test_mixtral(flops_config): |
| expected_flops = 172889210880.0 |
| assert mixtral(flops_config) == expected_flops |
|
|
|
|
| def test_bert(flops_config): |
| expected_flops = 84146651135.99998 |
| assert bert(flops_config) == expected_flops |
|
|
|
|
| def test_hyena(flops_config): |
| expected_flops = 116883062784.0 |
| assert hyena(flops_config) == expected_flops |
|
|
|
|
| def test_transformer(flops_config): |
| expected_flops = 118427811840.0 |
| assert transformer(flops_config) == expected_flops |
|
|
| def test_transformer_no_moe(flops_config): |
| flops_config.moe_router_topk = 0 |
| expected_flops = 96684539904.0 |
| assert transformer(flops_config) == expected_flops |
|
|