| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| name: NeMo E2E NeMo2 Tests |
| on: |
| workflow_call: |
| inputs: |
| test_to_run: |
| required: true |
| type: string |
| image-name: |
| required: false |
| default: nemo_container_nemo2 |
| type: string |
|
|
| jobs: |
| build: |
| uses: ./.github/workflows/_build_container.yml |
| with: |
| image-name: ${{ inputs.image-name }} |
| dockerfile: docker/Dockerfile.ci |
|
|
| e2e-tests: |
| strategy: |
| fail-fast: false |
| matrix: |
| include: |
| - script: L2_NeMo_2_GPT_Pretraining_no_transformer_engine |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_llama3_pretraining_recipe |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_llama3_pytorch_profiler |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_llama3_fault_tolerance_plugin |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_llama3_straggler_detection |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_llama3_local_ckpt |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_DDP_Param_Parity_check |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Hyena_Conversion_from_HF |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Hyena_DDP_Pretraining_Test |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Hyena_Mixer_Test |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_Hyena_PP_Pretraining_Test |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Hyena_TP_Pretraining_Test |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Hyena_CP_Pretraining_Test |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_SSM_Pretraining |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_SSM_Finetuning |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_HF_MODEL_IMPORT |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_jit_callback |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_T5_Pretraining |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_T5_MockData_Pretraining |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_T5_Finetuning |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_T5_Squad |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_T5_LoRA |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_BERT_Pretraining_Megatron |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_BERT_Pretraining_HuggingFace |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_ENERGON_FINETUNE_TP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2 |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_PAD |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_TRUNC |
| runner: self-hosted-azure-gpus-2-h100 |
| - script: L2_NeMo_2_NEVA_LOAD_GENERATE |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_LLAVA_IMPORT |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NEMO_2_MLLAMA_Inference |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_MLLAMA_PRELOADED_FINETUNE_TP2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_MLLAMA_ENERGON_FINETUNE_TP2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_MLLAMA_IMPORT |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_Mixtral_Pretraining |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_SFT_TP1PP2_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_SFT_TP2PP1_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1 |
| runner: self-hosted-azure |
| - script: L2_NEMO_2_LoRA_MERGE |
| runner: self-hosted-azure |
| - script: L2_NEMO_2_LoRA_Export |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NEMO_2_LoRA_Inference |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Automodel_PTQ_trtllm |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Automodel_PTQ_hf |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_PTQ_Llama2_FP8_trtllm |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_PTQ_Llama2_FP8_nemo |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_PTQ_Unified_Export |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Distill_Llama3_TP1PP2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Prune_Llama_TP1PP2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_GPT_Speculative_Llama3_TP2PP1 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_AVLM_MOCK_TRAINING |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_AVLM_ENERGON_TRAIN |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_AVLM_ENERGON_CP2_TRAIN |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_CLIP_PRETRAIN |
| runner: self-hosted-azure |
| timeout: 20 |
| - script: L2_NeMo_2_CLIP_INFER |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124 |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124 |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124 |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_Auto_Configurator_callbacks |
| runner: self-hosted-azure-gpus-1 |
| - script: L2_NeMo_2_Conversion_Test_Baichuan2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_ChatGLM |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_DeepSeek |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Gemma |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Gemma2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Gemma3_llm |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Gemma3_vlm |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Mistral |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Llama |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Llama_Embedding |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Llama4 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Llama4_Text |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_PTQ_Llama4_FP8_nemo |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Nemotron |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Nemotron_H_4B |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Phi3Mini |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Qwen2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Qwen3 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Starcoder |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_Starcoder2 |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_BERT |
| runner: self-hosted-azure |
| - script: L2_NeMo_2_Conversion_Test_T5 |
| runner: self-hosted-azure |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_QWEN2VL_MOCK_FINETUNE_TP2 |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_QWEN2VL_PRELOADED_FINETUNE_TP2 |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_QWEN2VL_ENERGON_FINETUNE_TP2 |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_PP2 |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_CP2 |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_LLAMA4_ENERGON_FINETUNE_EP2 |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Diffusion_Recipe_Test |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Diffusion_Taskencoder_Test |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Flux_Import_Test |
| is-optional: true |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Flux_Inference_Test |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Flux_Training_DDP_Test |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Flux_Training_FSDP_Test |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Flux_ControlNet_Training_DDP_Test |
| - runner: self-hosted-azure |
| script: L2_NeMo_2_Flux_ControlNet_Training_FSDP_Test |
|
|
|
|
| needs: [build] |
| runs-on: ${{ matrix.runner }} |
| name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }} |
| steps: |
| - name: Checkout |
| uses: actions/checkout@v4 |
| with: |
| path: ${{ github.run_id }} |
| - name: main |
| uses: NVIDIA/NeMo/.github/actions/test-template@main |
| with: |
| runner: ${{ runner.name }} |
| script: ${{ matrix.script }} |
| tests_to_run: ${{ inputs.test_to_run }} |
| image: ${{ inputs.image-name }} |
| is_optional: ${{ matrix.is-optional || false }} |
| timeout: ${{ matrix.timeout || 10 }} |
|
|