NeMo_Canary / .github /workflows /cicd-main-nemo2.yml
Respair's picture
Upload folder using huggingface_hub
b386992 verified
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: NeMo E2E NeMo2 Tests
on:
workflow_call:
inputs:
test_to_run:
required: true
type: string
image-name:
required: false
default: nemo_container_nemo2
type: string
jobs:
build:
uses: ./.github/workflows/_build_container.yml
with:
image-name: ${{ inputs.image-name }}
dockerfile: docker/Dockerfile.ci
e2e-tests:
strategy:
fail-fast: false
matrix:
include:
- script: L2_NeMo_2_GPT_Pretraining_no_transformer_engine
runner: self-hosted-azure
- script: L2_NeMo_2_llama3_pretraining_recipe
runner: self-hosted-azure
- script: L2_NeMo_2_llama3_pytorch_profiler
runner: self-hosted-azure
- script: L2_NeMo_2_llama3_fault_tolerance_plugin
runner: self-hosted-azure
- script: L2_NeMo_2_llama3_straggler_detection
runner: self-hosted-azure
- script: L2_NeMo_2_llama3_local_ckpt
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_DDP_Param_Parity_check
runner: self-hosted-azure
- script: L2_NeMo_2_Hyena_Conversion_from_HF
runner: self-hosted-azure
- script: L2_NeMo_2_Hyena_DDP_Pretraining_Test
runner: self-hosted-azure
- script: L2_NeMo_2_Hyena_Mixer_Test
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_Hyena_PP_Pretraining_Test
runner: self-hosted-azure
- script: L2_NeMo_2_Hyena_TP_Pretraining_Test
runner: self-hosted-azure
- script: L2_NeMo_2_Hyena_CP_Pretraining_Test
runner: self-hosted-azure
- script: L2_NeMo_2_SSM_Pretraining
runner: self-hosted-azure
- script: L2_NeMo_2_SSM_Finetuning
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_HF_MODEL_IMPORT
runner: self-hosted-azure
- script: L2_NeMo_2_jit_callback
runner: self-hosted-azure
- script: L2_NeMo_2_T5_Pretraining
runner: self-hosted-azure
- script: L2_NeMo_2_T5_MockData_Pretraining
runner: self-hosted-azure
- script: L2_NeMo_2_T5_Finetuning
runner: self-hosted-azure
- script: L2_NeMo_2_T5_Squad
runner: self-hosted-azure
- script: L2_NeMo_2_T5_LoRA
runner: self-hosted-azure
- script: L2_NeMo_2_BERT_Pretraining_Megatron
runner: self-hosted-azure
- script: L2_NeMo_2_BERT_Pretraining_HuggingFace
runner: self-hosted-azure
- script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_ENERGON_FINETUNE_TP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_PAD
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_TRUNC
runner: self-hosted-azure-gpus-2-h100
- script: L2_NeMo_2_NEVA_LOAD_GENERATE
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_LLAVA_IMPORT
runner: self-hosted-azure-gpus-1
- script: L2_NEMO_2_MLLAMA_Inference
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2
runner: self-hosted-azure
- script: L2_NeMo_2_MLLAMA_PRELOADED_FINETUNE_TP2
runner: self-hosted-azure
- script: L2_NeMo_2_MLLAMA_ENERGON_FINETUNE_TP2
runner: self-hosted-azure
- script: L2_NeMo_2_MLLAMA_IMPORT
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_Mixtral_Pretraining
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_SFT_TP1PP2_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_SFT_TP2PP1_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat
runner: self-hosted-azure
- script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude
runner: self-hosted-azure
- script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2
runner: self-hosted-azure
- script: L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1
runner: self-hosted-azure
- script: L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1
runner: self-hosted-azure
- script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1
runner: self-hosted-azure
- script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude
runner: self-hosted-azure
- script: L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1
runner: self-hosted-azure
- script: L2_NEMO_2_LoRA_MERGE
runner: self-hosted-azure
- script: L2_NEMO_2_LoRA_Export
runner: self-hosted-azure-gpus-1
- script: L2_NEMO_2_LoRA_Inference
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
runner: self-hosted-azure
- script: L2_NeMo_2_Automodel_PTQ_trtllm
runner: self-hosted-azure
- script: L2_NeMo_2_Automodel_PTQ_hf
runner: self-hosted-azure
- script: L2_NeMo_2_PTQ_Llama2_FP8_trtllm
runner: self-hosted-azure
- script: L2_NeMo_2_PTQ_Llama2_FP8_nemo
runner: self-hosted-azure
- script: L2_NeMo_2_PTQ_Unified_Export
runner: self-hosted-azure
- script: L2_NeMo_2_Distill_Llama3_TP1PP2
runner: self-hosted-azure
- script: L2_NeMo_2_Prune_Llama_TP1PP2
runner: self-hosted-azure
- script: L2_NeMo_2_GPT_Speculative_Llama3_TP2PP1
runner: self-hosted-azure
- script: L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING
runner: self-hosted-azure
- script: L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION
runner: self-hosted-azure
- script: L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN
runner: self-hosted-azure
- script: L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN
runner: self-hosted-azure
- script: L2_NeMo_2_AVLM_MOCK_TRAINING
runner: self-hosted-azure
- script: L2_NeMo_2_AVLM_ENERGON_TRAIN
runner: self-hosted-azure
- script: L2_NeMo_2_AVLM_ENERGON_CP2_TRAIN
runner: self-hosted-azure
- script: L2_NeMo_2_CLIP_PRETRAIN
runner: self-hosted-azure
timeout: 20
- script: L2_NeMo_2_CLIP_INFER
runner: self-hosted-azure
- script: L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_Auto_Configurator_callbacks
runner: self-hosted-azure-gpus-1
- script: L2_NeMo_2_Conversion_Test_Baichuan2
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_ChatGLM
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_DeepSeek
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Gemma
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Gemma2
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Gemma3_llm
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Gemma3_vlm
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Mistral
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Llama
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Llama_Embedding
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Llama4
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Llama4_Text
runner: self-hosted-azure
- script: L2_NeMo_2_PTQ_Llama4_FP8_nemo
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Nemotron
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Nemotron_H_4B
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Phi3Mini
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Qwen2
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Qwen3
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Starcoder
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_Starcoder2
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_BERT
runner: self-hosted-azure
- script: L2_NeMo_2_Conversion_Test_T5
runner: self-hosted-azure
- runner: self-hosted-azure
script: L2_NeMo_2_QWEN2VL_MOCK_FINETUNE_TP2
- runner: self-hosted-azure
script: L2_NeMo_2_QWEN2VL_PRELOADED_FINETUNE_TP2
- runner: self-hosted-azure
script: L2_NeMo_2_QWEN2VL_ENERGON_FINETUNE_TP2
- runner: self-hosted-azure
script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_PP2
- runner: self-hosted-azure
script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_CP2
- runner: self-hosted-azure
script: L2_NeMo_2_LLAMA4_ENERGON_FINETUNE_EP2
- runner: self-hosted-azure
script: L2_NeMo_2_Diffusion_Recipe_Test
- runner: self-hosted-azure
script: L2_NeMo_2_Diffusion_Taskencoder_Test
- runner: self-hosted-azure
script: L2_NeMo_2_Flux_Import_Test
is-optional: true
- runner: self-hosted-azure
script: L2_NeMo_2_Flux_Inference_Test
- runner: self-hosted-azure
script: L2_NeMo_2_Flux_Training_DDP_Test
- runner: self-hosted-azure
script: L2_NeMo_2_Flux_Training_FSDP_Test
- runner: self-hosted-azure
script: L2_NeMo_2_Flux_ControlNet_Training_DDP_Test
- runner: self-hosted-azure
script: L2_NeMo_2_Flux_ControlNet_Training_FSDP_Test
needs: [build]
runs-on: ${{ matrix.runner }}
name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
path: ${{ github.run_id }}
- name: main
uses: NVIDIA/NeMo/.github/actions/test-template@main
with:
runner: ${{ runner.name }}
script: ${{ matrix.script }}
tests_to_run: ${{ inputs.test_to_run }}
image: ${{ inputs.image-name }}
is_optional: ${{ matrix.is-optional || false }}
timeout: ${{ matrix.timeout || 10 }}