Rename arch class: NemotronLabsDiffusionEncoderModel → NemotronLabsDiffusionModel (#5)

- Rename arch class: NemotronLabsDiffusionEncoderModel → NemotronLabsDiffusionModel (540c304e355592ceee472c947337b3b5dd8f5948)

Co-authored-by: Khadkevich <mkhadkevich@users.noreply.huggingface.co>

Files changed (3) hide show

config.json +3 -3
configuration_nemotron_labs_diffusion.py +1 -1
modeling_nemotron_labs_diffusion.py +2 -2

config.json CHANGED Viewed

@@ -5,14 +5,14 @@
   "adaptive_mask_rate": false,
   "ar_loss_weight": 1.0,
   "architectures": [
-    "NemotronLabsDiffusionEncoderModel"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "attn_implementation": null,
   "auto_map": {
     "AutoConfig": "configuration_nemotron_labs_diffusion.NemotronLabsDiffusionConfig",
-    "AutoModel": "modeling_nemotron_labs_diffusion.NemotronLabsDiffusionEncoderModel"
   },
   "block_size": 32,
   "bos_token_id": 1,
@@ -77,4 +77,4 @@
     "rope_type": "yarn",
     "type": "yarn"
   }
-}

   "adaptive_mask_rate": false,
   "ar_loss_weight": 1.0,
   "architectures": [
+    "NemotronLabsDiffusionModel"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "attn_implementation": null,
   "auto_map": {
     "AutoConfig": "configuration_nemotron_labs_diffusion.NemotronLabsDiffusionConfig",
+    "AutoModel": "modeling_nemotron_labs_diffusion.NemotronLabsDiffusionModel"
   },
   "block_size": 32,
   "bos_token_id": 1,
     "rope_type": "yarn",
     "type": "yarn"
   }
+}

configuration_nemotron_labs_diffusion.py CHANGED Viewed

@@ -24,7 +24,7 @@ logger = logging.get_logger(__name__)
 class NemotronLabsDiffusionConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`NemotronLabsDiffusionEncoderModel`] (a Ministral3-based diffusion language model).
     It is used to instantiate a Nemotron Labs Diffusion model according to the specified arguments, defining the model architecture.
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the

 class NemotronLabsDiffusionConfig(PretrainedConfig):
     r"""
+    This is the configuration class to store the configuration of a [`NemotronLabsDiffusionModel`] (a Ministral3-based diffusion language model).
     It is used to instantiate a Nemotron Labs Diffusion model according to the specified arguments, defining the model architecture.
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the

modeling_nemotron_labs_diffusion.py CHANGED Viewed

@@ -434,7 +434,7 @@ def gumbel_topk(log_w: torch.Tensor, k: int) -> torch.Tensor:
     return mask
-class NemotronLabsDiffusionEncoderModel(Ministral3PreTrainedModel, GenerationMixin):
     """
     A single model with:
       - a bidirectional encoder + diffusion‐LM head over A
@@ -1109,4 +1109,4 @@ class NemotronLabsDiffusionEncoderModel(Ministral3PreTrainedModel, GenerationMix
         return x[:, : -(block_length * 2)], nfe
-__all__ = ["NemotronLabsDiffusionEncoderModel", "NemotronLabsDiffusionFlexAttention"]

     return mask
+class NemotronLabsDiffusionModel(Ministral3PreTrainedModel, GenerationMixin):
     """
     A single model with:
       - a bidirectional encoder + diffusion‐LM head over A
         return x[:, : -(block_length * 2)], nfe
+__all__ = ["NemotronLabsDiffusionModel", "NemotronLabsDiffusionFlexAttention"]