MMaghoumi mkhadkevich commited on
Commit
2058ebc
·
1 Parent(s): 77f1c46

Rename arch class: NemotronLabsDiffusionEncoderModel → NemotronLabsDiffusionModel (#5)

Browse files

- Rename arch class: NemotronLabsDiffusionEncoderModel → NemotronLabsDiffusionModel (540c304e355592ceee472c947337b3b5dd8f5948)


Co-authored-by: Khadkevich <mkhadkevich@users.noreply.huggingface.co>

config.json CHANGED
@@ -5,14 +5,14 @@
5
  "adaptive_mask_rate": false,
6
  "ar_loss_weight": 1.0,
7
  "architectures": [
8
- "NemotronLabsDiffusionEncoderModel"
9
  ],
10
  "attention_bias": false,
11
  "attention_dropout": 0.0,
12
  "attn_implementation": null,
13
  "auto_map": {
14
  "AutoConfig": "configuration_nemotron_labs_diffusion.NemotronLabsDiffusionConfig",
15
- "AutoModel": "modeling_nemotron_labs_diffusion.NemotronLabsDiffusionEncoderModel"
16
  },
17
  "block_size": 32,
18
  "bos_token_id": 1,
@@ -77,4 +77,4 @@
77
  "rope_type": "yarn",
78
  "type": "yarn"
79
  }
80
- }
 
5
  "adaptive_mask_rate": false,
6
  "ar_loss_weight": 1.0,
7
  "architectures": [
8
+ "NemotronLabsDiffusionModel"
9
  ],
10
  "attention_bias": false,
11
  "attention_dropout": 0.0,
12
  "attn_implementation": null,
13
  "auto_map": {
14
  "AutoConfig": "configuration_nemotron_labs_diffusion.NemotronLabsDiffusionConfig",
15
+ "AutoModel": "modeling_nemotron_labs_diffusion.NemotronLabsDiffusionModel"
16
  },
17
  "block_size": 32,
18
  "bos_token_id": 1,
 
77
  "rope_type": "yarn",
78
  "type": "yarn"
79
  }
80
+ }
configuration_nemotron_labs_diffusion.py CHANGED
@@ -24,7 +24,7 @@ logger = logging.get_logger(__name__)
24
 
25
  class NemotronLabsDiffusionConfig(PretrainedConfig):
26
  r"""
27
- This is the configuration class to store the configuration of a [`NemotronLabsDiffusionEncoderModel`] (a Ministral3-based diffusion language model).
28
  It is used to instantiate a Nemotron Labs Diffusion model according to the specified arguments, defining the model architecture.
29
 
30
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
 
24
 
25
  class NemotronLabsDiffusionConfig(PretrainedConfig):
26
  r"""
27
+ This is the configuration class to store the configuration of a [`NemotronLabsDiffusionModel`] (a Ministral3-based diffusion language model).
28
  It is used to instantiate a Nemotron Labs Diffusion model according to the specified arguments, defining the model architecture.
29
 
30
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
modeling_nemotron_labs_diffusion.py CHANGED
@@ -434,7 +434,7 @@ def gumbel_topk(log_w: torch.Tensor, k: int) -> torch.Tensor:
434
  return mask
435
 
436
 
437
- class NemotronLabsDiffusionEncoderModel(Ministral3PreTrainedModel, GenerationMixin):
438
  """
439
  A single model with:
440
  - a bidirectional encoder + diffusion‐LM head over A
@@ -1109,4 +1109,4 @@ class NemotronLabsDiffusionEncoderModel(Ministral3PreTrainedModel, GenerationMix
1109
  return x[:, : -(block_length * 2)], nfe
1110
 
1111
 
1112
- __all__ = ["NemotronLabsDiffusionEncoderModel", "NemotronLabsDiffusionFlexAttention"]
 
434
  return mask
435
 
436
 
437
+ class NemotronLabsDiffusionModel(Ministral3PreTrainedModel, GenerationMixin):
438
  """
439
  A single model with:
440
  - a bidirectional encoder + diffusion‐LM head over A
 
1109
  return x[:, : -(block_length * 2)], nfe
1110
 
1111
 
1112
+ __all__ = ["NemotronLabsDiffusionModel", "NemotronLabsDiffusionFlexAttention"]