Commit ·
2058ebc
1
Parent(s): 77f1c46
Rename arch class: NemotronLabsDiffusionEncoderModel → NemotronLabsDiffusionModel (#5)
Browse files- Rename arch class: NemotronLabsDiffusionEncoderModel → NemotronLabsDiffusionModel (540c304e355592ceee472c947337b3b5dd8f5948)
Co-authored-by: Khadkevich <mkhadkevich@users.noreply.huggingface.co>
config.json
CHANGED
|
@@ -5,14 +5,14 @@
|
|
| 5 |
"adaptive_mask_rate": false,
|
| 6 |
"ar_loss_weight": 1.0,
|
| 7 |
"architectures": [
|
| 8 |
-
"
|
| 9 |
],
|
| 10 |
"attention_bias": false,
|
| 11 |
"attention_dropout": 0.0,
|
| 12 |
"attn_implementation": null,
|
| 13 |
"auto_map": {
|
| 14 |
"AutoConfig": "configuration_nemotron_labs_diffusion.NemotronLabsDiffusionConfig",
|
| 15 |
-
"AutoModel": "modeling_nemotron_labs_diffusion.
|
| 16 |
},
|
| 17 |
"block_size": 32,
|
| 18 |
"bos_token_id": 1,
|
|
@@ -77,4 +77,4 @@
|
|
| 77 |
"rope_type": "yarn",
|
| 78 |
"type": "yarn"
|
| 79 |
}
|
| 80 |
-
}
|
|
|
|
| 5 |
"adaptive_mask_rate": false,
|
| 6 |
"ar_loss_weight": 1.0,
|
| 7 |
"architectures": [
|
| 8 |
+
"NemotronLabsDiffusionModel"
|
| 9 |
],
|
| 10 |
"attention_bias": false,
|
| 11 |
"attention_dropout": 0.0,
|
| 12 |
"attn_implementation": null,
|
| 13 |
"auto_map": {
|
| 14 |
"AutoConfig": "configuration_nemotron_labs_diffusion.NemotronLabsDiffusionConfig",
|
| 15 |
+
"AutoModel": "modeling_nemotron_labs_diffusion.NemotronLabsDiffusionModel"
|
| 16 |
},
|
| 17 |
"block_size": 32,
|
| 18 |
"bos_token_id": 1,
|
|
|
|
| 77 |
"rope_type": "yarn",
|
| 78 |
"type": "yarn"
|
| 79 |
}
|
| 80 |
+
}
|
configuration_nemotron_labs_diffusion.py
CHANGED
|
@@ -24,7 +24,7 @@ logger = logging.get_logger(__name__)
|
|
| 24 |
|
| 25 |
class NemotronLabsDiffusionConfig(PretrainedConfig):
|
| 26 |
r"""
|
| 27 |
-
This is the configuration class to store the configuration of a [`
|
| 28 |
It is used to instantiate a Nemotron Labs Diffusion model according to the specified arguments, defining the model architecture.
|
| 29 |
|
| 30 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
|
|
|
| 24 |
|
| 25 |
class NemotronLabsDiffusionConfig(PretrainedConfig):
|
| 26 |
r"""
|
| 27 |
+
This is the configuration class to store the configuration of a [`NemotronLabsDiffusionModel`] (a Ministral3-based diffusion language model).
|
| 28 |
It is used to instantiate a Nemotron Labs Diffusion model according to the specified arguments, defining the model architecture.
|
| 29 |
|
| 30 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
modeling_nemotron_labs_diffusion.py
CHANGED
|
@@ -434,7 +434,7 @@ def gumbel_topk(log_w: torch.Tensor, k: int) -> torch.Tensor:
|
|
| 434 |
return mask
|
| 435 |
|
| 436 |
|
| 437 |
-
class
|
| 438 |
"""
|
| 439 |
A single model with:
|
| 440 |
- a bidirectional encoder + diffusion‐LM head over A
|
|
@@ -1109,4 +1109,4 @@ class NemotronLabsDiffusionEncoderModel(Ministral3PreTrainedModel, GenerationMix
|
|
| 1109 |
return x[:, : -(block_length * 2)], nfe
|
| 1110 |
|
| 1111 |
|
| 1112 |
-
__all__ = ["
|
|
|
|
| 434 |
return mask
|
| 435 |
|
| 436 |
|
| 437 |
+
class NemotronLabsDiffusionModel(Ministral3PreTrainedModel, GenerationMixin):
|
| 438 |
"""
|
| 439 |
A single model with:
|
| 440 |
- a bidirectional encoder + diffusion‐LM head over A
|
|
|
|
| 1109 |
return x[:, : -(block_length * 2)], nfe
|
| 1110 |
|
| 1111 |
|
| 1112 |
+
__all__ = ["NemotronLabsDiffusionModel", "NemotronLabsDiffusionFlexAttention"]
|