Instructions to use FrontiersMind/Nandi-Mini-150M with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use FrontiersMind/Nandi-Mini-150M with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="FrontiersMind/Nandi-Mini-150M", trust_remote_code=True)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("FrontiersMind/Nandi-Mini-150M", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use FrontiersMind/Nandi-Mini-150M with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "FrontiersMind/Nandi-Mini-150M"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "FrontiersMind/Nandi-Mini-150M",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/FrontiersMind/Nandi-Mini-150M

SGLang

How to use FrontiersMind/Nandi-Mini-150M with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "FrontiersMind/Nandi-Mini-150M" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "FrontiersMind/Nandi-Mini-150M",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "FrontiersMind/Nandi-Mini-150M" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "FrontiersMind/Nandi-Mini-150M",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use FrontiersMind/Nandi-Mini-150M with Docker Model Runner:
```
docker model run hf.co/FrontiersMind/Nandi-Mini-150M
```

HemanthSai7 commited on Apr 1

Commit

094f4e0

verified ·

1 Parent(s): 3749884

Fix configuration_nandi.py: use init instead of @strict /__post_init__ for compatibility with released transformers

Browse files

Files changed (1) hide show

configuration_nandi.py +67 -58

configuration_nandi.py CHANGED Viewed

@@ -1,10 +1,4 @@
-#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
-#           This file was automatically generated from src/transformers/models/nandi/modular_nandi.py.
-#               Do NOT edit this file manually as any edits will be overwritten by the generation of
-#             the file from the modular. If any change should be done, please apply the change to the
-#                          modular_nandi.py file directly. One of our CI enforces this.
-#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
-# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,29 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from huggingface_hub.dataclasses import strict
 from transformers.configuration_utils import PretrainedConfig
-from transformers.modeling_rope_utils import RopeParameters
-@strict(accept_kwargs=True)
 class NandiConfig(PretrainedConfig):
     r"""
     Example:
     ```python
-    >>> from transformers import NandiConfig, NandiForCausalLM
-    >>> # Initializing a Nandi style configuration
-    >>> configuration = NandiConfig()
-    >>> # Initializing a model from the Nandi style configuration
-    >>> model = NandiForCausalLM(configuration)
-    >>> # Accessing the model configuration
     >>> configuration = model.config
-    ```"""
     model_type = "nandi"
     keys_to_ignore_at_inference = ["past_key_values"]
@@ -55,44 +45,56 @@ class NandiConfig(PretrainedConfig):
         "layers.*.mlp.down_proj": "rowwise",
     }
-    # Defaults from the provided Nanotron training config.
-    vocab_size: int = 131072
-    hidden_size: int = 832
-    intermediate_size: int = 2496
-    num_hidden_layers: int = 16
-    num_attention_heads: int = 16
-    num_key_value_heads: int | None = 4
-    head_dim: int | None = None
-    hidden_act: str = "silu"
-    max_position_embeddings: int = 2048
-    initializer_range: float = 0.008
-    rms_norm_eps: float = 1e-5
-    use_cache: bool = True
-    pad_token_id: int | None = None
-    bos_token_id: int | None = 1
-    eos_token_id: int | list[int] | None = 0
-    pretraining_tp: int | None = 1
-    tie_word_embeddings: bool = True
-    rope_parameters: RopeParameters | dict | None = None
-    attention_bias: bool = False
-    attention_dropout: float = 0.0
-    mlp_bias: bool = False
-    # Nandi-specific options.
-    factorized_embedding: bool = True
-    embedding_rank: int = 196
-    layer_sharing: bool = True
-    layer_sharing_repeats: int = 2
-    def __post_init__(self, **kwargs):
-        if self.num_key_value_heads is None:
-            self.num_key_value_heads = self.num_attention_heads
-        if self.head_dim is None:
-            self.head_dim = self.hidden_size // self.num_attention_heads
-        if self.rope_parameters is None:
-            self.rope_parameters = {"rope_theta": 100000.0}
-        if not self.layer_sharing:
-            self.layer_sharing_repeats = 1
         if self.factorized_embedding and self.embedding_rank <= 0:
             raise ValueError(
@@ -100,12 +102,19 @@ class NandiConfig(PretrainedConfig):
             )
         if self.hidden_size % self.num_attention_heads != 0:
             raise ValueError(
-                f"`hidden_size` ({self.hidden_size}) must be divisible by `num_attention_heads` ({self.num_attention_heads})."
             )
         if self.layer_sharing_repeats < 1:
             raise ValueError(f"`layer_sharing_repeats` must be >= 1, got {self.layer_sharing_repeats}.")
-        super().__post_init__(**kwargs)
 __all__ = ["NandiConfig"]

+# Copyright 2026 RTA AI Labs. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from transformers.configuration_utils import PretrainedConfig
 class NandiConfig(PretrainedConfig):
     r"""
+    Configuration class for the Nandi model.
     Example:
     ```python
+    >>> from transformers import AutoConfig, AutoModelForCausalLM
+    >>> configuration = AutoConfig.from_pretrained("Rta-AILabs/Nandi-150M-remote", trust_remote_code=True)
+    >>> model = AutoModelForCausalLM.from_pretrained("Rta-AILabs/Nandi-150M-remote", trust_remote_code=True)
     >>> configuration = model.config
+    ```
+    """
     model_type = "nandi"
     keys_to_ignore_at_inference = ["past_key_values"]
         "layers.*.mlp.down_proj": "rowwise",
     }
+    def __init__(
+        self,
+        vocab_size=131072,
+        hidden_size=832,
+        intermediate_size=2496,
+        num_hidden_layers=16,
+        num_attention_heads=16,
+        num_key_value_heads=4,
+        head_dim=None,
+        hidden_act="silu",
+        max_position_embeddings=2048,
+        initializer_range=0.008,
+        rms_norm_eps=1e-5,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=1,
+        eos_token_id=0,
+        pretraining_tp=1,
+        tie_word_embeddings=True,
+        rope_parameters=None,
+        attention_bias=False,
+        attention_dropout=0.0,
+        mlp_bias=False,
+        factorized_embedding=True,
+        embedding_rank=196,
+        layer_sharing=True,
+        layer_sharing_repeats=2,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads if num_key_value_heads is not None else num_attention_heads
+        self.head_dim = head_dim if head_dim is not None else hidden_size // num_attention_heads
+        self.hidden_act = hidden_act
+        self.max_position_embeddings = max_position_embeddings
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.pretraining_tp = pretraining_tp
+        self.rope_parameters = rope_parameters if rope_parameters is not None else {"rope_theta": 100000.0}
+        self.attention_bias = attention_bias
+        self.attention_dropout = attention_dropout
+        self.mlp_bias = mlp_bias
+        self.factorized_embedding = factorized_embedding
+        self.embedding_rank = embedding_rank
+        self.layer_sharing = layer_sharing
+        self.layer_sharing_repeats = layer_sharing_repeats if layer_sharing else 1
         if self.factorized_embedding and self.embedding_rank <= 0:
             raise ValueError(
             )
         if self.hidden_size % self.num_attention_heads != 0:
             raise ValueError(
+                f"`hidden_size` ({self.hidden_size}) must be divisible by "
+                f"`num_attention_heads` ({self.num_attention_heads})."
             )
         if self.layer_sharing_repeats < 1:
             raise ValueError(f"`layer_sharing_repeats` must be >= 1, got {self.layer_sharing_repeats}.")
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
 __all__ = ["NandiConfig"]

Fix configuration_nandi.py: use __init__ instead of @strict /__post_init__ for compatibility with released transformers

Fix configuration_nandi.py: use init instead of @strict /__post_init__ for compatibility with released transformers