trohrbaugh commited on
Commit
ace7f2c
·
verified ·
1 Parent(s): bbd83fb

Add NemotronH to KNOWN_BASES; handle llm_config nesting pattern

Browse files
Files changed (1) hide show
  1. scan.py +15 -5
scan.py CHANGED
@@ -66,6 +66,11 @@ KNOWN_BASES = {
66
  "vocab_size": [256000, 262144],
67
  "model_type_patterns": ["gemma"],
68
  },
 
 
 
 
 
69
  }
70
 
71
 
@@ -137,11 +142,16 @@ def stage1_screen(model_id: str, config: dict) -> dict:
137
  Returns a structured verdict without downloading any weights.
138
  Handles nested text_config (Qwen3.5/3.6, Mistral3, MiMo-V2.5 pattern).
139
  """
140
- # Merge text_config into top-level if present (multimodal nested configs)
141
- if config.get("text_config") and not config.get("vocab_size"):
142
- tc = config["text_config"]
143
- config = {**tc, **{k: v for k, v in config.items()
144
- if k not in ("text_config", "vision_config", "audio_config")}}
 
 
 
 
 
145
 
146
  vocab = config.get("vocab_size")
147
  model_type = (config.get("model_type") or "").lower()
 
66
  "vocab_size": [256000, 262144],
67
  "model_type_patterns": ["gemma"],
68
  },
69
+ "nemotron_h": {
70
+ "name": "NemotronH (NVIDIA Mamba+MoE hybrid)",
71
+ "vocab_size": 131072,
72
+ "model_type_patterns": ["nemotron_h", "nemotronh"],
73
+ },
74
  }
75
 
76
 
 
142
  Returns a structured verdict without downloading any weights.
143
  Handles nested text_config (Qwen3.5/3.6, Mistral3, MiMo-V2.5 pattern).
144
  """
145
+ # Lift nested LLM config into top-level when top-level vocab/hidden is absent.
146
+ # Handles: text_config (Qwen3.5/3.6, Mistral3, MiMo-V2.5), llm_config (NemotronH Omni)
147
+ _NESTED_KEYS = ("text_config", "llm_config")
148
+ _SKIP_KEYS = ("text_config", "llm_config", "vision_config", "audio_config", "sound_config")
149
+ if not config.get("vocab_size"):
150
+ for nested_key in _NESTED_KEYS:
151
+ if config.get(nested_key) and config[nested_key].get("vocab_size"):
152
+ tc = config[nested_key]
153
+ config = {**tc, **{k: v for k, v in config.items() if k not in _SKIP_KEYS}}
154
+ break
155
 
156
  vocab = config.get("vocab_size")
157
  model_type = (config.get("model_type") or "").lower()