""" Custom tokenizer for Nemotron-Diffusion-Exp-Ministral-8B-Instruct (final-template). Extends PreTrainedTokenizerFast with a `process_messages` method that handles image token expansion and pixel value preprocessing, analogous to MistralCommonBackend.apply_chat_template(return_dict=True). Usage: tokenizer = AutoTokenizer.from_pretrained(repo, trust_remote_code=True) result = tokenizer.process_messages(messages) # result["input_ids"] – (1, seq_len) with expanded image tokens # result["pixel_values"] – (N, 3, H, W) if images present # result["image_sizes"] – list of (H, W) tuples """ from typing import Any, Dict, List from transformers import PreTrainedTokenizerFast from .image_processing import process_messages as _process_messages class NemotronLabsDiffusionVLMTokenizerFast(PreTrainedTokenizerFast): """PreTrainedTokenizerFast + image-aware process_messages().""" def process_messages( self, messages: List[Dict[str, Any]], **kwargs, ) -> Dict[str, Any]: """ Process chat messages with optional images. Renders the chat template, expands image placeholders based on actual image dimensions, preprocesses pixel values, and tokenizes. Args: messages: OpenAI-style list of message dicts. **kwargs: forwarded to image_processing.process_messages (patch_size, spatial_merge_size, max_image_size, return_tensors, enable_thinking). Returns: dict with input_ids, and optionally pixel_values + image_sizes. """ return _process_messages(self, messages, **kwargs)