sql_env / server /mock_tokenizer.py
hjerpe's picture
Upload folder using huggingface_hub
9e64e71 verified
from openenv.core.env_server.interfaces import Message
class MockTokenizer:
"""Mock tokenizer for testing without requiring transformers library."""
def apply_chat_template(
self,
conversation: list[Message],
tokenize: bool = True,
return_tensors: str | None = None,
**kwargs,
):
"""Mock implementation that creates deterministic token IDs from text."""
text = " ".join([msg["content"] for msg in conversation])
tokens = [ord(c) % 256 for c in text]
if return_tensors == "pt":
try:
import torch
return torch.tensor([tokens])
except ImportError:
return [tokens]
return tokens
def decode(self, token_ids, skip_special_tokens: bool = False, **kwargs) -> str:
"""Mock decode that reverses the encoding process."""
if hasattr(token_ids, "tolist"):
token_ids = token_ids.tolist()
return "".join(chr(t) for t in token_ids)