motoko-embedding-1-1b / preprocessor /feature_extractor.py
hrudu's picture
Add Hugging Face model scaffold
14e9a9f
import json
from pathlib import Path
import numpy as np
class HapticFeatureExtractor:
def __init__(self, config):
self.config = config
self.window_size = int(config["window_size"])
self.padding_value = float(config.get("padding_value", 0.0))
self.return_attention_mask = bool(config.get("return_attention_mask", True))
normalization = config.get("normalization", {})
self.mean = np.asarray(normalization.get("mean", []), dtype=np.float32)
self.std = np.asarray(normalization.get("std", []), dtype=np.float32)
@classmethod
def from_pretrained(cls, root):
root_path = Path(root)
config_path = root_path / "preprocessor" / "preprocessor_config.json"
with config_path.open("r", encoding="utf-8") as handle:
config = json.load(handle)
return cls(config)
def _normalize(self, values):
if not self.config.get("normalize", True):
return values
if self.mean.size == 0 or self.std.size == 0:
return values
denom = np.where(self.std == 0, 1.0, self.std)
return (values - self.mean) / denom
def __call__(self, values):
values = np.asarray(values, dtype=np.float32)
if values.ndim != 2:
raise ValueError("Expected input shape [sequence_length, num_channels].")
values = self._normalize(values)
length, channels = values.shape
if length >= self.window_size:
trimmed = values[: self.window_size]
attention_mask = np.ones(self.window_size, dtype=np.int64)
else:
pad_amount = self.window_size - length
padding = np.full((pad_amount, channels), self.padding_value, dtype=np.float32)
trimmed = np.concatenate([values, padding], axis=0)
attention_mask = np.concatenate(
[
np.ones(length, dtype=np.int64),
np.zeros(pad_amount, dtype=np.int64),
]
)
result = {"input_values": trimmed}
if self.return_attention_mask:
result["attention_mask"] = attention_mask
return result