hrudu commited on 16 days ago

Commit

89e5d21

1 Parent(s): 8757cd2

update

Browse files

Files changed (19) hide show

README.md +134 -3
config.json +25 -0
configs/sensor_config.yaml +25 -0
configs/training_config.yaml +31 -0
examples/__pycache__/force_forecasting.cpython-311.pyc +0 -0
examples/__pycache__/grasp_stability.cpython-311.pyc +0 -0
examples/__pycache__/inference.cpython-311.pyc +0 -0
examples/__pycache__/material_recognition.cpython-311.pyc +0 -0
examples/force_forecasting.py +20 -0
examples/grasp_stability.py +20 -0
examples/inference.py +26 -0
examples/material_recognition.py +23 -0
model/model.safetensors +0 -0
model/model.safetensors.index.json +12 -0
preprocessor/__pycache__/feature_extractor.cpython-311.pyc +0 -0
preprocessor/feature_extractor.py +66 -0
preprocessor/preprocessor_config.json +31 -0
tokenizer.json +65 -0
tokenizer_config.json +11 -0

README.md CHANGED Viewed

@@ -1,3 +1,134 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+language:
+- en
+library_name: transformers
+tags:
+- haptics
+- time-series
+- robotics
+- sensor-fusion
+- mamba
+- transformer
+pipeline_tag: time-series-classification
+---
+# Motoko 1B
+Motoko 1B is the core foundation model of the Motoko family: a general-purpose haptic model pretrained across touch, force, and sensor interaction data.
+## Model Details
+- **Parameters:** 1B
+- **Architecture:** Mamba / Hybrid CNN + Transformer
+- **Input:** Force, torque, pressure, vibration time-series
+- **Output:** Next-state prediction and signal classification
+- **Sequence Length:** Up to 2048 timesteps
+- **Sampling Rate:** Up to 1 kHz
+- **License:** Apache 2.0
+## Intended Use
+Motoko 1B is designed for:
+- Haptic signal classification and understanding
+- Grasp stability prediction
+- Material and texture recognition from touch
+- Force state forecasting
+- Fine-tuning as a base for downstream haptic tasks
+- Serving as the parent model for Motoko LoRA adapters
+## Repository Layout
+```text
+.
+├── README.md
+├── config.json
+├── tokenizer_config.json
+├── tokenizer.json
+├── model/
+│   ├── model.safetensors
+│   └── model.safetensors.index.json
+├── preprocessor/
+│   ├── preprocessor_config.json
+│   └── feature_extractor.py
+├── configs/
+│   ├── training_config.yaml
+│   └── sensor_config.yaml
+├── examples/
+│   ├── inference.py
+│   ├── grasp_stability.py
+│   ├── material_recognition.py
+│   └── force_forecasting.py
+└── .gitattributes
+```
+## Input Format
+The model expects multichannel haptic time-series windows containing one or more of the following modalities:
+- Force
+- Torque
+- Pressure
+- Vibration
+Signals should be normalized and resampled according to `preprocessor/preprocessor_config.json` before inference.
+## Tasks
+### Grasp Stability Prediction
+Given a short force or tactile sequence collected during grasping, the model predicts whether a grasp is stable or likely to fail.
+### Material Recognition
+Given touch-only or force-plus-vibration sequences, the model classifies the material category or texture family.
+### Force Forecasting
+Given a recent trajectory of haptic observations, the model predicts the next force state or short horizon continuation.
+## Example Usage
+```python
+from pathlib import Path
+import numpy as np
+from preprocessor.feature_extractor import MotokoFeatureExtractor
+extractor = MotokoFeatureExtractor.from_config(
+    Path("preprocessor/preprocessor_config.json")
+)
+sample = {
+    "force": np.random.randn(256, 3),
+    "torque": np.random.randn(256, 3),
+    "pressure": np.random.randn(256, 16),
+}
+features = extractor(sample)
+print(features["input_values"].shape)
+```
+## Training
+Base training hyperparameters are stored in `configs/training_config.yaml`, and sensor assumptions are defined in `configs/sensor_config.yaml`.
+## Limitations
+- This repository currently contains scaffold configuration and examples.
+- `model/model.safetensors` is a placeholder and should be replaced with actual trained weights.
+- Final tokenizer and preprocessing values should be aligned with the released checkpoint.
+## Citation
+```bibtex
+@misc{motoko1b,
+  title        = {Motoko 1B},
+  author       = {Motoko Team},
+  year         = {2026},
+  howpublished = {\url{https://huggingface.co/}},
+  note         = {Foundation model for haptic understanding and forecasting}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "MotokoForHapticModeling"
+  ],
+  "model_type": "motoko",
+  "hidden_size": 2048,
+  "intermediate_size": 8192,
+  "num_hidden_layers": 24,
+  "num_attention_heads": 16,
+  "num_key_value_heads": 8,
+  "conv_kernel_size": 5,
+  "state_size": 256,
+  "max_position_embeddings": 2048,
+  "num_input_channels": 28,
+  "sampling_rate_hz": 1000,
+  "classifier_dropout": 0.1,
+  "hidden_act": "silu",
+  "layer_norm_eps": 1e-05,
+  "initializer_range": 0.02,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "torch_dtype": "float16",
+  "transformers_version": "4.52.0"
+}

configs/sensor_config.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+sensors:
+  force:
+    axes: [fx, fy, fz]
+    units: newton
+    channels: 3
+    sampling_rate_hz: 1000
+  torque:
+    axes: [tx, ty, tz]
+    units: newton_meter
+    channels: 3
+    sampling_rate_hz: 1000
+  pressure:
+    layout: tactile_grid
+    channels: 16
+    sampling_rate_hz: 1000
+  vibration:
+    axes: [vx, vy, vz, ax, ay, az]
+    units: normalized
+    channels: 6
+    sampling_rate_hz: 1000
+input_spec:
+  max_sampling_rate_hz: 1000
+  max_sequence_length: 2048
+  supported_modalities: [force, torque, pressure, vibration]

configs/training_config.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+model_name: motoko-1-1b
+task: multitask_haptic_pretraining
+training:
+  seed: 42
+  epochs: 20
+  global_batch_size: 128
+  micro_batch_size: 8
+  learning_rate: 2.0e-4
+  min_learning_rate: 2.0e-5
+  warmup_steps: 2000
+  weight_decay: 0.01
+  gradient_clip_norm: 1.0
+  precision: bf16
+data:
+  max_sequence_length: 2048
+  sampling_rate_hz: 1000
+  shuffle: true
+  num_workers: 8
+objectives:
+  next_state_prediction_weight: 1.0
+  grasp_stability_weight: 0.5
+  material_recognition_weight: 0.5
+  masked_signal_modeling_weight: 0.25
+checkpointing:
+  output_dir: ./checkpoints
+  save_steps: 1000
+  keep_last_n: 3

examples/__pycache__/force_forecasting.cpython-311.pyc ADDED Viewed

Binary file (1.68 kB). View file

examples/__pycache__/grasp_stability.cpython-311.pyc ADDED Viewed

Binary file (1.78 kB). View file

examples/__pycache__/inference.cpython-311.pyc ADDED Viewed

Binary file (1.72 kB). View file

examples/__pycache__/material_recognition.cpython-311.pyc ADDED Viewed

Binary file (1.8 kB). View file

examples/force_forecasting.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import numpy as np
+from preprocessor.feature_extractor import MotokoFeatureExtractor
+def forecast_force(signal: dict[str, np.ndarray]) -> np.ndarray:
+    extractor = MotokoFeatureExtractor.from_config("preprocessor/preprocessor_config.json")
+    features = extractor(signal)
+    force_slice = features["input_values"][:, :3]
+    return force_slice[-10:].mean(axis=0)
+if __name__ == "__main__":
+    signal = {
+        "force": np.random.randn(256, 3).astype(np.float32),
+        "torque": np.random.randn(256, 3).astype(np.float32),
+        "pressure": np.random.randn(256, 16).astype(np.float32),
+        "vibration": np.random.randn(256, 6).astype(np.float32),
+    }
+    print("next_force:", forecast_force(signal))

examples/grasp_stability.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import numpy as np
+from preprocessor.feature_extractor import MotokoFeatureExtractor
+def predict_grasp_stability(signal: dict[str, np.ndarray]) -> str:
+    extractor = MotokoFeatureExtractor.from_config("preprocessor/preprocessor_config.json")
+    features = extractor(signal)
+    stability_score = float(np.clip(features["input_values"].mean() + 0.5, 0.0, 1.0))
+    return "stable" if stability_score >= 0.5 else "unstable"
+if __name__ == "__main__":
+    signal = {
+        "force": np.random.randn(256, 3).astype(np.float32),
+        "torque": np.random.randn(256, 3).astype(np.float32),
+        "pressure": np.random.randn(256, 16).astype(np.float32),
+        "vibration": np.random.randn(256, 6).astype(np.float32),
+    }
+    print("grasp:", predict_grasp_stability(signal))

examples/inference.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from pathlib import Path
+import numpy as np
+from preprocessor.feature_extractor import MotokoFeatureExtractor
+def main() -> None:
+    extractor = MotokoFeatureExtractor.from_config(
+        Path("preprocessor/preprocessor_config.json")
+    )
+    sample = {
+        "force": np.random.randn(320, 3).astype(np.float32),
+        "torque": np.random.randn(320, 3).astype(np.float32),
+        "pressure": np.random.randn(320, 16).astype(np.float32),
+        "vibration": np.random.randn(320, 6).astype(np.float32),
+    }
+    features = extractor(sample)
+    print("input_values:", features["input_values"].shape)
+    print("attention_mask:", features["attention_mask"].shape)
+if __name__ == "__main__":
+    main()

examples/material_recognition.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import numpy as np
+from preprocessor.feature_extractor import MotokoFeatureExtractor
+MATERIALS = ["metal", "rubber", "wood", "fabric"]
+def predict_material(signal: dict[str, np.ndarray]) -> str:
+    extractor = MotokoFeatureExtractor.from_config("preprocessor/preprocessor_config.json")
+    features = extractor(signal)
+    index = int(abs(features["input_values"].sum())) % len(MATERIALS)
+    return MATERIALS[index]
+if __name__ == "__main__":
+    signal = {
+        "force": np.random.randn(256, 3).astype(np.float32),
+        "torque": np.random.randn(256, 3).astype(np.float32),
+        "pressure": np.random.randn(256, 16).astype(np.float32),
+        "vibration": np.random.randn(256, 6).astype(np.float32),
+    }
+    print("material:", predict_material(signal))

model/model.safetensors ADDED Viewed

File without changes

model/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "metadata": {
+    "total_size": 0
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model.safetensors",
+    "model.layers.0.mixer.in_proj.weight": "model.safetensors",
+    "model.layers.0.mixer.out_proj.weight": "model.safetensors",
+    "model.norm.weight": "model.safetensors",
+    "lm_head.weight": "model.safetensors"
+  }
+}

preprocessor/__pycache__/feature_extractor.cpython-311.pyc ADDED Viewed

Binary file (5.1 kB). View file

preprocessor/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
+import numpy as np
+class MotokoFeatureExtractor:
+    """Normalize and stack haptic modalities into a single model tensor."""
+    def __init__(self, config: dict[str, Any]) -> None:
+        self.config = config
+        self.max_length = int(config.get("max_length", 2048))
+        self.padding_value = float(config.get("padding_value", 0.0))
+        self.eps = float(config.get("normalization", {}).get("eps", 1e-6))
+        self.modalities = config.get("modalities", {})
+    @classmethod
+    def from_config(cls, path: str | Path) -> "MotokoFeatureExtractor":
+        with Path(path).open("r", encoding="utf-8") as handle:
+            return cls(json.load(handle))
+    def _normalize(self, values: np.ndarray) -> np.ndarray:
+        mean = values.mean(axis=0, keepdims=True)
+        std = values.std(axis=0, keepdims=True)
+        return (values - mean) / np.maximum(std, self.eps)
+    def _pad_or_trim(self, values: np.ndarray) -> np.ndarray:
+        if values.shape[0] >= self.max_length:
+            return values[: self.max_length]
+        pad_rows = self.max_length - values.shape[0]
+        pad = np.full((pad_rows, values.shape[1]), self.padding_value, dtype=values.dtype)
+        return np.concatenate([values, pad], axis=0)
+    def __call__(self, sample: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
+        features: list[np.ndarray] = []
+        for name, spec in self.modalities.items():
+            if not spec.get("enabled", False):
+                continue
+            channels = int(spec["channels"])
+            values = np.asarray(sample.get(name, np.zeros((0, channels), dtype=np.float32)))
+            if values.ndim != 2 or values.shape[1] != channels:
+                raise ValueError(
+                    f"Expected modality '{name}' to have shape [timesteps, {channels}], "
+                    f"got {values.shape}."
+                )
+            normalized = self._normalize(values.astype(np.float32))
+            features.append(self._pad_or_trim(normalized))
+        if not features:
+            raise ValueError("No enabled modalities were provided.")
+        stacked = np.concatenate(features, axis=1)
+        attention_mask = (np.abs(stacked).sum(axis=1) > 0).astype(np.int64)
+        return {
+            "input_values": stacked,
+            "attention_mask": attention_mask,
+        }

preprocessor/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "feature_extractor_type": "MotokoFeatureExtractor",
+  "sampling_rate_hz": 1000,
+  "target_sampling_rate_hz": 1000,
+  "window_size": 256,
+  "window_stride": 128,
+  "max_length": 2048,
+  "padding_value": 0.0,
+  "normalization": {
+    "method": "zscore",
+    "eps": 1e-06
+  },
+  "modalities": {
+    "force": {
+      "enabled": true,
+      "channels": 3
+    },
+    "torque": {
+      "enabled": true,
+      "channels": 3
+    },
+    "pressure": {
+      "enabled": true,
+      "channels": 16
+    },
+    "vibration": {
+      "enabled": true,
+      "channels": 6
+    }
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<pad>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "<bos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "<eos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "<unk>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": null,
+  "post_processor": null,
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "<pad>": 0,
+      "<bos>": 1,
+      "<eos>": 2,
+      "<unk>": 3,
+      "force": 4,
+      "torque": 5,
+      "pressure": 6,
+      "vibration": 7,
+      "slip": 8,
+      "stable": 9,
+      "material": 10,
+      "forecast": 11
+    },
+    "unk_token": "<unk>"
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "model_max_length": 2048,
+  "padding_side": "right",
+  "truncation_side": "right",
+  "pad_token": "<pad>",
+  "bos_token": "<bos>",
+  "eos_token": "<eos>",
+  "unk_token": "<unk>",
+  "clean_up_tokenization_spaces": false
+}