Upload 4 files

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,3 +1,10 @@
----
-license: cc-by-4.0
----

+# Mimi FP16
+This directory contains an FP16 converted version of [Kyutai Mimi](https://huggingface.co/kyutai/mimi), used as an audio codec for encoding and decoding. The original model was released by Kyutai, and the model architecture, weights, authorship, and license remain attributed to the original authors.
+The original Mimi model is released under the `CC-BY-4.0` license. The files here are only converted to FP16 to reduce memory usage and make local usage more convenient. When using, redistributing, or citing these files, please keep the original Kyutai attribution, model link, and license notice.
+Original links:
+- Hugging Face: https://huggingface.co/kyutai/mimi
+- License: https://creativecommons.org/licenses/by/4.0/

config.json ADDED Viewed

+{
+  "architectures": [
+    "MimiModel"
+  ],
+  "attention_dropout": 0.0,
+  "audio_channels": 1,
+  "codebook_dim": 256,
+  "codebook_size": 2048,
+  "compress": 2,
+  "dilation_growth_rate": 2,
+  "frame_rate": 12.5,
+  "head_dim": 64,
+  "hidden_act": "gelu",
+  "hidden_size": 512,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "kernel_size": 7,
+  "last_kernel_size": 3,
+  "layer_scale_initial_scale": 0.01,
+  "max_position_embeddings": 8000,
+  "model_type": "mimi",
+  "norm_eps": 1e-05,
+  "normalize": false,
+  "num_attention_heads": 8,
+  "num_filters": 64,
+  "num_hidden_layers": 8,
+  "num_key_value_heads": 8,
+  "num_residual_layers": 1,
+  "num_semantic_quantizers": 1,
+  "pad_mode": "constant",
+  "residual_kernel_size": 3,
+  "rope_theta": 10000.0,
+  "sampling_rate": 24000,
+  "sliding_window": 250,
+  "torch_dtype": "float16",
+  "transformers_version": "4.45.0.dev0",
+  "trim_right_ratio": 1.0,
+  "upsample_groups": 512,
+  "upsampling_ratios": [
+    8,
+    6,
+    5,
+    4
+  ],
+  "use_cache": false,
+  "use_causal_conv": true,
+  "use_conv_shortcut": false,
+  "vector_quantization_hidden_dimension": 256
+}

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:7542ee039d3025d5089cf227d21df64b6b8eff08fcd376a11a1fbd178dd9d3f5
+size 192346842

preprocessor_config.json ADDED Viewed

+{
+  "chunk_length_s": null,
+  "feature_extractor_type": "EncodecFeatureExtractor",
+  "feature_size": 1,
+  "overlap": null,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": true,
+  "sampling_rate": 24000
+}