wdga commited on
Commit
d7bdcbf
·
verified ·
1 Parent(s): 000d20b

Upload Kokoro LiteRT runtime preview

Browse files
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.tflite filter=lfs diff=lfs merge=lfs -text
2
+ *.npz filter=lfs diff=lfs merge=lfs -text
3
+ *.so filter=lfs diff=lfs merge=lfs -text
 
 
 
 
LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
MANIFEST.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "kokoro-82m-litert-runtime-preview",
3
+ "artifact_kind": "LiteRT/TFLite Kokoro text-to-audio runtime preview",
4
+ "source_model": "hexgrad/Kokoro-82M",
5
+ "source_checkpoint": "kokoro-v1_0.pth",
6
+ "source_checkpoint_sha256": "496dba118d1a58f5f3db2efc88dbdc216e0483fc89fe6e47ee1f2c53f18ad1e4",
7
+ "generated_by_repo": "https://github.com/will-deines/robot",
8
+ "sample_rate_hz": 24000,
9
+ "runtime_shape": "text -> KPipeline G2P/tokenization -> LiteRT frontend bucket -> LiteRT decoder/vocoder -> WAV",
10
+ "text_frontend": {
11
+ "package": "kokoro",
12
+ "component": "KPipeline",
13
+ "methods": ["g2p", "en_tokenize"],
14
+ "loads_pytorch_model_weights": false,
15
+ "forbidden_in_request_path": ["KModel"]
16
+ },
17
+ "frontend": {
18
+ "kind": "bucketed_full_frontend",
19
+ "buckets": [
20
+ {
21
+ "tokens": 48,
22
+ "max_frames": 128,
23
+ "max_f0_frames": 256,
24
+ "path": "frontend/kokoro_full_frontend_masked_b48_f128_f0256.tflite",
25
+ "sha256": "d075924f0f0be81c382f4a68b2799ac3a2142e650ac0ceb730aea7f6f4f5f4da",
26
+ "bytes": 128007356
27
+ }
28
+ ]
29
+ },
30
+ "decoder_vocoder": {
31
+ "kind": "merged_decoder_source_stft",
32
+ "path": "kokoro_decoder_source_stft_merged.tflite",
33
+ "sha256": "7111687d4513189c959adee16f4436e9c48f1c6285a02db8de126011d09cb8d0",
34
+ "bytes": 216280440,
35
+ "custom_op": {
36
+ "name": "KokoroSourceStft",
37
+ "source_path": "custom_ops/kokoro_source_stft_custom_op_native.cc",
38
+ "source_sha256": "40a6d3ed03548fe2d5d4e8381cb66287d2dbc304a8906cf07b2711942b7f2ad6",
39
+ "linux_x86_64_path": "custom_ops/linux-x86_64/kokoro_source_stft_custom_op_native.so",
40
+ "linux_x86_64_sha256": "c2f62be3925c21cb21fb41d66f4e0a227785ad4cc4ec2d10a6770a64ebc47519",
41
+ "linux_aarch64_path": "custom_ops/linux-aarch64/kokoro_source_stft_custom_op_native.so",
42
+ "linux_aarch64_status": "pending_jetson_build"
43
+ }
44
+ },
45
+ "supporting_files": [
46
+ {
47
+ "path": "kokoro_litert_manifest.json",
48
+ "sha256": "1dd3a6f0a79e29515acdd325118c6e05771db4f325a6ad6e72719bd3284f7170",
49
+ "bytes": 2523,
50
+ "role": "robot-agent runtime manifest"
51
+ },
52
+ {
53
+ "path": "config.json",
54
+ "sha256": "5abb01e2403b072bf03d04fde160443e209d7a0dad49a423be15196b9b43c17f",
55
+ "bytes": 2351,
56
+ "role": "Kokoro vocab/config used by KPipeline token packing"
57
+ },
58
+ {
59
+ "path": "voices/af_heart.npz",
60
+ "sha256": "1e3e7efeb4d30c354eef539d13f35aebc59e599a65257fb290a1b80755500c29",
61
+ "bytes": 522502,
62
+ "role": "runtime voice style pack"
63
+ }
64
+ ],
65
+ "reports": [
66
+ {
67
+ "path": "reports/kokoro_bucketed_frontend_litert_parity_report.json",
68
+ "sha256": "5aa69b92e832dc4603774234066070ffed00918cde1a708a5d4c07b11a9bda8b",
69
+ "bytes": 190141
70
+ },
71
+ {
72
+ "path": "reports/kokoro_decoder_source_stft_merged_probe.json",
73
+ "sha256": "6c9f76c2bc7be2bfa688390cd870ac85902ce9adbead776d89086b04406b28ee",
74
+ "bytes": 5865
75
+ }
76
+ ],
77
+ "acceptance": {
78
+ "bucketed_frontend_passed": true,
79
+ "max_observed_frontend_float_abs_error": 0.000812530517578125,
80
+ "pred_dur_exact": true,
81
+ "alignment_exact": true,
82
+ "valid_frames_exact": true
83
+ },
84
+ "runtime_contract": {
85
+ "compile_or_export_in_request_path": false,
86
+ "warm_interpreters_at_boot": true,
87
+ "fallback_when_token_count_exceeds_buckets": "deterministic_chunking_then_repack"
88
+ }
89
+ }
NOTICE ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Kokoro 82M LiteRT Decoder/Vocoder Preview
2
+
3
+ This package contains a converted LiteRT/TFLite decoder-vocoder artifact derived
4
+ from the Kokoro 82M model:
5
+
6
+ Source model: https://huggingface.co/hexgrad/Kokoro-82M
7
+ Source file: kokoro-v1_0.pth
8
+ License: Apache License 2.0
9
+
10
+ The conversion was produced from the robot edge-runtime research repository:
11
+
12
+ Repository: https://github.com/will-deines/robot
13
+ Commit: bb72dce
14
+
15
+ The generated artifact contains a custom operator named KokoroSourceStft. The
16
+ included Linux x86-64 shared object is provided for validation convenience only.
17
+ Other platforms should rebuild the custom operator from the included C++ source.
18
+
19
+ This package is not endorsed by the Kokoro authors, Hugging Face, Google,
20
+ LiteRT, TensorFlow, or Pollen Robotics.
README.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: hexgrad/Kokoro-82M
4
+ pipeline_tag: text-to-speech
5
+ tags:
6
+ - kokoro
7
+ - tflite
8
+ - litert
9
+ - ai-edge-litert
10
+ - text-to-speech
11
+ - custom-op
12
+ - edge-ai
13
+ - experimental
14
+ ---
15
+
16
+ # Kokoro 82M LiteRT Runtime Preview
17
+
18
+ This repository packages the current Kokoro 82M LiteRT/TFLite runtime used by
19
+ the Reachy edge robot-agent project.
20
+
21
+ It is sourced from [`hexgrad/Kokoro-82M`](https://huggingface.co/hexgrad/Kokoro-82M)
22
+ and contains the accepted text-to-decoder-input frontend bucket plus the accepted
23
+ merged decoder/vocoder graph.
24
+
25
+ ## Runtime Shape
26
+
27
+ ```text
28
+ text
29
+ -> Kokoro KPipeline G2P/tokenization
30
+ -> frontend/kokoro_full_frontend_masked_b48_f128_f0256.tflite
31
+ -> kokoro_decoder_source_stft_merged.tflite + KokoroSourceStft
32
+ -> WAV bytes
33
+ ```
34
+
35
+ The runtime still uses the `kokoro` Python package for `KPipeline.g2p()` and
36
+ `KPipeline.en_tokenize()`. It must not instantiate Kokoro `KModel` in the
37
+ request path. Neural inference is served by the LiteRT frontend bucket and the
38
+ LiteRT decoder/vocoder.
39
+
40
+ ## Included Artifacts
41
+
42
+ ```text
43
+ kokoro_litert_manifest.json
44
+ config.json
45
+ voices/af_heart.npz
46
+ frontend/kokoro_full_frontend_masked_b48_f128_f0256.tflite
47
+ kokoro_decoder_source_stft_merged.tflite
48
+ custom_ops/kokoro_source_stft_custom_op_native.cc
49
+ custom_ops/linux-x86_64/kokoro_source_stft_custom_op_native.so
50
+ reports/kokoro_bucketed_frontend_litert_parity_report.json
51
+ reports/kokoro_decoder_source_stft_merged_probe.json
52
+ ```
53
+
54
+ The current frontend bucket is `T=48`, with max `128` decoder frames and `256`
55
+ F0/noise frames. Longer or multi-segment text must be deterministically chunked
56
+ and repacked before inference.
57
+
58
+ ## Jetson / ARM64 Status
59
+
60
+ The Linux x86-64 custom op build is included for local development. Jetson needs
61
+ the Linux aarch64 build:
62
+
63
+ ```text
64
+ custom_ops/linux-aarch64/kokoro_source_stft_custom_op_native.so
65
+ ```
66
+
67
+ That file is intentionally not present in this preview package yet. Build it
68
+ from:
69
+
70
+ ```text
71
+ custom_ops/kokoro_source_stft_custom_op_native.cc
72
+ ```
73
+
74
+ with equivalent floating-point flags:
75
+
76
+ ```bash
77
+ g++ -std=c++17 -O2 -fPIC \
78
+ -fno-math-errno \
79
+ -fno-trapping-math \
80
+ -ffp-contract=fast \
81
+ -shared \
82
+ custom_ops/kokoro_source_stft_custom_op_native.cc \
83
+ -o custom_ops/linux-aarch64/kokoro_source_stft_custom_op_native.so
84
+ ```
85
+
86
+ After building the aarch64 shared object, add its SHA-256 to
87
+ `kokoro_litert_manifest.json` under `decoder_vocoder.custom_op`.
88
+
89
+ ## Validation
90
+
91
+ Frontend bucket acceptance is recorded in:
92
+
93
+ ```text
94
+ reports/kokoro_bucketed_frontend_litert_parity_report.json
95
+ ```
96
+
97
+ The local acceptance result for this package:
98
+
99
+ ```text
100
+ passed: true
101
+ bucket: T=48
102
+ max observed frontend float abs error: 0.000812530517578125
103
+ pred_dur exact: true
104
+ alignment exact: true
105
+ valid_frames exact: true
106
+ ```
107
+
108
+ Decoder/vocoder acceptance is recorded in:
109
+
110
+ ```text
111
+ reports/kokoro_decoder_source_stft_merged_probe.json
112
+ ```
113
+
114
+ The merged decoder is a one-interpreter graph connected through the
115
+ `KokoroSourceStft` custom op. The custom op remains a CPU custom-op island unless
116
+ implemented as a GPU-capable custom kernel or delegate.
117
+
118
+ ## Minimal Local Smoke
119
+
120
+ In the Reachy robot-agent repo:
121
+
122
+ ```bash
123
+ PYTHONPATH=src uv run --extra tts --extra kokoro-frontend \
124
+ python scripts/kokoro_litert_runtime_smoke.py \
125
+ --text "Hi Will." \
126
+ --output /tmp/robot-kokoro-litert/runtime_smoke.wav
127
+ ```
128
+
129
+ Expected output is a mono 24 kHz WAV file.
130
+
131
+ ## License
132
+
133
+ The upstream Kokoro model card lists `hexgrad/Kokoro-82M` under Apache-2.0. This
134
+ converted runtime package is distributed under Apache-2.0 as a derived runtime
135
+ form. See `LICENSE` and `NOTICE`.
config.json ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "istftnet": {
3
+ "upsample_kernel_sizes": [20, 12],
4
+ "upsample_rates": [10, 6],
5
+ "gen_istft_hop_size": 5,
6
+ "gen_istft_n_fft": 20,
7
+ "resblock_dilation_sizes": [
8
+ [1, 3, 5],
9
+ [1, 3, 5],
10
+ [1, 3, 5]
11
+ ],
12
+ "resblock_kernel_sizes": [3, 7, 11],
13
+ "upsample_initial_channel": 512
14
+ },
15
+ "dim_in": 64,
16
+ "dropout": 0.2,
17
+ "hidden_dim": 512,
18
+ "max_conv_dim": 512,
19
+ "max_dur": 50,
20
+ "multispeaker": true,
21
+ "n_layer": 3,
22
+ "n_mels": 80,
23
+ "n_token": 178,
24
+ "style_dim": 128,
25
+ "text_encoder_kernel_size": 5,
26
+ "plbert": {
27
+ "hidden_size": 768,
28
+ "num_attention_heads": 12,
29
+ "intermediate_size": 2048,
30
+ "max_position_embeddings": 512,
31
+ "num_hidden_layers": 12,
32
+ "dropout": 0.1
33
+ },
34
+ "vocab": {
35
+ ";": 1,
36
+ ":": 2,
37
+ ",": 3,
38
+ ".": 4,
39
+ "!": 5,
40
+ "?": 6,
41
+ "—": 9,
42
+ "…": 10,
43
+ "\"": 11,
44
+ "(": 12,
45
+ ")": 13,
46
+ "“": 14,
47
+ "”": 15,
48
+ " ": 16,
49
+ "\u0303": 17,
50
+ "ʣ": 18,
51
+ "ʥ": 19,
52
+ "ʦ": 20,
53
+ "ʨ": 21,
54
+ "ᵝ": 22,
55
+ "\uAB67": 23,
56
+ "A": 24,
57
+ "I": 25,
58
+ "O": 31,
59
+ "Q": 33,
60
+ "S": 35,
61
+ "T": 36,
62
+ "W": 39,
63
+ "Y": 41,
64
+ "ᵊ": 42,
65
+ "a": 43,
66
+ "b": 44,
67
+ "c": 45,
68
+ "d": 46,
69
+ "e": 47,
70
+ "f": 48,
71
+ "h": 50,
72
+ "i": 51,
73
+ "j": 52,
74
+ "k": 53,
75
+ "l": 54,
76
+ "m": 55,
77
+ "n": 56,
78
+ "o": 57,
79
+ "p": 58,
80
+ "q": 59,
81
+ "r": 60,
82
+ "s": 61,
83
+ "t": 62,
84
+ "u": 63,
85
+ "v": 64,
86
+ "w": 65,
87
+ "x": 66,
88
+ "y": 67,
89
+ "z": 68,
90
+ "ɑ": 69,
91
+ "ɐ": 70,
92
+ "ɒ": 71,
93
+ "æ": 72,
94
+ "β": 75,
95
+ "ɔ": 76,
96
+ "ɕ": 77,
97
+ "ç": 78,
98
+ "ɖ": 80,
99
+ "ð": 81,
100
+ "ʤ": 82,
101
+ "ə": 83,
102
+ "ɚ": 85,
103
+ "ɛ": 86,
104
+ "ɜ": 87,
105
+ "ɟ": 90,
106
+ "ɡ": 92,
107
+ "ɥ": 99,
108
+ "ɨ": 101,
109
+ "ɪ": 102,
110
+ "ʝ": 103,
111
+ "ɯ": 110,
112
+ "ɰ": 111,
113
+ "ŋ": 112,
114
+ "ɳ": 113,
115
+ "ɲ": 114,
116
+ "ɴ": 115,
117
+ "ø": 116,
118
+ "ɸ": 118,
119
+ "θ": 119,
120
+ "œ": 120,
121
+ "ɹ": 123,
122
+ "ɾ": 125,
123
+ "ɻ": 126,
124
+ "ʁ": 128,
125
+ "ɽ": 129,
126
+ "ʂ": 130,
127
+ "ʃ": 131,
128
+ "ʈ": 132,
129
+ "ʧ": 133,
130
+ "ʊ": 135,
131
+ "ʋ": 136,
132
+ "ʌ": 138,
133
+ "ɣ": 139,
134
+ "ɤ": 140,
135
+ "χ": 142,
136
+ "ʎ": 143,
137
+ "ʒ": 147,
138
+ "ʔ": 148,
139
+ "ˈ": 156,
140
+ "ˌ": 157,
141
+ "ː": 158,
142
+ "ʰ": 162,
143
+ "ʲ": 164,
144
+ "↓": 169,
145
+ "→": 171,
146
+ "↗": 172,
147
+ "↘": 173,
148
+ "ᵻ": 177
149
+ }
150
+ }
custom_ops/kokoro_source_stft_custom_op_native.cc ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #include <algorithm>
3
+ #include <cmath>
4
+ #include <cstdarg>
5
+ #include <cstddef>
6
+ #include <cstdint>
7
+ #include <cstdio>
8
+ #include <functional>
9
+ #include <memory>
10
+ #include <string>
11
+ #include <unordered_map>
12
+ #include <utility>
13
+ #include <vector>
14
+
15
+ extern "C" {
16
+
17
+ typedef enum TfLiteStatus {
18
+ kTfLiteOk = 0,
19
+ kTfLiteError = 1,
20
+ kTfLiteDelegateError = 2,
21
+ kTfLiteApplicationError = 3,
22
+ kTfLiteDelegateDataNotFound = 4,
23
+ kTfLiteDelegateDataWriteError = 5,
24
+ kTfLiteDelegateDataReadError = 6,
25
+ kTfLiteUnresolvedOps = 7,
26
+ kTfLiteCancelled = 8,
27
+ kTfLiteOutputShapeNotKnown = 9,
28
+ } TfLiteStatus;
29
+
30
+ typedef enum {
31
+ kTfLiteNoType = 0,
32
+ kTfLiteFloat32 = 1,
33
+ kTfLiteInt32 = 2,
34
+ } TfLiteType;
35
+
36
+ typedef struct TfLiteQuantizationParams {
37
+ float scale;
38
+ int32_t zero_point;
39
+ } TfLiteQuantizationParams;
40
+
41
+ typedef enum TfLiteQuantizationType {
42
+ kTfLiteNoQuantization = 0,
43
+ kTfLiteAffineQuantization = 1,
44
+ kTfLiteBlockwiseQuantization = 2,
45
+ } TfLiteQuantizationType;
46
+
47
+ typedef struct TfLiteQuantization {
48
+ TfLiteQuantizationType type;
49
+ void* params;
50
+ } TfLiteQuantization;
51
+
52
+ typedef union TfLitePtrUnion {
53
+ int32_t* i32;
54
+ float* f;
55
+ char* raw;
56
+ const char* raw_const;
57
+ void* data;
58
+ } TfLitePtrUnion;
59
+
60
+ typedef enum TfLiteAllocationType {
61
+ kTfLiteMemNone = 0,
62
+ kTfLiteMmapRo,
63
+ kTfLiteArenaRw,
64
+ kTfLiteArenaRwPersistent,
65
+ kTfLiteDynamic,
66
+ kTfLitePersistentRo,
67
+ kTfLiteCustom,
68
+ kTfLiteVariantObject,
69
+ kTfLiteNonCpu,
70
+ } TfLiteAllocationType;
71
+
72
+ typedef int TfLiteBufferHandle;
73
+ enum { kTfLiteNullBufferHandle = -1 };
74
+
75
+ typedef struct TfLiteIntArray {
76
+ int size;
77
+ int data[];
78
+ } TfLiteIntArray;
79
+
80
+ typedef struct TfLiteSparsity TfLiteSparsity;
81
+ typedef struct TfLiteDelegate TfLiteDelegate;
82
+ typedef struct TfLiteExternalContext TfLiteExternalContext;
83
+ typedef struct TfLiteOperator TfLiteOperator;
84
+ typedef struct TfLiteAsyncKernel TfLiteAsyncKernel;
85
+
86
+ typedef struct TfLiteTensor {
87
+ TfLiteType type;
88
+ TfLitePtrUnion data;
89
+ TfLiteIntArray* dims;
90
+ TfLiteQuantizationParams params;
91
+ TfLiteAllocationType allocation_type;
92
+ size_t bytes;
93
+ const void* allocation;
94
+ const char* name;
95
+ TfLiteDelegate* delegate;
96
+ TfLiteBufferHandle buffer_handle;
97
+ bool data_is_stale;
98
+ bool is_variable;
99
+ TfLiteQuantization quantization;
100
+ TfLiteSparsity* sparsity;
101
+ const TfLiteIntArray* dims_signature;
102
+ } TfLiteTensor;
103
+
104
+ typedef struct TfLiteNode {
105
+ TfLiteIntArray* inputs;
106
+ TfLiteIntArray* outputs;
107
+ TfLiteIntArray* intermediates;
108
+ TfLiteIntArray* temporaries;
109
+ void* user_data;
110
+ void* builtin_data;
111
+ const void* custom_initial_data;
112
+ int custom_initial_data_size;
113
+ TfLiteDelegate* delegate;
114
+ bool might_have_side_effect;
115
+ } TfLiteNode;
116
+
117
+ typedef struct TfLiteContext {
118
+ size_t tensors_size;
119
+ TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext*, TfLiteIntArray**);
120
+ TfLiteTensor* tensors;
121
+ void* impl_;
122
+ TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor*, TfLiteIntArray*);
123
+ void (*ReportError)(struct TfLiteContext*, const char*, ...);
124
+ } TfLiteContext;
125
+
126
+ typedef struct TfLiteRegistration {
127
+ void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
128
+ void (*free)(TfLiteContext* context, void* buffer);
129
+ TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
130
+ TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
131
+ const char* (*profiling_string)(const TfLiteContext* context, const TfLiteNode* node);
132
+ int32_t builtin_code;
133
+ const char* custom_name;
134
+ int version;
135
+ TfLiteOperator* registration_external;
136
+ TfLiteAsyncKernel* (*async_kernel)(TfLiteContext* context, TfLiteNode* node);
137
+ uint64_t inplace_operator;
138
+ } TfLiteRegistration;
139
+
140
+ } // extern "C"
141
+
142
+ namespace kokoro_source_stft {
143
+
144
+ constexpr int kSamples = 76800;
145
+ constexpr int kF0Frames = 256;
146
+ constexpr int kDim = 9;
147
+ constexpr int kUpsampleScale = 300;
148
+ constexpr int kNfft = 20;
149
+ constexpr int kHop = 5;
150
+ constexpr int kFreqBins = 11;
151
+ constexpr int kStftFrames = 15361;
152
+ constexpr float kSampleRate = 24000.0f;
153
+ constexpr float kSineAmp = 0.1f;
154
+ constexpr float kNoiseStd = 0.003f;
155
+ constexpr float kVoicedThreshold = 10.0f;
156
+ constexpr float kTwoPi = 6.28318530717958647692f;
157
+ constexpr float kPi = 3.14159265358979323846f;
158
+
159
+ void Report(TfLiteContext* context, const char* message) {
160
+ if (context != nullptr && context->ReportError != nullptr) {
161
+ context->ReportError(context, "%s", message);
162
+ }
163
+ }
164
+
165
+ TfLiteStatus CheckTensorCount(TfLiteContext* context, TfLiteNode* node) {
166
+ if (node == nullptr || node->inputs == nullptr || node->outputs == nullptr) {
167
+ Report(context, "KokoroSourceStft received a null node or tensor list");
168
+ return kTfLiteError;
169
+ }
170
+ if (node->inputs->size != 14 || node->outputs->size != 7) {
171
+ Report(context, "KokoroSourceStft expects 14 inputs and 7 outputs");
172
+ return kTfLiteError;
173
+ }
174
+ return kTfLiteOk;
175
+ }
176
+
177
+ inline const TfLiteTensor& InputTensor(TfLiteContext* context, TfLiteNode* node, int index) {
178
+ return context->tensors[node->inputs->data[index]];
179
+ }
180
+
181
+ inline TfLiteTensor& OutputTensor(TfLiteContext* context, TfLiteNode* node, int index) {
182
+ return context->tensors[node->outputs->data[index]];
183
+ }
184
+
185
+ inline float InterpolatePhase(
186
+ const float* phase_frames,
187
+ const int32_t* left_indices,
188
+ const int32_t* right_indices,
189
+ const float* left_weights,
190
+ const float* right_weights,
191
+ int sample,
192
+ int harmonic) {
193
+ const int left = std::min(std::max(left_indices[sample], 0), kF0Frames - 1);
194
+ const int right = std::min(std::max(right_indices[sample], 0), kF0Frames - 1);
195
+ const float left_weight = left_weights[sample];
196
+ const float right_weight = right_weights[sample];
197
+ const float left_phase = phase_frames[left * kDim + harmonic];
198
+ const float right_phase = phase_frames[right * kDim + harmonic];
199
+ return std::fmaf(left_phase, left_weight, right_phase * right_weight);
200
+ }
201
+
202
+ inline int ReflectIndex(int centered_position, int valid_samples) {
203
+ int source_position = centered_position;
204
+ if (centered_position < 0) {
205
+ source_position = -centered_position;
206
+ } else if (centered_position >= valid_samples) {
207
+ source_position = 2 * valid_samples - centered_position - 2;
208
+ }
209
+ if (source_position < 0) {
210
+ source_position = 0;
211
+ }
212
+ if (source_position >= kSamples) {
213
+ source_position = kSamples - 1;
214
+ }
215
+ return source_position;
216
+ }
217
+
218
+ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
219
+ return CheckTensorCount(context, node);
220
+ }
221
+
222
+ TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) {
223
+ if (CheckTensorCount(context, node) != kTfLiteOk) {
224
+ return kTfLiteError;
225
+ }
226
+ const TfLiteTensor& f0_tensor = InputTensor(context, node, 0);
227
+ const TfLiteTensor& sine_noise_tensor = InputTensor(context, node, 2);
228
+ const TfLiteTensor& valid_f0_frames_tensor = InputTensor(context, node, 3);
229
+ const TfLiteTensor& weight_tensor = InputTensor(context, node, 4);
230
+ const TfLiteTensor& bias_tensor = InputTensor(context, node, 5);
231
+ const TfLiteTensor& forward_real_tensor = InputTensor(context, node, 6);
232
+ const TfLiteTensor& forward_imag_tensor = InputTensor(context, node, 7);
233
+ const TfLiteTensor& branch_mask_tensor = InputTensor(context, node, 8);
234
+ const TfLiteTensor& branch_sign_tensor = InputTensor(context, node, 9);
235
+ const TfLiteTensor& interp_left_index_tensor = InputTensor(context, node, 10);
236
+ const TfLiteTensor& interp_right_index_tensor = InputTensor(context, node, 11);
237
+ const TfLiteTensor& interp_left_weight_tensor = InputTensor(context, node, 12);
238
+ const TfLiteTensor& interp_right_weight_tensor = InputTensor(context, node, 13);
239
+
240
+ TfLiteTensor& harmonic_tensor = OutputTensor(context, node, 0);
241
+ TfLiteTensor& stft_tensor = OutputTensor(context, node, 1);
242
+ TfLiteTensor& valid_samples_tensor = OutputTensor(context, node, 2);
243
+ TfLiteTensor& phase_frames_tensor = OutputTensor(context, node, 3);
244
+ TfLiteTensor& phase_samples_tensor = OutputTensor(context, node, 4);
245
+ TfLiteTensor& sine_samples_tensor = OutputTensor(context, node, 5);
246
+ TfLiteTensor& mixed_sine_tensor = OutputTensor(context, node, 6);
247
+
248
+ const float* f0 = f0_tensor.data.f;
249
+ const float* sine_noise = sine_noise_tensor.data.f;
250
+ const int32_t* valid_f0_frames_ptr = valid_f0_frames_tensor.data.i32;
251
+ const float* weight = weight_tensor.data.f;
252
+ const float* bias = bias_tensor.data.f;
253
+ const float* forward_real = forward_real_tensor.data.f;
254
+ const float* forward_imag = forward_imag_tensor.data.f;
255
+ const float* branch_mask = branch_mask_tensor.data.f;
256
+ const float* branch_sign = branch_sign_tensor.data.f;
257
+ const int32_t* interp_left_index = interp_left_index_tensor.data.i32;
258
+ const int32_t* interp_right_index = interp_right_index_tensor.data.i32;
259
+ const float* interp_left_weight = interp_left_weight_tensor.data.f;
260
+ const float* interp_right_weight = interp_right_weight_tensor.data.f;
261
+ float* harmonic = harmonic_tensor.data.f;
262
+ float* stft_stack = stft_tensor.data.f;
263
+ int32_t* valid_samples_output = valid_samples_tensor.data.i32;
264
+ float* debug_phase_frames = phase_frames_tensor.data.f;
265
+ float* debug_phase_samples = phase_samples_tensor.data.f;
266
+ float* debug_sine_samples = sine_samples_tensor.data.f;
267
+ float* debug_mixed_sine = mixed_sine_tensor.data.f;
268
+
269
+ if (f0 == nullptr || sine_noise == nullptr || valid_f0_frames_ptr == nullptr ||
270
+ weight == nullptr || bias == nullptr || forward_real == nullptr ||
271
+ forward_imag == nullptr || branch_mask == nullptr || branch_sign == nullptr ||
272
+ interp_left_index == nullptr || interp_right_index == nullptr ||
273
+ interp_left_weight == nullptr || interp_right_weight == nullptr ||
274
+ harmonic == nullptr || stft_stack == nullptr || valid_samples_output == nullptr ||
275
+ debug_phase_frames == nullptr || debug_phase_samples == nullptr ||
276
+ debug_sine_samples == nullptr || debug_mixed_sine == nullptr) {
277
+ Report(context, "KokoroSourceStft received a null tensor buffer");
278
+ return kTfLiteError;
279
+ }
280
+
281
+ const int valid_f0_frames = std::max(
282
+ 0, std::min(kF0Frames, static_cast<int>(valid_f0_frames_ptr[0])));
283
+ const int valid_samples = valid_f0_frames * kUpsampleScale;
284
+ valid_samples_output[0] = valid_samples;
285
+
286
+ float phase_frames[kF0Frames * kDim];
287
+ for (int harmonic_index = 0; harmonic_index < kDim; ++harmonic_index) {
288
+ double cumulative = 0.0;
289
+ const float multiplier = static_cast<float>(harmonic_index + 1);
290
+ for (int frame = 0; frame < kF0Frames; ++frame) {
291
+ const float f0_value = frame < valid_f0_frames ? f0[frame] : 0.0f;
292
+ float rad = f0_value * multiplier / kSampleRate;
293
+ rad = rad - std::floor(rad);
294
+ cumulative += static_cast<double>(rad);
295
+ const float cumulative_float = static_cast<float>(cumulative);
296
+ const float phase = cumulative_float * kTwoPi;
297
+ phase_frames[frame * kDim + harmonic_index] = phase * kUpsampleScale;
298
+ debug_phase_frames[frame * kDim + harmonic_index] =
299
+ phase_frames[frame * kDim + harmonic_index];
300
+ }
301
+ }
302
+
303
+ for (int sample = 0; sample < kSamples; ++sample) {
304
+ if (sample >= valid_samples) {
305
+ harmonic[sample] = 0.0f;
306
+ for (int harmonic_index = 0; harmonic_index < kDim; ++harmonic_index) {
307
+ const int debug_index = sample * kDim + harmonic_index;
308
+ debug_phase_samples[debug_index] = 0.0f;
309
+ debug_sine_samples[debug_index] = 0.0f;
310
+ debug_mixed_sine[debug_index] = 0.0f;
311
+ }
312
+ continue;
313
+ }
314
+ const int frame = sample / kUpsampleScale;
315
+ const float f0_value = f0[frame];
316
+ const float uv = f0_value > kVoicedThreshold ? 1.0f : 0.0f;
317
+ const float noise_amp = uv * kNoiseStd + (1.0f - uv) * kSineAmp / 3.0f;
318
+ float linear = bias[0];
319
+ for (int harmonic_index = 0; harmonic_index < kDim; ++harmonic_index) {
320
+ const int debug_index = sample * kDim + harmonic_index;
321
+ const float phase = InterpolatePhase(
322
+ phase_frames,
323
+ interp_left_index,
324
+ interp_right_index,
325
+ interp_left_weight,
326
+ interp_right_weight,
327
+ sample,
328
+ harmonic_index);
329
+ const float sine_sample = std::sin(phase);
330
+ const float sine = sine_sample * kSineAmp;
331
+ const float sine_wave =
332
+ sine * uv + noise_amp * sine_noise[sample * kDim + harmonic_index];
333
+ debug_phase_samples[debug_index] = phase;
334
+ debug_sine_samples[debug_index] = sine_sample;
335
+ debug_mixed_sine[debug_index] = sine_wave;
336
+ linear += sine_wave * weight[harmonic_index];
337
+ }
338
+ harmonic[sample] = std::tanh(linear);
339
+ }
340
+
341
+ constexpr int plane_size = kFreqBins * kStftFrames;
342
+ for (int frame = 0; frame < kStftFrames; ++frame) {
343
+ const int window_start = frame * kHop - kNfft / 2;
344
+ float window[kNfft];
345
+ for (int offset = 0; offset < kNfft; ++offset) {
346
+ const int source_index = ReflectIndex(window_start + offset, valid_samples);
347
+ window[offset] = harmonic[source_index];
348
+ }
349
+ for (int freq = 0; freq < kFreqBins; ++freq) {
350
+ double real_accumulator = 0.0;
351
+ double imag_accumulator = 0.0;
352
+ for (int offset = 0; offset < kNfft; ++offset) {
353
+ const int filter_index = freq * kNfft + offset;
354
+ real_accumulator +=
355
+ static_cast<double>(window[offset]) * static_cast<double>(forward_real[filter_index]);
356
+ imag_accumulator +=
357
+ static_cast<double>(window[offset]) * static_cast<double>(forward_imag[filter_index]);
358
+ }
359
+ const float real = static_cast<float>(real_accumulator);
360
+ const float imag = static_cast<float>(imag_accumulator);
361
+ const float magnitude = std::sqrt(real * real + imag * imag);
362
+ float phase = std::atan2(imag, real);
363
+ const float threshold = branch_mask[freq] * 1.0e-6f;
364
+ if (branch_mask[freq] > 0.0f && std::fabs(imag) <= threshold && real < 0.0f) {
365
+ phase = branch_sign[freq] * kPi;
366
+ }
367
+ const int output_index = freq * kStftFrames + frame;
368
+ stft_stack[output_index] = magnitude;
369
+ stft_stack[plane_size + output_index] = phase;
370
+ stft_stack[2 * plane_size + output_index] = real;
371
+ stft_stack[3 * plane_size + output_index] = imag;
372
+ }
373
+ }
374
+
375
+ return kTfLiteOk;
376
+ }
377
+
378
+ TfLiteRegistration* Registration() {
379
+ static TfLiteRegistration registration = {};
380
+ registration.prepare = Prepare;
381
+ registration.invoke = Invoke;
382
+ registration.builtin_code = 32;
383
+ registration.custom_name = "KokoroSourceStft";
384
+ registration.version = 1;
385
+ return &registration;
386
+ }
387
+
388
+ } // namespace kokoro_source_stft
389
+
390
+ namespace tflite {
391
+
392
+ enum BuiltinOperator : int32_t {
393
+ BuiltinOperator_CUSTOM = 32,
394
+ };
395
+
396
+ inline size_t CombineHashes(std::initializer_list<size_t> hashes) {
397
+ size_t result = 0;
398
+ for (size_t hash : hashes) {
399
+ result = result ^
400
+ (hash + 0x9e3779b97f4a7800ULL + (result << 10) + (result >> 4));
401
+ }
402
+ return result;
403
+ }
404
+
405
+ namespace op_resolver_hasher {
406
+ template <typename V>
407
+ struct ValueHasher {
408
+ size_t operator()(const V& v) const { return std::hash<V>()(v); }
409
+ };
410
+
411
+ template <>
412
+ struct ValueHasher<tflite::BuiltinOperator> {
413
+ size_t operator()(const tflite::BuiltinOperator& v) const {
414
+ return std::hash<int>()(static_cast<int>(v));
415
+ }
416
+ };
417
+
418
+ template <typename T>
419
+ struct OperatorKeyHasher {
420
+ size_t operator()(const T& x) const {
421
+ size_t a = ValueHasher<typename T::first_type>()(x.first);
422
+ size_t b = ValueHasher<typename T::second_type>()(x.second);
423
+ return CombineHashes({a, b});
424
+ }
425
+ };
426
+ } // namespace op_resolver_hasher
427
+
428
+ class OpResolverHack {
429
+ public:
430
+ using TfLiteDelegatePtrVector =
431
+ std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
432
+ using TfLiteDelegateCreator =
433
+ std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
434
+ TfLiteContext*)>;
435
+ using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
436
+ using TfLiteOpaqueDelegatePtr =
437
+ std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
438
+ using TfLiteOpaqueDelegateCreator = std::function<TfLiteOpaqueDelegatePtr(int)>;
439
+ using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
440
+
441
+ virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op, int version) const = 0;
442
+ virtual const TfLiteRegistration* FindOp(const char* op, int version) const = 0;
443
+ virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const { return {}; }
444
+ virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
445
+ virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const { return {}; }
446
+ virtual ~OpResolverHack() = default;
447
+
448
+ private:
449
+ struct OperatorsCache;
450
+ mutable std::shared_ptr<OperatorsCache> registration_externals_cache_;
451
+ };
452
+
453
+ class MutableOpResolverHack : public OpResolverHack {
454
+ public:
455
+ void AddKokoroSourceStft(const TfLiteRegistration* registration) {
456
+ may_directly_contain_user_defined_ops_ = true;
457
+ TfLiteRegistration copy = *registration;
458
+ copy.builtin_code = BuiltinOperator_CUSTOM;
459
+ copy.custom_name = "KokoroSourceStft";
460
+ copy.version = 1;
461
+ custom_ops_[CustomOperatorKey("KokoroSourceStft", 1)] = copy;
462
+ }
463
+
464
+ protected:
465
+ bool may_directly_contain_user_defined_ops_ = false;
466
+ TfLiteDelegateCreators delegate_creators_;
467
+ TfLiteOpaqueDelegateCreators opaque_delegate_creators_;
468
+
469
+ private:
470
+ using BuiltinOperatorKey = std::pair<tflite::BuiltinOperator, int>;
471
+ using CustomOperatorKey = std::pair<std::string, int>;
472
+
473
+ std::unordered_map<BuiltinOperatorKey, TfLiteRegistration,
474
+ op_resolver_hasher::OperatorKeyHasher<BuiltinOperatorKey>>
475
+ builtins_;
476
+ std::unordered_map<CustomOperatorKey, TfLiteRegistration,
477
+ op_resolver_hasher::OperatorKeyHasher<CustomOperatorKey>>
478
+ custom_ops_;
479
+ std::vector<const OpResolverHack*> other_op_resolvers_;
480
+ };
481
+
482
+ } // namespace tflite
483
+
484
+ extern "C" __attribute__((visibility("default"))) void RegisterKokoroSourceStft(
485
+ uintptr_t resolver_ptr) {
486
+ auto* resolver = reinterpret_cast<tflite::MutableOpResolverHack*>(resolver_ptr);
487
+ resolver->AddKokoroSourceStft(kokoro_source_stft::Registration());
488
+ }
custom_ops/linux-x86_64/kokoro_source_stft_custom_op_native.so ADDED
Binary file (26.4 kB). View file
 
examples/run_merged_decoder.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run the merged Kokoro decoder/vocoder LiteRT artifact with custom op.
2
+
3
+ The input NPZ must contain these arrays:
4
+
5
+ asr, f0_curve, noise, style, valid_frames, initial_phase, sine_noise
6
+
7
+ This example is intentionally decoder-only. It does not perform Kokoro text
8
+ normalization, phonemization, duration prediction, or frontend inference.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import ctypes
15
+ from pathlib import Path
16
+
17
+ import numpy as np
18
+ from ai_edge_litert import interpreter as litert_interpreter
19
+
20
+ INPUT_NAMES = (
21
+ "asr",
22
+ "f0_curve",
23
+ "noise",
24
+ "style",
25
+ "valid_frames",
26
+ "initial_phase",
27
+ "sine_noise",
28
+ )
29
+
30
+
31
+ def register_kokoro_source_stft(shared_object: Path):
32
+ library = ctypes.CDLL(str(shared_object), mode=ctypes.RTLD_GLOBAL)
33
+ register_native = library.RegisterKokoroSourceStft
34
+ register_native.argtypes = [ctypes.c_uint64]
35
+ register_native.restype = None
36
+
37
+ def registerer(resolver_pointer: int) -> None:
38
+ register_native(int(resolver_pointer))
39
+
40
+ return registerer
41
+
42
+
43
+ def parse_args() -> argparse.Namespace:
44
+ artifact_root = Path(__file__).resolve().parents[1]
45
+ parser = argparse.ArgumentParser()
46
+ parser.add_argument(
47
+ "--model",
48
+ type=Path,
49
+ default=artifact_root / "kokoro_decoder_source_stft_merged.tflite",
50
+ )
51
+ parser.add_argument(
52
+ "--custom-op",
53
+ type=Path,
54
+ default=artifact_root
55
+ / "custom_ops"
56
+ / "linux-x86_64"
57
+ / "kokoro_source_stft_custom_op_native.so",
58
+ )
59
+ parser.add_argument("--inputs", type=Path, required=True)
60
+ parser.add_argument("--output", type=Path, default=Path("waveform.npy"))
61
+ return parser.parse_args()
62
+
63
+
64
+ def main() -> int:
65
+ args = parse_args()
66
+ inputs = np.load(args.inputs)
67
+ missing = [name for name in INPUT_NAMES if name not in inputs]
68
+ if missing:
69
+ raise KeyError(f"input NPZ is missing required arrays: {missing}")
70
+
71
+ interpreter = litert_interpreter.InterpreterWithCustomOps(
72
+ model_path=str(args.model),
73
+ custom_op_registerers=[register_kokoro_source_stft(args.custom_op)],
74
+ )
75
+ interpreter.allocate_tensors()
76
+
77
+ for detail, name in zip(interpreter.get_input_details(), INPUT_NAMES, strict=True):
78
+ interpreter.set_tensor(detail["index"], inputs[name])
79
+
80
+ interpreter.invoke()
81
+ outputs = interpreter.get_output_details()
82
+ waveform = interpreter.get_tensor(outputs[0]["index"])
83
+ valid_samples = int(interpreter.get_tensor(outputs[1]["index"])[0])
84
+ args.output.parent.mkdir(parents=True, exist_ok=True)
85
+ np.save(args.output, waveform[..., :valid_samples])
86
+ print(f"wrote {args.output} with {valid_samples} valid samples at 24000 Hz")
87
+ return 0
88
+
89
+
90
+ if __name__ == "__main__":
91
+ raise SystemExit(main())
frontend/kokoro_full_frontend_masked_b48_f128_f0256.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d075924f0f0be81c382f4a68b2799ac3a2142e650ac0ceb730aea7f6f4f5f4da
3
+ size 128007356
kokoro_decoder_source_stft_merged.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7111687d4513189c959adee16f4436e9c48f1c6285a02db8de126011d09cb8d0
3
+ size 216280440
kokoro_litert_manifest.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "schema_version": 1,
3
+ "name": "kokoro-82m-litert-runtime-preview",
4
+ "source_model": {
5
+ "repo_id": "hexgrad/Kokoro-82M",
6
+ "voice": "af_heart",
7
+ "sample_rate_hz": 24000
8
+ },
9
+ "text_frontend": {
10
+ "package": "kokoro",
11
+ "component": "KPipeline",
12
+ "methods": [
13
+ "g2p",
14
+ "en_tokenize"
15
+ ],
16
+ "loads_pytorch_model_weights": false,
17
+ "forbidden_in_request_path": [
18
+ "KModel"
19
+ ]
20
+ },
21
+ "frontend": {
22
+ "kind": "bucketed_full_frontend",
23
+ "buckets": [
24
+ {
25
+ "tokens": 48,
26
+ "max_frames": 128,
27
+ "max_f0_frames": 256,
28
+ "path": "frontend/kokoro_full_frontend_masked_b48_f128_f0256.tflite",
29
+ "bytes": 128007356,
30
+ "sha256": "d075924f0f0be81c382f4a68b2799ac3a2142e650ac0ceb730aea7f6f4f5f4da"
31
+ }
32
+ ],
33
+ "packing": {
34
+ "pad_input_id": 0,
35
+ "text_mask_true_means_padding": true,
36
+ "select_bucket": "smallest_bucket_gte_token_count"
37
+ },
38
+ "outputs": {
39
+ "decoder_inputs": [
40
+ "f0_curve",
41
+ "noise",
42
+ "text_encoded",
43
+ "asr",
44
+ "valid_frames",
45
+ "valid_f0_frames"
46
+ ],
47
+ "debug_outputs": [
48
+ "bert_hidden",
49
+ "bert_encoder",
50
+ "duration_encoded",
51
+ "predictor_lstm",
52
+ "duration_logits",
53
+ "duration",
54
+ "pred_dur",
55
+ "alignment",
56
+ "prosody_en"
57
+ ]
58
+ }
59
+ },
60
+ "decoder_vocoder": {
61
+ "kind": "merged_decoder_source_stft",
62
+ "path": "kokoro_decoder_source_stft_merged.tflite",
63
+ "bytes": 216280440,
64
+ "sha256": "7111687d4513189c959adee16f4436e9c48f1c6285a02db8de126011d09cb8d0",
65
+ "custom_op": {
66
+ "name": "KokoroSourceStft",
67
+ "local_linux_x86_64_path": "custom_ops/linux-x86_64/kokoro_source_stft_custom_op_native.so",
68
+ "local_linux_x86_64_sha256": "c2f62be3925c21cb21fb41d66f4e0a227785ad4cc4ec2d10a6770a64ebc47519",
69
+ "linux_aarch64_path": "custom_ops/linux-aarch64/kokoro_source_stft_custom_op_native.so",
70
+ "linux_aarch64_status": "pending_jetson_build"
71
+ }
72
+ },
73
+ "acceptance": {
74
+ "frontend_bucketed_report": "reports/kokoro_bucketed_frontend_litert_parity_report.json",
75
+ "bucketed_frontend_passed": true,
76
+ "max_observed_frontend_float_abs_error": 0.000812530517578125,
77
+ "pred_dur_exact": true,
78
+ "alignment_exact": true,
79
+ "valid_frames_exact": true
80
+ },
81
+ "runtime_contract": {
82
+ "compile_or_export_in_request_path": false,
83
+ "warm_interpreters_at_boot": true,
84
+ "fallback_when_token_count_exceeds_buckets": "deterministic_chunking_then_repack"
85
+ }
86
+ }
reports/kokoro_bucketed_frontend_litert_parity_report.json ADDED
The diff for this file is too large to render. See raw diff
 
reports/kokoro_decoder_source_stft_merged_probe.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "acceptance_criteria": {
3
+ "all_outputs_finite": true,
4
+ "merged_vs_split_waveform_max_abs_error": 1e-06,
5
+ "tail_after_valid_max_abs": 1e-07,
6
+ "valid_samples_exact": true
7
+ },
8
+ "artifact": "/tmp/robot-kokoro-litert/kokoro_decoder_source_stft_merged.tflite",
9
+ "artifact_bytes": 216280440,
10
+ "candidate_acceptance": {
11
+ "accepted": true,
12
+ "failures": []
13
+ },
14
+ "fixture_results": {
15
+ "counting": {
16
+ "merged_output_finite": true,
17
+ "merged_tail_after_valid": 0.0,
18
+ "merged_vs_split_composed_waveform": {
19
+ "all_outputs_finite": true,
20
+ "candidate_rms": 0.04797517691684405,
21
+ "candidate_shape": [
22
+ 1,
23
+ 1,
24
+ 64200
25
+ ],
26
+ "max_abs_error": 0.0,
27
+ "mean_abs_error": 0.0,
28
+ "reference_rms": 0.04797517691684405,
29
+ "reference_shape": [
30
+ 1,
31
+ 1,
32
+ 64200
33
+ ],
34
+ "rms_error": 0.0,
35
+ "snr_db": Infinity
36
+ },
37
+ "merged_vs_split_valid_samples": {
38
+ "candidate_shape": [
39
+ 1
40
+ ],
41
+ "max_abs_error": 0.0,
42
+ "mean_abs_error": 0.0,
43
+ "reference_shape": [
44
+ 1
45
+ ]
46
+ },
47
+ "split_composition_metrics_against_pytorch": {
48
+ "all_outputs_finite": true,
49
+ "candidate_rms": 0.04797517691684405,
50
+ "candidate_shape": [
51
+ 1,
52
+ 1,
53
+ 64200
54
+ ],
55
+ "max_abs_error": 5.751848220825195e-06,
56
+ "mean_abs_error": 3.035724242542549e-07,
57
+ "reference_rms": 0.04797517190639091,
58
+ "reference_shape": [
59
+ 1,
60
+ 1,
61
+ 64200
62
+ ],
63
+ "rms_error": 5.925892404353504e-07,
64
+ "snr_db": 98.16525555604315
65
+ },
66
+ "valid_samples": 64200
67
+ },
68
+ "hi_will": {
69
+ "merged_output_finite": true,
70
+ "merged_tail_after_valid": 0.0,
71
+ "merged_vs_split_composed_waveform": {
72
+ "all_outputs_finite": true,
73
+ "candidate_rms": 0.04477709541035824,
74
+ "candidate_shape": [
75
+ 1,
76
+ 1,
77
+ 36000
78
+ ],
79
+ "max_abs_error": 0.0,
80
+ "mean_abs_error": 0.0,
81
+ "reference_rms": 0.04477709541035824,
82
+ "reference_shape": [
83
+ 1,
84
+ 1,
85
+ 36000
86
+ ],
87
+ "rms_error": 0.0,
88
+ "snr_db": Infinity
89
+ },
90
+ "merged_vs_split_valid_samples": {
91
+ "candidate_shape": [
92
+ 1
93
+ ],
94
+ "max_abs_error": 0.0,
95
+ "mean_abs_error": 0.0,
96
+ "reference_shape": [
97
+ 1
98
+ ]
99
+ },
100
+ "split_composition_metrics_against_pytorch": {
101
+ "all_outputs_finite": true,
102
+ "candidate_rms": 0.04477709541035824,
103
+ "candidate_shape": [
104
+ 1,
105
+ 1,
106
+ 36000
107
+ ],
108
+ "max_abs_error": 6.459653377532959e-06,
109
+ "mean_abs_error": 2.8408125768818335e-07,
110
+ "reference_rms": 0.04477708741488783,
111
+ "reference_shape": [
112
+ 1,
113
+ 1,
114
+ 36000
115
+ ],
116
+ "rms_error": 6.784404965979393e-07,
117
+ "snr_db": 96.39088155060315
118
+ },
119
+ "valid_samples": 36000
120
+ }
121
+ },
122
+ "gpu_delegate_note": "This proves one-buffer graph packaging. GPU persistence still depends on delegate partitioning; KokoroSourceStft remains a CPU custom op unless implemented as a GPU-capable delegate/kernel.",
123
+ "merge_summary": {
124
+ "buffer_count": 2571,
125
+ "connected_tensors": {
126
+ "front_f0_curve_to_source_f0_curve": [
127
+ "front/serving_default_args_1",
128
+ "front/serving_default_args_1"
129
+ ],
130
+ "front_valid_to_source_valid_f0_frames": [
131
+ "front/serving_default_output_4_output",
132
+ "front/serving_default_output_4_output"
133
+ ],
134
+ "source_stack_to_generator_stack": [
135
+ "source/harmonic_stft_stack",
136
+ "source/harmonic_stft_stack"
137
+ ]
138
+ },
139
+ "input_names": [
140
+ "front/serving_default_args_0",
141
+ "front/serving_default_args_1",
142
+ "front/serving_default_args_2",
143
+ "front/serving_default_args_3",
144
+ "front/serving_default_args_4",
145
+ "source/initial_phase",
146
+ "source/sine_noise"
147
+ ],
148
+ "opcode_count": 48,
149
+ "operator_count": 2087,
150
+ "output_names": [
151
+ "generator/serving_default_output_0_output",
152
+ "source/valid_samples",
153
+ "generator/serving_default_output_2_output",
154
+ "generator/serving_default_output_3_output",
155
+ "generator/serving_default_output_4_output",
156
+ "generator/serving_default_output_5_output",
157
+ "generator/serving_default_output_6_output",
158
+ "generator/serving_default_output_7_output",
159
+ "generator/serving_default_output_8_output",
160
+ "generator/serving_default_output_9_output",
161
+ "generator/serving_default_output_10_output",
162
+ "generator/serving_default_output_11_output",
163
+ "generator/serving_default_output_12_output",
164
+ "generator/serving_default_output_13_output",
165
+ "generator/serving_default_output_14_output",
166
+ "generator/serving_default_output_15_output",
167
+ "generator/serving_default_output_16_output",
168
+ "generator/serving_default_output_17_output",
169
+ "generator/serving_default_output_18_output",
170
+ "generator/serving_default_output_19_output"
171
+ ],
172
+ "tensor_count": 2569
173
+ },
174
+ "runtime_contract": "one TFLite FlatBuffer and one interpreter; front and generator builtins are connected through the KokoroSourceStft custom op",
175
+ "source_stft_custom_op_shared_object": "/tmp/robot-kokoro-litert/kokoro_source_stft_custom_op_native.so",
176
+ "split_artifacts": {
177
+ "decoder_front": "/tmp/robot-kokoro-litert/kokoro_decoder_front_masked_f128.tflite",
178
+ "generator_source_stft": "/tmp/robot-kokoro-litert/kokoro_generator_source_stft_f256.tflite",
179
+ "source_stft_custom_op_native": "/tmp/robot-kokoro-litert/kokoro_source_stft_custom_op_native.tflite",
180
+ "source_stft_custom_op_shared_object": "/tmp/robot-kokoro-litert/kokoro_source_stft_custom_op_native.so"
181
+ }
182
+ }
upload.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ repo_id="${1:-wdga/kokoro-82m-litert-runtime-preview}"
5
+ root_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6
+
7
+ uv run --with huggingface_hub hf upload \
8
+ "${repo_id}" \
9
+ "${root_dir}" \
10
+ . \
11
+ --repo-type model \
12
+ --commit-message "Upload Kokoro LiteRT runtime preview"
voices/af_heart.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e3e7efeb4d30c354eef539d13f35aebc59e599a65257fb290a1b80755500c29
3
+ size 522502