Duplicate from ResembleAI/Dramabox
Browse filesCo-authored-by: Manmay Nakhashi <Manmay@users.noreply.huggingface.co>
- .gitattributes +44 -0
- LICENSE +381 -0
- README.md +193 -0
- assets/Dramabox.png +3 -0
- assets/silence_latent_frame.pt +3 -0
- config.json +42 -0
- dramabox-audio-components.safetensors +3 -0
- dramabox-dit-v1.safetensors +3 -0
- samples/01_queen_sighs_rage.wav +3 -0
- samples/04_catgirl_giggles_snort.wav +3 -0
- samples/06_arnie_panting_triumph.wav +3 -0
- samples/09_villain_sinister_laugh.wav +3 -0
- samples/13_conan_wheezing_laughter.wav +3 -0
- samples/refs/01_queen_sighs_rage.wav +3 -0
- samples/refs/04_catgirl_giggles_snort.wav +3 -0
- samples/refs/09_villain_sinister_laugh.wav +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
samples/01_queen_sighs_rage.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
samples/refs/01_queen_sighs_rage.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
samples/04_catgirl_giggles_snort.wav filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
samples/refs/04_catgirl_giggles_snort.wav filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
samples/06_arnie_panting_triumph.wav filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
samples/09_villain_sinister_laugh.wav filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
samples/refs/09_villain_sinister_laugh.wav filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
samples/13_conan_wheezing_laughter.wav filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
assets/Dramabox.png filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LTX-2 Community License Agreement
|
| 2 |
+
License date: January 5, 2026
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
By using or distributing any portion or element of LTX-2, you agree
|
| 6 |
+
to be bound by this Agreement.
|
| 7 |
+
|
| 8 |
+
1. Definitions.
|
| 9 |
+
|
| 10 |
+
"Agreement" means the terms and conditions for the license, use,
|
| 11 |
+
reproduction, and distribution of LTX-2 and the Complementary
|
| 12 |
+
Materials, as specified in this document.
|
| 13 |
+
|
| 14 |
+
"Control" means the direct or indirect ownership of more than
|
| 15 |
+
fifty percent (50%) of the voting securities or other ownership
|
| 16 |
+
interests, or the power to direct the management and policies of
|
| 17 |
+
such Entity through voting rights, contract, or otherwise.
|
| 18 |
+
|
| 19 |
+
"Data" means a collection of information and/or content extracted
|
| 20 |
+
from the dataset used with LTX-2, including to train, pretrain,
|
| 21 |
+
or otherwise evaluate LTX-2. The Data is not licensed under this
|
| 22 |
+
Agreement.
|
| 23 |
+
|
| 24 |
+
"Derivatives of LTX-2" means all modifications to LTX-2, works
|
| 25 |
+
based on LTX-2, or any other model which is created or initialized
|
| 26 |
+
by transfer of patterns of the weights, parameters, activations or
|
| 27 |
+
output of LTX-2, to the other model, in order to cause the other
|
| 28 |
+
model to perform similarly to LTX-2, including – but not limited
|
| 29 |
+
to - distillation methods entailing the use of intermediate data
|
| 30 |
+
representations or methods based on the generation of synthetic
|
| 31 |
+
data by LTX-2 for training the other model. For clarity, Derivatives
|
| 32 |
+
of LTX-2 include: (i) any fine-tuned or adapted weights, parameters,
|
| 33 |
+
or checkpoints derived from LTX-2; (ii) derivative model architectures
|
| 34 |
+
that incorporate or are based upon LTX-2's architecture; and
|
| 35 |
+
(iii) any modified or extended versions of the Complementary
|
| 36 |
+
Materials. All intellectual property rights in Derivatives of LTX-2
|
| 37 |
+
shall be subject to the terms of this Agreement, and you may not
|
| 38 |
+
claim exclusive ownership rights in any Derivatives of LTX-2 that
|
| 39 |
+
would restrict the rights granted herein.
|
| 40 |
+
|
| 41 |
+
"Entity" means any individual, corporation, partnership, limited
|
| 42 |
+
liability company, or other legal entity. For purposes of this
|
| 43 |
+
Agreement, an Entity shall be deemed to include, on an aggregative
|
| 44 |
+
basis, all subsidiaries, affiliates, and other companies under
|
| 45 |
+
common Control with such Entity. When determining whether an Entity
|
| 46 |
+
meets any threshold under this Agreement (including revenue
|
| 47 |
+
thresholds), all subsidiaries, affiliates, and companies under
|
| 48 |
+
common Control shall be considered collectively.
|
| 49 |
+
|
| 50 |
+
"Harm" includes but is not limited to physical, mental,
|
| 51 |
+
psychological, financial and reputational damage, pain, or loss.
|
| 52 |
+
|
| 53 |
+
"Licensor" or "Lightricks" means the owner that is granting the
|
| 54 |
+
license under this Agreement. For the purposes of this Agreement,
|
| 55 |
+
the Licensor is Lightricks Ltd.
|
| 56 |
+
|
| 57 |
+
"LTX-2" means the large language models, text/image/video/audio/3D
|
| 58 |
+
generation models, and multimodal large language models and their
|
| 59 |
+
software and algorithms, including trained model weights, parameters
|
| 60 |
+
(including optimizer states), machine-learning model code,
|
| 61 |
+
inference-enabling code, training-enabling code, fine-tuning
|
| 62 |
+
enabling code, accompanying source code, scripts, documentation,
|
| 63 |
+
tutorials, examples, and all other elements of the foregoing
|
| 64 |
+
distributed and made publicly available by Lightricks (including,
|
| 65 |
+
for example, at https://github.com/Lightricks/LTX-2) for the LTX-2
|
| 66 |
+
model released on January 5, 2026. This license is applicable to
|
| 67 |
+
all LTX-2 versions released since January 5, 2026, and all future
|
| 68 |
+
releases of LTX-2 under this license.
|
| 69 |
+
|
| 70 |
+
"Output" means the results of operating LTX-2 as embodied in
|
| 71 |
+
informational content resulting therefrom.
|
| 72 |
+
|
| 73 |
+
"you" (or "your") means an individual or legal Entity licensing
|
| 74 |
+
LTX-2 in accordance with this Agreement and/or making use of LTX-2
|
| 75 |
+
for whichever purpose and in any field of use, including usage of
|
| 76 |
+
LTX-2 in an end-use application - e.g. chatbot, translator, image
|
| 77 |
+
generator.
|
| 78 |
+
|
| 79 |
+
2. Grant of License. Subject to the terms and conditions of this
|
| 80 |
+
Agreement, you are granted a non-exclusive, worldwide,
|
| 81 |
+
non-transferable and royalty-free limited license under Licensor's
|
| 82 |
+
intellectual property or other rights owned by Licensor embodied
|
| 83 |
+
in LTX-2 to use, reproduce, prepare, distribute, publicly display,
|
| 84 |
+
publicly perform, sublicense, copy, create derivative works of,
|
| 85 |
+
and make modifications to LTX-2, for any purpose, subject to the
|
| 86 |
+
restrictions set forth in Attachment A; provided however, that
|
| 87 |
+
Entities with annual revenues of at least $10,000,000 (the
|
| 88 |
+
"Commercial Entities") are required to obtain a paid commercial
|
| 89 |
+
use license in order to use LTX-2 and Derivatives of LTX-2,
|
| 90 |
+
subject to the terms and provisions of a different license (the
|
| 91 |
+
"Commercial Use Agreement"), as will be provided by the Licensor.
|
| 92 |
+
Commercial Entities interested in such a commercial license are
|
| 93 |
+
required to [contact Licensor](https://ltx.io/model/licensing).
|
| 94 |
+
Any commercial use of LTX-2 or Derivatives of LTX-2 by the
|
| 95 |
+
Commercial Entities not in accordance with this Agreement and/or
|
| 96 |
+
the Commercial Use Agreement is strictly prohibited and shall be
|
| 97 |
+
deemed a material breach of this Agreement. Such material breach
|
| 98 |
+
will be subject, in addition to any license fees owed to Licensor
|
| 99 |
+
for the period such Commercial Entity used LTX-2 (as will be
|
| 100 |
+
determined by Licensor), to liquidated damages, which will be paid
|
| 101 |
+
to Licensor immediately upon demand, in an amount equal to double
|
| 102 |
+
the amount that would otherwise have been paid by you for the
|
| 103 |
+
relevant period of time. Such amount reflects a reasonable estimation
|
| 104 |
+
of the losses and administrative costs incurred due to such breach.
|
| 105 |
+
You agree and understand that this remedy does not limit the Licensor's
|
| 106 |
+
right to pursue other remedies available at law or equity.
|
| 107 |
+
|
| 108 |
+
3. Distribution and Redistribution. You may host for third parties
|
| 109 |
+
remote access purposes (e.g. software-as-a-service), reproduce
|
| 110 |
+
and distribute copies of LTX-2 or Derivatives of LTX-2 thereof in
|
| 111 |
+
any medium, with or without modifications, provided that you meet
|
| 112 |
+
the following conditions:
|
| 113 |
+
|
| 114 |
+
(a) Use-based restrictions as referenced in paragraph 4 and all
|
| 115 |
+
provisions of Attachment A MUST be included as an enforceable
|
| 116 |
+
provision by you in any type of legal agreement (e.g. a
|
| 117 |
+
license) governing the use and/or distribution of LTX-2 or
|
| 118 |
+
Derivatives of LTX-2, and you shall give notice to subsequent
|
| 119 |
+
users you distribute to, that LTX-2 or Derivatives of LTX-2
|
| 120 |
+
are subject to paragraph 4 and Attachment A in their entirety,
|
| 121 |
+
including all use restrictions and acceptable use policies;
|
| 122 |
+
|
| 123 |
+
(b) You must provide any third party recipients of LTX-2 or
|
| 124 |
+
Derivatives of LTX-2 a copy of this Agreement, including all
|
| 125 |
+
attachments and use policies. Any Derivative of LTX-2 (as
|
| 126 |
+
defined in Section 1, including but not limited to fine-tuned
|
| 127 |
+
weights, modified training code, models trained on Outputs, or
|
| 128 |
+
any other derivative) must be distributed exclusively under
|
| 129 |
+
the terms of this Agreement with a complete copy of this
|
| 130 |
+
license included;
|
| 131 |
+
|
| 132 |
+
(c) You must cause any modified files to carry prominent notices
|
| 133 |
+
stating that you changed the files;
|
| 134 |
+
|
| 135 |
+
(d) You must retain all copyright, patent, trademark, and
|
| 136 |
+
attribution notices excluding those notices that do not
|
| 137 |
+
pertain to any part of LTX-2, Derivatives of LTX-2.
|
| 138 |
+
|
| 139 |
+
You may add your own copyright statement to your modifications and
|
| 140 |
+
may provide additional or different license terms and conditions -
|
| 141 |
+
respecting paragraph 3(a) - for use, reproduction, or distribution
|
| 142 |
+
of your modifications, or for any such Derivatives of LTX-2 as a
|
| 143 |
+
whole, provided your use, reproduction, and distribution of LTX-2
|
| 144 |
+
otherwise complies with the conditions stated in this Agreement,
|
| 145 |
+
and you provide a complete copy of this Agreement with any such
|
| 146 |
+
use, reproduction and distribution of LTX-2 and any Derivatives
|
| 147 |
+
thereof.
|
| 148 |
+
|
| 149 |
+
4. Use-based restrictions. The restrictions set forth in Attachment A
|
| 150 |
+
are considered Use-based restrictions. Therefore, you cannot use
|
| 151 |
+
LTX-2 and the Derivatives of LTX-2 in violation of the specified
|
| 152 |
+
restricted uses. You may use LTX-2 subject to this Agreement,
|
| 153 |
+
including only for lawful purposes and in accordance with the
|
| 154 |
+
Agreement. "Use" may include creating any content with, fine-tuning,
|
| 155 |
+
updating, running, training, evaluating and/or re-parametrizing
|
| 156 |
+
LTX-2. You shall require all of your users who use LTX-2 or a
|
| 157 |
+
Derivative of LTX-2 to comply with the terms of this paragraph 4.
|
| 158 |
+
|
| 159 |
+
5. The Output You Generate. Except as set forth herein, Licensor
|
| 160 |
+
claims no rights in the Output you generate using LTX-2. You are
|
| 161 |
+
accountable for input you insert into LTX-2, the Output you
|
| 162 |
+
generate and its subsequent uses. No use of the Output can
|
| 163 |
+
contravene any provision as stated in the Agreement.
|
| 164 |
+
|
| 165 |
+
6. Updates and Runtime Restrictions. To the maximum extent permitted
|
| 166 |
+
by law, Licensor reserves the right to restrict (remotely or
|
| 167 |
+
otherwise) usage of LTX-2 in violation of this Agreement, update
|
| 168 |
+
LTX-2 through electronic means, or modify the Output of LTX-2
|
| 169 |
+
based on updates. You shall undertake reasonable efforts to use
|
| 170 |
+
the latest version of LTX-2. Any use of the non-current version
|
| 171 |
+
of LTX-2 is done solely at your risk.
|
| 172 |
+
|
| 173 |
+
7. Export Controls and Sanctions Compliance. You acknowledge that
|
| 174 |
+
LTX-2, Derivatives of LTX-2 may be subject to export control laws
|
| 175 |
+
and regulations, including but not limited to the U.S. Export
|
| 176 |
+
Administration Regulations and sanctions programs administered by
|
| 177 |
+
the Office of Foreign Assets Control (OFAC). You represent and
|
| 178 |
+
warrant that you and any users of LTX-2 are not (i) located in,
|
| 179 |
+
organized under the laws of, or ordinarily resident in any country
|
| 180 |
+
or territory subject to comprehensive sanctions; (ii) identified
|
| 181 |
+
on any U.S. government restricted party list, including the
|
| 182 |
+
Specially Designated Nationals and Blocked Persons List; or
|
| 183 |
+
(iii) otherwise prohibited from receiving LTX-2 under applicable
|
| 184 |
+
law. You shall not export, re-export, or transfer LTX-2, directly
|
| 185 |
+
or indirectly, in violation of any applicable export control or
|
| 186 |
+
sanctions laws or regulations. You agree to comply with all
|
| 187 |
+
applicable trade control laws and shall indemnify and hold
|
| 188 |
+
Licensor harmless from any claims arising from your failure to
|
| 189 |
+
comply with such laws.
|
| 190 |
+
|
| 191 |
+
8. Trademarks and related. Nothing in this Agreement permits you to
|
| 192 |
+
make use of Licensor's trademarks, trade names, logos or to
|
| 193 |
+
otherwise suggest endorsement or misrepresent the relationship
|
| 194 |
+
between the parties; and any rights not expressly granted herein
|
| 195 |
+
are reserved by the Licensor.
|
| 196 |
+
|
| 197 |
+
9. Disclaimer of Warranty. Unless required by applicable law or
|
| 198 |
+
agreed to in writing, Licensor provides LTX-2 on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 200 |
+
implied, including, without limitation, any warranties or
|
| 201 |
+
conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS
|
| 202 |
+
FOR A PARTICULAR PURPOSE. You are solely responsible for
|
| 203 |
+
determining the appropriateness of using or redistributing LTX-2
|
| 204 |
+
and Derivatives of LTX-2 and assume any risks associated with
|
| 205 |
+
your exercise of permissions under this Agreement.
|
| 206 |
+
|
| 207 |
+
10. Limitation of Liability. In no event and under no legal theory,
|
| 208 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 209 |
+
unless required by applicable law (such as deliberate and grossly
|
| 210 |
+
negligent acts) or agreed to in writing, shall Licensor be liable
|
| 211 |
+
to you for damages, including any direct, indirect, special,
|
| 212 |
+
incidental, or consequential damages of any character arising as
|
| 213 |
+
a result of this Agreement or out of the use or inability to use
|
| 214 |
+
LTX-2 (including but not limited to damages for loss of goodwill,
|
| 215 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 216 |
+
other commercial damages or losses), even if Licensor has been
|
| 217 |
+
advised of the possibility of such damages.
|
| 218 |
+
|
| 219 |
+
11. Accepting Warranty or Additional Liability. While redistributing
|
| 220 |
+
LTX-2 and Derivatives of LTX-2, you may, provided you do not
|
| 221 |
+
violate the terms of this Agreement, choose to offer and charge
|
| 222 |
+
a fee for, acceptance of support, warranty, indemnity, or other
|
| 223 |
+
liability obligations. However, in accepting such obligations,
|
| 224 |
+
you may act only on your own behalf and on your sole
|
| 225 |
+
responsibility, not on behalf of Licensor, and only if you agree
|
| 226 |
+
to indemnify, defend, and hold Licensor harmless for any liability
|
| 227 |
+
incurred by, or claims asserted against Licensor, by reason of
|
| 228 |
+
your accepting any such warranty or additional liability.
|
| 229 |
+
|
| 230 |
+
12. Governing Law. This Agreement and all relations, disputes, claims
|
| 231 |
+
and other matters arising hereunder (including non-contractual
|
| 232 |
+
disputes or claims) will be governed exclusively by, and construed
|
| 233 |
+
exclusively in accordance with, the laws of the State of New York.
|
| 234 |
+
To the extent permitted by law, choice of laws rules and the
|
| 235 |
+
United Nations Convention on Contracts for the International Sale
|
| 236 |
+
of Goods will not apply. For the purposes of adjudicating any
|
| 237 |
+
action or proceeding to enforce the terms of this Agreement, you
|
| 238 |
+
hereby irrevocably consent to the exclusive jurisdiction of, and
|
| 239 |
+
venue in, the federal and state courts located in the County of
|
| 240 |
+
New York within the State of New York. The prevailing party in
|
| 241 |
+
any claim or dispute between the parties under this Agreement
|
| 242 |
+
will be entitled to reimbursement of its reasonable attorneys'
|
| 243 |
+
fees and costs. You hereby waive the right to a trial by jury,
|
| 244 |
+
to participate in a class or representative action (including in
|
| 245 |
+
arbitration), or to combine individual proceedings in court or
|
| 246 |
+
in arbitration without the consent of all parties.
|
| 247 |
+
|
| 248 |
+
13. Term and Termination. This Agreement is effective upon your
|
| 249 |
+
acceptance and continues until terminated. Licensor may terminate
|
| 250 |
+
this Agreement immediately upon written notice to you if you
|
| 251 |
+
breach any provision of this Agreement, including but not limited
|
| 252 |
+
to violations of the use restrictions in Attachment A or
|
| 253 |
+
unauthorized commercial use. Upon termination: (a) all rights
|
| 254 |
+
granted to you under this Agreement will immediately cease;
|
| 255 |
+
(b) you must immediately cease all use of LTX-2 and Derivatives
|
| 256 |
+
of LTX-2; (c) you must delete or destroy all copies of LTX-2
|
| 257 |
+
and Derivatives of LTX-2 in your possession or control; and
|
| 258 |
+
(d) you must notify any third parties to whom you distributed
|
| 259 |
+
LTX-2 or Derivatives of LTX-2 of the termination. Sections 8-13,
|
| 260 |
+
and Section 15 shall survive termination of this Agreement.
|
| 261 |
+
Termination does not relieve you of any obligations incurred
|
| 262 |
+
prior to termination, including payment obligations under
|
| 263 |
+
Section 2. In addition, if You commence a lawsuit or other
|
| 264 |
+
proceedings (including a cross-claim or counterclaim in a lawsuit)
|
| 265 |
+
against Licensor or any person or entity alleging that LTX-2 or
|
| 266 |
+
any Output, or any portion of any of the foregoing, infringe any
|
| 267 |
+
intellectual property or other right owned or licensable by you,
|
| 268 |
+
then all licenses granted to you under this Agreement shall
|
| 269 |
+
terminate as of the date such lawsuit or other proceeding is filed.
|
| 270 |
+
|
| 271 |
+
14. Disputes and Arbitration. All disputes arising in connection with
|
| 272 |
+
this Agreement shall be finally settled by arbitration under the
|
| 273 |
+
Rules of Arbitration of the International Chamber of Commerce
|
| 274 |
+
("ICC Rules"), by one (1) arbitrator appointed in accordance with
|
| 275 |
+
the ICC Rules. The seat of arbitration shall be New York, NY, USA,
|
| 276 |
+
and the proceedings shall be conducted in English. The arbitrator
|
| 277 |
+
shall be empowered to grant any relief that a court could grant.
|
| 278 |
+
Judgment on the arbitration award may be entered by any court
|
| 279 |
+
having jurisdiction thereof. Each party waives its right to a
|
| 280 |
+
trial by jury and to participate in any class or representative
|
| 281 |
+
action.
|
| 282 |
+
|
| 283 |
+
15. If any provision of this Agreement is held to be
|
| 284 |
+
invalid, illegal
|
| 285 |
+
or unenforceable, the remaining provisions shall be unaffected
|
| 286 |
+
thereby and remain valid as if such provision had not been set
|
| 287 |
+
forth herein.
|
| 288 |
+
|
| 289 |
+
END OF TERMS AND CONDITIONS
|
| 290 |
+
|
| 291 |
+
ATTACHMENT A: Use Restrictions
|
| 292 |
+
|
| 293 |
+
When using the Outputs, LTX-2 and any Derivatives thereof, you
|
| 294 |
+
will comply with the Acceptable Use Policy. In addition, you
|
| 295 |
+
agree not to use the Outputs, LTX-2 or its Derivatives in any
|
| 296 |
+
of the following ways:
|
| 297 |
+
|
| 298 |
+
1. In any way that violates any applicable national, federal,
|
| 299 |
+
state, local or international law or regulation;
|
| 300 |
+
|
| 301 |
+
2. For the purpose of exploiting, Harming or attempting to
|
| 302 |
+
exploit or Harm minors in any way;
|
| 303 |
+
|
| 304 |
+
3. To generate or disseminate false information and/or content
|
| 305 |
+
with the purpose of Harming others;
|
| 306 |
+
|
| 307 |
+
4. To generate or disseminate personal identifiable information
|
| 308 |
+
that can be used to Harm an individual;
|
| 309 |
+
|
| 310 |
+
5. To generate or disseminate information and/or content (e.g.
|
| 311 |
+
images, code, posts, articles), and place the information
|
| 312 |
+
and/or content in any context (e.g. bot generating tweets)
|
| 313 |
+
without expressly and intelligibly disclaiming that the
|
| 314 |
+
information and/or content is machine generated;
|
| 315 |
+
|
| 316 |
+
6. To defame, disparage or otherwise harass others;
|
| 317 |
+
|
| 318 |
+
7. To impersonate or attempt to impersonate (e.g. deepfakes)
|
| 319 |
+
others without their consent;
|
| 320 |
+
|
| 321 |
+
8. For fully automated decision making that adversely impacts an
|
| 322 |
+
individual's legal rights or otherwise creates or modifies a
|
| 323 |
+
binding, enforceable obligation;
|
| 324 |
+
|
| 325 |
+
9. For any use intended to or which has the effect of
|
| 326 |
+
discriminating against or Harming individuals or groups based
|
| 327 |
+
on online or offline social behavior or known or predicted
|
| 328 |
+
personal or personality characteristics;
|
| 329 |
+
|
| 330 |
+
10. To exploit any of the vulnerabilities of a specific group of
|
| 331 |
+
persons based on their age, social, physical or mental
|
| 332 |
+
characteristics, in order to materially distort the behavior
|
| 333 |
+
of a person pertaining to that group in a manner that causes
|
| 334 |
+
or is likely to cause that person or another person physical
|
| 335 |
+
or psychological Harm;
|
| 336 |
+
|
| 337 |
+
11. For any use intended to or which has the effect of
|
| 338 |
+
discriminating against individuals or groups based on legally
|
| 339 |
+
protected characteristics or categories;
|
| 340 |
+
|
| 341 |
+
12. To provide medical advice and medical results interpretation;
|
| 342 |
+
|
| 343 |
+
13. To generate or disseminate information for the purpose to be
|
| 344 |
+
used for administration of justice, law enforcement,
|
| 345 |
+
immigration or asylum processes, such as predicting an
|
| 346 |
+
individual will commit fraud/crime commitment (e.g. by text
|
| 347 |
+
profiling, drawing causal relationships between assertions
|
| 348 |
+
made in documents, indiscriminate and arbitrarily-targeted use);
|
| 349 |
+
|
| 350 |
+
14. To generate and/or disseminate malware (including – but not
|
| 351 |
+
limited to – ransomware) or any other content to be used for
|
| 352 |
+
the purpose of harming electronic systems;
|
| 353 |
+
|
| 354 |
+
15. To engage in, promote, incite, or facilitate discrimination
|
| 355 |
+
or other unlawful or harmful conduct in the provision of
|
| 356 |
+
employment, employment benefits, credit, housing, or other
|
| 357 |
+
essential goods and services;
|
| 358 |
+
|
| 359 |
+
16. To engage in, promote, incite, or facilitate the harassment,
|
| 360 |
+
abuse, threatening, or bullying of individuals or groups of
|
| 361 |
+
individuals;
|
| 362 |
+
|
| 363 |
+
17. For military, warfare, nuclear industries or applications,
|
| 364 |
+
weapons development, or any use in connection with activities
|
| 365 |
+
that may cause death, personal injury, or severe physical or
|
| 366 |
+
environmental damage;
|
| 367 |
+
|
| 368 |
+
18. For commercial use only: To train, improve, or fine-tune any
|
| 369 |
+
other machine learning model, artificial intelligence system,
|
| 370 |
+
or competing model, except for Derivatives of LTX-2 as
|
| 371 |
+
expressly permitted under this Agreement;
|
| 372 |
+
|
| 373 |
+
19. To circumvent, disable, or interfere with any technical
|
| 374 |
+
limitations, safety features, content filters, or use
|
| 375 |
+
restrictions implemented in LTX-2 by Licensor;
|
| 376 |
+
|
| 377 |
+
20. To use LTX-2 or Derivatives of LTX-2 in any product, service,
|
| 378 |
+
or application that directly competes with Licensor's
|
| 379 |
+
commercial products or services, or is designed to replace or
|
| 380 |
+
substitute Licensor's offerings in the market, without
|
| 381 |
+
obtaining a separate commercial license from Licensor.
|
README.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: other
|
| 5 |
+
license_name: ltx-2-community
|
| 6 |
+
license_link: https://huggingface.co/ResembleAI/Dramabox/blob/main/LICENSE
|
| 7 |
+
pipeline_tag: text-to-speech
|
| 8 |
+
tags:
|
| 9 |
+
- tts
|
| 10 |
+
- voice-cloning
|
| 11 |
+
- audio-generation
|
| 12 |
+
- diffusion-transformer
|
| 13 |
+
- flow-matching
|
| 14 |
+
- ltx-2
|
| 15 |
+
library_name: ltx-audio-tts
|
| 16 |
+
base_model: Lightricks/LTX-2.3
|
| 17 |
+
base_model_relation: finetune
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
<p align="center">
|
| 21 |
+
<a href="https://www.resemble.ai/learn/models/dramabox">
|
| 22 |
+
<img src="https://huggingface.co/ResembleAI/Dramabox/resolve/main/assets/Dramabox.png" alt="DramaBox" width="720"/>
|
| 23 |
+
</a>
|
| 24 |
+
</p>
|
| 25 |
+
|
| 26 |
+
# Dramabox — Expressive TTS with Voice Cloning
|
| 27 |
+
|
| 28 |
+
[](https://discord.gg/rJq9cRJBJ6)
|
| 29 |
+
|
| 30 |
+
> **Built on [LTX-2](https://github.com/Lightricks/LTX-2) by Lightricks.**
|
| 31 |
+
> Dramabox is **Resemble AI's** expressive TTS, trained on top of the LTX-2.3 audio branch under the LTX-2 Community License. Huge thanks to the Lightricks team for open-sourcing the base.
|
| 32 |
+
|
| 33 |
+
*Made with ♥️ by* <a href="https://www.resemble.ai/learn/models/dramabox" target="_blank"><img width="100" alt="resemble-logo-horizontal" src="https://github.com/user-attachments/assets/35cf756b-3506-4943-9c72-c05ddfa4e525" /></a>
|
| 34 |
+
|
| 35 |
+
Dramabox is a prompt-driven TTS where **the prompt itself controls everything** — speaker identity, emotion, delivery, laughs, sighs, breaths, pauses, transitions. An optional 10-second voice reference clones the target timbre. It is an IC-LoRA fine-tune of the **LTX-2.3 3.3B audio-only** model (Diffusion Transformer + flow matching), conditioned on Gemma 3 12B text embeddings.
|
| 36 |
+
|
| 37 |
+
| | |
|
| 38 |
+
|---|---|
|
| 39 |
+
| 🤗 Model | [`ResembleAI/Dramabox`](https://huggingface.co/ResembleAI/Dramabox) |
|
| 40 |
+
| 🎭 Demo Space | [`ResembleAI/Dramabox`](https://huggingface.co/spaces/ResembleAI/Dramabox) (ZeroGPU) |
|
| 41 |
+
| 💻 Code | [`resemble-ai/DramaBox`](https://github.com/resemble-ai/DramaBox) |
|
| 42 |
+
| 🏗️ Base model | [`Lightricks/LTX-2.3`](https://huggingface.co/Lightricks/LTX-2.3) |
|
| 43 |
+
| 📜 License | LTX-2 Community License — see [LICENSE](https://huggingface.co/ResembleAI/Dramabox/blob/main/LICENSE) |
|
| 44 |
+
|
| 45 |
+
## Quick start
|
| 46 |
+
|
| 47 |
+
### Python (warm server — recommended, ~2.5 s / generation)
|
| 48 |
+
|
| 49 |
+
```python
|
| 50 |
+
from src.inference_server import TTSServer
|
| 51 |
+
|
| 52 |
+
server = TTSServer(device="cuda") # downloads weights on first run
|
| 53 |
+
|
| 54 |
+
server.generate_to_file(
|
| 55 |
+
prompt='A woman speaks warmly, "Hello, how are you today?" '
|
| 56 |
+
'She laughs, "Hahaha, it is so good to see you!"',
|
| 57 |
+
output="output.wav",
|
| 58 |
+
voice_ref="reference.wav", # optional, 10+ seconds of target voice
|
| 59 |
+
cfg_scale=2.5,
|
| 60 |
+
stg_scale=1.5,
|
| 61 |
+
duration_multiplier=1.1,
|
| 62 |
+
seed=42,
|
| 63 |
+
)
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### CLI
|
| 67 |
+
|
| 68 |
+
```bash
|
| 69 |
+
python src/inference.py \
|
| 70 |
+
--prompt 'A woman speaks warmly, "Hello, how are you today?"' \
|
| 71 |
+
--voice-sample reference.wav \
|
| 72 |
+
--output output.wav \
|
| 73 |
+
--cfg-scale 2.5 --stg-scale 1.5
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
## Inference parameters
|
| 77 |
+
|
| 78 |
+
| Parameter | Default | What it does |
|
| 79 |
+
|---|---|---|
|
| 80 |
+
| `prompt` | — | The scene description. Dialogue inside `"double quotes"`, stage directions outside. See "Prompt format" below. |
|
| 81 |
+
| `voice_ref` (`--voice-sample`) | `None` | Optional 10+ s audio clip whose timbre the model clones. Without it, the model picks a voice that fits the description. |
|
| 82 |
+
| `cfg_scale` | 2.5 | Classifier-free guidance — how strictly the output follows the prompt. Lower = more natural, higher = more text-faithful but more dramatic. Auto-rescaled internally to prevent clipping at high cfg (see *Auto rescale* below). |
|
| 83 |
+
| `stg_scale` | 1.5 | Skip-token guidance — applied through the perturbed transformer block path (block 29). Increases expressive emphasis without saturating like cfg. |
|
| 84 |
+
| `duration_multiplier` (`--duration-multiplier`) | 1.1 | Multiplier on the auto-estimated speech length (10 % breathing-room headroom). Only used when `gen_duration` (or `--gen-duration`) is 0. |
|
| 85 |
+
| `gen_duration` (`--gen-duration`, "Target duration" slider) | 0 (auto) | Explicit output duration in seconds. Set to 20–60 s for music or long scenes. Overrides the prompt-based estimate when > 0. |
|
| 86 |
+
| `ref_duration` (`--ref-duration`, "Reference duration" slider) | 10.0 | How many seconds of the voice reference the model conditions on (3–30 s). Longer ref → richer timbre capture, shorter ref → faster encode. |
|
| 87 |
+
| `seed` | 42 | Reproducibility. |
|
| 88 |
+
| `rescale_scale` (`--rescale-scale`) | `"auto"` | Latent-side CFG std-rescale. The default is a cfg-aware schedule (0 below cfg=2, ramping to 1.0 by cfg=10) that keeps the output peak below 0 dBFS at every cfg. Pass any float in [0, 1] to override or 0 to disable. |
|
| 89 |
+
| `watermark` (`--no-watermark` to disable) | `True` | Apply [Resemble Perth](https://github.com/resemble-ai/Perth) imperceptible neural watermark to the output. Survives MP3/AAC, common edits; ≈ 100 % detection accuracy. |
|
| 90 |
+
|
| 91 |
+
## Prompt format
|
| 92 |
+
|
| 93 |
+
```
|
| 94 |
+
<speaker description>, "<dialogue>" <action direction> "<more dialogue>"
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
**Inside double quotes** — the model speaks these literally:
|
| 98 |
+
- Dialogue: `"Hello, how are you?"`
|
| 99 |
+
- Phonetic vocalisations (one word, no separators): `"Hahaha"`, `"Hehehe"`, `"Mmmmm"`, `"Ugh"`, `"Argh"`, `"Hmm"`
|
| 100 |
+
|
| 101 |
+
**Outside quotes** — stage directions interpreted as performance cues, never spoken:
|
| 102 |
+
- `She sighs deeply.` · `He clears his throat.` · `A long pause.` · `Her voice cracks.` · `He gulps nervously.`
|
| 103 |
+
|
| 104 |
+
**Avoid inside quotes** (the model will speak the word literally): `Sigh`, `Gasp`, `Cough`, `Ahem`, `Pfft`.
|
| 105 |
+
|
| 106 |
+
## Sample outputs
|
| 107 |
+
|
| 108 |
+
### Regal Queen — Cold Fury to Venomous Whisper
|
| 109 |
+
|
| 110 |
+
> A regal woman speaks with cold fury in a measured, low voice. She sighs deeply, "I have told you a thousand times, and yet here we are again." Her voice sharpens with rising anger, "Do you honestly think I enjoy repeating myself?! Do you?!" She lets out a cold, mocking laugh, "Hahaha, how utterly pathetic you are." She drops to a venomous whisper, leaning close, "Now get out of my sight before I do something we will both regret."
|
| 111 |
+
|
| 112 |
+
**Reference**
|
| 113 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/refs/01_queen_sighs_rage.wav"></audio>
|
| 114 |
+
|
| 115 |
+
**Generated**
|
| 116 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/generated/01_queen_sighs_rage.wav"></audio>
|
| 117 |
+
|
| 118 |
+
### Catgirl — Uncontrollable Giggling
|
| 119 |
+
|
| 120 |
+
> A playful girl speaks in a bright, singsong voice, already mid-giggle, "Hehehe, oh my gosh you should see your face right now, it is priceless!" She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop laughing!" She tries to compose herself with a long sigh, "Ahhhhh okay okay okay, I will stop, I promise I will stop." She leans in and whispers conspiratorially, "But seriously though, between you and me," then immediately loses it again, "Haha, no I, hehehe, I just cannot! You are way too funny, haha!" She snorts mid-laugh, "Pfft, oh no no no, that was so embarrassing, pretend you did not hear that!"
|
| 121 |
+
|
| 122 |
+
**Reference**
|
| 123 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/refs/04_catgirl_giggles_snort.wav"></audio>
|
| 124 |
+
|
| 125 |
+
**Generated**
|
| 126 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/generated/04_catgirl_giggles_snort.wav"></audio>
|
| 127 |
+
|
| 128 |
+
### Villain — Sinister Laugh
|
| 129 |
+
|
| 130 |
+
> A deep-voiced villain speaks with theatrical menace, chuckling softly at first, "Heh heh heh, ha ha ha ha ha! Oh, forgive me, forgive me." He catches his breath with a sinister grin, He clears his throat. "It is just SO amusing when they struggle, is it not?" His voice drips with contempt, "I expected more from you, truly I did. How disappointing." He leans in close and whispers with vicious intensity, "But fear not, my dear. The REAL entertainment has only just begun." He chuckles one last time, "Heh heh heh."
|
| 131 |
+
|
| 132 |
+
**Reference**
|
| 133 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/refs/09_villain_sinister_laugh.wav"></audio>
|
| 134 |
+
|
| 135 |
+
**Generated**
|
| 136 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/generated/09_villain_sinister_laugh.wav"></audio>
|
| 137 |
+
|
| 138 |
+
### Talk Show Host — Wheezing Laughter
|
| 139 |
+
|
| 140 |
+
> A talk show host speaks with animated enthusiasm. He gasps with exaggerated shock, "No! You did NOT just say that, tell me you did not just say that!" He bursts into uncontrollable laughter, "HAHAHA! Oh my god, oh my god!" He wheezes, barely getting words out, "I cannot, I literally cannot breathe right now!" He wipes his eyes, sniffling, "Oh that is so good, that is really genuinely good." He sighs happily, "Ahhh okay okay, let me compose myself, I am a professional." He takes one breath then immediately cracks up again, "Pfft hehehe, no I absolutely cannot, I am so sorry everybody!" He claps, "Folks, THIS, this right here, is why I love my job!"
|
| 141 |
+
|
| 142 |
+
**Reference**
|
| 143 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/refs/13_conan_wheezing_laughter.mp3"></audio>
|
| 144 |
+
|
| 145 |
+
**Generated**
|
| 146 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr16/efqN7-b6HWE/ltx-tts-eval/expressive/generated/13_conan_wheezing_laughter.wav"></audio>
|
| 147 |
+
|
| 148 |
+
### Football Commentator — Martin Tyler
|
| 149 |
+
|
| 150 |
+
> Martin Tyler, a calm, authoritative English football commentator with a smooth, measured delivery, building tension gradually with precise timing and understated drama. "And here he comes… into the kitchen… opens the fridge…" he says evenly as a faint murmur of an imaginary crowd begins to rise. "You sense a moment here… the options are there…" his voice steady, observational. "Milk… eggs… leftovers… he considers them…" a slight pause, the crowd beginning to anticipate. "No… he moves past them…" a hint of intrigue enters his tone. "Now this is interesting…" The crowd grows, a low hum building behind the moment. "He's taking his time�� weighing every option…" he continues calmly. A sudden hush falls. "Wait a moment… he's reaching…" The pause stretches—then— "He's got the juice!" his voice lifts, controlled but clearly excited. For a split second, silence—then the crowd detonates. "And listen to that! The place has absolutely erupted!" he says as roaring cheers, shouting, and thunderous applause fill the air. "They're on their feet—what a reaction to a moment of pure decision-making!" his voice rises just slightly above the chaos. The roar continues, echoing and relentless. "Extraordinary scenes… simply extraordinary…" he adds, letting the sound carry the moment. "And in the end… it's the juice that wins it…" he concludes as the crowd slowly begins to fade, still buzzing.
|
| 151 |
+
|
| 152 |
+
**Reference**
|
| 153 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr24/mLbkPu2Qzwo/refs/002_ltx_tts_8ng372ra.wav"></audio>
|
| 154 |
+
|
| 155 |
+
**Generated**
|
| 156 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr24/mLbkPu2Qzwo/generated/002_ltx_tts_8ng372ra.wav"></audio>
|
| 157 |
+
|
| 158 |
+
### Backstreet Boys — Pop Harmony
|
| 159 |
+
|
| 160 |
+
> Backstreet Boys, a polished late-90s boy band with five smooth, harmonizing male voices, blending in rich, emotional layers with clean pop production. "Step by step… out the door… new day… ready for more…" they sing in soft, synchronized harmony. One voice steps forward with a warm, heartfelt lead. "Keys in my hand… got my plan… heading out right on time…" The others swell behind him with lush backing vocals. "Don't be late… gotta move…" and then "city's calling my name…" Their voices rise together, smooth and uplifting. "Tell me why… every morning feels the same…" they sing with nostalgic warmth. Harmonies tighten with polished precision. "But I know… I'm on my way again…" and then "Coffee in my hand…" A brief pause, softer now. "I'm ready to go…" The full group returns in a bright, unified chorus. "We'll make it our way…" they sing with confident energy. "Through the rush, through the noise, we keep moving strong, yeah!" they finish with smooth, layered harmony and feel-good momentum.
|
| 161 |
+
|
| 162 |
+
**Reference**
|
| 163 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr24/mLbkPu2Qzwo/refs/004_00_ltx_tts_ttop_woi.wav"></audio>
|
| 164 |
+
|
| 165 |
+
**Generated**
|
| 166 |
+
<audio controls src="https://storage.googleapis.com/resemble-sampletables/Apr24/mLbkPu2Qzwo/generated/004_00_ltx_tts_ttop_woi.wav"></audio>
|
| 167 |
+
|
| 168 |
+
## Files
|
| 169 |
+
|
| 170 |
+
| File | Size | Contents |
|
| 171 |
+
|---|---|---|
|
| 172 |
+
| `dramabox-dit-v1.safetensors` | 6.6 GB | Audio-only DiT (LoRA already merged into base) |
|
| 173 |
+
| `dramabox-audio-components.safetensors` | 1.9 GB | Audio embeddings connector + audio text projection + audio VAE + vocoder |
|
| 174 |
+
| [`unsloth/gemma-3-12b-it-bnb-4bit`](https://huggingface.co/unsloth/gemma-3-12b-it-bnb-4bit) | ~8 GB | Text encoder (auto-downloaded on first run) |
|
| 175 |
+
|
| 176 |
+
**VRAM**: ~24 GB peak, warm server. **Speed**: ~2.5 s / generation on H100 once warm.
|
| 177 |
+
|
| 178 |
+
## Watermarking
|
| 179 |
+
|
| 180 |
+
Every output of `inference.py` and `TTSServer.generate_to_file` is automatically watermarked with [Resemble Perth](https://github.com/resemble-ai/Perth) — an imperceptible neural watermark that survives MP3 compression, audio editing, and common manipulations while maintaining nearly 100 % detection accuracy.
|
| 181 |
+
|
| 182 |
+
```python
|
| 183 |
+
import perth, librosa
|
| 184 |
+
wav, sr = librosa.load("output.wav", sr=None, mono=True)
|
| 185 |
+
detector = perth.PerthImplicitWatermarker()
|
| 186 |
+
print(detector.get_watermark(wav, sample_rate=sr)) # ≈ 1.0 for our outputs
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
Pass `--no-watermark` (CLI) or `watermark=False` (Python) to disable for debugging.
|
| 190 |
+
|
| 191 |
+
## License & acknowledgement
|
| 192 |
+
|
| 193 |
+
Dramabox is a Resemble AI fine-tune of [LTX-2](https://github.com/Lightricks/LTX-2). Distributed under the LTX-2 Community License Agreement — see [LICENSE](https://huggingface.co/ResembleAI/Dramabox/blob/main/LICENSE). Thanks again to Lightricks for releasing the base model.
|
assets/Dramabox.png
ADDED
|
Git LFS Details
|
assets/silence_latent_frame.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f73746d2163f8f1742c5de89005404ccaeeff05154bbb10a3337bf9bd13f161c
|
| 3 |
+
size 1501
|
config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "dramabox-tts",
|
| 3 |
+
"architecture": "DiT-FlowMatching",
|
| 4 |
+
"base_model": "ltx-2.3-22b-dev-audio-only",
|
| 5 |
+
"parameters": "3.3B",
|
| 6 |
+
"num_layers": 48,
|
| 7 |
+
"audio_inner_dim": 2048,
|
| 8 |
+
"audio_num_attention_heads": 32,
|
| 9 |
+
"audio_attention_head_dim": 64,
|
| 10 |
+
"audio_cross_attention_dim": 2048,
|
| 11 |
+
"denoising_steps": 30,
|
| 12 |
+
"scheduler": "euler_flow_matching",
|
| 13 |
+
"text_encoder": "google/gemma-3-12b-it-qat-q4_0-unquantized",
|
| 14 |
+
"text_encoder_hidden_size": 3840,
|
| 15 |
+
"ic_lora": {
|
| 16 |
+
"rank": 128,
|
| 17 |
+
"alpha": 128,
|
| 18 |
+
"merged": true,
|
| 19 |
+
"training_version": "v13",
|
| 20 |
+
"text_dropout": 0.4,
|
| 21 |
+
"training_steps": "v12@3000 + v13@1000"
|
| 22 |
+
},
|
| 23 |
+
"audio": {
|
| 24 |
+
"sample_rate": 48000,
|
| 25 |
+
"vae_channels": 8,
|
| 26 |
+
"mel_bins": 16,
|
| 27 |
+
"fps": 25.0
|
| 28 |
+
},
|
| 29 |
+
"inference_defaults": {
|
| 30 |
+
"cfg_scale": 2.5,
|
| 31 |
+
"stg_scale": 1.5,
|
| 32 |
+
"rescale_scale": 0.0,
|
| 33 |
+
"modality_scale": 1.0,
|
| 34 |
+
"duration_multiplier": 1.1,
|
| 35 |
+
"seed": 42
|
| 36 |
+
},
|
| 37 |
+
"files": {
|
| 38 |
+
"transformer": "dramabox-dit-v1.safetensors",
|
| 39 |
+
"audio_components": "dramabox-audio-components.safetensors",
|
| 40 |
+
"silence_latent": "assets/silence_latent_frame.pt"
|
| 41 |
+
}
|
| 42 |
+
}
|
dramabox-audio-components.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73d50dd3e913fd1d2511a09e4a2225f60f2ede43ef629764e6d4a389422bf7d1
|
| 3 |
+
size 1942831020
|
dramabox-dit-v1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01a626525d935e8c9fb0efe124334d1e4970aeda82215d2e14ca9fe904b5c25d
|
| 3 |
+
size 6575225528
|
samples/01_queen_sighs_rage.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:758fb1412f9af73721e59a6e4c949bbd14aeda802d52786471fe3130a84a447e
|
| 3 |
+
size 4855758
|
samples/04_catgirl_giggles_snort.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d12ac377fceed7488493e6ee4ae9c8d7f9294bb64822068fc54e2e6350ca1453
|
| 3 |
+
size 7620558
|
samples/06_arnie_panting_triumph.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68b67153ba93c500254f963840b46da1439785e50b3ff432df8f9d8c3f47a035
|
| 3 |
+
size 6268878
|
samples/09_villain_sinister_laugh.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd2368c3fad976f9ce54cf8d0608a78574ab83a7584d5754c3703c9ade64fb69
|
| 3 |
+
size 5285838
|
samples/13_conan_wheezing_laughter.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8ee6e28c11c7844599213f8eebe72ab20dc43c55621ca453e16cad0609d45d3
|
| 3 |
+
size 7190478
|
samples/refs/01_queen_sighs_rage.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0bf624251cc325098863e3b5e280505c4dccfd5591e6312a1844a467b1a3f14
|
| 3 |
+
size 351616
|
samples/refs/04_catgirl_giggles_snort.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6e4a21b962c30a2644a6e7f6b5e2b0a7db8b63d2cf2efa69b009bd9b62b0bf3
|
| 3 |
+
size 414478
|
samples/refs/09_villain_sinister_laugh.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41f266980881a7c61027f73831b559dde846469e74966d37bb06c52992ae472c
|
| 3 |
+
size 349946
|