Update app.py
Browse files
app.py
CHANGED
|
@@ -65,6 +65,7 @@ from ltx_pipelines.utils.constants import DISTILLED_SIGMA_VALUES, STAGE_2_DISTIL
|
|
| 65 |
from ltx_pipelines.utils.helpers import (
|
| 66 |
cleanup_memory,
|
| 67 |
combined_image_conditionings,
|
|
|
|
| 68 |
denoise_video_only,
|
| 69 |
encode_prompts,
|
| 70 |
simple_denoising_func,
|
|
@@ -92,7 +93,7 @@ DEFAULT_PROMPT = (
|
|
| 92 |
"Fine lunar dust lifts and drifts outward with each movement, floating "
|
| 93 |
"in slow arcs before settling back onto the ground."
|
| 94 |
)
|
| 95 |
-
DEFAULT_FRAME_RATE =
|
| 96 |
|
| 97 |
# Resolution presets: (width, height)
|
| 98 |
RESOLUTIONS = {
|
|
@@ -196,7 +197,7 @@ class LTX23DistilledA2VPipeline(DistilledPipeline):
|
|
| 196 |
dtype=dtype,
|
| 197 |
device=self.device,
|
| 198 |
)
|
| 199 |
-
video_state =
|
| 200 |
output_shape=output_shape,
|
| 201 |
conditionings=conditionings,
|
| 202 |
noiser=noiser,
|
|
@@ -221,7 +222,18 @@ class LTX23DistilledA2VPipeline(DistilledPipeline):
|
|
| 221 |
generator,
|
| 222 |
)
|
| 223 |
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
|
| 227 |
# Model repos
|
|
|
|
| 65 |
from ltx_pipelines.utils.helpers import (
|
| 66 |
cleanup_memory,
|
| 67 |
combined_image_conditionings,
|
| 68 |
+
denoise_audio_video,
|
| 69 |
denoise_video_only,
|
| 70 |
encode_prompts,
|
| 71 |
simple_denoising_func,
|
|
|
|
| 93 |
"Fine lunar dust lifts and drifts outward with each movement, floating "
|
| 94 |
"in slow arcs before settling back onto the ground."
|
| 95 |
)
|
| 96 |
+
DEFAULT_FRAME_RATE = 24.0
|
| 97 |
|
| 98 |
# Resolution presets: (width, height)
|
| 99 |
RESOLUTIONS = {
|
|
|
|
| 197 |
dtype=dtype,
|
| 198 |
device=self.device,
|
| 199 |
)
|
| 200 |
+
video_state, audio_state = denoise_audio_video(
|
| 201 |
output_shape=output_shape,
|
| 202 |
conditionings=conditionings,
|
| 203 |
noiser=noiser,
|
|
|
|
| 222 |
generator,
|
| 223 |
)
|
| 224 |
|
| 225 |
+
# If audio was provided as input, return it as-is (higher fidelity than decoded)
|
| 226 |
+
# If no audio input, decode the generated audio latent from the denoising
|
| 227 |
+
if original_audio is not None:
|
| 228 |
+
return decoded_video, original_audio
|
| 229 |
+
else:
|
| 230 |
+
from ltx_core.model.audio_vae import decode_audio as vae_decode_audio
|
| 231 |
+
generated_audio = vae_decode_audio(
|
| 232 |
+
audio_state.latent,
|
| 233 |
+
self.model_ledger.audio_decoder(),
|
| 234 |
+
self.model_ledger.vocoder(),
|
| 235 |
+
)
|
| 236 |
+
return decoded_video, generated_audio
|
| 237 |
|
| 238 |
|
| 239 |
# Model repos
|