Spaces:

dagloop5
/

Testing2

Sleeping

dagloop5 commited on 6 days ago

Commit

f88c8f3

verified ·

1 Parent(s): 4e8337c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -65,6 +65,7 @@ from ltx_pipelines.utils.constants import DISTILLED_SIGMA_VALUES, STAGE_2_DISTIL
 from ltx_pipelines.utils.helpers import (
     cleanup_memory,
     combined_image_conditionings,
     denoise_video_only,
     encode_prompts,
     simple_denoising_func,
@@ -92,7 +93,7 @@ DEFAULT_PROMPT = (
     "Fine lunar dust lifts and drifts outward with each movement, floating "
     "in slow arcs before settling back onto the ground."
 )
-DEFAULT_FRAME_RATE = 25.0
 # Resolution presets: (width, height)
 RESOLUTIONS = {
@@ -196,7 +197,7 @@ class LTX23DistilledA2VPipeline(DistilledPipeline):
             dtype=dtype,
             device=self.device,
         )
-        video_state = denoise_video_only(
             output_shape=output_shape,
             conditionings=conditionings,
             noiser=noiser,
@@ -221,7 +222,18 @@ class LTX23DistilledA2VPipeline(DistilledPipeline):
             generator,
         )
-        return decoded_video, original_audio
 # Model repos

 from ltx_pipelines.utils.helpers import (
     cleanup_memory,
     combined_image_conditionings,
+    denoise_audio_video,
     denoise_video_only,
     encode_prompts,
     simple_denoising_func,
     "Fine lunar dust lifts and drifts outward with each movement, floating "
     "in slow arcs before settling back onto the ground."
 )
+DEFAULT_FRAME_RATE = 24.0
 # Resolution presets: (width, height)
 RESOLUTIONS = {
             dtype=dtype,
             device=self.device,
         )
+        video_state, audio_state = denoise_audio_video(
             output_shape=output_shape,
             conditionings=conditionings,
             noiser=noiser,
             generator,
         )
+        # If audio was provided as input, return it as-is (higher fidelity than decoded)
+        # If no audio input, decode the generated audio latent from the denoising
+        if original_audio is not None:
+            return decoded_video, original_audio
+        else:
+            from ltx_core.model.audio_vae import decode_audio as vae_decode_audio
+            generated_audio = vae_decode_audio(
+                audio_state.latent,
+                self.model_ledger.audio_decoder(),
+                self.model_ledger.vocoder(),
+            )
+            return decoded_video, generated_audio
 # Model repos