Testing0

Running on Zero

App Files Files Community

dagloop5 commited on Mar 18

Commit

c1892c6

verified ·

1 Parent(s): c21a9b3

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -12

app.py CHANGED Viewed

@@ -60,6 +60,10 @@ from ltx_pipelines.utils.helpers import (
     encode_prompts,
     simple_denoising_func,
 )
 from ltx_pipelines.utils.media_io import decode_audio_from_file, encode_video
 # Force-patch xformers attention into the LTX attention module.
@@ -271,12 +275,34 @@ print(f"Checkpoint: {checkpoint_path}")
 print(f"Spatial upsampler: {spatial_upsampler_path}")
 print(f"Gemma root: {gemma_root}")
 # Initialize pipeline WITH text encoder and optional audio support
 pipeline = LTX23DistilledA2VPipeline(
     distilled_checkpoint_path=checkpoint_path,
     spatial_upsampler_path=spatial_upsampler_path,
     gemma_root=gemma_root,
-    loras=[],
     quantization=QuantizationPolicy.fp8_cast(),
 )
@@ -293,15 +319,6 @@ _spatial_upsampler = ledger.spatial_upsampler()
 _text_encoder = ledger.text_encoder()
 _embeddings_processor = ledger.gemma_embeddings_processor()
-ledger.transformer = lambda: _transformer
-ledger.video_encoder = lambda: _video_encoder
-ledger.video_decoder = lambda: _video_decoder
-ledger.audio_encoder = lambda: _audio_encoder
-ledger.audio_decoder = lambda: _audio_decoder
-ledger.vocoder = lambda: _vocoder
-ledger.spatial_upsampler = lambda: _spatial_upsampler
-ledger.text_encoder = lambda: _text_encoder
-ledger.gemma_embeddings_processor = lambda: _embeddings_processor
 print("All models preloaded (including Gemma text encoder and audio encoder)!")
 print("=" * 80)
@@ -347,7 +364,7 @@ def on_highres_toggle(first_image, last_image, high_res):
     return gr.update(value=w), gr.update(value=h)
-@spaces.GPU(duration=75)
 @torch.inference_mode()
 def generate_video(
     first_image,
@@ -360,6 +377,9 @@ def generate_video(
     randomize_seed: bool = True,
     height: int = 1024,
     width: int = 1536,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
@@ -368,6 +388,42 @@ def generate_video(
         current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
         frame_rate = DEFAULT_FRAME_RATE
         num_frames = int(duration * frame_rate) + 1
         num_frames = ((num_frames - 1 + 7) // 8) * 8 + 1
@@ -464,9 +520,12 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
                 with gr.Row():
                     enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
                     high_res = gr.Checkbox(label="High Resolution", value=True)
         with gr.Column():
-            output_video = gr.Video(label="Generated Video", autoplay=True)
     gr.Examples(
         examples=[
@@ -517,6 +576,7 @@ with gr.Blocks(title="LTX-2.3 Heretic Distilled") as demo:
         inputs=[
             first_image, last_image, input_audio, prompt, duration, enhance_prompt,
             seed, randomize_seed, height, width,
         ],
         outputs=[output_video, seed],
     )

     encode_prompts,
     simple_denoising_func,
 )
+from ltx_core.loader.primitives import LoraPathStrengthAndSDOps
+from ltx_core.loader.sd_ops import LTXV_LORA_COMFY_RENAMING_MAP
 from ltx_pipelines.utils.media_io import decode_audio_from_file, encode_video
 # Force-patch xformers attention into the LTX attention module.
 print(f"Spatial upsampler: {spatial_upsampler_path}")
 print(f"Gemma root: {gemma_root}")
+# Download the LoRAs we want to support and prepare helper to create LoraPathStrengthAndSDOps
+LORA_REPO = "dagloop5/LoRA"
+pose_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="pose_enhancer.safetensors")
+general_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="general_enhancer.safetensors")
+motion_lora_path = hf_hub_download(repo_id=LORA_REPO, filename="motion_helper.safetensors")
+print(f"Downloaded LoRAs: {pose_lora_path}, {general_lora_path}, {motion_lora_path}")
+def build_loras_tuple(pose_strength: float, general_strength: float, motion_strength: float):
+    """
+    Return a tuple of LoraPathStrengthAndSDOps matching LTX loader expectations.
+    Uses the LTX renaming map for SD key remapping (helps with some LoRA formats).
+    """
+    return (
+        LoraPathStrengthAndSDOps(path=str(pose_lora_path), strength=float(pose_strength), sd_ops=LTXV_LORA_COMFY_RENAMING_MAP),
+        LoraPathStrengthAndSDOps(path=str(general_lora_path), strength=float(general_strength), sd_ops=LTXV_LORA_COMFY_RENAMING_MAP),
+        LoraPathStrengthAndSDOps(path=str(motion_lora_path), strength=float(motion_strength), sd_ops=LTXV_LORA_COMFY_RENAMING_MAP),
+    )
+# initial strengths (you can change defaults)
+INITIAL_LORAS = build_loras_tuple(1.0, 1.0, 1.0)
 # Initialize pipeline WITH text encoder and optional audio support
 pipeline = LTX23DistilledA2VPipeline(
     distilled_checkpoint_path=checkpoint_path,
     spatial_upsampler_path=spatial_upsampler_path,
     gemma_root=gemma_root,
+    loras=[INITIAL_LORAS],
     quantization=QuantizationPolicy.fp8_cast(),
 )
 _text_encoder = ledger.text_encoder()
 _embeddings_processor = ledger.gemma_embeddings_processor()
 print("All models preloaded (including Gemma text encoder and audio encoder)!")
 print("=" * 80)
     return gr.update(value=w), gr.update(value=h)
+@spaces.GPU(duration=80)
 @torch.inference_mode()
 def generate_video(
     first_image,
     randomize_seed: bool = True,
     height: int = 1024,
     width: int = 1536,
+    pose_lora_strength: float = 1.0,
+    general_lora_strength: float = 1.0,
+    motion_lora_strength: float = 1.0,
     progress=gr.Progress(track_tqdm=True),
 ):
     try:
         current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+                # --- LoRA dynamic update: rebuild ledger models in-place when strengths change ---
+        try:
+            current_ledger = pipeline.model_ledger
+            # helper to compare strengths quickly
+            def _get_current_strengths(ledger_obj):
+                return tuple(float(lora.strength) for lora in getattr(ledger_obj, "loras", ()))
+            requested_strengths = (float(pose_lora_strength), float(general_lora_strength), float(motion_lora_strength))
+            if _get_current_strengths(current_ledger) != requested_strengths:
+                # build new tuple and replace ledger.loras
+                current_ledger.loras = build_loras_tuple(*requested_strengths)
+                # clear cached model instances so new models are constructed with the new LoRAs
+                # (ModelLedger builds models on first access using its configured `loras`)
+                try:
+                    current_ledger.clear_vram()
+                except Exception:
+                    # `clear_vram` should exist; if it doesn't, fall back to deleting cached attrs
+                    for k in list(vars(current_ledger).keys()):
+                        if k in ("_transformer", "_video_encoder", "_video_decoder", "_audio_encoder", "_audio_decoder", "_vocoder", "_spatial_upsampler", "_text_encoder", "_gemma_embeddings_processor"):
+                            vars(current_ledger).pop(k, None)
+                # Now pre-load the models again (ensures they are on-device before pipeline call)
+                _ = current_ledger.transformer()
+                _ = current_ledger.video_encoder()
+                _ = current_ledger.video_decoder()
+                _ = current_ledger.audio_encoder()
+                _ = current_ledger.audio_decoder()
+                _ = current_ledger.vocoder()
+                _ = current_ledger.spatial_upsampler()
+                _ = current_ledger.text_encoder()
+                _ = current_ledger.gemma_embeddings_processor()
+                torch.cuda.empty_cache()
+        except Exception as e:
+            # if this fails, we still proceed with the existing pipeline (safer to continue than to crash)
+            print(f"[LoRA rebuild warning] Could not update LoRA strengths in-place: {e}")
+        # --- end LoRA update ---
         frame_rate = DEFAULT_FRAME_RATE
         num_frames = int(duration * frame_rate) + 1
         num_frames = ((num_frames - 1 + 7) // 8) * 8 + 1
                 with gr.Row():
                     enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=False)
                     high_res = gr.Checkbox(label="High Resolution", value=True)
+                pose_lora_strength = gr.Slider(label="Pose LoRA Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
+                general_lora_strength = gr.Slider(label="General LoRA Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
+                motion_lora_strength = gr.Slider(label="Motion LoRA Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
         with gr.Column():
+            output_video = gr.Video(label="Generated Video", autoplay=False)
     gr.Examples(
         examples=[
         inputs=[
             first_image, last_image, input_audio, prompt, duration, enhance_prompt,
             seed, randomize_seed, height, width,
+            pose_lora_strength, general_lora_strength, motion_lora_strength,
         ],
         outputs=[output_video, seed],
     )