Add files using upload-large-folder tool
Browse files- distillation_data/compute_vae_latent.py +97 -0
- distillation_data/download_mixkit.py +58 -0
- distillation_data/ode_gen.sh +6 -0
- distillation_data/process_mixkit.py +161 -0
- kill_processes.sh +8 -0
- minimal_inference/bidirectional_inference.py +50 -0
- minimal_inference/longvideo_autoregressive_inference.py +88 -0
- output/wandb/debug-internal.log +6 -0
- output/wandb/debug.log +22 -0
- output/wandb/run-20250908_062833-1l2wnyo3/files/config.yaml +159 -0
- output/wandb/run-20250908_062833-1l2wnyo3/files/output.log +29 -0
- output/wandb/run-20250908_062833-1l2wnyo3/files/requirements.txt +145 -0
- output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-metadata.json +93 -0
- output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-summary.json +1 -0
- output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-core.log +15 -0
- output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-internal.log +11 -0
- output/wandb/run-20250908_062833-1l2wnyo3/logs/debug.log +24 -0
- output/wandb/run-20250908_062833-1l2wnyo3/run-1l2wnyo3.wandb +0 -0
- output/wandb/run-20250908_064634-xd44venm/files/output.log +26 -0
- output/wandb/run-20250908_064634-xd44venm/files/requirements.txt +145 -0
- output/wandb/run-20250908_064634-xd44venm/logs/debug-internal.log +6 -0
- output/wandb/run-20250908_064634-xd44venm/logs/debug.log +22 -0
- output/wandb/run-20250908_082236-gjh65qzq/files/config.yaml +159 -0
- output/wandb/run-20250908_082236-gjh65qzq/files/output.log +36 -0
- output/wandb/run-20250908_082236-gjh65qzq/files/requirements.txt +145 -0
- output/wandb/run-20250908_082236-gjh65qzq/files/wandb-metadata.json +93 -0
- output/wandb/run-20250908_082236-gjh65qzq/files/wandb-summary.json +1 -0
- output/wandb/run-20250908_082236-gjh65qzq/logs/debug-core.log +14 -0
- output/wandb/run-20250908_082236-gjh65qzq/logs/debug-internal.log +11 -0
- output/wandb/run-20250908_082236-gjh65qzq/logs/debug.log +24 -0
- output/wandb/run-20250908_082236-gjh65qzq/run-gjh65qzq.wandb +0 -0
- output/wandb/run-20250908_091215-tz5j30tc/files/config.yaml +160 -0
- output/wandb/run-20250908_091215-tz5j30tc/files/output.log +53 -0
- output/wandb/run-20250908_091215-tz5j30tc/files/requirements.txt +145 -0
- output/wandb/run-20250908_091215-tz5j30tc/files/wandb-metadata.json +93 -0
- output/wandb/run-20250908_091215-tz5j30tc/files/wandb-summary.json +1 -0
- output/wandb/run-20250908_091215-tz5j30tc/logs/debug-core.log +14 -0
- output/wandb/run-20250908_091215-tz5j30tc/logs/debug-internal.log +11 -0
- output/wandb/run-20250908_091215-tz5j30tc/logs/debug.log +24 -0
- output/wandb/run-20250908_091215-tz5j30tc/run-tz5j30tc.wandb +0 -0
- output/wandb/run-20250908_091534-f394z0xa/logs/debug-core.log +14 -0
- output/wandb/run-20250908_091953-n3vl9u22/logs/debug-core.log +7 -0
- output/wandb/run-20250909_031406-fvhxlznm/files/output.log +113 -0
- output/wandb/run-20250909_031406-fvhxlznm/files/requirements.txt +145 -0
- output/wandb/run-20250909_031406-fvhxlznm/files/wandb-metadata.json +93 -0
- output/wandb/run-20250909_031406-fvhxlznm/logs/debug-core.log +7 -0
- output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log +6 -0
- output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log +22 -0
- train.log +0 -0
- video_processing.log +2 -0
distillation_data/compute_vae_latent.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from causvid.models.wan.wan_wrapper import WanVAEWrapper
|
| 2 |
+
from causvid.util import launch_distributed_job
|
| 3 |
+
import torch.distributed as dist
|
| 4 |
+
import imageio.v3 as iio
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
import argparse
|
| 7 |
+
import torch
|
| 8 |
+
import json
|
| 9 |
+
import math
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
torch.set_grad_enabled(False)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def video_to_numpy(video_path):
|
| 16 |
+
"""
|
| 17 |
+
Reads a video file and returns a NumPy array containing all frames.
|
| 18 |
+
|
| 19 |
+
:param video_path: Path to the video file.
|
| 20 |
+
:return: NumPy array of shape (num_frames, height, width, channels)
|
| 21 |
+
"""
|
| 22 |
+
return iio.imread(video_path, plugin="pyav") # Reads the entire video as a NumPy array
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def encode(self, videos: torch.Tensor) -> torch.Tensor:
|
| 26 |
+
device, dtype = videos[0].device, videos[0].dtype
|
| 27 |
+
scale = [self.mean.to(device=device, dtype=dtype),
|
| 28 |
+
1.0 / self.std.to(device=device, dtype=dtype)]
|
| 29 |
+
output = [
|
| 30 |
+
self.model.encode(u.unsqueeze(0), scale).float().squeeze(0)
|
| 31 |
+
for u in videos
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
output = torch.stack(output, dim=0)
|
| 35 |
+
return output
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def main():
|
| 39 |
+
parser = argparse.ArgumentParser()
|
| 40 |
+
parser.add_argument("--input_video_folder", type=str,
|
| 41 |
+
help="Path to the folder containing input videos.")
|
| 42 |
+
parser.add_argument("--output_latent_folder", type=str,
|
| 43 |
+
help="Path to the folder where output latents will be saved.")
|
| 44 |
+
parser.add_argument("--info_path", type=str,
|
| 45 |
+
help="Path to the info file containing video metadata.")
|
| 46 |
+
|
| 47 |
+
args = parser.parse_args()
|
| 48 |
+
|
| 49 |
+
# Step 1: Setup the environment
|
| 50 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 51 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 52 |
+
torch.set_grad_enabled(False)
|
| 53 |
+
|
| 54 |
+
# Step 2: Create the generator
|
| 55 |
+
launch_distributed_job()
|
| 56 |
+
device = torch.cuda.current_device()
|
| 57 |
+
|
| 58 |
+
with open(args.info_path, "r") as f:
|
| 59 |
+
video_info = json.load(f)
|
| 60 |
+
|
| 61 |
+
model = WanVAEWrapper().to(device=device, dtype=torch.bfloat16)
|
| 62 |
+
|
| 63 |
+
video_paths = sorted(list(video_info.keys()))
|
| 64 |
+
|
| 65 |
+
os.makedirs(args.output_latent_folder, exist_ok=True)
|
| 66 |
+
|
| 67 |
+
for index in tqdm(range(int(math.ceil(len(video_paths) / dist.get_world_size()))), disable=dist.get_rank() != 0):
|
| 68 |
+
global_index = index * dist.get_world_size() + dist.get_rank()
|
| 69 |
+
if global_index >= len(video_paths):
|
| 70 |
+
break
|
| 71 |
+
|
| 72 |
+
video_path = video_paths[global_index]
|
| 73 |
+
prompt = video_info[video_path]
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
array = video_to_numpy(os.path.join(
|
| 77 |
+
args.input_video_folder, video_path))
|
| 78 |
+
except:
|
| 79 |
+
print(f"Failed to read video: {video_path}")
|
| 80 |
+
continue
|
| 81 |
+
|
| 82 |
+
video_tensor = torch.tensor(array, dtype=torch.float32, device=device).unsqueeze(0).permute(
|
| 83 |
+
0, 4, 1, 2, 3
|
| 84 |
+
) / 255.0
|
| 85 |
+
video_tensor = video_tensor * 2 - 1
|
| 86 |
+
video_tensor = video_tensor.to(torch.bfloat16)
|
| 87 |
+
encoded_latents = encode(model, video_tensor).transpose(2, 1)
|
| 88 |
+
|
| 89 |
+
torch.save(
|
| 90 |
+
{prompt: encoded_latents.cpu().detach()},
|
| 91 |
+
os.path.join(args.output_latent_folder, f"{global_index:08d}.pt")
|
| 92 |
+
)
|
| 93 |
+
dist.barrier()
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
if __name__ == "__main__":
|
| 97 |
+
main()
|
distillation_data/download_mixkit.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
import tarfile
|
| 4 |
+
from huggingface_hub import snapshot_download
|
| 5 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def extract_tar(tar_path, dest_dir):
|
| 9 |
+
"""
|
| 10 |
+
Extracts a .tar file to the specified destination directory.
|
| 11 |
+
"""
|
| 12 |
+
with tarfile.open(tar_path, 'r') as tar:
|
| 13 |
+
tar.extractall(path=dest_dir)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(
|
| 18 |
+
description="Download and extract dataset.")
|
| 19 |
+
parser.add_argument("--local_dir", type=str, default="/mnt/localssd/",
|
| 20 |
+
help="Local directory to save the dataset.")
|
| 21 |
+
parser.add_argument("--repo_id", type=str,
|
| 22 |
+
default="Languagebind/Open-Sora-Plan-v1.1.0", help="Hugging Face repository ID.")
|
| 23 |
+
parser.add_argument("--folder_name", type=str, default="all_mixkit",
|
| 24 |
+
help="Folder name of the huggingface repo.")
|
| 25 |
+
|
| 26 |
+
args = parser.parse_args()
|
| 27 |
+
|
| 28 |
+
allow_patterns = [f"{args.folder_name}/*.tar"]
|
| 29 |
+
|
| 30 |
+
snapshot_download(
|
| 31 |
+
repo_id=args.repo_id,
|
| 32 |
+
local_dir=args.local_dir,
|
| 33 |
+
revision="main", # or the branch/tag/commit you want
|
| 34 |
+
allow_patterns=allow_patterns,
|
| 35 |
+
repo_type="dataset"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# 4. Collect all .tar files recursively from the downloaded folder
|
| 39 |
+
tar_files = []
|
| 40 |
+
for root, dirs, files in os.walk(args.local_dir):
|
| 41 |
+
for file in files:
|
| 42 |
+
if file.endswith(".tar"):
|
| 43 |
+
tar_files.append(os.path.join(root, file))
|
| 44 |
+
|
| 45 |
+
# 5. Destination folder for extracted files
|
| 46 |
+
output_dir = os.path.join(args.local_dir, "videos")
|
| 47 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 48 |
+
|
| 49 |
+
# 6. Extract each tar file in parallel
|
| 50 |
+
with ThreadPoolExecutor() as executor:
|
| 51 |
+
for tar_path in tar_files:
|
| 52 |
+
executor.submit(extract_tar, tar_path, output_dir)
|
| 53 |
+
|
| 54 |
+
print("All .tar files have been downloaded and extracted to:", output_dir)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
main()
|
distillation_data/ode_gen.sh
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export LD_LIBRARY_PATH=/usr/local/lib/:/opt/nccl/build/lib:/usr/local/cuda/lib64:/opt/amazon/efa/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
|
| 2 |
+
|
| 3 |
+
torchrun --nnodes 8 --nproc_per_node=8 --rdzv_id=5235 \
|
| 4 |
+
--rdzv_backend=c10d \
|
| 5 |
+
--rdzv_endpoint $MASTER_ADDR causvid/models/wan/generate_ode_pairs.py \
|
| 6 |
+
--output_folder mixkit_ode --caption_path sample_dataset/mixkit_prompts.txt
|
distillation_data/process_mixkit.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# the following code is taken from FastVideo https://github.com/hao-ai-lab/FastVideo/tree/main
|
| 2 |
+
# Apache-2.0 License
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import logging
|
| 6 |
+
import time
|
| 7 |
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
from moviepy.editor import VideoFileClip
|
| 12 |
+
from skimage.transform import resize
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 15 |
+
|
| 16 |
+
# Configure logging
|
| 17 |
+
logging.basicConfig(level=logging.INFO,
|
| 18 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 19 |
+
handlers=[logging.FileHandler('video_processing.log')])
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def is_16_9_ratio(width: int, height: int, tolerance: float = 0.1) -> bool:
|
| 23 |
+
target_ratio = 16 / 9
|
| 24 |
+
actual_ratio = width / height
|
| 25 |
+
return abs(actual_ratio - target_ratio) <= (target_ratio * tolerance)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def resize_video(args_tuple):
|
| 29 |
+
"""
|
| 30 |
+
Resize a single video file.
|
| 31 |
+
args_tuple: (input_file, output_dir, width, height, fps)
|
| 32 |
+
"""
|
| 33 |
+
input_file, output_dir, width, height, fps = args_tuple
|
| 34 |
+
video = None
|
| 35 |
+
resized = None
|
| 36 |
+
output_file = output_dir / f"{input_file.name}"
|
| 37 |
+
|
| 38 |
+
if output_file.exists():
|
| 39 |
+
output_file.unlink()
|
| 40 |
+
|
| 41 |
+
video = VideoFileClip(str(input_file))
|
| 42 |
+
|
| 43 |
+
if not is_16_9_ratio(video.w, video.h):
|
| 44 |
+
return (input_file.name, "skipped", "Not 16:9")
|
| 45 |
+
|
| 46 |
+
def process_frame(frame):
|
| 47 |
+
frame_float = frame.astype(float) / 255.0
|
| 48 |
+
resized = resize(frame_float, (height, width, 3),
|
| 49 |
+
mode='reflect', anti_aliasing=True, preserve_range=True)
|
| 50 |
+
return (resized * 255).astype(np.uint8)
|
| 51 |
+
|
| 52 |
+
resized = video.fl_image(process_frame)
|
| 53 |
+
|
| 54 |
+
start_time = 0
|
| 55 |
+
end_time = (81 / 16)
|
| 56 |
+
|
| 57 |
+
# Crop the clip temporally using subclip
|
| 58 |
+
resized = resized.subclip(start_time, end_time)
|
| 59 |
+
|
| 60 |
+
# resized = resized.set_fps(fps)
|
| 61 |
+
|
| 62 |
+
resized.write_videofile(str(output_file),
|
| 63 |
+
codec='libx264',
|
| 64 |
+
audio_codec='aac',
|
| 65 |
+
temp_audiofile=f'temp-audio-{input_file.stem}.m4a',
|
| 66 |
+
remove_temp=True,
|
| 67 |
+
verbose=False,
|
| 68 |
+
logger=None,
|
| 69 |
+
fps=fps)
|
| 70 |
+
|
| 71 |
+
return (input_file.name, "success", None)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def process_folder(args):
|
| 75 |
+
input_path = Path(args.input_dir)
|
| 76 |
+
output_path = Path(args.output_dir)
|
| 77 |
+
output_path.mkdir(parents=True, exist_ok=True)
|
| 78 |
+
|
| 79 |
+
video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.webm'}
|
| 80 |
+
video_files = [f for f in input_path.iterdir() if f.is_file()
|
| 81 |
+
and f.suffix.lower() in video_extensions]
|
| 82 |
+
|
| 83 |
+
if not video_files:
|
| 84 |
+
print(f"No video files found in {args.input_dir}")
|
| 85 |
+
return
|
| 86 |
+
|
| 87 |
+
print(f"Found {len(video_files)} videos")
|
| 88 |
+
print(f"Target: {args.width}x{args.height} at {args.fps}fps")
|
| 89 |
+
|
| 90 |
+
# Prepare arguments for parallel processing
|
| 91 |
+
process_args = [(video_file, output_path, args.width,
|
| 92 |
+
args.height, args.fps) for video_file in video_files]
|
| 93 |
+
|
| 94 |
+
successful = 0
|
| 95 |
+
skipped = 0
|
| 96 |
+
failed = []
|
| 97 |
+
|
| 98 |
+
resize_video(process_args[0])
|
| 99 |
+
|
| 100 |
+
with tqdm(total=len(video_files), desc="Converting videos", dynamic_ncols=True) as pbar:
|
| 101 |
+
with ThreadPoolExecutor() as executor:
|
| 102 |
+
# Submit all tasks
|
| 103 |
+
future_to_file = {executor.submit(
|
| 104 |
+
resize_video, arg): arg[0] for arg in process_args}
|
| 105 |
+
|
| 106 |
+
# Process completed tasks
|
| 107 |
+
for future in as_completed(future_to_file):
|
| 108 |
+
filename, status, message = future.result()
|
| 109 |
+
if status == "success":
|
| 110 |
+
successful += 1
|
| 111 |
+
elif status == "skipped":
|
| 112 |
+
skipped += 1
|
| 113 |
+
else:
|
| 114 |
+
failed.append((filename, message))
|
| 115 |
+
pbar.update(1)
|
| 116 |
+
|
| 117 |
+
# Print final summary
|
| 118 |
+
print(
|
| 119 |
+
f"\nDone! Processed: {successful}, Skipped: {skipped}, Failed: {len(failed)}")
|
| 120 |
+
if failed:
|
| 121 |
+
print("Failed files:")
|
| 122 |
+
for fname, error in failed:
|
| 123 |
+
print(f"- {fname}: {error}")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def parse_args():
|
| 127 |
+
parser = argparse.ArgumentParser(
|
| 128 |
+
description='Batch resize videos to specified resolution and FPS (16:9 only)')
|
| 129 |
+
parser.add_argument('--input_dir', required=True,
|
| 130 |
+
help='Input directory containing video files')
|
| 131 |
+
parser.add_argument('--output_dir', required=True,
|
| 132 |
+
help='Output directory for processed videos')
|
| 133 |
+
parser.add_argument('--width', type=int, default=1280,
|
| 134 |
+
help='Target width in pixels (default: 848)')
|
| 135 |
+
parser.add_argument('--height', type=int, default=720,
|
| 136 |
+
help='Target height in pixels (default: 480)')
|
| 137 |
+
parser.add_argument('--fps', type=int, default=30,
|
| 138 |
+
help='Target frames per second (default: 30)')
|
| 139 |
+
parser.add_argument('--log-level',
|
| 140 |
+
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
|
| 141 |
+
default='INFO',
|
| 142 |
+
help='Set the logging level (default: INFO)')
|
| 143 |
+
return parser.parse_args()
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def main():
|
| 147 |
+
args = parse_args()
|
| 148 |
+
logging.getLogger().setLevel(getattr(logging, args.log_level))
|
| 149 |
+
|
| 150 |
+
if not Path(args.input_dir).exists():
|
| 151 |
+
logging.error(f"Input directory not found: {args.input_dir}")
|
| 152 |
+
return
|
| 153 |
+
|
| 154 |
+
start_time = time.time()
|
| 155 |
+
process_folder(args)
|
| 156 |
+
duration = time.time() - start_time
|
| 157 |
+
logging.info(f"Batch processing completed in {duration:.2f} seconds")
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
if __name__ == "__main__":
|
| 161 |
+
main()
|
kill_processes.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PIDS=$(ps aux | grep python | grep -v grep | awk '{print $2}')
|
| 2 |
+
|
| 3 |
+
for PID in $PIDS; do
|
| 4 |
+
# echo "Killing Python process with PID: $PID"
|
| 5 |
+
kill -9 $PID
|
| 6 |
+
done
|
| 7 |
+
|
| 8 |
+
echo "All Python processes have been terminated."
|
minimal_inference/bidirectional_inference.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from causvid.models.wan.bidirectional_inference import BidirectionalInferencePipeline
|
| 2 |
+
from huggingface_hub import hf_hub_download
|
| 3 |
+
from diffusers.utils import export_to_video
|
| 4 |
+
from causvid.data import TextDataset
|
| 5 |
+
from omegaconf import OmegaConf
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
import argparse
|
| 8 |
+
import torch
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
parser = argparse.ArgumentParser()
|
| 12 |
+
parser.add_argument("--config_path", type=str)
|
| 13 |
+
parser.add_argument("--checkpoint_folder", type=str)
|
| 14 |
+
parser.add_argument("--output_folder", type=str)
|
| 15 |
+
parser.add_argument("--prompt_file_path", type=str)
|
| 16 |
+
|
| 17 |
+
args = parser.parse_args()
|
| 18 |
+
|
| 19 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 20 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 21 |
+
|
| 22 |
+
torch.set_grad_enabled(False)
|
| 23 |
+
|
| 24 |
+
config = OmegaConf.load(args.config_path)
|
| 25 |
+
|
| 26 |
+
pipe = BidirectionalInferencePipeline(config, device="cuda")
|
| 27 |
+
|
| 28 |
+
state_dict = torch.load(os.path.join(args.checkpoint_folder, "model.pt"), map_location="cpu")[
|
| 29 |
+
'generator']
|
| 30 |
+
|
| 31 |
+
pipe.generator.load_state_dict(state_dict)
|
| 32 |
+
|
| 33 |
+
pipe = pipe.to(device="cuda", dtype=torch.bfloat16)
|
| 34 |
+
|
| 35 |
+
dataset = TextDataset(args.prompt_file_path)
|
| 36 |
+
|
| 37 |
+
os.makedirs(args.output_folder, exist_ok=True)
|
| 38 |
+
|
| 39 |
+
for index in tqdm(range(len(dataset))):
|
| 40 |
+
prompt = dataset[index]
|
| 41 |
+
video = pipe.inference(
|
| 42 |
+
noise=torch.randn(
|
| 43 |
+
1, 21, 16, 60, 104, generator=torch.Generator(device="cuda").manual_seed(42),
|
| 44 |
+
dtype=torch.bfloat16, device="cuda"
|
| 45 |
+
),
|
| 46 |
+
text_prompts=[prompt]
|
| 47 |
+
)[0].permute(0, 2, 3, 1).cpu().numpy()
|
| 48 |
+
|
| 49 |
+
export_to_video(
|
| 50 |
+
video, os.path.join(args.output_folder, f"output_{index:03d}.mp4"), fps=16)
|
minimal_inference/longvideo_autoregressive_inference.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from causvid.models.wan.causal_inference import InferencePipeline
|
| 2 |
+
from diffusers.utils import export_to_video
|
| 3 |
+
from causvid.data import TextDataset
|
| 4 |
+
from omegaconf import OmegaConf
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
import numpy as np
|
| 7 |
+
import argparse
|
| 8 |
+
import torch
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
parser = argparse.ArgumentParser()
|
| 12 |
+
parser.add_argument("--config_path", type=str)
|
| 13 |
+
parser.add_argument("--checkpoint_folder", type=str)
|
| 14 |
+
parser.add_argument("--prompt_file_path", type=str)
|
| 15 |
+
parser.add_argument("--output_folder", type=str)
|
| 16 |
+
parser.add_argument("--num_rollout", type=int, default=3)
|
| 17 |
+
parser.add_argument("--num_overlap_frames", type=int, default=3)
|
| 18 |
+
|
| 19 |
+
args = parser.parse_args()
|
| 20 |
+
|
| 21 |
+
torch.set_grad_enabled(False)
|
| 22 |
+
|
| 23 |
+
config = OmegaConf.load(args.config_path)
|
| 24 |
+
|
| 25 |
+
pipeline = InferencePipeline(config, device="cuda")
|
| 26 |
+
pipeline.to(device="cuda", dtype=torch.bfloat16)
|
| 27 |
+
assert args.num_overlap_frames % pipeline.num_frame_per_block == 0, "num_overlap_frames must be divisible by num_frame_per_block"
|
| 28 |
+
|
| 29 |
+
state_dict = torch.load(os.path.join(args.checkpoint_folder, "model.pt"), map_location="cpu")[
|
| 30 |
+
'generator']
|
| 31 |
+
|
| 32 |
+
pipeline.generator.load_state_dict(
|
| 33 |
+
state_dict, strict=True
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
dataset = TextDataset(args.prompt_file_path)
|
| 37 |
+
|
| 38 |
+
num_rollout = args.num_rollout
|
| 39 |
+
|
| 40 |
+
os.makedirs(args.output_folder, exist_ok=True)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def encode(self, videos: torch.Tensor) -> torch.Tensor:
|
| 44 |
+
device, dtype = videos[0].device, videos[0].dtype
|
| 45 |
+
scale = [self.mean.to(device=device, dtype=dtype),
|
| 46 |
+
1.0 / self.std.to(device=device, dtype=dtype)]
|
| 47 |
+
output = [
|
| 48 |
+
self.model.encode(u.unsqueeze(0), scale).float().squeeze(0)
|
| 49 |
+
for u in videos
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
output = torch.stack(output, dim=0)
|
| 53 |
+
return output
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
for prompt_index in tqdm(range(len(dataset))):
|
| 57 |
+
prompts = [dataset[prompt_index]]
|
| 58 |
+
start_latents = None
|
| 59 |
+
all_video = []
|
| 60 |
+
|
| 61 |
+
for rollout_index in range(num_rollout):
|
| 62 |
+
sampled_noise = torch.randn(
|
| 63 |
+
[1, 21, 16, 60, 104], device="cuda", dtype=torch.bfloat16
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
video, latents = pipeline.inference(
|
| 67 |
+
noise=sampled_noise,
|
| 68 |
+
text_prompts=prompts,
|
| 69 |
+
return_latents=True,
|
| 70 |
+
start_latents=start_latents
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
current_video = video[0].permute(0, 2, 3, 1).cpu().numpy()
|
| 74 |
+
|
| 75 |
+
start_frame = encode(pipeline.vae, (
|
| 76 |
+
video[:, -4 * (args.num_overlap_frames - 1) - 1:-4 * (args.num_overlap_frames - 1), :] * 2.0 - 1.0
|
| 77 |
+
).transpose(2, 1).to(torch.bfloat16)).transpose(2, 1).to(torch.bfloat16)
|
| 78 |
+
|
| 79 |
+
start_latents = torch.cat(
|
| 80 |
+
[start_frame, latents[:, -(args.num_overlap_frames - 1):]], dim=1
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
all_video.append(current_video[:-(4 * (args.num_overlap_frames - 1) + 1)])
|
| 84 |
+
|
| 85 |
+
video = np.concatenate(all_video, axis=0)
|
| 86 |
+
|
| 87 |
+
export_to_video(
|
| 88 |
+
video, os.path.join(args.output_folder, f"long_video_output_{prompt_index:03d}.mp4"), fps=16)
|
output/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-09T03:14:06.827346748Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
|
| 2 |
+
{"time":"2025-09-09T03:14:07.041670446Z","level":"INFO","msg":"stream: created new stream","id":"fvhxlznm"}
|
| 3 |
+
{"time":"2025-09-09T03:14:07.04170638Z","level":"INFO","msg":"stream: started","id":"fvhxlznm"}
|
| 4 |
+
{"time":"2025-09-09T03:14:07.04171651Z","level":"INFO","msg":"writer: started","stream_id":"fvhxlznm"}
|
| 5 |
+
{"time":"2025-09-09T03:14:07.041726675Z","level":"INFO","msg":"handler: started","stream_id":"fvhxlznm"}
|
| 6 |
+
{"time":"2025-09-09T03:14:07.041739753Z","level":"INFO","msg":"sender: started","stream_id":"fvhxlznm"}
|
output/wandb/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
|
| 2 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Configure stats pid to 1234529
|
| 3 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
|
| 4 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
|
| 5 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from environment variables
|
| 6 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log
|
| 7 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log
|
| 8 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():813] calling init triggers
|
| 9 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'causal_wan', 'generator_ckpt': 'pretrained_ode.pt', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 6553852, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
|
| 11 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():854] starting backend
|
| 12 |
+
2025-09-09 03:14:06,816 INFO MainThread:1234529 [wandb_init.py:init():857] sending inform_init request
|
| 13 |
+
2025-09-09 03:14:06,821 INFO MainThread:1234529 [wandb_init.py:init():865] backend started and connected
|
| 14 |
+
2025-09-09 03:14:06,825 INFO MainThread:1234529 [wandb_init.py:init():936] updated telemetry
|
| 15 |
+
2025-09-09 03:14:06,830 INFO MainThread:1234529 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-09 03:14:07,304 INFO MainThread:1234529 [wandb_init.py:init():1011] starting run threads in backend
|
| 17 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_console_start():2494] atexit reg
|
| 18 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2342] redirect: wrap_raw
|
| 19 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2411] Wrapping output streams.
|
| 20 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2434] Redirects installed.
|
| 21 |
+
2025-09-09 03:14:07,417 INFO MainThread:1234529 [wandb_init.py:init():1057] run started, returning control to user process
|
| 22 |
+
2025-09-09 03:14:07,902 INFO MainThread:1234529 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
|
output/wandb/run-20250908_062833-1l2wnyo3/files/config.yaml
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.3
|
| 4 |
+
code_path: source-self-forcing-causvid_train_ode.py
|
| 5 |
+
e:
|
| 6 |
+
aqq0exsxdyb4vz63hwlotexs67e6hip5:
|
| 7 |
+
args:
|
| 8 |
+
- --config_path
|
| 9 |
+
- configs/wan_causal_ode.yaml
|
| 10 |
+
codePath: causvid/train_ode.py
|
| 11 |
+
codePathLocal: causvid/train_ode.py
|
| 12 |
+
cpu_count: 48
|
| 13 |
+
cpu_count_logical: 96
|
| 14 |
+
cudaVersion: "12.8"
|
| 15 |
+
disk:
|
| 16 |
+
/:
|
| 17 |
+
total: "2079114358784"
|
| 18 |
+
used: "1366588243968"
|
| 19 |
+
email: liyitong.thu@gmail.com
|
| 20 |
+
executable: /home/yitongli/miniconda3/envs/causvid/bin/python3.10
|
| 21 |
+
git:
|
| 22 |
+
commit: adb6a5ecd07666b4d0290042915c8406e6d5ce22
|
| 23 |
+
remote: https://github.com/tianweiy/CausVid.git
|
| 24 |
+
gpu: NVIDIA A100-SXM4-80GB
|
| 25 |
+
gpu_count: 8
|
| 26 |
+
gpu_nvidia:
|
| 27 |
+
- architecture: Ampere
|
| 28 |
+
cudaCores: 6912
|
| 29 |
+
memoryTotal: "85899345920"
|
| 30 |
+
name: NVIDIA A100-SXM4-80GB
|
| 31 |
+
uuid: GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949
|
| 32 |
+
- architecture: Ampere
|
| 33 |
+
cudaCores: 6912
|
| 34 |
+
memoryTotal: "85899345920"
|
| 35 |
+
name: NVIDIA A100-SXM4-80GB
|
| 36 |
+
uuid: GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42
|
| 37 |
+
- architecture: Ampere
|
| 38 |
+
cudaCores: 6912
|
| 39 |
+
memoryTotal: "85899345920"
|
| 40 |
+
name: NVIDIA A100-SXM4-80GB
|
| 41 |
+
uuid: GPU-1f47056a-9c2c-8af1-8232-636203622830
|
| 42 |
+
- architecture: Ampere
|
| 43 |
+
cudaCores: 6912
|
| 44 |
+
memoryTotal: "85899345920"
|
| 45 |
+
name: NVIDIA A100-SXM4-80GB
|
| 46 |
+
uuid: GPU-38de1dff-a680-ec66-625b-49f2a31e3b42
|
| 47 |
+
- architecture: Ampere
|
| 48 |
+
cudaCores: 6912
|
| 49 |
+
memoryTotal: "85899345920"
|
| 50 |
+
name: NVIDIA A100-SXM4-80GB
|
| 51 |
+
uuid: GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204
|
| 52 |
+
- architecture: Ampere
|
| 53 |
+
cudaCores: 6912
|
| 54 |
+
memoryTotal: "85899345920"
|
| 55 |
+
name: NVIDIA A100-SXM4-80GB
|
| 56 |
+
uuid: GPU-194ed489-5bfa-defc-85b1-812953e7d84a
|
| 57 |
+
- architecture: Ampere
|
| 58 |
+
cudaCores: 6912
|
| 59 |
+
memoryTotal: "85899345920"
|
| 60 |
+
name: NVIDIA A100-SXM4-80GB
|
| 61 |
+
uuid: GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4
|
| 62 |
+
- architecture: Ampere
|
| 63 |
+
cudaCores: 6912
|
| 64 |
+
memoryTotal: "85899345920"
|
| 65 |
+
name: NVIDIA A100-SXM4-80GB
|
| 66 |
+
uuid: GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057
|
| 67 |
+
host: ip-172-31-3-169
|
| 68 |
+
memory:
|
| 69 |
+
total: "1204521443328"
|
| 70 |
+
os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.39
|
| 71 |
+
program: /home/yitongli/CausVid/causvid/train_ode.py
|
| 72 |
+
python: CPython 3.10.18
|
| 73 |
+
root: ./output
|
| 74 |
+
startedAt: "2025-09-08T06:28:33.977989Z"
|
| 75 |
+
writerId: aqq0exsxdyb4vz63hwlotexs67e6hip5
|
| 76 |
+
m: []
|
| 77 |
+
python_version: 3.10.18
|
| 78 |
+
t:
|
| 79 |
+
"1":
|
| 80 |
+
- 1
|
| 81 |
+
- 11
|
| 82 |
+
- 41
|
| 83 |
+
- 49
|
| 84 |
+
- 71
|
| 85 |
+
- 83
|
| 86 |
+
- 105
|
| 87 |
+
"2":
|
| 88 |
+
- 1
|
| 89 |
+
- 11
|
| 90 |
+
- 41
|
| 91 |
+
- 49
|
| 92 |
+
- 71
|
| 93 |
+
- 83
|
| 94 |
+
- 105
|
| 95 |
+
"3":
|
| 96 |
+
- 16
|
| 97 |
+
- 17
|
| 98 |
+
"4": 3.10.18
|
| 99 |
+
"5": 0.21.3
|
| 100 |
+
"6": 4.56.1
|
| 101 |
+
"12": 0.21.3
|
| 102 |
+
"13": linux-x86_64
|
| 103 |
+
batch_size:
|
| 104 |
+
value: 2
|
| 105 |
+
beta1:
|
| 106 |
+
value: 0.9
|
| 107 |
+
beta2:
|
| 108 |
+
value: 0.999
|
| 109 |
+
data_path:
|
| 110 |
+
value: ../mixkit_ode_lmdb
|
| 111 |
+
denoising_step_list:
|
| 112 |
+
value:
|
| 113 |
+
- 1000
|
| 114 |
+
- 757
|
| 115 |
+
- 522
|
| 116 |
+
- 0
|
| 117 |
+
distillation_loss:
|
| 118 |
+
value: ode
|
| 119 |
+
generator_fsdp_wrap_strategy:
|
| 120 |
+
value: size
|
| 121 |
+
generator_grad:
|
| 122 |
+
value:
|
| 123 |
+
model: true
|
| 124 |
+
generator_task:
|
| 125 |
+
value: causal_video
|
| 126 |
+
gradient_checkpointing:
|
| 127 |
+
value: true
|
| 128 |
+
log_iters:
|
| 129 |
+
value: 200
|
| 130 |
+
lr:
|
| 131 |
+
value: 2e-06
|
| 132 |
+
mixed_precision:
|
| 133 |
+
value: true
|
| 134 |
+
model_name:
|
| 135 |
+
value: causal_wan
|
| 136 |
+
no_save:
|
| 137 |
+
value: false
|
| 138 |
+
num_frame_per_block:
|
| 139 |
+
value: 3
|
| 140 |
+
output_path:
|
| 141 |
+
value: ./output
|
| 142 |
+
seed:
|
| 143 |
+
value: 8706203
|
| 144 |
+
sharding_strategy:
|
| 145 |
+
value: hybrid_full
|
| 146 |
+
text_encoder_fsdp_wrap_strategy:
|
| 147 |
+
value: size
|
| 148 |
+
wandb_entity:
|
| 149 |
+
value: liyitong-Tsinghua University
|
| 150 |
+
wandb_host:
|
| 151 |
+
value: https://api.wandb.ai
|
| 152 |
+
wandb_key:
|
| 153 |
+
value: 5409d3b960b01b25cec0f6abb5361b4022f0cc41
|
| 154 |
+
wandb_name:
|
| 155 |
+
value: wan_causal_ode
|
| 156 |
+
wandb_project:
|
| 157 |
+
value: self-forcing
|
| 158 |
+
warp_denoising_step:
|
| 159 |
+
value: false
|
output/wandb/run-20250908_062833-1l2wnyo3/files/output.log
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run dir: ./output/wandb/run-20250908_062833-1l2wnyo3/files
|
| 2 |
+
Traceback (most recent call last):
|
| 3 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 211, in <module>
|
| 4 |
+
main()
|
| 5 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 204, in main
|
| 6 |
+
trainer = Trainer(config)
|
| 7 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 51, in __init__
|
| 8 |
+
self.distillation_model = ODERegression(config, device=self.device)
|
| 9 |
+
File "/home/yitongli/CausVid/causvid/ode_regression.py", line 39, in __init__
|
| 10 |
+
self.generator.enable_gradient_checkpointing()
|
| 11 |
+
File "/home/yitongli/CausVid/causvid/models/wan/wan_wrapper.py", line 113, in enable_gradient_checkpointing
|
| 12 |
+
self.model.enable_gradient_checkpointing()
|
| 13 |
+
File "/home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/diffusers/models/modeling_utils.py", line 310, in enable_gradient_checkpointing
|
| 14 |
+
self._set_gradient_checkpointing(enable=True, gradient_checkpointing_func=gradient_checkpointing_func)
|
| 15 |
+
TypeError: CausalWanModel._set_gradient_checkpointing() got an unexpected keyword argument 'enable'
|
| 16 |
+
[rank0]: Traceback (most recent call last):
|
| 17 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 211, in <module>
|
| 18 |
+
[rank0]: main()
|
| 19 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 204, in main
|
| 20 |
+
[rank0]: trainer = Trainer(config)
|
| 21 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 51, in __init__
|
| 22 |
+
[rank0]: self.distillation_model = ODERegression(config, device=self.device)
|
| 23 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/ode_regression.py", line 39, in __init__
|
| 24 |
+
[rank0]: self.generator.enable_gradient_checkpointing()
|
| 25 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/models/wan/wan_wrapper.py", line 113, in enable_gradient_checkpointing
|
| 26 |
+
[rank0]: self.model.enable_gradient_checkpointing()
|
| 27 |
+
[rank0]: File "/home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/diffusers/models/modeling_utils.py", line 310, in enable_gradient_checkpointing
|
| 28 |
+
[rank0]: self._set_gradient_checkpointing(enable=True, gradient_checkpointing_func=gradient_checkpointing_func)
|
| 29 |
+
[rank0]: TypeError: CausalWanModel._set_gradient_checkpointing() got an unexpected keyword argument 'enable'
|
output/wandb/run-20250908_062833-1l2wnyo3/files/requirements.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 2 |
+
filelock==3.19.1
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
PyYAML==6.0.2
|
| 5 |
+
httpx==0.28.1
|
| 6 |
+
multidict==6.6.4
|
| 7 |
+
tifffile==2025.5.10
|
| 8 |
+
tzdata==2025.2
|
| 9 |
+
urllib3==2.5.0
|
| 10 |
+
decord==0.6.0
|
| 11 |
+
certifi==2025.8.3
|
| 12 |
+
setuptools==78.1.1
|
| 13 |
+
websocket-client==1.8.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
yarl==1.20.1
|
| 16 |
+
tqdm==4.67.1
|
| 17 |
+
open_clip_torch==3.1.0
|
| 18 |
+
pyparsing==3.2.3
|
| 19 |
+
fastapi==0.116.1
|
| 20 |
+
nvidia-curand-cu12==10.3.9.90
|
| 21 |
+
mdurl==0.1.2
|
| 22 |
+
torchvision==0.23.0
|
| 23 |
+
h11==0.16.0
|
| 24 |
+
pytz==2025.2
|
| 25 |
+
six==1.17.0
|
| 26 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 27 |
+
aiohappyeyeballs==2.6.1
|
| 28 |
+
wandb==0.21.3
|
| 29 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 30 |
+
cycler==0.12.1
|
| 31 |
+
anyio==4.10.0
|
| 32 |
+
scikit-image==0.25.2
|
| 33 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 34 |
+
sentencepiece==0.2.1
|
| 35 |
+
rich==14.1.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
GitPython==3.1.45
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
accelerate==1.10.1
|
| 40 |
+
proglog==0.1.12
|
| 41 |
+
sentry-sdk==2.37.0
|
| 42 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 43 |
+
dashscope==1.24.3
|
| 44 |
+
platformdirs==4.4.0
|
| 45 |
+
safehttpx==0.1.6
|
| 46 |
+
fsspec==2025.9.0
|
| 47 |
+
lazy_loader==0.4
|
| 48 |
+
typing_extensions==4.15.0
|
| 49 |
+
semantic-version==2.10.0
|
| 50 |
+
numpy==1.24.4
|
| 51 |
+
diffusers==0.35.1
|
| 52 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 53 |
+
ruff==0.12.12
|
| 54 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 55 |
+
annotated-types==0.7.0
|
| 56 |
+
decorator==4.4.2
|
| 57 |
+
antlr4-python3-runtime==4.9.3
|
| 58 |
+
psutil==7.0.0
|
| 59 |
+
Brotli==1.1.0
|
| 60 |
+
tomlkit==0.13.3
|
| 61 |
+
httpcore==1.0.9
|
| 62 |
+
kiwisolver==1.4.9
|
| 63 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 64 |
+
groovy==0.1.2
|
| 65 |
+
protobuf==6.32.0
|
| 66 |
+
orjson==3.11.3
|
| 67 |
+
scipy==1.15.3
|
| 68 |
+
regex==2025.9.1
|
| 69 |
+
MarkupSafe==3.0.2
|
| 70 |
+
av==13.1.0
|
| 71 |
+
timm==1.0.19
|
| 72 |
+
uvicorn==0.35.0
|
| 73 |
+
wheel==0.45.1
|
| 74 |
+
Pygments==2.19.2
|
| 75 |
+
websockets==15.0.1
|
| 76 |
+
lmdb==1.7.3
|
| 77 |
+
sympy==1.14.0
|
| 78 |
+
einops==0.8.1
|
| 79 |
+
idna==3.10
|
| 80 |
+
triton==3.4.0
|
| 81 |
+
torch==2.8.0
|
| 82 |
+
moviepy==1.0.3
|
| 83 |
+
nvidia-nvtx-cu12==12.8.90
|
| 84 |
+
matplotlib==3.10.6
|
| 85 |
+
pillow==11.3.0
|
| 86 |
+
charset-normalizer==3.4.3
|
| 87 |
+
attrs==25.3.0
|
| 88 |
+
aiosignal==1.4.0
|
| 89 |
+
markdown-it-py==4.0.0
|
| 90 |
+
requests==2.32.5
|
| 91 |
+
typer==0.17.4
|
| 92 |
+
huggingface-hub==0.34.4
|
| 93 |
+
nvidia-nccl-cu12==2.27.3
|
| 94 |
+
propcache==0.3.2
|
| 95 |
+
opencv-python==4.11.0.86
|
| 96 |
+
ffmpy==0.6.1
|
| 97 |
+
jmespath==1.0.1
|
| 98 |
+
botocore==1.40.25
|
| 99 |
+
pydantic_core==2.33.2
|
| 100 |
+
fonttools==4.59.2
|
| 101 |
+
omegaconf==2.3.0
|
| 102 |
+
pycparser==2.22
|
| 103 |
+
mpmath==1.3.0
|
| 104 |
+
flash_attn==2.8.3
|
| 105 |
+
smmap==5.0.2
|
| 106 |
+
gradio_client==1.12.1
|
| 107 |
+
exceptiongroup==1.3.0
|
| 108 |
+
cffi==1.17.1
|
| 109 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 110 |
+
contourpy==1.3.2
|
| 111 |
+
cryptography==45.0.7
|
| 112 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 113 |
+
wcwidth==0.2.13
|
| 114 |
+
zipp==3.23.0
|
| 115 |
+
safetensors==0.6.2
|
| 116 |
+
gradio==5.44.1
|
| 117 |
+
click==8.2.1
|
| 118 |
+
frozenlist==1.7.0
|
| 119 |
+
networkx==3.4.2
|
| 120 |
+
s3transfer==0.13.1
|
| 121 |
+
shellingham==1.5.4
|
| 122 |
+
starlette==0.47.3
|
| 123 |
+
packaging==25.0
|
| 124 |
+
ftfy==6.3.1
|
| 125 |
+
importlib_metadata==8.7.0
|
| 126 |
+
transformers==4.56.1
|
| 127 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 128 |
+
Jinja2==3.1.6
|
| 129 |
+
pycocotools==2.0.10
|
| 130 |
+
sniffio==1.3.1
|
| 131 |
+
hf-xet==1.1.9
|
| 132 |
+
typing-inspection==0.4.1
|
| 133 |
+
pandas==2.3.2
|
| 134 |
+
python-multipart==0.0.20
|
| 135 |
+
aiohttp==3.12.15
|
| 136 |
+
clip==1.0
|
| 137 |
+
pydub==0.25.1
|
| 138 |
+
easydict==1.13
|
| 139 |
+
pip==25.2
|
| 140 |
+
tokenizers==0.22.0
|
| 141 |
+
imageio==2.37.0
|
| 142 |
+
async-timeout==5.0.1
|
| 143 |
+
boto3==1.40.25
|
| 144 |
+
imageio-ffmpeg==0.6.0
|
| 145 |
+
CausVid==0.0.1
|
output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.10.18",
|
| 4 |
+
"startedAt": "2025-09-08T06:28:33.977989Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_path",
|
| 7 |
+
"configs/wan_causal_ode.yaml"
|
| 8 |
+
],
|
| 9 |
+
"program": "/home/yitongli/CausVid/causvid/train_ode.py",
|
| 10 |
+
"codePath": "causvid/train_ode.py",
|
| 11 |
+
"codePathLocal": "causvid/train_ode.py",
|
| 12 |
+
"git": {
|
| 13 |
+
"remote": "https://github.com/tianweiy/CausVid.git",
|
| 14 |
+
"commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
|
| 15 |
+
},
|
| 16 |
+
"email": "liyitong.thu@gmail.com",
|
| 17 |
+
"root": "./output",
|
| 18 |
+
"host": "ip-172-31-3-169",
|
| 19 |
+
"executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
|
| 20 |
+
"cpu_count": 48,
|
| 21 |
+
"cpu_count_logical": 96,
|
| 22 |
+
"gpu": "NVIDIA A100-SXM4-80GB",
|
| 23 |
+
"gpu_count": 8,
|
| 24 |
+
"disk": {
|
| 25 |
+
"/": {
|
| 26 |
+
"total": "2079114358784",
|
| 27 |
+
"used": "1366588243968"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"memory": {
|
| 31 |
+
"total": "1204521443328"
|
| 32 |
+
},
|
| 33 |
+
"gpu_nvidia": [
|
| 34 |
+
{
|
| 35 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 36 |
+
"memoryTotal": "85899345920",
|
| 37 |
+
"cudaCores": 6912,
|
| 38 |
+
"architecture": "Ampere",
|
| 39 |
+
"uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 43 |
+
"memoryTotal": "85899345920",
|
| 44 |
+
"cudaCores": 6912,
|
| 45 |
+
"architecture": "Ampere",
|
| 46 |
+
"uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 50 |
+
"memoryTotal": "85899345920",
|
| 51 |
+
"cudaCores": 6912,
|
| 52 |
+
"architecture": "Ampere",
|
| 53 |
+
"uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 57 |
+
"memoryTotal": "85899345920",
|
| 58 |
+
"cudaCores": 6912,
|
| 59 |
+
"architecture": "Ampere",
|
| 60 |
+
"uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 64 |
+
"memoryTotal": "85899345920",
|
| 65 |
+
"cudaCores": 6912,
|
| 66 |
+
"architecture": "Ampere",
|
| 67 |
+
"uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 71 |
+
"memoryTotal": "85899345920",
|
| 72 |
+
"cudaCores": 6912,
|
| 73 |
+
"architecture": "Ampere",
|
| 74 |
+
"uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85899345920",
|
| 79 |
+
"cudaCores": 6912,
|
| 80 |
+
"architecture": "Ampere",
|
| 81 |
+
"uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 85 |
+
"memoryTotal": "85899345920",
|
| 86 |
+
"cudaCores": 6912,
|
| 87 |
+
"architecture": "Ampere",
|
| 88 |
+
"uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"cudaVersion": "12.8",
|
| 92 |
+
"writerId": "aqq0exsxdyb4vz63hwlotexs67e6hip5"
|
| 93 |
+
}
|
output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":0},"_runtime":0}
|
output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-core.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T06:28:34.253598263Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqgyjn6i1/port-3409973.txt","pid":3409973,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-08T06:28:34.257091825Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3409973}
|
| 3 |
+
{"time":"2025-09-08T06:28:34.256253546Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3409973-3410209-1263294975/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-08T06:28:34.391672548Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-08T06:28:34.406857393Z","level":"INFO","msg":"handleInformInit: received","streamId":"1l2wnyo3","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-08T06:28:34.736516792Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"1l2wnyo3","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-08T06:28:35.917810249Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 8 |
+
{"time":"2025-09-08T06:28:35.917878105Z","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2025-09-08T06:28:35.917865065Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 10 |
+
{"time":"2025-09-08T06:28:35.917988283Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 11 |
+
{"time":"2025-09-08T06:28:35.917976111Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3409973-3410209-1263294975/socket","Net":"unix"}}
|
| 12 |
+
{"time":"2025-09-08T06:28:36.754084432Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write unix /tmp/wandb-3409973-3410209-1263294975/socket->@: use of closed network connection","id":"1(@)"}
|
| 13 |
+
{"time":"2025-09-08T06:28:36.932139033Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 14 |
+
{"time":"2025-09-08T06:28:36.932153507Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 15 |
+
{"time":"2025-09-08T06:28:36.932164872Z","level":"INFO","msg":"server is closed"}
|
output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T06:28:34.406945302Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
|
| 2 |
+
{"time":"2025-09-08T06:28:34.736482072Z","level":"INFO","msg":"stream: created new stream","id":"1l2wnyo3"}
|
| 3 |
+
{"time":"2025-09-08T06:28:34.736512607Z","level":"INFO","msg":"stream: started","id":"1l2wnyo3"}
|
| 4 |
+
{"time":"2025-09-08T06:28:34.736522488Z","level":"INFO","msg":"writer: started","stream_id":"1l2wnyo3"}
|
| 5 |
+
{"time":"2025-09-08T06:28:34.7365292Z","level":"INFO","msg":"handler: started","stream_id":"1l2wnyo3"}
|
| 6 |
+
{"time":"2025-09-08T06:28:34.736574661Z","level":"INFO","msg":"sender: started","stream_id":"1l2wnyo3"}
|
| 7 |
+
{"time":"2025-09-08T06:28:35.91787343Z","level":"INFO","msg":"stream: closing","id":"1l2wnyo3"}
|
| 8 |
+
{"time":"2025-09-08T06:28:36.804459652Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-09-08T06:28:36.9184071Z","level":"INFO","msg":"handler: closed","stream_id":"1l2wnyo3"}
|
| 10 |
+
{"time":"2025-09-08T06:28:36.918464588Z","level":"INFO","msg":"sender: closed","stream_id":"1l2wnyo3"}
|
| 11 |
+
{"time":"2025-09-08T06:28:36.918471349Z","level":"INFO","msg":"stream: closed","id":"1l2wnyo3"}
|
output/wandb/run-20250908_062833-1l2wnyo3/logs/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
|
| 2 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Configure stats pid to 3409973
|
| 3 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
|
| 4 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
|
| 5 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Loading settings from environment variables
|
| 6 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_062833-1l2wnyo3/logs/debug.log
|
| 7 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-internal.log
|
| 8 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:init():813] calling init triggers
|
| 9 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 8706203, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
|
| 11 |
+
2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:init():854] starting backend
|
| 12 |
+
2025-09-08 06:28:34,391 INFO MainThread:3409973 [wandb_init.py:init():857] sending inform_init request
|
| 13 |
+
2025-09-08 06:28:34,398 INFO MainThread:3409973 [wandb_init.py:init():865] backend started and connected
|
| 14 |
+
2025-09-08 06:28:34,400 INFO MainThread:3409973 [wandb_init.py:init():936] updated telemetry
|
| 15 |
+
2025-09-08 06:28:34,405 INFO MainThread:3409973 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-08 06:28:35,106 INFO MainThread:3409973 [wandb_init.py:init():1011] starting run threads in backend
|
| 17 |
+
2025-09-08 06:28:35,269 INFO MainThread:3409973 [wandb_run.py:_console_start():2494] atexit reg
|
| 18 |
+
2025-09-08 06:28:35,270 INFO MainThread:3409973 [wandb_run.py:_redirect():2342] redirect: wrap_raw
|
| 19 |
+
2025-09-08 06:28:35,270 INFO MainThread:3409973 [wandb_run.py:_redirect():2411] Wrapping output streams.
|
| 20 |
+
2025-09-08 06:28:35,270 INFO MainThread:3409973 [wandb_run.py:_redirect():2434] Redirects installed.
|
| 21 |
+
2025-09-08 06:28:35,273 INFO MainThread:3409973 [wandb_init.py:init():1057] run started, returning control to user process
|
| 22 |
+
2025-09-08 06:28:35,672 INFO MainThread:3409973 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
|
| 23 |
+
2025-09-08 06:28:35,917 INFO wandb-AsyncioManager-main:3409973 [service_client.py:_forward_responses():84] Reached EOF.
|
| 24 |
+
2025-09-08 06:28:35,918 INFO wandb-AsyncioManager-main:3409973 [mailbox.py:close():137] Closing mailbox, abandoning 2 handles.
|
output/wandb/run-20250908_062833-1l2wnyo3/run-1l2wnyo3.wandb
ADDED
|
Binary file (7.27 kB). View file
|
|
|
output/wandb/run-20250908_064634-xd44venm/files/output.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run dir: ./output/wandb/run-20250908_064634-xd44venm/files
|
| 2 |
+
Start training step 0...
|
| 3 |
+
cache a block wise causal mask with block size of 3 frames
|
| 4 |
+
BlockMask(shape=(1, 1, 32768, 32768), sparsity=42.52%,
|
| 5 |
+
(0, 0)
|
| 6 |
+
████░░
|
| 7 |
+
████░░
|
| 8 |
+
████░░░░░░░░
|
| 9 |
+
██████████░░
|
| 10 |
+
██████████░░
|
| 11 |
+
██████████░░░░░░░░
|
| 12 |
+
████████████████░░
|
| 13 |
+
████████████████░░
|
| 14 |
+
████████████████░░░░░░░░
|
| 15 |
+
██████████████████████░░
|
| 16 |
+
██████████████████████░░
|
| 17 |
+
██████████████████████░░░░░░░░
|
| 18 |
+
████████████████████████████░░
|
| 19 |
+
████████████████████████████░░
|
| 20 |
+
████████████████████████████████░░
|
| 21 |
+
████████████████████████████████░░
|
| 22 |
+
████████████████████████████████░░░░░░░░
|
| 23 |
+
████████████████████████████████████████
|
| 24 |
+
████████████████████████████████████████
|
| 25 |
+
████████████████████████████████████████
|
| 26 |
+
)
|
output/wandb/run-20250908_064634-xd44venm/files/requirements.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 2 |
+
filelock==3.19.1
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
PyYAML==6.0.2
|
| 5 |
+
httpx==0.28.1
|
| 6 |
+
multidict==6.6.4
|
| 7 |
+
tifffile==2025.5.10
|
| 8 |
+
tzdata==2025.2
|
| 9 |
+
urllib3==2.5.0
|
| 10 |
+
decord==0.6.0
|
| 11 |
+
certifi==2025.8.3
|
| 12 |
+
setuptools==78.1.1
|
| 13 |
+
websocket-client==1.8.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
yarl==1.20.1
|
| 16 |
+
tqdm==4.67.1
|
| 17 |
+
open_clip_torch==3.1.0
|
| 18 |
+
pyparsing==3.2.3
|
| 19 |
+
fastapi==0.116.1
|
| 20 |
+
nvidia-curand-cu12==10.3.9.90
|
| 21 |
+
mdurl==0.1.2
|
| 22 |
+
torchvision==0.23.0
|
| 23 |
+
h11==0.16.0
|
| 24 |
+
pytz==2025.2
|
| 25 |
+
six==1.17.0
|
| 26 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 27 |
+
aiohappyeyeballs==2.6.1
|
| 28 |
+
wandb==0.21.3
|
| 29 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 30 |
+
cycler==0.12.1
|
| 31 |
+
anyio==4.10.0
|
| 32 |
+
scikit-image==0.25.2
|
| 33 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 34 |
+
sentencepiece==0.2.1
|
| 35 |
+
rich==14.1.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
GitPython==3.1.45
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
accelerate==1.10.1
|
| 40 |
+
proglog==0.1.12
|
| 41 |
+
sentry-sdk==2.37.0
|
| 42 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 43 |
+
dashscope==1.24.3
|
| 44 |
+
platformdirs==4.4.0
|
| 45 |
+
safehttpx==0.1.6
|
| 46 |
+
fsspec==2025.9.0
|
| 47 |
+
lazy_loader==0.4
|
| 48 |
+
typing_extensions==4.15.0
|
| 49 |
+
semantic-version==2.10.0
|
| 50 |
+
numpy==1.24.4
|
| 51 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 52 |
+
ruff==0.12.12
|
| 53 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 54 |
+
annotated-types==0.7.0
|
| 55 |
+
decorator==4.4.2
|
| 56 |
+
antlr4-python3-runtime==4.9.3
|
| 57 |
+
psutil==7.0.0
|
| 58 |
+
Brotli==1.1.0
|
| 59 |
+
tomlkit==0.13.3
|
| 60 |
+
httpcore==1.0.9
|
| 61 |
+
kiwisolver==1.4.9
|
| 62 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 63 |
+
groovy==0.1.2
|
| 64 |
+
protobuf==6.32.0
|
| 65 |
+
orjson==3.11.3
|
| 66 |
+
scipy==1.15.3
|
| 67 |
+
regex==2025.9.1
|
| 68 |
+
MarkupSafe==3.0.2
|
| 69 |
+
av==13.1.0
|
| 70 |
+
timm==1.0.19
|
| 71 |
+
uvicorn==0.35.0
|
| 72 |
+
wheel==0.45.1
|
| 73 |
+
Pygments==2.19.2
|
| 74 |
+
websockets==15.0.1
|
| 75 |
+
lmdb==1.7.3
|
| 76 |
+
sympy==1.14.0
|
| 77 |
+
einops==0.8.1
|
| 78 |
+
idna==3.10
|
| 79 |
+
triton==3.4.0
|
| 80 |
+
torch==2.8.0
|
| 81 |
+
moviepy==1.0.3
|
| 82 |
+
nvidia-nvtx-cu12==12.8.90
|
| 83 |
+
matplotlib==3.10.6
|
| 84 |
+
pillow==11.3.0
|
| 85 |
+
charset-normalizer==3.4.3
|
| 86 |
+
attrs==25.3.0
|
| 87 |
+
aiosignal==1.4.0
|
| 88 |
+
markdown-it-py==4.0.0
|
| 89 |
+
requests==2.32.5
|
| 90 |
+
typer==0.17.4
|
| 91 |
+
huggingface-hub==0.34.4
|
| 92 |
+
nvidia-nccl-cu12==2.27.3
|
| 93 |
+
propcache==0.3.2
|
| 94 |
+
opencv-python==4.11.0.86
|
| 95 |
+
ffmpy==0.6.1
|
| 96 |
+
jmespath==1.0.1
|
| 97 |
+
botocore==1.40.25
|
| 98 |
+
pydantic_core==2.33.2
|
| 99 |
+
fonttools==4.59.2
|
| 100 |
+
omegaconf==2.3.0
|
| 101 |
+
pycparser==2.22
|
| 102 |
+
mpmath==1.3.0
|
| 103 |
+
flash_attn==2.8.3
|
| 104 |
+
smmap==5.0.2
|
| 105 |
+
gradio_client==1.12.1
|
| 106 |
+
exceptiongroup==1.3.0
|
| 107 |
+
cffi==1.17.1
|
| 108 |
+
diffusers==0.31.0
|
| 109 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 110 |
+
contourpy==1.3.2
|
| 111 |
+
cryptography==45.0.7
|
| 112 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 113 |
+
wcwidth==0.2.13
|
| 114 |
+
zipp==3.23.0
|
| 115 |
+
safetensors==0.6.2
|
| 116 |
+
gradio==5.44.1
|
| 117 |
+
click==8.2.1
|
| 118 |
+
frozenlist==1.7.0
|
| 119 |
+
networkx==3.4.2
|
| 120 |
+
s3transfer==0.13.1
|
| 121 |
+
shellingham==1.5.4
|
| 122 |
+
starlette==0.47.3
|
| 123 |
+
packaging==25.0
|
| 124 |
+
ftfy==6.3.1
|
| 125 |
+
importlib_metadata==8.7.0
|
| 126 |
+
transformers==4.56.1
|
| 127 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 128 |
+
Jinja2==3.1.6
|
| 129 |
+
pycocotools==2.0.10
|
| 130 |
+
sniffio==1.3.1
|
| 131 |
+
hf-xet==1.1.9
|
| 132 |
+
typing-inspection==0.4.1
|
| 133 |
+
pandas==2.3.2
|
| 134 |
+
python-multipart==0.0.20
|
| 135 |
+
aiohttp==3.12.15
|
| 136 |
+
clip==1.0
|
| 137 |
+
pydub==0.25.1
|
| 138 |
+
easydict==1.13
|
| 139 |
+
pip==25.2
|
| 140 |
+
tokenizers==0.22.0
|
| 141 |
+
imageio==2.37.0
|
| 142 |
+
async-timeout==5.0.1
|
| 143 |
+
boto3==1.40.25
|
| 144 |
+
imageio-ffmpeg==0.6.0
|
| 145 |
+
CausVid==0.0.1
|
output/wandb/run-20250908_064634-xd44venm/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T06:46:35.017276807Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
|
| 2 |
+
{"time":"2025-09-08T06:46:35.219581537Z","level":"INFO","msg":"stream: created new stream","id":"xd44venm"}
|
| 3 |
+
{"time":"2025-09-08T06:46:35.219621685Z","level":"INFO","msg":"stream: started","id":"xd44venm"}
|
| 4 |
+
{"time":"2025-09-08T06:46:35.219634304Z","level":"INFO","msg":"sender: started","stream_id":"xd44venm"}
|
| 5 |
+
{"time":"2025-09-08T06:46:35.219635253Z","level":"INFO","msg":"handler: started","stream_id":"xd44venm"}
|
| 6 |
+
{"time":"2025-09-08T06:46:35.219656757Z","level":"INFO","msg":"writer: started","stream_id":"xd44venm"}
|
output/wandb/run-20250908_064634-xd44venm/logs/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
|
| 2 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Configure stats pid to 3454108
|
| 3 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
|
| 4 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
|
| 5 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Loading settings from environment variables
|
| 6 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_064634-xd44venm/logs/debug.log
|
| 7 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_064634-xd44venm/logs/debug-internal.log
|
| 8 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:init():813] calling init triggers
|
| 9 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 6790074, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
|
| 11 |
+
2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:init():854] starting backend
|
| 12 |
+
2025-09-08 06:46:35,006 INFO MainThread:3454108 [wandb_init.py:init():857] sending inform_init request
|
| 13 |
+
2025-09-08 06:46:35,010 INFO MainThread:3454108 [wandb_init.py:init():865] backend started and connected
|
| 14 |
+
2025-09-08 06:46:35,015 INFO MainThread:3454108 [wandb_init.py:init():936] updated telemetry
|
| 15 |
+
2025-09-08 06:46:35,020 INFO MainThread:3454108 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-08 06:46:35,404 INFO MainThread:3454108 [wandb_init.py:init():1011] starting run threads in backend
|
| 17 |
+
2025-09-08 06:46:35,515 INFO MainThread:3454108 [wandb_run.py:_console_start():2494] atexit reg
|
| 18 |
+
2025-09-08 06:46:35,516 INFO MainThread:3454108 [wandb_run.py:_redirect():2342] redirect: wrap_raw
|
| 19 |
+
2025-09-08 06:46:35,516 INFO MainThread:3454108 [wandb_run.py:_redirect():2411] Wrapping output streams.
|
| 20 |
+
2025-09-08 06:46:35,516 INFO MainThread:3454108 [wandb_run.py:_redirect():2434] Redirects installed.
|
| 21 |
+
2025-09-08 06:46:35,518 INFO MainThread:3454108 [wandb_init.py:init():1057] run started, returning control to user process
|
| 22 |
+
2025-09-08 06:46:35,894 INFO MainThread:3454108 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
|
output/wandb/run-20250908_082236-gjh65qzq/files/config.yaml
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.3
|
| 4 |
+
code_path: source-self-forcing-causvid_train_ode.py
|
| 5 |
+
e:
|
| 6 |
+
ffoakmd0zn8f6sjv7nu2r58nmtyazqxg:
|
| 7 |
+
args:
|
| 8 |
+
- --config_path
|
| 9 |
+
- configs/wan_causal_ode.yaml
|
| 10 |
+
codePath: causvid/train_ode.py
|
| 11 |
+
codePathLocal: causvid/train_ode.py
|
| 12 |
+
cpu_count: 48
|
| 13 |
+
cpu_count_logical: 96
|
| 14 |
+
cudaVersion: "12.8"
|
| 15 |
+
disk:
|
| 16 |
+
/:
|
| 17 |
+
total: "2079114358784"
|
| 18 |
+
used: "1376086970368"
|
| 19 |
+
email: liyitong.thu@gmail.com
|
| 20 |
+
executable: /home/yitongli/miniconda3/envs/causvid/bin/python3.10
|
| 21 |
+
git:
|
| 22 |
+
commit: adb6a5ecd07666b4d0290042915c8406e6d5ce22
|
| 23 |
+
remote: https://github.com/tianweiy/CausVid.git
|
| 24 |
+
gpu: NVIDIA A100-SXM4-80GB
|
| 25 |
+
gpu_count: 8
|
| 26 |
+
gpu_nvidia:
|
| 27 |
+
- architecture: Ampere
|
| 28 |
+
cudaCores: 6912
|
| 29 |
+
memoryTotal: "85899345920"
|
| 30 |
+
name: NVIDIA A100-SXM4-80GB
|
| 31 |
+
uuid: GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949
|
| 32 |
+
- architecture: Ampere
|
| 33 |
+
cudaCores: 6912
|
| 34 |
+
memoryTotal: "85899345920"
|
| 35 |
+
name: NVIDIA A100-SXM4-80GB
|
| 36 |
+
uuid: GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42
|
| 37 |
+
- architecture: Ampere
|
| 38 |
+
cudaCores: 6912
|
| 39 |
+
memoryTotal: "85899345920"
|
| 40 |
+
name: NVIDIA A100-SXM4-80GB
|
| 41 |
+
uuid: GPU-1f47056a-9c2c-8af1-8232-636203622830
|
| 42 |
+
- architecture: Ampere
|
| 43 |
+
cudaCores: 6912
|
| 44 |
+
memoryTotal: "85899345920"
|
| 45 |
+
name: NVIDIA A100-SXM4-80GB
|
| 46 |
+
uuid: GPU-38de1dff-a680-ec66-625b-49f2a31e3b42
|
| 47 |
+
- architecture: Ampere
|
| 48 |
+
cudaCores: 6912
|
| 49 |
+
memoryTotal: "85899345920"
|
| 50 |
+
name: NVIDIA A100-SXM4-80GB
|
| 51 |
+
uuid: GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204
|
| 52 |
+
- architecture: Ampere
|
| 53 |
+
cudaCores: 6912
|
| 54 |
+
memoryTotal: "85899345920"
|
| 55 |
+
name: NVIDIA A100-SXM4-80GB
|
| 56 |
+
uuid: GPU-194ed489-5bfa-defc-85b1-812953e7d84a
|
| 57 |
+
- architecture: Ampere
|
| 58 |
+
cudaCores: 6912
|
| 59 |
+
memoryTotal: "85899345920"
|
| 60 |
+
name: NVIDIA A100-SXM4-80GB
|
| 61 |
+
uuid: GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4
|
| 62 |
+
- architecture: Ampere
|
| 63 |
+
cudaCores: 6912
|
| 64 |
+
memoryTotal: "85899345920"
|
| 65 |
+
name: NVIDIA A100-SXM4-80GB
|
| 66 |
+
uuid: GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057
|
| 67 |
+
host: ip-172-31-3-169
|
| 68 |
+
memory:
|
| 69 |
+
total: "1204521443328"
|
| 70 |
+
os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.39
|
| 71 |
+
program: /home/yitongli/CausVid/causvid/train_ode.py
|
| 72 |
+
python: CPython 3.10.18
|
| 73 |
+
root: ./output
|
| 74 |
+
startedAt: "2025-09-08T08:22:36.803576Z"
|
| 75 |
+
writerId: ffoakmd0zn8f6sjv7nu2r58nmtyazqxg
|
| 76 |
+
m: []
|
| 77 |
+
python_version: 3.10.18
|
| 78 |
+
t:
|
| 79 |
+
"1":
|
| 80 |
+
- 1
|
| 81 |
+
- 11
|
| 82 |
+
- 41
|
| 83 |
+
- 49
|
| 84 |
+
- 71
|
| 85 |
+
- 83
|
| 86 |
+
- 105
|
| 87 |
+
"2":
|
| 88 |
+
- 1
|
| 89 |
+
- 11
|
| 90 |
+
- 41
|
| 91 |
+
- 49
|
| 92 |
+
- 71
|
| 93 |
+
- 83
|
| 94 |
+
- 105
|
| 95 |
+
"3":
|
| 96 |
+
- 16
|
| 97 |
+
- 17
|
| 98 |
+
"4": 3.10.18
|
| 99 |
+
"5": 0.21.3
|
| 100 |
+
"6": 4.56.1
|
| 101 |
+
"12": 0.21.3
|
| 102 |
+
"13": linux-x86_64
|
| 103 |
+
batch_size:
|
| 104 |
+
value: 2
|
| 105 |
+
beta1:
|
| 106 |
+
value: 0.9
|
| 107 |
+
beta2:
|
| 108 |
+
value: 0.999
|
| 109 |
+
data_path:
|
| 110 |
+
value: ../mixkit_ode_lmdb
|
| 111 |
+
denoising_step_list:
|
| 112 |
+
value:
|
| 113 |
+
- 1000
|
| 114 |
+
- 757
|
| 115 |
+
- 522
|
| 116 |
+
- 0
|
| 117 |
+
distillation_loss:
|
| 118 |
+
value: ode
|
| 119 |
+
generator_fsdp_wrap_strategy:
|
| 120 |
+
value: size
|
| 121 |
+
generator_grad:
|
| 122 |
+
value:
|
| 123 |
+
model: true
|
| 124 |
+
generator_task:
|
| 125 |
+
value: causal_video
|
| 126 |
+
gradient_checkpointing:
|
| 127 |
+
value: true
|
| 128 |
+
log_iters:
|
| 129 |
+
value: 200
|
| 130 |
+
lr:
|
| 131 |
+
value: 2e-06
|
| 132 |
+
mixed_precision:
|
| 133 |
+
value: true
|
| 134 |
+
model_name:
|
| 135 |
+
value: causal_wan
|
| 136 |
+
no_save:
|
| 137 |
+
value: false
|
| 138 |
+
num_frame_per_block:
|
| 139 |
+
value: 3
|
| 140 |
+
output_path:
|
| 141 |
+
value: ./output
|
| 142 |
+
seed:
|
| 143 |
+
value: 7735925
|
| 144 |
+
sharding_strategy:
|
| 145 |
+
value: hybrid_full
|
| 146 |
+
text_encoder_fsdp_wrap_strategy:
|
| 147 |
+
value: size
|
| 148 |
+
wandb_entity:
|
| 149 |
+
value: liyitong-Tsinghua University
|
| 150 |
+
wandb_host:
|
| 151 |
+
value: https://api.wandb.ai
|
| 152 |
+
wandb_key:
|
| 153 |
+
value: 5409d3b960b01b25cec0f6abb5361b4022f0cc41
|
| 154 |
+
wandb_name:
|
| 155 |
+
value: wan_causal_ode
|
| 156 |
+
wandb_project:
|
| 157 |
+
value: self-forcing
|
| 158 |
+
warp_denoising_step:
|
| 159 |
+
value: false
|
output/wandb/run-20250908_082236-gjh65qzq/files/output.log
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run dir: ./output/wandb/run-20250908_082236-gjh65qzq/files
|
| 2 |
+
KV inference with 3 frames per block
|
| 3 |
+
ODERegression initialized.
|
| 4 |
+
########### torch.Size([2, 21, 16, 60, 104])
|
| 5 |
+
Traceback (most recent call last):
|
| 6 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 229, in <module>
|
| 7 |
+
main()
|
| 8 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 223, in main
|
| 9 |
+
trainer.train()
|
| 10 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 187, in train
|
| 11 |
+
self.generate_video()
|
| 12 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 181, in generate_video
|
| 13 |
+
self.distillation_model.generate_video(ode_latent, conditional_dict)
|
| 14 |
+
File "/home/yitongli/CausVid/causvid/ode_regression.py", line 199, in generate_video
|
| 15 |
+
video = self.inference_pipeline.inference(
|
| 16 |
+
File "/home/yitongli/CausVid/causvid/models/wan/causal_inference.py", line 186, in inference
|
| 17 |
+
noisy_input = self.scheduler.add_noise(
|
| 18 |
+
File "/home/yitongli/CausVid/causvid/models/wan/flow_match.py", line 72, in add_noise
|
| 19 |
+
sample = (1 - sigma) * original_samples + sigma * noise
|
| 20 |
+
RuntimeError: The size of tensor a (2) must match the size of tensor b (6) at non-singleton dimension 0
|
| 21 |
+
[rank0]: Traceback (most recent call last):
|
| 22 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 229, in <module>
|
| 23 |
+
[rank0]: main()
|
| 24 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 223, in main
|
| 25 |
+
[rank0]: trainer.train()
|
| 26 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 187, in train
|
| 27 |
+
[rank0]: self.generate_video()
|
| 28 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 181, in generate_video
|
| 29 |
+
[rank0]: self.distillation_model.generate_video(ode_latent, conditional_dict)
|
| 30 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/ode_regression.py", line 199, in generate_video
|
| 31 |
+
[rank0]: video = self.inference_pipeline.inference(
|
| 32 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/models/wan/causal_inference.py", line 186, in inference
|
| 33 |
+
[rank0]: noisy_input = self.scheduler.add_noise(
|
| 34 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/models/wan/flow_match.py", line 72, in add_noise
|
| 35 |
+
[rank0]: sample = (1 - sigma) * original_samples + sigma * noise
|
| 36 |
+
[rank0]: RuntimeError: The size of tensor a (2) must match the size of tensor b (6) at non-singleton dimension 0
|
output/wandb/run-20250908_082236-gjh65qzq/files/requirements.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 2 |
+
filelock==3.19.1
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
PyYAML==6.0.2
|
| 5 |
+
httpx==0.28.1
|
| 6 |
+
multidict==6.6.4
|
| 7 |
+
tifffile==2025.5.10
|
| 8 |
+
tzdata==2025.2
|
| 9 |
+
urllib3==2.5.0
|
| 10 |
+
decord==0.6.0
|
| 11 |
+
certifi==2025.8.3
|
| 12 |
+
setuptools==78.1.1
|
| 13 |
+
websocket-client==1.8.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
yarl==1.20.1
|
| 16 |
+
tqdm==4.67.1
|
| 17 |
+
open_clip_torch==3.1.0
|
| 18 |
+
pyparsing==3.2.3
|
| 19 |
+
fastapi==0.116.1
|
| 20 |
+
nvidia-curand-cu12==10.3.9.90
|
| 21 |
+
mdurl==0.1.2
|
| 22 |
+
torchvision==0.23.0
|
| 23 |
+
h11==0.16.0
|
| 24 |
+
pytz==2025.2
|
| 25 |
+
six==1.17.0
|
| 26 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 27 |
+
aiohappyeyeballs==2.6.1
|
| 28 |
+
wandb==0.21.3
|
| 29 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 30 |
+
cycler==0.12.1
|
| 31 |
+
anyio==4.10.0
|
| 32 |
+
scikit-image==0.25.2
|
| 33 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 34 |
+
sentencepiece==0.2.1
|
| 35 |
+
rich==14.1.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
GitPython==3.1.45
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
accelerate==1.10.1
|
| 40 |
+
proglog==0.1.12
|
| 41 |
+
sentry-sdk==2.37.0
|
| 42 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 43 |
+
dashscope==1.24.3
|
| 44 |
+
platformdirs==4.4.0
|
| 45 |
+
safehttpx==0.1.6
|
| 46 |
+
fsspec==2025.9.0
|
| 47 |
+
lazy_loader==0.4
|
| 48 |
+
typing_extensions==4.15.0
|
| 49 |
+
semantic-version==2.10.0
|
| 50 |
+
numpy==1.24.4
|
| 51 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 52 |
+
ruff==0.12.12
|
| 53 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 54 |
+
annotated-types==0.7.0
|
| 55 |
+
decorator==4.4.2
|
| 56 |
+
antlr4-python3-runtime==4.9.3
|
| 57 |
+
psutil==7.0.0
|
| 58 |
+
Brotli==1.1.0
|
| 59 |
+
tomlkit==0.13.3
|
| 60 |
+
httpcore==1.0.9
|
| 61 |
+
kiwisolver==1.4.9
|
| 62 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 63 |
+
groovy==0.1.2
|
| 64 |
+
protobuf==6.32.0
|
| 65 |
+
orjson==3.11.3
|
| 66 |
+
scipy==1.15.3
|
| 67 |
+
regex==2025.9.1
|
| 68 |
+
MarkupSafe==3.0.2
|
| 69 |
+
av==13.1.0
|
| 70 |
+
timm==1.0.19
|
| 71 |
+
uvicorn==0.35.0
|
| 72 |
+
wheel==0.45.1
|
| 73 |
+
Pygments==2.19.2
|
| 74 |
+
websockets==15.0.1
|
| 75 |
+
lmdb==1.7.3
|
| 76 |
+
sympy==1.14.0
|
| 77 |
+
einops==0.8.1
|
| 78 |
+
idna==3.10
|
| 79 |
+
triton==3.4.0
|
| 80 |
+
torch==2.8.0
|
| 81 |
+
moviepy==1.0.3
|
| 82 |
+
nvidia-nvtx-cu12==12.8.90
|
| 83 |
+
matplotlib==3.10.6
|
| 84 |
+
pillow==11.3.0
|
| 85 |
+
charset-normalizer==3.4.3
|
| 86 |
+
attrs==25.3.0
|
| 87 |
+
aiosignal==1.4.0
|
| 88 |
+
markdown-it-py==4.0.0
|
| 89 |
+
requests==2.32.5
|
| 90 |
+
typer==0.17.4
|
| 91 |
+
huggingface-hub==0.34.4
|
| 92 |
+
nvidia-nccl-cu12==2.27.3
|
| 93 |
+
propcache==0.3.2
|
| 94 |
+
opencv-python==4.11.0.86
|
| 95 |
+
ffmpy==0.6.1
|
| 96 |
+
jmespath==1.0.1
|
| 97 |
+
botocore==1.40.25
|
| 98 |
+
pydantic_core==2.33.2
|
| 99 |
+
fonttools==4.59.2
|
| 100 |
+
omegaconf==2.3.0
|
| 101 |
+
pycparser==2.22
|
| 102 |
+
mpmath==1.3.0
|
| 103 |
+
flash_attn==2.8.3
|
| 104 |
+
smmap==5.0.2
|
| 105 |
+
gradio_client==1.12.1
|
| 106 |
+
exceptiongroup==1.3.0
|
| 107 |
+
cffi==1.17.1
|
| 108 |
+
diffusers==0.31.0
|
| 109 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 110 |
+
contourpy==1.3.2
|
| 111 |
+
cryptography==45.0.7
|
| 112 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 113 |
+
wcwidth==0.2.13
|
| 114 |
+
zipp==3.23.0
|
| 115 |
+
safetensors==0.6.2
|
| 116 |
+
gradio==5.44.1
|
| 117 |
+
click==8.2.1
|
| 118 |
+
frozenlist==1.7.0
|
| 119 |
+
networkx==3.4.2
|
| 120 |
+
s3transfer==0.13.1
|
| 121 |
+
shellingham==1.5.4
|
| 122 |
+
starlette==0.47.3
|
| 123 |
+
packaging==25.0
|
| 124 |
+
ftfy==6.3.1
|
| 125 |
+
importlib_metadata==8.7.0
|
| 126 |
+
transformers==4.56.1
|
| 127 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 128 |
+
Jinja2==3.1.6
|
| 129 |
+
pycocotools==2.0.10
|
| 130 |
+
sniffio==1.3.1
|
| 131 |
+
hf-xet==1.1.9
|
| 132 |
+
typing-inspection==0.4.1
|
| 133 |
+
pandas==2.3.2
|
| 134 |
+
python-multipart==0.0.20
|
| 135 |
+
aiohttp==3.12.15
|
| 136 |
+
clip==1.0
|
| 137 |
+
pydub==0.25.1
|
| 138 |
+
easydict==1.13
|
| 139 |
+
pip==25.2
|
| 140 |
+
tokenizers==0.22.0
|
| 141 |
+
imageio==2.37.0
|
| 142 |
+
async-timeout==5.0.1
|
| 143 |
+
boto3==1.40.25
|
| 144 |
+
imageio-ffmpeg==0.6.0
|
| 145 |
+
CausVid==0.0.1
|
output/wandb/run-20250908_082236-gjh65qzq/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.10.18",
|
| 4 |
+
"startedAt": "2025-09-08T08:22:36.803576Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_path",
|
| 7 |
+
"configs/wan_causal_ode.yaml"
|
| 8 |
+
],
|
| 9 |
+
"program": "/home/yitongli/CausVid/causvid/train_ode.py",
|
| 10 |
+
"codePath": "causvid/train_ode.py",
|
| 11 |
+
"codePathLocal": "causvid/train_ode.py",
|
| 12 |
+
"git": {
|
| 13 |
+
"remote": "https://github.com/tianweiy/CausVid.git",
|
| 14 |
+
"commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
|
| 15 |
+
},
|
| 16 |
+
"email": "liyitong.thu@gmail.com",
|
| 17 |
+
"root": "./output",
|
| 18 |
+
"host": "ip-172-31-3-169",
|
| 19 |
+
"executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
|
| 20 |
+
"cpu_count": 48,
|
| 21 |
+
"cpu_count_logical": 96,
|
| 22 |
+
"gpu": "NVIDIA A100-SXM4-80GB",
|
| 23 |
+
"gpu_count": 8,
|
| 24 |
+
"disk": {
|
| 25 |
+
"/": {
|
| 26 |
+
"total": "2079114358784",
|
| 27 |
+
"used": "1376086970368"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"memory": {
|
| 31 |
+
"total": "1204521443328"
|
| 32 |
+
},
|
| 33 |
+
"gpu_nvidia": [
|
| 34 |
+
{
|
| 35 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 36 |
+
"memoryTotal": "85899345920",
|
| 37 |
+
"cudaCores": 6912,
|
| 38 |
+
"architecture": "Ampere",
|
| 39 |
+
"uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 43 |
+
"memoryTotal": "85899345920",
|
| 44 |
+
"cudaCores": 6912,
|
| 45 |
+
"architecture": "Ampere",
|
| 46 |
+
"uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 50 |
+
"memoryTotal": "85899345920",
|
| 51 |
+
"cudaCores": 6912,
|
| 52 |
+
"architecture": "Ampere",
|
| 53 |
+
"uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 57 |
+
"memoryTotal": "85899345920",
|
| 58 |
+
"cudaCores": 6912,
|
| 59 |
+
"architecture": "Ampere",
|
| 60 |
+
"uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 64 |
+
"memoryTotal": "85899345920",
|
| 65 |
+
"cudaCores": 6912,
|
| 66 |
+
"architecture": "Ampere",
|
| 67 |
+
"uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 71 |
+
"memoryTotal": "85899345920",
|
| 72 |
+
"cudaCores": 6912,
|
| 73 |
+
"architecture": "Ampere",
|
| 74 |
+
"uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85899345920",
|
| 79 |
+
"cudaCores": 6912,
|
| 80 |
+
"architecture": "Ampere",
|
| 81 |
+
"uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 85 |
+
"memoryTotal": "85899345920",
|
| 86 |
+
"cudaCores": 6912,
|
| 87 |
+
"architecture": "Ampere",
|
| 88 |
+
"uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"cudaVersion": "12.8",
|
| 92 |
+
"writerId": "ffoakmd0zn8f6sjv7nu2r58nmtyazqxg"
|
| 93 |
+
}
|
output/wandb/run-20250908_082236-gjh65qzq/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":98},"_runtime":98}
|
output/wandb/run-20250908_082236-gjh65qzq/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T08:22:36.825042468Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpk_l8zwe3/port-3836172.txt","pid":3836172,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-08T08:22:36.826033664Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3836172}
|
| 3 |
+
{"time":"2025-09-08T08:22:36.825970015Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3836172-3836464-2285782236/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-08T08:22:37.011146283Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-08T08:22:37.01985984Z","level":"INFO","msg":"handleInformInit: received","streamId":"gjh65qzq","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-08T08:22:37.227697259Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gjh65qzq","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-08T08:24:15.728157025Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 8 |
+
{"time":"2025-09-08T08:24:15.728207682Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 9 |
+
{"time":"2025-09-08T08:24:15.728220917Z","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2025-09-08T08:24:15.728290134Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 11 |
+
{"time":"2025-09-08T08:24:15.728328201Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3836172-3836464-2285782236/socket","Net":"unix"}}
|
| 12 |
+
{"time":"2025-09-08T08:24:16.267531359Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 13 |
+
{"time":"2025-09-08T08:24:16.267567371Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 14 |
+
{"time":"2025-09-08T08:24:16.26757774Z","level":"INFO","msg":"server is closed"}
|
output/wandb/run-20250908_082236-gjh65qzq/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T08:22:37.019990235Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
|
| 2 |
+
{"time":"2025-09-08T08:22:37.227508836Z","level":"INFO","msg":"stream: created new stream","id":"gjh65qzq"}
|
| 3 |
+
{"time":"2025-09-08T08:22:37.227691977Z","level":"INFO","msg":"stream: started","id":"gjh65qzq"}
|
| 4 |
+
{"time":"2025-09-08T08:22:37.227709815Z","level":"INFO","msg":"writer: started","stream_id":"gjh65qzq"}
|
| 5 |
+
{"time":"2025-09-08T08:22:37.227733552Z","level":"INFO","msg":"handler: started","stream_id":"gjh65qzq"}
|
| 6 |
+
{"time":"2025-09-08T08:22:37.227762439Z","level":"INFO","msg":"sender: started","stream_id":"gjh65qzq"}
|
| 7 |
+
{"time":"2025-09-08T08:24:15.728213817Z","level":"INFO","msg":"stream: closing","id":"gjh65qzq"}
|
| 8 |
+
{"time":"2025-09-08T08:24:16.029737674Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-09-08T08:24:16.167542431Z","level":"INFO","msg":"handler: closed","stream_id":"gjh65qzq"}
|
| 10 |
+
{"time":"2025-09-08T08:24:16.167636838Z","level":"INFO","msg":"sender: closed","stream_id":"gjh65qzq"}
|
| 11 |
+
{"time":"2025-09-08T08:24:16.167654281Z","level":"INFO","msg":"stream: closed","id":"gjh65qzq"}
|
output/wandb/run-20250908_082236-gjh65qzq/logs/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-08 08:22:36,804 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
|
| 2 |
+
2025-09-08 08:22:36,804 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Configure stats pid to 3836172
|
| 3 |
+
2025-09-08 08:22:36,804 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
|
| 4 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
|
| 5 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Loading settings from environment variables
|
| 6 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_082236-gjh65qzq/logs/debug.log
|
| 7 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_082236-gjh65qzq/logs/debug-internal.log
|
| 8 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:init():813] calling init triggers
|
| 9 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 7735925, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
|
| 11 |
+
2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:init():854] starting backend
|
| 12 |
+
2025-09-08 08:22:37,011 INFO MainThread:3836172 [wandb_init.py:init():857] sending inform_init request
|
| 13 |
+
2025-09-08 08:22:37,015 INFO MainThread:3836172 [wandb_init.py:init():865] backend started and connected
|
| 14 |
+
2025-09-08 08:22:37,018 INFO MainThread:3836172 [wandb_init.py:init():936] updated telemetry
|
| 15 |
+
2025-09-08 08:22:37,022 INFO MainThread:3836172 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-08 08:22:37,413 INFO MainThread:3836172 [wandb_init.py:init():1011] starting run threads in backend
|
| 17 |
+
2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_console_start():2494] atexit reg
|
| 18 |
+
2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_redirect():2342] redirect: wrap_raw
|
| 19 |
+
2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_redirect():2411] Wrapping output streams.
|
| 20 |
+
2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_redirect():2434] Redirects installed.
|
| 21 |
+
2025-09-08 08:22:37,528 INFO MainThread:3836172 [wandb_init.py:init():1057] run started, returning control to user process
|
| 22 |
+
2025-09-08 08:22:37,914 INFO MainThread:3836172 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
|
| 23 |
+
2025-09-08 08:24:15,728 INFO wandb-AsyncioManager-main:3836172 [service_client.py:_forward_responses():84] Reached EOF.
|
| 24 |
+
2025-09-08 08:24:15,729 INFO wandb-AsyncioManager-main:3836172 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
|
output/wandb/run-20250908_082236-gjh65qzq/run-gjh65qzq.wandb
ADDED
|
Binary file (28.8 kB). View file
|
|
|
output/wandb/run-20250908_091215-tz5j30tc/files/config.yaml
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.3
|
| 4 |
+
code_path: source-self-forcing-causvid_train_ode.py
|
| 5 |
+
e:
|
| 6 |
+
dzykhs2dp41ohf8z5ba4wgauzbq2dbq2:
|
| 7 |
+
args:
|
| 8 |
+
- --config_path
|
| 9 |
+
- configs/wan_causal_ode.yaml
|
| 10 |
+
codePath: causvid/train_ode.py
|
| 11 |
+
codePathLocal: causvid/train_ode.py
|
| 12 |
+
cpu_count: 48
|
| 13 |
+
cpu_count_logical: 96
|
| 14 |
+
cudaVersion: "12.8"
|
| 15 |
+
disk:
|
| 16 |
+
/:
|
| 17 |
+
total: "2079114358784"
|
| 18 |
+
used: "1400048283648"
|
| 19 |
+
email: liyitong.thu@gmail.com
|
| 20 |
+
executable: /home/yitongli/miniconda3/envs/causvid/bin/python3.10
|
| 21 |
+
git:
|
| 22 |
+
commit: adb6a5ecd07666b4d0290042915c8406e6d5ce22
|
| 23 |
+
remote: https://github.com/tianweiy/CausVid.git
|
| 24 |
+
gpu: NVIDIA A100-SXM4-80GB
|
| 25 |
+
gpu_count: 8
|
| 26 |
+
gpu_nvidia:
|
| 27 |
+
- architecture: Ampere
|
| 28 |
+
cudaCores: 6912
|
| 29 |
+
memoryTotal: "85899345920"
|
| 30 |
+
name: NVIDIA A100-SXM4-80GB
|
| 31 |
+
uuid: GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949
|
| 32 |
+
- architecture: Ampere
|
| 33 |
+
cudaCores: 6912
|
| 34 |
+
memoryTotal: "85899345920"
|
| 35 |
+
name: NVIDIA A100-SXM4-80GB
|
| 36 |
+
uuid: GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42
|
| 37 |
+
- architecture: Ampere
|
| 38 |
+
cudaCores: 6912
|
| 39 |
+
memoryTotal: "85899345920"
|
| 40 |
+
name: NVIDIA A100-SXM4-80GB
|
| 41 |
+
uuid: GPU-1f47056a-9c2c-8af1-8232-636203622830
|
| 42 |
+
- architecture: Ampere
|
| 43 |
+
cudaCores: 6912
|
| 44 |
+
memoryTotal: "85899345920"
|
| 45 |
+
name: NVIDIA A100-SXM4-80GB
|
| 46 |
+
uuid: GPU-38de1dff-a680-ec66-625b-49f2a31e3b42
|
| 47 |
+
- architecture: Ampere
|
| 48 |
+
cudaCores: 6912
|
| 49 |
+
memoryTotal: "85899345920"
|
| 50 |
+
name: NVIDIA A100-SXM4-80GB
|
| 51 |
+
uuid: GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204
|
| 52 |
+
- architecture: Ampere
|
| 53 |
+
cudaCores: 6912
|
| 54 |
+
memoryTotal: "85899345920"
|
| 55 |
+
name: NVIDIA A100-SXM4-80GB
|
| 56 |
+
uuid: GPU-194ed489-5bfa-defc-85b1-812953e7d84a
|
| 57 |
+
- architecture: Ampere
|
| 58 |
+
cudaCores: 6912
|
| 59 |
+
memoryTotal: "85899345920"
|
| 60 |
+
name: NVIDIA A100-SXM4-80GB
|
| 61 |
+
uuid: GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4
|
| 62 |
+
- architecture: Ampere
|
| 63 |
+
cudaCores: 6912
|
| 64 |
+
memoryTotal: "85899345920"
|
| 65 |
+
name: NVIDIA A100-SXM4-80GB
|
| 66 |
+
uuid: GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057
|
| 67 |
+
host: ip-172-31-3-169
|
| 68 |
+
memory:
|
| 69 |
+
total: "1204521443328"
|
| 70 |
+
os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.39
|
| 71 |
+
program: /home/yitongli/CausVid/causvid/train_ode.py
|
| 72 |
+
python: CPython 3.10.18
|
| 73 |
+
root: ./output
|
| 74 |
+
startedAt: "2025-09-08T09:12:15.567002Z"
|
| 75 |
+
writerId: dzykhs2dp41ohf8z5ba4wgauzbq2dbq2
|
| 76 |
+
m: []
|
| 77 |
+
python_version: 3.10.18
|
| 78 |
+
t:
|
| 79 |
+
"1":
|
| 80 |
+
- 1
|
| 81 |
+
- 11
|
| 82 |
+
- 41
|
| 83 |
+
- 49
|
| 84 |
+
- 71
|
| 85 |
+
- 83
|
| 86 |
+
- 105
|
| 87 |
+
"2":
|
| 88 |
+
- 1
|
| 89 |
+
- 11
|
| 90 |
+
- 41
|
| 91 |
+
- 49
|
| 92 |
+
- 71
|
| 93 |
+
- 83
|
| 94 |
+
- 105
|
| 95 |
+
"3":
|
| 96 |
+
- 16
|
| 97 |
+
- 17
|
| 98 |
+
- 61
|
| 99 |
+
"4": 3.10.18
|
| 100 |
+
"5": 0.21.3
|
| 101 |
+
"6": 4.56.1
|
| 102 |
+
"12": 0.21.3
|
| 103 |
+
"13": linux-x86_64
|
| 104 |
+
batch_size:
|
| 105 |
+
value: 2
|
| 106 |
+
beta1:
|
| 107 |
+
value: 0.9
|
| 108 |
+
beta2:
|
| 109 |
+
value: 0.999
|
| 110 |
+
data_path:
|
| 111 |
+
value: ../mixkit_ode_lmdb
|
| 112 |
+
denoising_step_list:
|
| 113 |
+
value:
|
| 114 |
+
- 1000
|
| 115 |
+
- 757
|
| 116 |
+
- 522
|
| 117 |
+
- 0
|
| 118 |
+
distillation_loss:
|
| 119 |
+
value: ode
|
| 120 |
+
generator_fsdp_wrap_strategy:
|
| 121 |
+
value: size
|
| 122 |
+
generator_grad:
|
| 123 |
+
value:
|
| 124 |
+
model: true
|
| 125 |
+
generator_task:
|
| 126 |
+
value: causal_video
|
| 127 |
+
gradient_checkpointing:
|
| 128 |
+
value: true
|
| 129 |
+
log_iters:
|
| 130 |
+
value: 200
|
| 131 |
+
lr:
|
| 132 |
+
value: 2e-06
|
| 133 |
+
mixed_precision:
|
| 134 |
+
value: true
|
| 135 |
+
model_name:
|
| 136 |
+
value: causal_wan
|
| 137 |
+
no_save:
|
| 138 |
+
value: false
|
| 139 |
+
num_frame_per_block:
|
| 140 |
+
value: 3
|
| 141 |
+
output_path:
|
| 142 |
+
value: ./output
|
| 143 |
+
seed:
|
| 144 |
+
value: 550819
|
| 145 |
+
sharding_strategy:
|
| 146 |
+
value: hybrid_full
|
| 147 |
+
text_encoder_fsdp_wrap_strategy:
|
| 148 |
+
value: size
|
| 149 |
+
wandb_entity:
|
| 150 |
+
value: liyitong-Tsinghua University
|
| 151 |
+
wandb_host:
|
| 152 |
+
value: https://api.wandb.ai
|
| 153 |
+
wandb_key:
|
| 154 |
+
value: 5409d3b960b01b25cec0f6abb5361b4022f0cc41
|
| 155 |
+
wandb_name:
|
| 156 |
+
value: wan_causal_ode
|
| 157 |
+
wandb_project:
|
| 158 |
+
value: self-forcing
|
| 159 |
+
warp_denoising_step:
|
| 160 |
+
value: false
|
output/wandb/run-20250908_091215-tz5j30tc/files/output.log
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run dir: ./output/wandb/run-20250908_091215-tz5j30tc/files
|
| 2 |
+
KV inference with 3 frames per block
|
| 3 |
+
ODERegression initialized.
|
| 4 |
+
cache a block wise causal mask with block size of 3 frames
|
| 5 |
+
BlockMask(shape=(1, 1, 32768, 32768), sparsity=42.52%,
|
| 6 |
+
(0, 0)
|
| 7 |
+
████░░
|
| 8 |
+
████░░
|
| 9 |
+
████░░░░░░░░
|
| 10 |
+
██████████░░
|
| 11 |
+
██████████░░
|
| 12 |
+
██████████░░░░░░░░
|
| 13 |
+
████████████████░░
|
| 14 |
+
████████████████░░
|
| 15 |
+
████████████████░░░░░░░░
|
| 16 |
+
██████████████████████░░
|
| 17 |
+
██████████████████████░░
|
| 18 |
+
██████████████████████░░░░░░░░
|
| 19 |
+
████████████████████████████░░
|
| 20 |
+
████████████████████████████░░
|
| 21 |
+
████████████████████████████████░░
|
| 22 |
+
████████████████████████████████░░
|
| 23 |
+
████████████████████████████████░░░░░░░░
|
| 24 |
+
████████████████████████████████████████
|
| 25 |
+
████████████████████████████████████████
|
| 26 |
+
████████████████████████████████████████
|
| 27 |
+
)
|
| 28 |
+
arrive
|
| 29 |
+
Start gathering distributed model states...
|
| 30 |
+
/home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:678: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
| 31 |
+
warnings.warn(
|
| 32 |
+
Model saved to ./output/2025-09-08-09-12-15.318383_seed550819/checkpoint_model_000000/model.pt
|
| 33 |
+
training step 0...
|
| 34 |
+
Traceback (most recent call last):
|
| 35 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 283, in <module>
|
| 36 |
+
main()
|
| 37 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 277, in main
|
| 38 |
+
trainer.train()
|
| 39 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 241, in train
|
| 40 |
+
self.generate_video()
|
| 41 |
+
File "/home/yitongli/CausVid/causvid/train_ode.py", line 213, in generate_video
|
| 42 |
+
output_path = os.path.join("tmp", f"teacher_{self.step:06d}_{base_name}.mp4")
|
| 43 |
+
UnboundLocalError: local variable 'base_name' referenced before assignment
|
| 44 |
+
[rank0]: Traceback (most recent call last):
|
| 45 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 283, in <module>
|
| 46 |
+
[rank0]: main()
|
| 47 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 277, in main
|
| 48 |
+
[rank0]: trainer.train()
|
| 49 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 241, in train
|
| 50 |
+
[rank0]: self.generate_video()
|
| 51 |
+
[rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 213, in generate_video
|
| 52 |
+
[rank0]: output_path = os.path.join("tmp", f"teacher_{self.step:06d}_{base_name}.mp4")
|
| 53 |
+
[rank0]: UnboundLocalError: local variable 'base_name' referenced before assignment
|
output/wandb/run-20250908_091215-tz5j30tc/files/requirements.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 2 |
+
filelock==3.19.1
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
PyYAML==6.0.2
|
| 5 |
+
httpx==0.28.1
|
| 6 |
+
multidict==6.6.4
|
| 7 |
+
tifffile==2025.5.10
|
| 8 |
+
tzdata==2025.2
|
| 9 |
+
urllib3==2.5.0
|
| 10 |
+
decord==0.6.0
|
| 11 |
+
certifi==2025.8.3
|
| 12 |
+
setuptools==78.1.1
|
| 13 |
+
websocket-client==1.8.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
yarl==1.20.1
|
| 16 |
+
tqdm==4.67.1
|
| 17 |
+
open_clip_torch==3.1.0
|
| 18 |
+
pyparsing==3.2.3
|
| 19 |
+
fastapi==0.116.1
|
| 20 |
+
nvidia-curand-cu12==10.3.9.90
|
| 21 |
+
mdurl==0.1.2
|
| 22 |
+
torchvision==0.23.0
|
| 23 |
+
h11==0.16.0
|
| 24 |
+
pytz==2025.2
|
| 25 |
+
six==1.17.0
|
| 26 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 27 |
+
aiohappyeyeballs==2.6.1
|
| 28 |
+
wandb==0.21.3
|
| 29 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 30 |
+
cycler==0.12.1
|
| 31 |
+
anyio==4.10.0
|
| 32 |
+
scikit-image==0.25.2
|
| 33 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 34 |
+
sentencepiece==0.2.1
|
| 35 |
+
rich==14.1.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
GitPython==3.1.45
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
accelerate==1.10.1
|
| 40 |
+
proglog==0.1.12
|
| 41 |
+
sentry-sdk==2.37.0
|
| 42 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 43 |
+
dashscope==1.24.3
|
| 44 |
+
platformdirs==4.4.0
|
| 45 |
+
safehttpx==0.1.6
|
| 46 |
+
fsspec==2025.9.0
|
| 47 |
+
lazy_loader==0.4
|
| 48 |
+
typing_extensions==4.15.0
|
| 49 |
+
semantic-version==2.10.0
|
| 50 |
+
numpy==1.24.4
|
| 51 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 52 |
+
ruff==0.12.12
|
| 53 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 54 |
+
annotated-types==0.7.0
|
| 55 |
+
decorator==4.4.2
|
| 56 |
+
antlr4-python3-runtime==4.9.3
|
| 57 |
+
psutil==7.0.0
|
| 58 |
+
Brotli==1.1.0
|
| 59 |
+
tomlkit==0.13.3
|
| 60 |
+
httpcore==1.0.9
|
| 61 |
+
kiwisolver==1.4.9
|
| 62 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 63 |
+
groovy==0.1.2
|
| 64 |
+
protobuf==6.32.0
|
| 65 |
+
orjson==3.11.3
|
| 66 |
+
scipy==1.15.3
|
| 67 |
+
regex==2025.9.1
|
| 68 |
+
MarkupSafe==3.0.2
|
| 69 |
+
av==13.1.0
|
| 70 |
+
timm==1.0.19
|
| 71 |
+
uvicorn==0.35.0
|
| 72 |
+
wheel==0.45.1
|
| 73 |
+
Pygments==2.19.2
|
| 74 |
+
websockets==15.0.1
|
| 75 |
+
lmdb==1.7.3
|
| 76 |
+
sympy==1.14.0
|
| 77 |
+
einops==0.8.1
|
| 78 |
+
idna==3.10
|
| 79 |
+
triton==3.4.0
|
| 80 |
+
torch==2.8.0
|
| 81 |
+
moviepy==1.0.3
|
| 82 |
+
nvidia-nvtx-cu12==12.8.90
|
| 83 |
+
matplotlib==3.10.6
|
| 84 |
+
pillow==11.3.0
|
| 85 |
+
charset-normalizer==3.4.3
|
| 86 |
+
attrs==25.3.0
|
| 87 |
+
aiosignal==1.4.0
|
| 88 |
+
markdown-it-py==4.0.0
|
| 89 |
+
requests==2.32.5
|
| 90 |
+
typer==0.17.4
|
| 91 |
+
huggingface-hub==0.34.4
|
| 92 |
+
nvidia-nccl-cu12==2.27.3
|
| 93 |
+
propcache==0.3.2
|
| 94 |
+
opencv-python==4.11.0.86
|
| 95 |
+
ffmpy==0.6.1
|
| 96 |
+
jmespath==1.0.1
|
| 97 |
+
botocore==1.40.25
|
| 98 |
+
pydantic_core==2.33.2
|
| 99 |
+
fonttools==4.59.2
|
| 100 |
+
omegaconf==2.3.0
|
| 101 |
+
pycparser==2.22
|
| 102 |
+
mpmath==1.3.0
|
| 103 |
+
flash_attn==2.8.3
|
| 104 |
+
smmap==5.0.2
|
| 105 |
+
gradio_client==1.12.1
|
| 106 |
+
exceptiongroup==1.3.0
|
| 107 |
+
cffi==1.17.1
|
| 108 |
+
diffusers==0.31.0
|
| 109 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 110 |
+
contourpy==1.3.2
|
| 111 |
+
cryptography==45.0.7
|
| 112 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 113 |
+
wcwidth==0.2.13
|
| 114 |
+
zipp==3.23.0
|
| 115 |
+
safetensors==0.6.2
|
| 116 |
+
gradio==5.44.1
|
| 117 |
+
click==8.2.1
|
| 118 |
+
frozenlist==1.7.0
|
| 119 |
+
networkx==3.4.2
|
| 120 |
+
s3transfer==0.13.1
|
| 121 |
+
shellingham==1.5.4
|
| 122 |
+
starlette==0.47.3
|
| 123 |
+
packaging==25.0
|
| 124 |
+
ftfy==6.3.1
|
| 125 |
+
importlib_metadata==8.7.0
|
| 126 |
+
transformers==4.56.1
|
| 127 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 128 |
+
Jinja2==3.1.6
|
| 129 |
+
pycocotools==2.0.10
|
| 130 |
+
sniffio==1.3.1
|
| 131 |
+
hf-xet==1.1.9
|
| 132 |
+
typing-inspection==0.4.1
|
| 133 |
+
pandas==2.3.2
|
| 134 |
+
python-multipart==0.0.20
|
| 135 |
+
aiohttp==3.12.15
|
| 136 |
+
clip==1.0
|
| 137 |
+
pydub==0.25.1
|
| 138 |
+
easydict==1.13
|
| 139 |
+
pip==25.2
|
| 140 |
+
tokenizers==0.22.0
|
| 141 |
+
imageio==2.37.0
|
| 142 |
+
async-timeout==5.0.1
|
| 143 |
+
boto3==1.40.25
|
| 144 |
+
imageio-ffmpeg==0.6.0
|
| 145 |
+
CausVid==0.0.1
|
output/wandb/run-20250908_091215-tz5j30tc/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.10.18",
|
| 4 |
+
"startedAt": "2025-09-08T09:12:15.567002Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_path",
|
| 7 |
+
"configs/wan_causal_ode.yaml"
|
| 8 |
+
],
|
| 9 |
+
"program": "/home/yitongli/CausVid/causvid/train_ode.py",
|
| 10 |
+
"codePath": "causvid/train_ode.py",
|
| 11 |
+
"codePathLocal": "causvid/train_ode.py",
|
| 12 |
+
"git": {
|
| 13 |
+
"remote": "https://github.com/tianweiy/CausVid.git",
|
| 14 |
+
"commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
|
| 15 |
+
},
|
| 16 |
+
"email": "liyitong.thu@gmail.com",
|
| 17 |
+
"root": "./output",
|
| 18 |
+
"host": "ip-172-31-3-169",
|
| 19 |
+
"executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
|
| 20 |
+
"cpu_count": 48,
|
| 21 |
+
"cpu_count_logical": 96,
|
| 22 |
+
"gpu": "NVIDIA A100-SXM4-80GB",
|
| 23 |
+
"gpu_count": 8,
|
| 24 |
+
"disk": {
|
| 25 |
+
"/": {
|
| 26 |
+
"total": "2079114358784",
|
| 27 |
+
"used": "1400048283648"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"memory": {
|
| 31 |
+
"total": "1204521443328"
|
| 32 |
+
},
|
| 33 |
+
"gpu_nvidia": [
|
| 34 |
+
{
|
| 35 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 36 |
+
"memoryTotal": "85899345920",
|
| 37 |
+
"cudaCores": 6912,
|
| 38 |
+
"architecture": "Ampere",
|
| 39 |
+
"uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 43 |
+
"memoryTotal": "85899345920",
|
| 44 |
+
"cudaCores": 6912,
|
| 45 |
+
"architecture": "Ampere",
|
| 46 |
+
"uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 50 |
+
"memoryTotal": "85899345920",
|
| 51 |
+
"cudaCores": 6912,
|
| 52 |
+
"architecture": "Ampere",
|
| 53 |
+
"uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 57 |
+
"memoryTotal": "85899345920",
|
| 58 |
+
"cudaCores": 6912,
|
| 59 |
+
"architecture": "Ampere",
|
| 60 |
+
"uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 64 |
+
"memoryTotal": "85899345920",
|
| 65 |
+
"cudaCores": 6912,
|
| 66 |
+
"architecture": "Ampere",
|
| 67 |
+
"uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 71 |
+
"memoryTotal": "85899345920",
|
| 72 |
+
"cudaCores": 6912,
|
| 73 |
+
"architecture": "Ampere",
|
| 74 |
+
"uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85899345920",
|
| 79 |
+
"cudaCores": 6912,
|
| 80 |
+
"architecture": "Ampere",
|
| 81 |
+
"uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 85 |
+
"memoryTotal": "85899345920",
|
| 86 |
+
"cudaCores": 6912,
|
| 87 |
+
"architecture": "Ampere",
|
| 88 |
+
"uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"cudaVersion": "12.8",
|
| 92 |
+
"writerId": "dzykhs2dp41ohf8z5ba4wgauzbq2dbq2"
|
| 93 |
+
}
|
output/wandb/run-20250908_091215-tz5j30tc/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_step":0,"_wandb":{"runtime":139},"_runtime":139.90914664,"generator_loss":0.31640625,"generator_grad_norm":5.5625,"loss_at_time_500":0.2705078125,"_timestamp":1.7573228593443248e+09}
|
output/wandb/run-20250908_091215-tz5j30tc/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T09:12:15.588566306Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpnban0o38/port-4018970.txt","pid":4018970,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-08T09:12:15.589212266Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4018970}
|
| 3 |
+
{"time":"2025-09-08T09:12:15.589223222Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4018970-4019303-1328136160/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-08T09:12:15.77464832Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-08T09:12:15.785168473Z","level":"INFO","msg":"handleInformInit: received","streamId":"tz5j30tc","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-08T09:12:16.008129399Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tz5j30tc","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-08T09:14:36.117068968Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 8 |
+
{"time":"2025-09-08T09:14:36.117157346Z","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2025-09-08T09:14:36.117145923Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 10 |
+
{"time":"2025-09-08T09:14:36.117277861Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4018970-4019303-1328136160/socket","Net":"unix"}}
|
| 11 |
+
{"time":"2025-09-08T09:14:36.117345059Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 12 |
+
{"time":"2025-09-08T09:14:36.703976989Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 13 |
+
{"time":"2025-09-08T09:14:36.70401689Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 14 |
+
{"time":"2025-09-08T09:14:36.7040303Z","level":"INFO","msg":"server is closed"}
|
output/wandb/run-20250908_091215-tz5j30tc/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T09:12:15.785345022Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
|
| 2 |
+
{"time":"2025-09-08T09:12:16.008075812Z","level":"INFO","msg":"stream: created new stream","id":"tz5j30tc"}
|
| 3 |
+
{"time":"2025-09-08T09:12:16.008124642Z","level":"INFO","msg":"stream: started","id":"tz5j30tc"}
|
| 4 |
+
{"time":"2025-09-08T09:12:16.008135566Z","level":"INFO","msg":"handler: started","stream_id":"tz5j30tc"}
|
| 5 |
+
{"time":"2025-09-08T09:12:16.008145832Z","level":"INFO","msg":"sender: started","stream_id":"tz5j30tc"}
|
| 6 |
+
{"time":"2025-09-08T09:12:16.008160919Z","level":"INFO","msg":"writer: started","stream_id":"tz5j30tc"}
|
| 7 |
+
{"time":"2025-09-08T09:14:36.117135291Z","level":"INFO","msg":"stream: closing","id":"tz5j30tc"}
|
| 8 |
+
{"time":"2025-09-08T09:14:36.435658587Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-09-08T09:14:36.58265815Z","level":"INFO","msg":"handler: closed","stream_id":"tz5j30tc"}
|
| 10 |
+
{"time":"2025-09-08T09:14:36.582727241Z","level":"INFO","msg":"sender: closed","stream_id":"tz5j30tc"}
|
| 11 |
+
{"time":"2025-09-08T09:14:36.582737303Z","level":"INFO","msg":"stream: closed","id":"tz5j30tc"}
|
output/wandb/run-20250908_091215-tz5j30tc/logs/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
|
| 2 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Configure stats pid to 4018970
|
| 3 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
|
| 4 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
|
| 5 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Loading settings from environment variables
|
| 6 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_091215-tz5j30tc/logs/debug.log
|
| 7 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_091215-tz5j30tc/logs/debug-internal.log
|
| 8 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:init():813] calling init triggers
|
| 9 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 550819, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
|
| 11 |
+
2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:init():854] starting backend
|
| 12 |
+
2025-09-08 09:12:15,774 INFO MainThread:4018970 [wandb_init.py:init():857] sending inform_init request
|
| 13 |
+
2025-09-08 09:12:15,779 INFO MainThread:4018970 [wandb_init.py:init():865] backend started and connected
|
| 14 |
+
2025-09-08 09:12:15,783 INFO MainThread:4018970 [wandb_init.py:init():936] updated telemetry
|
| 15 |
+
2025-09-08 09:12:15,788 INFO MainThread:4018970 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-08 09:12:16,206 INFO MainThread:4018970 [wandb_init.py:init():1011] starting run threads in backend
|
| 17 |
+
2025-09-08 09:12:16,315 INFO MainThread:4018970 [wandb_run.py:_console_start():2494] atexit reg
|
| 18 |
+
2025-09-08 09:12:16,316 INFO MainThread:4018970 [wandb_run.py:_redirect():2342] redirect: wrap_raw
|
| 19 |
+
2025-09-08 09:12:16,316 INFO MainThread:4018970 [wandb_run.py:_redirect():2411] Wrapping output streams.
|
| 20 |
+
2025-09-08 09:12:16,316 INFO MainThread:4018970 [wandb_run.py:_redirect():2434] Redirects installed.
|
| 21 |
+
2025-09-08 09:12:16,318 INFO MainThread:4018970 [wandb_init.py:init():1057] run started, returning control to user process
|
| 22 |
+
2025-09-08 09:12:17,359 INFO MainThread:4018970 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
|
| 23 |
+
2025-09-08 09:14:36,117 INFO wandb-AsyncioManager-main:4018970 [service_client.py:_forward_responses():84] Reached EOF.
|
| 24 |
+
2025-09-08 09:14:36,117 INFO wandb-AsyncioManager-main:4018970 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
|
output/wandb/run-20250908_091215-tz5j30tc/run-tz5j30tc.wandb
ADDED
|
Binary file (41.8 kB). View file
|
|
|
output/wandb/run-20250908_091534-f394z0xa/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T09:15:34.947665584Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpb9cxfvx4/port-4032801.txt","pid":4032801,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-08T09:15:34.948269838Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4032801}
|
| 3 |
+
{"time":"2025-09-08T09:15:34.948238593Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4032801-4033128-1592522043/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-08T09:15:35.132762156Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-08T09:15:35.13989349Z","level":"INFO","msg":"handleInformInit: received","streamId":"f394z0xa","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-08T09:15:35.350122319Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"f394z0xa","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-08T09:17:58.289894961Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 8 |
+
{"time":"2025-09-08T09:17:58.289957034Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 9 |
+
{"time":"2025-09-08T09:17:58.289973286Z","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2025-09-08T09:17:58.290055209Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 11 |
+
{"time":"2025-09-08T09:17:58.29008793Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4032801-4033128-1592522043/socket","Net":"unix"}}
|
| 12 |
+
{"time":"2025-09-08T09:17:58.843012332Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 13 |
+
{"time":"2025-09-08T09:17:58.843047327Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 14 |
+
{"time":"2025-09-08T09:17:58.843056791Z","level":"INFO","msg":"server is closed"}
|
output/wandb/run-20250908_091953-n3vl9u22/logs/debug-core.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-08T09:19:53.577274617Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyl_6c_ow/port-4049443.txt","pid":4049443,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-08T09:19:53.577824208Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4049443}
|
| 3 |
+
{"time":"2025-09-08T09:19:53.577821212Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4049443-4049733-3298022966/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-08T09:19:53.762195815Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-08T09:19:53.77245292Z","level":"INFO","msg":"handleInformInit: received","streamId":"n3vl9u22","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-08T09:19:53.97809872Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"n3vl9u22","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-08T09:31:04.091714838Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
output/wandb/run-20250909_031406-fvhxlznm/files/output.log
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run dir: ./output/wandb/run-20250909_031406-fvhxlznm/files
|
| 2 |
+
Loading pretrained generator from pretrained_ode.pt
|
| 3 |
+
KV inference with 3 frames per block
|
| 4 |
+
ODERegression initialized.
|
| 5 |
+
cache a block wise causal mask with block size of 3 frames
|
| 6 |
+
BlockMask(shape=(1, 1, 32768, 32768), sparsity=42.52%,
|
| 7 |
+
(0, 0)
|
| 8 |
+
████░░
|
| 9 |
+
████░░
|
| 10 |
+
████░░░░░░░░
|
| 11 |
+
██████████░░
|
| 12 |
+
██████████░░
|
| 13 |
+
██████████░░░░░░░░
|
| 14 |
+
████████████████░░
|
| 15 |
+
████████████████░░
|
| 16 |
+
████████████████░░░░░░░░
|
| 17 |
+
██████████████████████░░
|
| 18 |
+
██████████████████████░░
|
| 19 |
+
██████████████████████░░░░░░░░
|
| 20 |
+
████████████████████████████░░
|
| 21 |
+
████████████████████████████░░
|
| 22 |
+
████████████████████████████████░░
|
| 23 |
+
████████████████████████████████░░
|
| 24 |
+
████████████████████████████████░░░░░░░░
|
| 25 |
+
████████████████████████████████████████
|
| 26 |
+
████████████████████████████████████████
|
| 27 |
+
████████████████████████████████████████
|
| 28 |
+
)
|
| 29 |
+
Start gathering distributed model states...
|
| 30 |
+
/home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:678: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
| 31 |
+
warnings.warn(
|
| 32 |
+
Model saved to ./output/2025-09-09-03-14-06.343483_seed6553852/checkpoint_model_000000/model.pt
|
| 33 |
+
training step 0...
|
| 34 |
+
Saving video: 100%|██████████████████████████████████████████████████████████| 81/81 [00:03<00:00, 24.90it/s]
|
| 35 |
+
in main process
|
| 36 |
+
log video_0
|
| 37 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 38 |
+
log video_1
|
| 39 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 40 |
+
log video_2
|
| 41 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 42 |
+
log video_3
|
| 43 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 44 |
+
log video_4
|
| 45 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 46 |
+
log video_5
|
| 47 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 48 |
+
log video_6
|
| 49 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 50 |
+
log video_7
|
| 51 |
+
[34m[1mwandb[0m: [33mWARNING[0m `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
|
| 52 |
+
training step 1...
|
| 53 |
+
training step 2...
|
| 54 |
+
training step 3...
|
| 55 |
+
training step 4...
|
| 56 |
+
training step 5...
|
| 57 |
+
training step 6...
|
| 58 |
+
training step 7...
|
| 59 |
+
training step 8...
|
| 60 |
+
training step 9...
|
| 61 |
+
training step 10...
|
| 62 |
+
training step 11...
|
| 63 |
+
training step 12...
|
| 64 |
+
training step 13...
|
| 65 |
+
training step 14...
|
| 66 |
+
training step 15...
|
| 67 |
+
training step 16...
|
| 68 |
+
training step 17...
|
| 69 |
+
training step 18...
|
| 70 |
+
training step 19...
|
| 71 |
+
training step 20...
|
| 72 |
+
training step 21...
|
| 73 |
+
training step 22...
|
| 74 |
+
training step 23...
|
| 75 |
+
training step 24...
|
| 76 |
+
training step 25...
|
| 77 |
+
training step 26...
|
| 78 |
+
training step 27...
|
| 79 |
+
training step 28...
|
| 80 |
+
training step 29...
|
| 81 |
+
training step 30...
|
| 82 |
+
training step 31...
|
| 83 |
+
training step 32...
|
| 84 |
+
training step 33...
|
| 85 |
+
training step 34...
|
| 86 |
+
training step 35...
|
| 87 |
+
training step 36...
|
| 88 |
+
training step 37...
|
| 89 |
+
training step 38...
|
| 90 |
+
training step 39...
|
| 91 |
+
training step 40...
|
| 92 |
+
training step 41...
|
| 93 |
+
training step 42...
|
| 94 |
+
training step 43...
|
| 95 |
+
training step 44...
|
| 96 |
+
training step 45...
|
| 97 |
+
training step 46...
|
| 98 |
+
training step 47...
|
| 99 |
+
training step 48...
|
| 100 |
+
training step 49...
|
| 101 |
+
training step 50...
|
| 102 |
+
training step 51...
|
| 103 |
+
training step 52...
|
| 104 |
+
training step 53...
|
| 105 |
+
training step 54...
|
| 106 |
+
training step 55...
|
| 107 |
+
training step 56...
|
| 108 |
+
training step 57...
|
| 109 |
+
training step 58...
|
| 110 |
+
training step 59...
|
| 111 |
+
training step 60...
|
| 112 |
+
training step 61...
|
| 113 |
+
training step 62...
|
output/wandb/run-20250909_031406-fvhxlznm/files/requirements.txt
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cuda-nvrtc-cu12==12.8.93
|
| 2 |
+
filelock==3.19.1
|
| 3 |
+
aiofiles==24.1.0
|
| 4 |
+
PyYAML==6.0.2
|
| 5 |
+
httpx==0.28.1
|
| 6 |
+
multidict==6.6.4
|
| 7 |
+
tifffile==2025.5.10
|
| 8 |
+
tzdata==2025.2
|
| 9 |
+
urllib3==2.5.0
|
| 10 |
+
decord==0.6.0
|
| 11 |
+
certifi==2025.8.3
|
| 12 |
+
setuptools==78.1.1
|
| 13 |
+
websocket-client==1.8.0
|
| 14 |
+
gitdb==4.0.12
|
| 15 |
+
yarl==1.20.1
|
| 16 |
+
tqdm==4.67.1
|
| 17 |
+
open_clip_torch==3.1.0
|
| 18 |
+
pyparsing==3.2.3
|
| 19 |
+
fastapi==0.116.1
|
| 20 |
+
nvidia-curand-cu12==10.3.9.90
|
| 21 |
+
mdurl==0.1.2
|
| 22 |
+
torchvision==0.23.0
|
| 23 |
+
h11==0.16.0
|
| 24 |
+
pytz==2025.2
|
| 25 |
+
six==1.17.0
|
| 26 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 27 |
+
aiohappyeyeballs==2.6.1
|
| 28 |
+
wandb==0.21.3
|
| 29 |
+
nvidia-nvjitlink-cu12==12.8.93
|
| 30 |
+
cycler==0.12.1
|
| 31 |
+
anyio==4.10.0
|
| 32 |
+
scikit-image==0.25.2
|
| 33 |
+
nvidia-cuda-runtime-cu12==12.8.90
|
| 34 |
+
sentencepiece==0.2.1
|
| 35 |
+
rich==14.1.0
|
| 36 |
+
pydantic==2.11.7
|
| 37 |
+
GitPython==3.1.45
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
accelerate==1.10.1
|
| 40 |
+
proglog==0.1.12
|
| 41 |
+
sentry-sdk==2.37.0
|
| 42 |
+
nvidia-cusparselt-cu12==0.7.1
|
| 43 |
+
dashscope==1.24.3
|
| 44 |
+
platformdirs==4.4.0
|
| 45 |
+
safehttpx==0.1.6
|
| 46 |
+
fsspec==2025.9.0
|
| 47 |
+
lazy_loader==0.4
|
| 48 |
+
typing_extensions==4.15.0
|
| 49 |
+
semantic-version==2.10.0
|
| 50 |
+
numpy==1.24.4
|
| 51 |
+
nvidia-cufft-cu12==11.3.3.83
|
| 52 |
+
ruff==0.12.12
|
| 53 |
+
nvidia-cudnn-cu12==9.10.2.21
|
| 54 |
+
annotated-types==0.7.0
|
| 55 |
+
decorator==4.4.2
|
| 56 |
+
antlr4-python3-runtime==4.9.3
|
| 57 |
+
psutil==7.0.0
|
| 58 |
+
Brotli==1.1.0
|
| 59 |
+
tomlkit==0.13.3
|
| 60 |
+
httpcore==1.0.9
|
| 61 |
+
kiwisolver==1.4.9
|
| 62 |
+
nvidia-cusparse-cu12==12.5.8.93
|
| 63 |
+
groovy==0.1.2
|
| 64 |
+
protobuf==6.32.0
|
| 65 |
+
orjson==3.11.3
|
| 66 |
+
scipy==1.15.3
|
| 67 |
+
regex==2025.9.1
|
| 68 |
+
MarkupSafe==3.0.2
|
| 69 |
+
av==13.1.0
|
| 70 |
+
timm==1.0.19
|
| 71 |
+
uvicorn==0.35.0
|
| 72 |
+
wheel==0.45.1
|
| 73 |
+
Pygments==2.19.2
|
| 74 |
+
websockets==15.0.1
|
| 75 |
+
lmdb==1.7.3
|
| 76 |
+
sympy==1.14.0
|
| 77 |
+
einops==0.8.1
|
| 78 |
+
idna==3.10
|
| 79 |
+
triton==3.4.0
|
| 80 |
+
torch==2.8.0
|
| 81 |
+
moviepy==1.0.3
|
| 82 |
+
nvidia-nvtx-cu12==12.8.90
|
| 83 |
+
matplotlib==3.10.6
|
| 84 |
+
pillow==11.3.0
|
| 85 |
+
charset-normalizer==3.4.3
|
| 86 |
+
attrs==25.3.0
|
| 87 |
+
aiosignal==1.4.0
|
| 88 |
+
markdown-it-py==4.0.0
|
| 89 |
+
requests==2.32.5
|
| 90 |
+
typer==0.17.4
|
| 91 |
+
huggingface-hub==0.34.4
|
| 92 |
+
nvidia-nccl-cu12==2.27.3
|
| 93 |
+
propcache==0.3.2
|
| 94 |
+
opencv-python==4.11.0.86
|
| 95 |
+
ffmpy==0.6.1
|
| 96 |
+
jmespath==1.0.1
|
| 97 |
+
botocore==1.40.25
|
| 98 |
+
pydantic_core==2.33.2
|
| 99 |
+
fonttools==4.59.2
|
| 100 |
+
omegaconf==2.3.0
|
| 101 |
+
pycparser==2.22
|
| 102 |
+
mpmath==1.3.0
|
| 103 |
+
flash_attn==2.8.3
|
| 104 |
+
smmap==5.0.2
|
| 105 |
+
gradio_client==1.12.1
|
| 106 |
+
exceptiongroup==1.3.0
|
| 107 |
+
cffi==1.17.1
|
| 108 |
+
diffusers==0.31.0
|
| 109 |
+
nvidia-cusolver-cu12==11.7.3.90
|
| 110 |
+
contourpy==1.3.2
|
| 111 |
+
cryptography==45.0.7
|
| 112 |
+
nvidia-cuda-cupti-cu12==12.8.90
|
| 113 |
+
wcwidth==0.2.13
|
| 114 |
+
zipp==3.23.0
|
| 115 |
+
safetensors==0.6.2
|
| 116 |
+
gradio==5.44.1
|
| 117 |
+
click==8.2.1
|
| 118 |
+
frozenlist==1.7.0
|
| 119 |
+
networkx==3.4.2
|
| 120 |
+
s3transfer==0.13.1
|
| 121 |
+
shellingham==1.5.4
|
| 122 |
+
starlette==0.47.3
|
| 123 |
+
packaging==25.0
|
| 124 |
+
ftfy==6.3.1
|
| 125 |
+
importlib_metadata==8.7.0
|
| 126 |
+
transformers==4.56.1
|
| 127 |
+
nvidia-cublas-cu12==12.8.4.1
|
| 128 |
+
Jinja2==3.1.6
|
| 129 |
+
pycocotools==2.0.10
|
| 130 |
+
sniffio==1.3.1
|
| 131 |
+
hf-xet==1.1.9
|
| 132 |
+
typing-inspection==0.4.1
|
| 133 |
+
pandas==2.3.2
|
| 134 |
+
python-multipart==0.0.20
|
| 135 |
+
aiohttp==3.12.15
|
| 136 |
+
clip==1.0
|
| 137 |
+
pydub==0.25.1
|
| 138 |
+
easydict==1.13
|
| 139 |
+
pip==25.2
|
| 140 |
+
tokenizers==0.22.0
|
| 141 |
+
imageio==2.37.0
|
| 142 |
+
async-timeout==5.0.1
|
| 143 |
+
boto3==1.40.25
|
| 144 |
+
imageio-ffmpeg==0.6.0
|
| 145 |
+
CausVid==0.0.1
|
output/wandb/run-20250909_031406-fvhxlznm/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.10.18",
|
| 4 |
+
"startedAt": "2025-09-09T03:14:06.607859Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_path",
|
| 7 |
+
"configs/wan_causal_ode.yaml"
|
| 8 |
+
],
|
| 9 |
+
"program": "/home/yitongli/CausVid/causvid/train_ode.py",
|
| 10 |
+
"codePath": "causvid/train_ode.py",
|
| 11 |
+
"codePathLocal": "causvid/train_ode.py",
|
| 12 |
+
"git": {
|
| 13 |
+
"remote": "https://github.com/tianweiy/CausVid.git",
|
| 14 |
+
"commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
|
| 15 |
+
},
|
| 16 |
+
"email": "liyitong.thu@gmail.com",
|
| 17 |
+
"root": "./output",
|
| 18 |
+
"host": "ip-172-31-3-169",
|
| 19 |
+
"executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
|
| 20 |
+
"cpu_count": 48,
|
| 21 |
+
"cpu_count_logical": 96,
|
| 22 |
+
"gpu": "NVIDIA A100-SXM4-80GB",
|
| 23 |
+
"gpu_count": 8,
|
| 24 |
+
"disk": {
|
| 25 |
+
"/": {
|
| 26 |
+
"total": "2079114358784",
|
| 27 |
+
"used": "1450179067904"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"memory": {
|
| 31 |
+
"total": "1204521443328"
|
| 32 |
+
},
|
| 33 |
+
"gpu_nvidia": [
|
| 34 |
+
{
|
| 35 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 36 |
+
"memoryTotal": "85899345920",
|
| 37 |
+
"cudaCores": 6912,
|
| 38 |
+
"architecture": "Ampere",
|
| 39 |
+
"uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 43 |
+
"memoryTotal": "85899345920",
|
| 44 |
+
"cudaCores": 6912,
|
| 45 |
+
"architecture": "Ampere",
|
| 46 |
+
"uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 50 |
+
"memoryTotal": "85899345920",
|
| 51 |
+
"cudaCores": 6912,
|
| 52 |
+
"architecture": "Ampere",
|
| 53 |
+
"uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 57 |
+
"memoryTotal": "85899345920",
|
| 58 |
+
"cudaCores": 6912,
|
| 59 |
+
"architecture": "Ampere",
|
| 60 |
+
"uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 64 |
+
"memoryTotal": "85899345920",
|
| 65 |
+
"cudaCores": 6912,
|
| 66 |
+
"architecture": "Ampere",
|
| 67 |
+
"uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 71 |
+
"memoryTotal": "85899345920",
|
| 72 |
+
"cudaCores": 6912,
|
| 73 |
+
"architecture": "Ampere",
|
| 74 |
+
"uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 78 |
+
"memoryTotal": "85899345920",
|
| 79 |
+
"cudaCores": 6912,
|
| 80 |
+
"architecture": "Ampere",
|
| 81 |
+
"uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 85 |
+
"memoryTotal": "85899345920",
|
| 86 |
+
"cudaCores": 6912,
|
| 87 |
+
"architecture": "Ampere",
|
| 88 |
+
"uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"cudaVersion": "12.8",
|
| 92 |
+
"writerId": "fsrpwg7v4b4972ihmv15clw215fcozrh"
|
| 93 |
+
}
|
output/wandb/run-20250909_031406-fvhxlznm/logs/debug-core.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-09T03:14:06.646254908Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp527vay8l/port-1234529.txt","pid":1234529,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-09T03:14:06.646757368Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1234529}
|
| 3 |
+
{"time":"2025-09-09T03:14:06.646738738Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1234529-1235338-3412218744/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-09T03:14:06.816587266Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-09T03:14:06.827214814Z","level":"INFO","msg":"handleInformInit: received","streamId":"fvhxlznm","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-09T03:14:07.041711266Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fvhxlznm","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-09T04:05:19.585076497Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
|
output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-09T03:14:06.827346748Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
|
| 2 |
+
{"time":"2025-09-09T03:14:07.041670446Z","level":"INFO","msg":"stream: created new stream","id":"fvhxlznm"}
|
| 3 |
+
{"time":"2025-09-09T03:14:07.04170638Z","level":"INFO","msg":"stream: started","id":"fvhxlznm"}
|
| 4 |
+
{"time":"2025-09-09T03:14:07.04171651Z","level":"INFO","msg":"writer: started","stream_id":"fvhxlznm"}
|
| 5 |
+
{"time":"2025-09-09T03:14:07.041726675Z","level":"INFO","msg":"handler: started","stream_id":"fvhxlznm"}
|
| 6 |
+
{"time":"2025-09-09T03:14:07.041739753Z","level":"INFO","msg":"sender: started","stream_id":"fvhxlznm"}
|
output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
|
| 2 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Configure stats pid to 1234529
|
| 3 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
|
| 4 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
|
| 5 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from environment variables
|
| 6 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log
|
| 7 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log
|
| 8 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():813] calling init triggers
|
| 9 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'causal_wan', 'generator_ckpt': 'pretrained_ode.pt', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 6553852, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
|
| 11 |
+
2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():854] starting backend
|
| 12 |
+
2025-09-09 03:14:06,816 INFO MainThread:1234529 [wandb_init.py:init():857] sending inform_init request
|
| 13 |
+
2025-09-09 03:14:06,821 INFO MainThread:1234529 [wandb_init.py:init():865] backend started and connected
|
| 14 |
+
2025-09-09 03:14:06,825 INFO MainThread:1234529 [wandb_init.py:init():936] updated telemetry
|
| 15 |
+
2025-09-09 03:14:06,830 INFO MainThread:1234529 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-09 03:14:07,304 INFO MainThread:1234529 [wandb_init.py:init():1011] starting run threads in backend
|
| 17 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_console_start():2494] atexit reg
|
| 18 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2342] redirect: wrap_raw
|
| 19 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2411] Wrapping output streams.
|
| 20 |
+
2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2434] Redirects installed.
|
| 21 |
+
2025-09-09 03:14:07,417 INFO MainThread:1234529 [wandb_init.py:init():1057] run started, returning control to user process
|
| 22 |
+
2025-09-09 03:14:07,902 INFO MainThread:1234529 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
|
train.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
video_processing.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-08 05:00:38,647 - INFO - Batch processing completed in 0.00 seconds
|
| 2 |
+
2025-09-08 05:01:15,712 - INFO - Batch processing completed in 0.00 seconds
|