lyttt commited on
Commit
5f5f46e
·
verified ·
1 Parent(s): aed98f0

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. distillation_data/compute_vae_latent.py +97 -0
  2. distillation_data/download_mixkit.py +58 -0
  3. distillation_data/ode_gen.sh +6 -0
  4. distillation_data/process_mixkit.py +161 -0
  5. kill_processes.sh +8 -0
  6. minimal_inference/bidirectional_inference.py +50 -0
  7. minimal_inference/longvideo_autoregressive_inference.py +88 -0
  8. output/wandb/debug-internal.log +6 -0
  9. output/wandb/debug.log +22 -0
  10. output/wandb/run-20250908_062833-1l2wnyo3/files/config.yaml +159 -0
  11. output/wandb/run-20250908_062833-1l2wnyo3/files/output.log +29 -0
  12. output/wandb/run-20250908_062833-1l2wnyo3/files/requirements.txt +145 -0
  13. output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-metadata.json +93 -0
  14. output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-summary.json +1 -0
  15. output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-core.log +15 -0
  16. output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-internal.log +11 -0
  17. output/wandb/run-20250908_062833-1l2wnyo3/logs/debug.log +24 -0
  18. output/wandb/run-20250908_062833-1l2wnyo3/run-1l2wnyo3.wandb +0 -0
  19. output/wandb/run-20250908_064634-xd44venm/files/output.log +26 -0
  20. output/wandb/run-20250908_064634-xd44venm/files/requirements.txt +145 -0
  21. output/wandb/run-20250908_064634-xd44venm/logs/debug-internal.log +6 -0
  22. output/wandb/run-20250908_064634-xd44venm/logs/debug.log +22 -0
  23. output/wandb/run-20250908_082236-gjh65qzq/files/config.yaml +159 -0
  24. output/wandb/run-20250908_082236-gjh65qzq/files/output.log +36 -0
  25. output/wandb/run-20250908_082236-gjh65qzq/files/requirements.txt +145 -0
  26. output/wandb/run-20250908_082236-gjh65qzq/files/wandb-metadata.json +93 -0
  27. output/wandb/run-20250908_082236-gjh65qzq/files/wandb-summary.json +1 -0
  28. output/wandb/run-20250908_082236-gjh65qzq/logs/debug-core.log +14 -0
  29. output/wandb/run-20250908_082236-gjh65qzq/logs/debug-internal.log +11 -0
  30. output/wandb/run-20250908_082236-gjh65qzq/logs/debug.log +24 -0
  31. output/wandb/run-20250908_082236-gjh65qzq/run-gjh65qzq.wandb +0 -0
  32. output/wandb/run-20250908_091215-tz5j30tc/files/config.yaml +160 -0
  33. output/wandb/run-20250908_091215-tz5j30tc/files/output.log +53 -0
  34. output/wandb/run-20250908_091215-tz5j30tc/files/requirements.txt +145 -0
  35. output/wandb/run-20250908_091215-tz5j30tc/files/wandb-metadata.json +93 -0
  36. output/wandb/run-20250908_091215-tz5j30tc/files/wandb-summary.json +1 -0
  37. output/wandb/run-20250908_091215-tz5j30tc/logs/debug-core.log +14 -0
  38. output/wandb/run-20250908_091215-tz5j30tc/logs/debug-internal.log +11 -0
  39. output/wandb/run-20250908_091215-tz5j30tc/logs/debug.log +24 -0
  40. output/wandb/run-20250908_091215-tz5j30tc/run-tz5j30tc.wandb +0 -0
  41. output/wandb/run-20250908_091534-f394z0xa/logs/debug-core.log +14 -0
  42. output/wandb/run-20250908_091953-n3vl9u22/logs/debug-core.log +7 -0
  43. output/wandb/run-20250909_031406-fvhxlznm/files/output.log +113 -0
  44. output/wandb/run-20250909_031406-fvhxlznm/files/requirements.txt +145 -0
  45. output/wandb/run-20250909_031406-fvhxlznm/files/wandb-metadata.json +93 -0
  46. output/wandb/run-20250909_031406-fvhxlznm/logs/debug-core.log +7 -0
  47. output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log +6 -0
  48. output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log +22 -0
  49. train.log +0 -0
  50. video_processing.log +2 -0
distillation_data/compute_vae_latent.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from causvid.models.wan.wan_wrapper import WanVAEWrapper
2
+ from causvid.util import launch_distributed_job
3
+ import torch.distributed as dist
4
+ import imageio.v3 as iio
5
+ from tqdm import tqdm
6
+ import argparse
7
+ import torch
8
+ import json
9
+ import math
10
+ import os
11
+
12
+ torch.set_grad_enabled(False)
13
+
14
+
15
+ def video_to_numpy(video_path):
16
+ """
17
+ Reads a video file and returns a NumPy array containing all frames.
18
+
19
+ :param video_path: Path to the video file.
20
+ :return: NumPy array of shape (num_frames, height, width, channels)
21
+ """
22
+ return iio.imread(video_path, plugin="pyav") # Reads the entire video as a NumPy array
23
+
24
+
25
+ def encode(self, videos: torch.Tensor) -> torch.Tensor:
26
+ device, dtype = videos[0].device, videos[0].dtype
27
+ scale = [self.mean.to(device=device, dtype=dtype),
28
+ 1.0 / self.std.to(device=device, dtype=dtype)]
29
+ output = [
30
+ self.model.encode(u.unsqueeze(0), scale).float().squeeze(0)
31
+ for u in videos
32
+ ]
33
+
34
+ output = torch.stack(output, dim=0)
35
+ return output
36
+
37
+
38
+ def main():
39
+ parser = argparse.ArgumentParser()
40
+ parser.add_argument("--input_video_folder", type=str,
41
+ help="Path to the folder containing input videos.")
42
+ parser.add_argument("--output_latent_folder", type=str,
43
+ help="Path to the folder where output latents will be saved.")
44
+ parser.add_argument("--info_path", type=str,
45
+ help="Path to the info file containing video metadata.")
46
+
47
+ args = parser.parse_args()
48
+
49
+ # Step 1: Setup the environment
50
+ torch.backends.cuda.matmul.allow_tf32 = True
51
+ torch.backends.cudnn.allow_tf32 = True
52
+ torch.set_grad_enabled(False)
53
+
54
+ # Step 2: Create the generator
55
+ launch_distributed_job()
56
+ device = torch.cuda.current_device()
57
+
58
+ with open(args.info_path, "r") as f:
59
+ video_info = json.load(f)
60
+
61
+ model = WanVAEWrapper().to(device=device, dtype=torch.bfloat16)
62
+
63
+ video_paths = sorted(list(video_info.keys()))
64
+
65
+ os.makedirs(args.output_latent_folder, exist_ok=True)
66
+
67
+ for index in tqdm(range(int(math.ceil(len(video_paths) / dist.get_world_size()))), disable=dist.get_rank() != 0):
68
+ global_index = index * dist.get_world_size() + dist.get_rank()
69
+ if global_index >= len(video_paths):
70
+ break
71
+
72
+ video_path = video_paths[global_index]
73
+ prompt = video_info[video_path]
74
+
75
+ try:
76
+ array = video_to_numpy(os.path.join(
77
+ args.input_video_folder, video_path))
78
+ except:
79
+ print(f"Failed to read video: {video_path}")
80
+ continue
81
+
82
+ video_tensor = torch.tensor(array, dtype=torch.float32, device=device).unsqueeze(0).permute(
83
+ 0, 4, 1, 2, 3
84
+ ) / 255.0
85
+ video_tensor = video_tensor * 2 - 1
86
+ video_tensor = video_tensor.to(torch.bfloat16)
87
+ encoded_latents = encode(model, video_tensor).transpose(2, 1)
88
+
89
+ torch.save(
90
+ {prompt: encoded_latents.cpu().detach()},
91
+ os.path.join(args.output_latent_folder, f"{global_index:08d}.pt")
92
+ )
93
+ dist.barrier()
94
+
95
+
96
+ if __name__ == "__main__":
97
+ main()
distillation_data/download_mixkit.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import tarfile
4
+ from huggingface_hub import snapshot_download
5
+ from concurrent.futures import ThreadPoolExecutor
6
+
7
+
8
+ def extract_tar(tar_path, dest_dir):
9
+ """
10
+ Extracts a .tar file to the specified destination directory.
11
+ """
12
+ with tarfile.open(tar_path, 'r') as tar:
13
+ tar.extractall(path=dest_dir)
14
+
15
+
16
+ def main():
17
+ parser = argparse.ArgumentParser(
18
+ description="Download and extract dataset.")
19
+ parser.add_argument("--local_dir", type=str, default="/mnt/localssd/",
20
+ help="Local directory to save the dataset.")
21
+ parser.add_argument("--repo_id", type=str,
22
+ default="Languagebind/Open-Sora-Plan-v1.1.0", help="Hugging Face repository ID.")
23
+ parser.add_argument("--folder_name", type=str, default="all_mixkit",
24
+ help="Folder name of the huggingface repo.")
25
+
26
+ args = parser.parse_args()
27
+
28
+ allow_patterns = [f"{args.folder_name}/*.tar"]
29
+
30
+ snapshot_download(
31
+ repo_id=args.repo_id,
32
+ local_dir=args.local_dir,
33
+ revision="main", # or the branch/tag/commit you want
34
+ allow_patterns=allow_patterns,
35
+ repo_type="dataset"
36
+ )
37
+
38
+ # 4. Collect all .tar files recursively from the downloaded folder
39
+ tar_files = []
40
+ for root, dirs, files in os.walk(args.local_dir):
41
+ for file in files:
42
+ if file.endswith(".tar"):
43
+ tar_files.append(os.path.join(root, file))
44
+
45
+ # 5. Destination folder for extracted files
46
+ output_dir = os.path.join(args.local_dir, "videos")
47
+ os.makedirs(output_dir, exist_ok=True)
48
+
49
+ # 6. Extract each tar file in parallel
50
+ with ThreadPoolExecutor() as executor:
51
+ for tar_path in tar_files:
52
+ executor.submit(extract_tar, tar_path, output_dir)
53
+
54
+ print("All .tar files have been downloaded and extracted to:", output_dir)
55
+
56
+
57
+ if __name__ == "__main__":
58
+ main()
distillation_data/ode_gen.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ export LD_LIBRARY_PATH=/usr/local/lib/:/opt/nccl/build/lib:/usr/local/cuda/lib64:/opt/amazon/efa/lib:/opt/aws-ofi-nccl/lib:$LD_LIBRARY_PATH
2
+
3
+ torchrun --nnodes 8 --nproc_per_node=8 --rdzv_id=5235 \
4
+ --rdzv_backend=c10d \
5
+ --rdzv_endpoint $MASTER_ADDR causvid/models/wan/generate_ode_pairs.py \
6
+ --output_folder mixkit_ode --caption_path sample_dataset/mixkit_prompts.txt
distillation_data/process_mixkit.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # the following code is taken from FastVideo https://github.com/hao-ai-lab/FastVideo/tree/main
2
+ # Apache-2.0 License
3
+
4
+ import argparse
5
+ import logging
6
+ import time
7
+ from concurrent.futures import ProcessPoolExecutor, as_completed
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+ from moviepy.editor import VideoFileClip
12
+ from skimage.transform import resize
13
+ from tqdm import tqdm
14
+ from concurrent.futures import ThreadPoolExecutor
15
+
16
+ # Configure logging
17
+ logging.basicConfig(level=logging.INFO,
18
+ format='%(asctime)s - %(levelname)s - %(message)s',
19
+ handlers=[logging.FileHandler('video_processing.log')])
20
+
21
+
22
+ def is_16_9_ratio(width: int, height: int, tolerance: float = 0.1) -> bool:
23
+ target_ratio = 16 / 9
24
+ actual_ratio = width / height
25
+ return abs(actual_ratio - target_ratio) <= (target_ratio * tolerance)
26
+
27
+
28
+ def resize_video(args_tuple):
29
+ """
30
+ Resize a single video file.
31
+ args_tuple: (input_file, output_dir, width, height, fps)
32
+ """
33
+ input_file, output_dir, width, height, fps = args_tuple
34
+ video = None
35
+ resized = None
36
+ output_file = output_dir / f"{input_file.name}"
37
+
38
+ if output_file.exists():
39
+ output_file.unlink()
40
+
41
+ video = VideoFileClip(str(input_file))
42
+
43
+ if not is_16_9_ratio(video.w, video.h):
44
+ return (input_file.name, "skipped", "Not 16:9")
45
+
46
+ def process_frame(frame):
47
+ frame_float = frame.astype(float) / 255.0
48
+ resized = resize(frame_float, (height, width, 3),
49
+ mode='reflect', anti_aliasing=True, preserve_range=True)
50
+ return (resized * 255).astype(np.uint8)
51
+
52
+ resized = video.fl_image(process_frame)
53
+
54
+ start_time = 0
55
+ end_time = (81 / 16)
56
+
57
+ # Crop the clip temporally using subclip
58
+ resized = resized.subclip(start_time, end_time)
59
+
60
+ # resized = resized.set_fps(fps)
61
+
62
+ resized.write_videofile(str(output_file),
63
+ codec='libx264',
64
+ audio_codec='aac',
65
+ temp_audiofile=f'temp-audio-{input_file.stem}.m4a',
66
+ remove_temp=True,
67
+ verbose=False,
68
+ logger=None,
69
+ fps=fps)
70
+
71
+ return (input_file.name, "success", None)
72
+
73
+
74
+ def process_folder(args):
75
+ input_path = Path(args.input_dir)
76
+ output_path = Path(args.output_dir)
77
+ output_path.mkdir(parents=True, exist_ok=True)
78
+
79
+ video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.webm'}
80
+ video_files = [f for f in input_path.iterdir() if f.is_file()
81
+ and f.suffix.lower() in video_extensions]
82
+
83
+ if not video_files:
84
+ print(f"No video files found in {args.input_dir}")
85
+ return
86
+
87
+ print(f"Found {len(video_files)} videos")
88
+ print(f"Target: {args.width}x{args.height} at {args.fps}fps")
89
+
90
+ # Prepare arguments for parallel processing
91
+ process_args = [(video_file, output_path, args.width,
92
+ args.height, args.fps) for video_file in video_files]
93
+
94
+ successful = 0
95
+ skipped = 0
96
+ failed = []
97
+
98
+ resize_video(process_args[0])
99
+
100
+ with tqdm(total=len(video_files), desc="Converting videos", dynamic_ncols=True) as pbar:
101
+ with ThreadPoolExecutor() as executor:
102
+ # Submit all tasks
103
+ future_to_file = {executor.submit(
104
+ resize_video, arg): arg[0] for arg in process_args}
105
+
106
+ # Process completed tasks
107
+ for future in as_completed(future_to_file):
108
+ filename, status, message = future.result()
109
+ if status == "success":
110
+ successful += 1
111
+ elif status == "skipped":
112
+ skipped += 1
113
+ else:
114
+ failed.append((filename, message))
115
+ pbar.update(1)
116
+
117
+ # Print final summary
118
+ print(
119
+ f"\nDone! Processed: {successful}, Skipped: {skipped}, Failed: {len(failed)}")
120
+ if failed:
121
+ print("Failed files:")
122
+ for fname, error in failed:
123
+ print(f"- {fname}: {error}")
124
+
125
+
126
+ def parse_args():
127
+ parser = argparse.ArgumentParser(
128
+ description='Batch resize videos to specified resolution and FPS (16:9 only)')
129
+ parser.add_argument('--input_dir', required=True,
130
+ help='Input directory containing video files')
131
+ parser.add_argument('--output_dir', required=True,
132
+ help='Output directory for processed videos')
133
+ parser.add_argument('--width', type=int, default=1280,
134
+ help='Target width in pixels (default: 848)')
135
+ parser.add_argument('--height', type=int, default=720,
136
+ help='Target height in pixels (default: 480)')
137
+ parser.add_argument('--fps', type=int, default=30,
138
+ help='Target frames per second (default: 30)')
139
+ parser.add_argument('--log-level',
140
+ choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
141
+ default='INFO',
142
+ help='Set the logging level (default: INFO)')
143
+ return parser.parse_args()
144
+
145
+
146
+ def main():
147
+ args = parse_args()
148
+ logging.getLogger().setLevel(getattr(logging, args.log_level))
149
+
150
+ if not Path(args.input_dir).exists():
151
+ logging.error(f"Input directory not found: {args.input_dir}")
152
+ return
153
+
154
+ start_time = time.time()
155
+ process_folder(args)
156
+ duration = time.time() - start_time
157
+ logging.info(f"Batch processing completed in {duration:.2f} seconds")
158
+
159
+
160
+ if __name__ == "__main__":
161
+ main()
kill_processes.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ PIDS=$(ps aux | grep python | grep -v grep | awk '{print $2}')
2
+
3
+ for PID in $PIDS; do
4
+ # echo "Killing Python process with PID: $PID"
5
+ kill -9 $PID
6
+ done
7
+
8
+ echo "All Python processes have been terminated."
minimal_inference/bidirectional_inference.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from causvid.models.wan.bidirectional_inference import BidirectionalInferencePipeline
2
+ from huggingface_hub import hf_hub_download
3
+ from diffusers.utils import export_to_video
4
+ from causvid.data import TextDataset
5
+ from omegaconf import OmegaConf
6
+ from tqdm import tqdm
7
+ import argparse
8
+ import torch
9
+ import os
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--config_path", type=str)
13
+ parser.add_argument("--checkpoint_folder", type=str)
14
+ parser.add_argument("--output_folder", type=str)
15
+ parser.add_argument("--prompt_file_path", type=str)
16
+
17
+ args = parser.parse_args()
18
+
19
+ torch.backends.cuda.matmul.allow_tf32 = True
20
+ torch.backends.cudnn.allow_tf32 = True
21
+
22
+ torch.set_grad_enabled(False)
23
+
24
+ config = OmegaConf.load(args.config_path)
25
+
26
+ pipe = BidirectionalInferencePipeline(config, device="cuda")
27
+
28
+ state_dict = torch.load(os.path.join(args.checkpoint_folder, "model.pt"), map_location="cpu")[
29
+ 'generator']
30
+
31
+ pipe.generator.load_state_dict(state_dict)
32
+
33
+ pipe = pipe.to(device="cuda", dtype=torch.bfloat16)
34
+
35
+ dataset = TextDataset(args.prompt_file_path)
36
+
37
+ os.makedirs(args.output_folder, exist_ok=True)
38
+
39
+ for index in tqdm(range(len(dataset))):
40
+ prompt = dataset[index]
41
+ video = pipe.inference(
42
+ noise=torch.randn(
43
+ 1, 21, 16, 60, 104, generator=torch.Generator(device="cuda").manual_seed(42),
44
+ dtype=torch.bfloat16, device="cuda"
45
+ ),
46
+ text_prompts=[prompt]
47
+ )[0].permute(0, 2, 3, 1).cpu().numpy()
48
+
49
+ export_to_video(
50
+ video, os.path.join(args.output_folder, f"output_{index:03d}.mp4"), fps=16)
minimal_inference/longvideo_autoregressive_inference.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from causvid.models.wan.causal_inference import InferencePipeline
2
+ from diffusers.utils import export_to_video
3
+ from causvid.data import TextDataset
4
+ from omegaconf import OmegaConf
5
+ from tqdm import tqdm
6
+ import numpy as np
7
+ import argparse
8
+ import torch
9
+ import os
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--config_path", type=str)
13
+ parser.add_argument("--checkpoint_folder", type=str)
14
+ parser.add_argument("--prompt_file_path", type=str)
15
+ parser.add_argument("--output_folder", type=str)
16
+ parser.add_argument("--num_rollout", type=int, default=3)
17
+ parser.add_argument("--num_overlap_frames", type=int, default=3)
18
+
19
+ args = parser.parse_args()
20
+
21
+ torch.set_grad_enabled(False)
22
+
23
+ config = OmegaConf.load(args.config_path)
24
+
25
+ pipeline = InferencePipeline(config, device="cuda")
26
+ pipeline.to(device="cuda", dtype=torch.bfloat16)
27
+ assert args.num_overlap_frames % pipeline.num_frame_per_block == 0, "num_overlap_frames must be divisible by num_frame_per_block"
28
+
29
+ state_dict = torch.load(os.path.join(args.checkpoint_folder, "model.pt"), map_location="cpu")[
30
+ 'generator']
31
+
32
+ pipeline.generator.load_state_dict(
33
+ state_dict, strict=True
34
+ )
35
+
36
+ dataset = TextDataset(args.prompt_file_path)
37
+
38
+ num_rollout = args.num_rollout
39
+
40
+ os.makedirs(args.output_folder, exist_ok=True)
41
+
42
+
43
+ def encode(self, videos: torch.Tensor) -> torch.Tensor:
44
+ device, dtype = videos[0].device, videos[0].dtype
45
+ scale = [self.mean.to(device=device, dtype=dtype),
46
+ 1.0 / self.std.to(device=device, dtype=dtype)]
47
+ output = [
48
+ self.model.encode(u.unsqueeze(0), scale).float().squeeze(0)
49
+ for u in videos
50
+ ]
51
+
52
+ output = torch.stack(output, dim=0)
53
+ return output
54
+
55
+
56
+ for prompt_index in tqdm(range(len(dataset))):
57
+ prompts = [dataset[prompt_index]]
58
+ start_latents = None
59
+ all_video = []
60
+
61
+ for rollout_index in range(num_rollout):
62
+ sampled_noise = torch.randn(
63
+ [1, 21, 16, 60, 104], device="cuda", dtype=torch.bfloat16
64
+ )
65
+
66
+ video, latents = pipeline.inference(
67
+ noise=sampled_noise,
68
+ text_prompts=prompts,
69
+ return_latents=True,
70
+ start_latents=start_latents
71
+ )
72
+
73
+ current_video = video[0].permute(0, 2, 3, 1).cpu().numpy()
74
+
75
+ start_frame = encode(pipeline.vae, (
76
+ video[:, -4 * (args.num_overlap_frames - 1) - 1:-4 * (args.num_overlap_frames - 1), :] * 2.0 - 1.0
77
+ ).transpose(2, 1).to(torch.bfloat16)).transpose(2, 1).to(torch.bfloat16)
78
+
79
+ start_latents = torch.cat(
80
+ [start_frame, latents[:, -(args.num_overlap_frames - 1):]], dim=1
81
+ )
82
+
83
+ all_video.append(current_video[:-(4 * (args.num_overlap_frames - 1) + 1)])
84
+
85
+ video = np.concatenate(all_video, axis=0)
86
+
87
+ export_to_video(
88
+ video, os.path.join(args.output_folder, f"long_video_output_{prompt_index:03d}.mp4"), fps=16)
output/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-09-09T03:14:06.827346748Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
2
+ {"time":"2025-09-09T03:14:07.041670446Z","level":"INFO","msg":"stream: created new stream","id":"fvhxlznm"}
3
+ {"time":"2025-09-09T03:14:07.04170638Z","level":"INFO","msg":"stream: started","id":"fvhxlznm"}
4
+ {"time":"2025-09-09T03:14:07.04171651Z","level":"INFO","msg":"writer: started","stream_id":"fvhxlznm"}
5
+ {"time":"2025-09-09T03:14:07.041726675Z","level":"INFO","msg":"handler: started","stream_id":"fvhxlznm"}
6
+ {"time":"2025-09-09T03:14:07.041739753Z","level":"INFO","msg":"sender: started","stream_id":"fvhxlznm"}
output/wandb/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
2
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Configure stats pid to 1234529
3
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
4
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
5
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log
7
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log
8
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():813] calling init triggers
9
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'causal_wan', 'generator_ckpt': 'pretrained_ode.pt', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 6553852, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
11
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():854] starting backend
12
+ 2025-09-09 03:14:06,816 INFO MainThread:1234529 [wandb_init.py:init():857] sending inform_init request
13
+ 2025-09-09 03:14:06,821 INFO MainThread:1234529 [wandb_init.py:init():865] backend started and connected
14
+ 2025-09-09 03:14:06,825 INFO MainThread:1234529 [wandb_init.py:init():936] updated telemetry
15
+ 2025-09-09 03:14:06,830 INFO MainThread:1234529 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
16
+ 2025-09-09 03:14:07,304 INFO MainThread:1234529 [wandb_init.py:init():1011] starting run threads in backend
17
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_console_start():2494] atexit reg
18
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2342] redirect: wrap_raw
19
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2411] Wrapping output streams.
20
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2434] Redirects installed.
21
+ 2025-09-09 03:14:07,417 INFO MainThread:1234529 [wandb_init.py:init():1057] run started, returning control to user process
22
+ 2025-09-09 03:14:07,902 INFO MainThread:1234529 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
output/wandb/run-20250908_062833-1l2wnyo3/files/config.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.3
4
+ code_path: source-self-forcing-causvid_train_ode.py
5
+ e:
6
+ aqq0exsxdyb4vz63hwlotexs67e6hip5:
7
+ args:
8
+ - --config_path
9
+ - configs/wan_causal_ode.yaml
10
+ codePath: causvid/train_ode.py
11
+ codePathLocal: causvid/train_ode.py
12
+ cpu_count: 48
13
+ cpu_count_logical: 96
14
+ cudaVersion: "12.8"
15
+ disk:
16
+ /:
17
+ total: "2079114358784"
18
+ used: "1366588243968"
19
+ email: liyitong.thu@gmail.com
20
+ executable: /home/yitongli/miniconda3/envs/causvid/bin/python3.10
21
+ git:
22
+ commit: adb6a5ecd07666b4d0290042915c8406e6d5ce22
23
+ remote: https://github.com/tianweiy/CausVid.git
24
+ gpu: NVIDIA A100-SXM4-80GB
25
+ gpu_count: 8
26
+ gpu_nvidia:
27
+ - architecture: Ampere
28
+ cudaCores: 6912
29
+ memoryTotal: "85899345920"
30
+ name: NVIDIA A100-SXM4-80GB
31
+ uuid: GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949
32
+ - architecture: Ampere
33
+ cudaCores: 6912
34
+ memoryTotal: "85899345920"
35
+ name: NVIDIA A100-SXM4-80GB
36
+ uuid: GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42
37
+ - architecture: Ampere
38
+ cudaCores: 6912
39
+ memoryTotal: "85899345920"
40
+ name: NVIDIA A100-SXM4-80GB
41
+ uuid: GPU-1f47056a-9c2c-8af1-8232-636203622830
42
+ - architecture: Ampere
43
+ cudaCores: 6912
44
+ memoryTotal: "85899345920"
45
+ name: NVIDIA A100-SXM4-80GB
46
+ uuid: GPU-38de1dff-a680-ec66-625b-49f2a31e3b42
47
+ - architecture: Ampere
48
+ cudaCores: 6912
49
+ memoryTotal: "85899345920"
50
+ name: NVIDIA A100-SXM4-80GB
51
+ uuid: GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-194ed489-5bfa-defc-85b1-812953e7d84a
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057
67
+ host: ip-172-31-3-169
68
+ memory:
69
+ total: "1204521443328"
70
+ os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.39
71
+ program: /home/yitongli/CausVid/causvid/train_ode.py
72
+ python: CPython 3.10.18
73
+ root: ./output
74
+ startedAt: "2025-09-08T06:28:33.977989Z"
75
+ writerId: aqq0exsxdyb4vz63hwlotexs67e6hip5
76
+ m: []
77
+ python_version: 3.10.18
78
+ t:
79
+ "1":
80
+ - 1
81
+ - 11
82
+ - 41
83
+ - 49
84
+ - 71
85
+ - 83
86
+ - 105
87
+ "2":
88
+ - 1
89
+ - 11
90
+ - 41
91
+ - 49
92
+ - 71
93
+ - 83
94
+ - 105
95
+ "3":
96
+ - 16
97
+ - 17
98
+ "4": 3.10.18
99
+ "5": 0.21.3
100
+ "6": 4.56.1
101
+ "12": 0.21.3
102
+ "13": linux-x86_64
103
+ batch_size:
104
+ value: 2
105
+ beta1:
106
+ value: 0.9
107
+ beta2:
108
+ value: 0.999
109
+ data_path:
110
+ value: ../mixkit_ode_lmdb
111
+ denoising_step_list:
112
+ value:
113
+ - 1000
114
+ - 757
115
+ - 522
116
+ - 0
117
+ distillation_loss:
118
+ value: ode
119
+ generator_fsdp_wrap_strategy:
120
+ value: size
121
+ generator_grad:
122
+ value:
123
+ model: true
124
+ generator_task:
125
+ value: causal_video
126
+ gradient_checkpointing:
127
+ value: true
128
+ log_iters:
129
+ value: 200
130
+ lr:
131
+ value: 2e-06
132
+ mixed_precision:
133
+ value: true
134
+ model_name:
135
+ value: causal_wan
136
+ no_save:
137
+ value: false
138
+ num_frame_per_block:
139
+ value: 3
140
+ output_path:
141
+ value: ./output
142
+ seed:
143
+ value: 8706203
144
+ sharding_strategy:
145
+ value: hybrid_full
146
+ text_encoder_fsdp_wrap_strategy:
147
+ value: size
148
+ wandb_entity:
149
+ value: liyitong-Tsinghua University
150
+ wandb_host:
151
+ value: https://api.wandb.ai
152
+ wandb_key:
153
+ value: 5409d3b960b01b25cec0f6abb5361b4022f0cc41
154
+ wandb_name:
155
+ value: wan_causal_ode
156
+ wandb_project:
157
+ value: self-forcing
158
+ warp_denoising_step:
159
+ value: false
output/wandb/run-20250908_062833-1l2wnyo3/files/output.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run dir: ./output/wandb/run-20250908_062833-1l2wnyo3/files
2
+ Traceback (most recent call last):
3
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 211, in <module>
4
+ main()
5
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 204, in main
6
+ trainer = Trainer(config)
7
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 51, in __init__
8
+ self.distillation_model = ODERegression(config, device=self.device)
9
+ File "/home/yitongli/CausVid/causvid/ode_regression.py", line 39, in __init__
10
+ self.generator.enable_gradient_checkpointing()
11
+ File "/home/yitongli/CausVid/causvid/models/wan/wan_wrapper.py", line 113, in enable_gradient_checkpointing
12
+ self.model.enable_gradient_checkpointing()
13
+ File "/home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/diffusers/models/modeling_utils.py", line 310, in enable_gradient_checkpointing
14
+ self._set_gradient_checkpointing(enable=True, gradient_checkpointing_func=gradient_checkpointing_func)
15
+ TypeError: CausalWanModel._set_gradient_checkpointing() got an unexpected keyword argument 'enable'
16
+ [rank0]: Traceback (most recent call last):
17
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 211, in <module>
18
+ [rank0]: main()
19
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 204, in main
20
+ [rank0]: trainer = Trainer(config)
21
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 51, in __init__
22
+ [rank0]: self.distillation_model = ODERegression(config, device=self.device)
23
+ [rank0]: File "/home/yitongli/CausVid/causvid/ode_regression.py", line 39, in __init__
24
+ [rank0]: self.generator.enable_gradient_checkpointing()
25
+ [rank0]: File "/home/yitongli/CausVid/causvid/models/wan/wan_wrapper.py", line 113, in enable_gradient_checkpointing
26
+ [rank0]: self.model.enable_gradient_checkpointing()
27
+ [rank0]: File "/home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/diffusers/models/modeling_utils.py", line 310, in enable_gradient_checkpointing
28
+ [rank0]: self._set_gradient_checkpointing(enable=True, gradient_checkpointing_func=gradient_checkpointing_func)
29
+ [rank0]: TypeError: CausalWanModel._set_gradient_checkpointing() got an unexpected keyword argument 'enable'
output/wandb/run-20250908_062833-1l2wnyo3/files/requirements.txt ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-nvrtc-cu12==12.8.93
2
+ filelock==3.19.1
3
+ aiofiles==24.1.0
4
+ PyYAML==6.0.2
5
+ httpx==0.28.1
6
+ multidict==6.6.4
7
+ tifffile==2025.5.10
8
+ tzdata==2025.2
9
+ urllib3==2.5.0
10
+ decord==0.6.0
11
+ certifi==2025.8.3
12
+ setuptools==78.1.1
13
+ websocket-client==1.8.0
14
+ gitdb==4.0.12
15
+ yarl==1.20.1
16
+ tqdm==4.67.1
17
+ open_clip_torch==3.1.0
18
+ pyparsing==3.2.3
19
+ fastapi==0.116.1
20
+ nvidia-curand-cu12==10.3.9.90
21
+ mdurl==0.1.2
22
+ torchvision==0.23.0
23
+ h11==0.16.0
24
+ pytz==2025.2
25
+ six==1.17.0
26
+ nvidia-cufile-cu12==1.13.1.3
27
+ aiohappyeyeballs==2.6.1
28
+ wandb==0.21.3
29
+ nvidia-nvjitlink-cu12==12.8.93
30
+ cycler==0.12.1
31
+ anyio==4.10.0
32
+ scikit-image==0.25.2
33
+ nvidia-cuda-runtime-cu12==12.8.90
34
+ sentencepiece==0.2.1
35
+ rich==14.1.0
36
+ pydantic==2.11.7
37
+ GitPython==3.1.45
38
+ python-dateutil==2.9.0.post0
39
+ accelerate==1.10.1
40
+ proglog==0.1.12
41
+ sentry-sdk==2.37.0
42
+ nvidia-cusparselt-cu12==0.7.1
43
+ dashscope==1.24.3
44
+ platformdirs==4.4.0
45
+ safehttpx==0.1.6
46
+ fsspec==2025.9.0
47
+ lazy_loader==0.4
48
+ typing_extensions==4.15.0
49
+ semantic-version==2.10.0
50
+ numpy==1.24.4
51
+ diffusers==0.35.1
52
+ nvidia-cufft-cu12==11.3.3.83
53
+ ruff==0.12.12
54
+ nvidia-cudnn-cu12==9.10.2.21
55
+ annotated-types==0.7.0
56
+ decorator==4.4.2
57
+ antlr4-python3-runtime==4.9.3
58
+ psutil==7.0.0
59
+ Brotli==1.1.0
60
+ tomlkit==0.13.3
61
+ httpcore==1.0.9
62
+ kiwisolver==1.4.9
63
+ nvidia-cusparse-cu12==12.5.8.93
64
+ groovy==0.1.2
65
+ protobuf==6.32.0
66
+ orjson==3.11.3
67
+ scipy==1.15.3
68
+ regex==2025.9.1
69
+ MarkupSafe==3.0.2
70
+ av==13.1.0
71
+ timm==1.0.19
72
+ uvicorn==0.35.0
73
+ wheel==0.45.1
74
+ Pygments==2.19.2
75
+ websockets==15.0.1
76
+ lmdb==1.7.3
77
+ sympy==1.14.0
78
+ einops==0.8.1
79
+ idna==3.10
80
+ triton==3.4.0
81
+ torch==2.8.0
82
+ moviepy==1.0.3
83
+ nvidia-nvtx-cu12==12.8.90
84
+ matplotlib==3.10.6
85
+ pillow==11.3.0
86
+ charset-normalizer==3.4.3
87
+ attrs==25.3.0
88
+ aiosignal==1.4.0
89
+ markdown-it-py==4.0.0
90
+ requests==2.32.5
91
+ typer==0.17.4
92
+ huggingface-hub==0.34.4
93
+ nvidia-nccl-cu12==2.27.3
94
+ propcache==0.3.2
95
+ opencv-python==4.11.0.86
96
+ ffmpy==0.6.1
97
+ jmespath==1.0.1
98
+ botocore==1.40.25
99
+ pydantic_core==2.33.2
100
+ fonttools==4.59.2
101
+ omegaconf==2.3.0
102
+ pycparser==2.22
103
+ mpmath==1.3.0
104
+ flash_attn==2.8.3
105
+ smmap==5.0.2
106
+ gradio_client==1.12.1
107
+ exceptiongroup==1.3.0
108
+ cffi==1.17.1
109
+ nvidia-cusolver-cu12==11.7.3.90
110
+ contourpy==1.3.2
111
+ cryptography==45.0.7
112
+ nvidia-cuda-cupti-cu12==12.8.90
113
+ wcwidth==0.2.13
114
+ zipp==3.23.0
115
+ safetensors==0.6.2
116
+ gradio==5.44.1
117
+ click==8.2.1
118
+ frozenlist==1.7.0
119
+ networkx==3.4.2
120
+ s3transfer==0.13.1
121
+ shellingham==1.5.4
122
+ starlette==0.47.3
123
+ packaging==25.0
124
+ ftfy==6.3.1
125
+ importlib_metadata==8.7.0
126
+ transformers==4.56.1
127
+ nvidia-cublas-cu12==12.8.4.1
128
+ Jinja2==3.1.6
129
+ pycocotools==2.0.10
130
+ sniffio==1.3.1
131
+ hf-xet==1.1.9
132
+ typing-inspection==0.4.1
133
+ pandas==2.3.2
134
+ python-multipart==0.0.20
135
+ aiohttp==3.12.15
136
+ clip==1.0
137
+ pydub==0.25.1
138
+ easydict==1.13
139
+ pip==25.2
140
+ tokenizers==0.22.0
141
+ imageio==2.37.0
142
+ async-timeout==5.0.1
143
+ boto3==1.40.25
144
+ imageio-ffmpeg==0.6.0
145
+ CausVid==0.0.1
output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-metadata.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-08T06:28:33.977989Z",
5
+ "args": [
6
+ "--config_path",
7
+ "configs/wan_causal_ode.yaml"
8
+ ],
9
+ "program": "/home/yitongli/CausVid/causvid/train_ode.py",
10
+ "codePath": "causvid/train_ode.py",
11
+ "codePathLocal": "causvid/train_ode.py",
12
+ "git": {
13
+ "remote": "https://github.com/tianweiy/CausVid.git",
14
+ "commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
15
+ },
16
+ "email": "liyitong.thu@gmail.com",
17
+ "root": "./output",
18
+ "host": "ip-172-31-3-169",
19
+ "executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
20
+ "cpu_count": 48,
21
+ "cpu_count_logical": 96,
22
+ "gpu": "NVIDIA A100-SXM4-80GB",
23
+ "gpu_count": 8,
24
+ "disk": {
25
+ "/": {
26
+ "total": "2079114358784",
27
+ "used": "1366588243968"
28
+ }
29
+ },
30
+ "memory": {
31
+ "total": "1204521443328"
32
+ },
33
+ "gpu_nvidia": [
34
+ {
35
+ "name": "NVIDIA A100-SXM4-80GB",
36
+ "memoryTotal": "85899345920",
37
+ "cudaCores": 6912,
38
+ "architecture": "Ampere",
39
+ "uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
40
+ },
41
+ {
42
+ "name": "NVIDIA A100-SXM4-80GB",
43
+ "memoryTotal": "85899345920",
44
+ "cudaCores": 6912,
45
+ "architecture": "Ampere",
46
+ "uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
47
+ },
48
+ {
49
+ "name": "NVIDIA A100-SXM4-80GB",
50
+ "memoryTotal": "85899345920",
51
+ "cudaCores": 6912,
52
+ "architecture": "Ampere",
53
+ "uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
54
+ },
55
+ {
56
+ "name": "NVIDIA A100-SXM4-80GB",
57
+ "memoryTotal": "85899345920",
58
+ "cudaCores": 6912,
59
+ "architecture": "Ampere",
60
+ "uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
61
+ },
62
+ {
63
+ "name": "NVIDIA A100-SXM4-80GB",
64
+ "memoryTotal": "85899345920",
65
+ "cudaCores": 6912,
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
68
+ },
69
+ {
70
+ "name": "NVIDIA A100-SXM4-80GB",
71
+ "memoryTotal": "85899345920",
72
+ "cudaCores": 6912,
73
+ "architecture": "Ampere",
74
+ "uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
75
+ },
76
+ {
77
+ "name": "NVIDIA A100-SXM4-80GB",
78
+ "memoryTotal": "85899345920",
79
+ "cudaCores": 6912,
80
+ "architecture": "Ampere",
81
+ "uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
82
+ },
83
+ {
84
+ "name": "NVIDIA A100-SXM4-80GB",
85
+ "memoryTotal": "85899345920",
86
+ "cudaCores": 6912,
87
+ "architecture": "Ampere",
88
+ "uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
89
+ }
90
+ ],
91
+ "cudaVersion": "12.8",
92
+ "writerId": "aqq0exsxdyb4vz63hwlotexs67e6hip5"
93
+ }
output/wandb/run-20250908_062833-1l2wnyo3/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0},"_runtime":0}
output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-core.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T06:28:34.253598263Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqgyjn6i1/port-3409973.txt","pid":3409973,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-08T06:28:34.257091825Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3409973}
3
+ {"time":"2025-09-08T06:28:34.256253546Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3409973-3410209-1263294975/socket","Net":"unix"}}
4
+ {"time":"2025-09-08T06:28:34.391672548Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-08T06:28:34.406857393Z","level":"INFO","msg":"handleInformInit: received","streamId":"1l2wnyo3","id":"1(@)"}
6
+ {"time":"2025-09-08T06:28:34.736516792Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"1l2wnyo3","id":"1(@)"}
7
+ {"time":"2025-09-08T06:28:35.917810249Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-08T06:28:35.917878105Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-09-08T06:28:35.917865065Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-09-08T06:28:35.917988283Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-08T06:28:35.917976111Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3409973-3410209-1263294975/socket","Net":"unix"}}
12
+ {"time":"2025-09-08T06:28:36.754084432Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write unix /tmp/wandb-3409973-3410209-1263294975/socket->@: use of closed network connection","id":"1(@)"}
13
+ {"time":"2025-09-08T06:28:36.932139033Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
14
+ {"time":"2025-09-08T06:28:36.932153507Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
15
+ {"time":"2025-09-08T06:28:36.932164872Z","level":"INFO","msg":"server is closed"}
output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T06:28:34.406945302Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
2
+ {"time":"2025-09-08T06:28:34.736482072Z","level":"INFO","msg":"stream: created new stream","id":"1l2wnyo3"}
3
+ {"time":"2025-09-08T06:28:34.736512607Z","level":"INFO","msg":"stream: started","id":"1l2wnyo3"}
4
+ {"time":"2025-09-08T06:28:34.736522488Z","level":"INFO","msg":"writer: started","stream_id":"1l2wnyo3"}
5
+ {"time":"2025-09-08T06:28:34.7365292Z","level":"INFO","msg":"handler: started","stream_id":"1l2wnyo3"}
6
+ {"time":"2025-09-08T06:28:34.736574661Z","level":"INFO","msg":"sender: started","stream_id":"1l2wnyo3"}
7
+ {"time":"2025-09-08T06:28:35.91787343Z","level":"INFO","msg":"stream: closing","id":"1l2wnyo3"}
8
+ {"time":"2025-09-08T06:28:36.804459652Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-09-08T06:28:36.9184071Z","level":"INFO","msg":"handler: closed","stream_id":"1l2wnyo3"}
10
+ {"time":"2025-09-08T06:28:36.918464588Z","level":"INFO","msg":"sender: closed","stream_id":"1l2wnyo3"}
11
+ {"time":"2025-09-08T06:28:36.918471349Z","level":"INFO","msg":"stream: closed","id":"1l2wnyo3"}
output/wandb/run-20250908_062833-1l2wnyo3/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
2
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Configure stats pid to 3409973
3
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
4
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
5
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_062833-1l2wnyo3/logs/debug.log
7
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_062833-1l2wnyo3/logs/debug-internal.log
8
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:init():813] calling init triggers
9
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 8706203, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
11
+ 2025-09-08 06:28:33,979 INFO MainThread:3409973 [wandb_init.py:init():854] starting backend
12
+ 2025-09-08 06:28:34,391 INFO MainThread:3409973 [wandb_init.py:init():857] sending inform_init request
13
+ 2025-09-08 06:28:34,398 INFO MainThread:3409973 [wandb_init.py:init():865] backend started and connected
14
+ 2025-09-08 06:28:34,400 INFO MainThread:3409973 [wandb_init.py:init():936] updated telemetry
15
+ 2025-09-08 06:28:34,405 INFO MainThread:3409973 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
16
+ 2025-09-08 06:28:35,106 INFO MainThread:3409973 [wandb_init.py:init():1011] starting run threads in backend
17
+ 2025-09-08 06:28:35,269 INFO MainThread:3409973 [wandb_run.py:_console_start():2494] atexit reg
18
+ 2025-09-08 06:28:35,270 INFO MainThread:3409973 [wandb_run.py:_redirect():2342] redirect: wrap_raw
19
+ 2025-09-08 06:28:35,270 INFO MainThread:3409973 [wandb_run.py:_redirect():2411] Wrapping output streams.
20
+ 2025-09-08 06:28:35,270 INFO MainThread:3409973 [wandb_run.py:_redirect():2434] Redirects installed.
21
+ 2025-09-08 06:28:35,273 INFO MainThread:3409973 [wandb_init.py:init():1057] run started, returning control to user process
22
+ 2025-09-08 06:28:35,672 INFO MainThread:3409973 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
23
+ 2025-09-08 06:28:35,917 INFO wandb-AsyncioManager-main:3409973 [service_client.py:_forward_responses():84] Reached EOF.
24
+ 2025-09-08 06:28:35,918 INFO wandb-AsyncioManager-main:3409973 [mailbox.py:close():137] Closing mailbox, abandoning 2 handles.
output/wandb/run-20250908_062833-1l2wnyo3/run-1l2wnyo3.wandb ADDED
Binary file (7.27 kB). View file
 
output/wandb/run-20250908_064634-xd44venm/files/output.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run dir: ./output/wandb/run-20250908_064634-xd44venm/files
2
+ Start training step 0...
3
+ cache a block wise causal mask with block size of 3 frames
4
+ BlockMask(shape=(1, 1, 32768, 32768), sparsity=42.52%,
5
+ (0, 0)
6
+ ████░░
7
+ ████░░
8
+ ████░░░░░░░░
9
+ ██████████░░
10
+ ██████████░░
11
+ ██████████░░░░░░░░
12
+ ████████████████░░
13
+ ████████████████░░
14
+ ████████████████░░░░░░░░
15
+ ██████████████████████░░
16
+ ██████████████████████░░
17
+ ██████████████████████░░░░░░░░
18
+ ████████████████████████████░░
19
+ ████████████████████████████░░
20
+ ████████████████████████████████░░
21
+ ████████████████████████████████░░
22
+ ████████████████████████████████░░░░░░░░
23
+ ████████████████████████████████████████
24
+ ████████████████████████████████████████
25
+ ████████████████████████████████████████
26
+ )
output/wandb/run-20250908_064634-xd44venm/files/requirements.txt ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-nvrtc-cu12==12.8.93
2
+ filelock==3.19.1
3
+ aiofiles==24.1.0
4
+ PyYAML==6.0.2
5
+ httpx==0.28.1
6
+ multidict==6.6.4
7
+ tifffile==2025.5.10
8
+ tzdata==2025.2
9
+ urllib3==2.5.0
10
+ decord==0.6.0
11
+ certifi==2025.8.3
12
+ setuptools==78.1.1
13
+ websocket-client==1.8.0
14
+ gitdb==4.0.12
15
+ yarl==1.20.1
16
+ tqdm==4.67.1
17
+ open_clip_torch==3.1.0
18
+ pyparsing==3.2.3
19
+ fastapi==0.116.1
20
+ nvidia-curand-cu12==10.3.9.90
21
+ mdurl==0.1.2
22
+ torchvision==0.23.0
23
+ h11==0.16.0
24
+ pytz==2025.2
25
+ six==1.17.0
26
+ nvidia-cufile-cu12==1.13.1.3
27
+ aiohappyeyeballs==2.6.1
28
+ wandb==0.21.3
29
+ nvidia-nvjitlink-cu12==12.8.93
30
+ cycler==0.12.1
31
+ anyio==4.10.0
32
+ scikit-image==0.25.2
33
+ nvidia-cuda-runtime-cu12==12.8.90
34
+ sentencepiece==0.2.1
35
+ rich==14.1.0
36
+ pydantic==2.11.7
37
+ GitPython==3.1.45
38
+ python-dateutil==2.9.0.post0
39
+ accelerate==1.10.1
40
+ proglog==0.1.12
41
+ sentry-sdk==2.37.0
42
+ nvidia-cusparselt-cu12==0.7.1
43
+ dashscope==1.24.3
44
+ platformdirs==4.4.0
45
+ safehttpx==0.1.6
46
+ fsspec==2025.9.0
47
+ lazy_loader==0.4
48
+ typing_extensions==4.15.0
49
+ semantic-version==2.10.0
50
+ numpy==1.24.4
51
+ nvidia-cufft-cu12==11.3.3.83
52
+ ruff==0.12.12
53
+ nvidia-cudnn-cu12==9.10.2.21
54
+ annotated-types==0.7.0
55
+ decorator==4.4.2
56
+ antlr4-python3-runtime==4.9.3
57
+ psutil==7.0.0
58
+ Brotli==1.1.0
59
+ tomlkit==0.13.3
60
+ httpcore==1.0.9
61
+ kiwisolver==1.4.9
62
+ nvidia-cusparse-cu12==12.5.8.93
63
+ groovy==0.1.2
64
+ protobuf==6.32.0
65
+ orjson==3.11.3
66
+ scipy==1.15.3
67
+ regex==2025.9.1
68
+ MarkupSafe==3.0.2
69
+ av==13.1.0
70
+ timm==1.0.19
71
+ uvicorn==0.35.0
72
+ wheel==0.45.1
73
+ Pygments==2.19.2
74
+ websockets==15.0.1
75
+ lmdb==1.7.3
76
+ sympy==1.14.0
77
+ einops==0.8.1
78
+ idna==3.10
79
+ triton==3.4.0
80
+ torch==2.8.0
81
+ moviepy==1.0.3
82
+ nvidia-nvtx-cu12==12.8.90
83
+ matplotlib==3.10.6
84
+ pillow==11.3.0
85
+ charset-normalizer==3.4.3
86
+ attrs==25.3.0
87
+ aiosignal==1.4.0
88
+ markdown-it-py==4.0.0
89
+ requests==2.32.5
90
+ typer==0.17.4
91
+ huggingface-hub==0.34.4
92
+ nvidia-nccl-cu12==2.27.3
93
+ propcache==0.3.2
94
+ opencv-python==4.11.0.86
95
+ ffmpy==0.6.1
96
+ jmespath==1.0.1
97
+ botocore==1.40.25
98
+ pydantic_core==2.33.2
99
+ fonttools==4.59.2
100
+ omegaconf==2.3.0
101
+ pycparser==2.22
102
+ mpmath==1.3.0
103
+ flash_attn==2.8.3
104
+ smmap==5.0.2
105
+ gradio_client==1.12.1
106
+ exceptiongroup==1.3.0
107
+ cffi==1.17.1
108
+ diffusers==0.31.0
109
+ nvidia-cusolver-cu12==11.7.3.90
110
+ contourpy==1.3.2
111
+ cryptography==45.0.7
112
+ nvidia-cuda-cupti-cu12==12.8.90
113
+ wcwidth==0.2.13
114
+ zipp==3.23.0
115
+ safetensors==0.6.2
116
+ gradio==5.44.1
117
+ click==8.2.1
118
+ frozenlist==1.7.0
119
+ networkx==3.4.2
120
+ s3transfer==0.13.1
121
+ shellingham==1.5.4
122
+ starlette==0.47.3
123
+ packaging==25.0
124
+ ftfy==6.3.1
125
+ importlib_metadata==8.7.0
126
+ transformers==4.56.1
127
+ nvidia-cublas-cu12==12.8.4.1
128
+ Jinja2==3.1.6
129
+ pycocotools==2.0.10
130
+ sniffio==1.3.1
131
+ hf-xet==1.1.9
132
+ typing-inspection==0.4.1
133
+ pandas==2.3.2
134
+ python-multipart==0.0.20
135
+ aiohttp==3.12.15
136
+ clip==1.0
137
+ pydub==0.25.1
138
+ easydict==1.13
139
+ pip==25.2
140
+ tokenizers==0.22.0
141
+ imageio==2.37.0
142
+ async-timeout==5.0.1
143
+ boto3==1.40.25
144
+ imageio-ffmpeg==0.6.0
145
+ CausVid==0.0.1
output/wandb/run-20250908_064634-xd44venm/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T06:46:35.017276807Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
2
+ {"time":"2025-09-08T06:46:35.219581537Z","level":"INFO","msg":"stream: created new stream","id":"xd44venm"}
3
+ {"time":"2025-09-08T06:46:35.219621685Z","level":"INFO","msg":"stream: started","id":"xd44venm"}
4
+ {"time":"2025-09-08T06:46:35.219634304Z","level":"INFO","msg":"sender: started","stream_id":"xd44venm"}
5
+ {"time":"2025-09-08T06:46:35.219635253Z","level":"INFO","msg":"handler: started","stream_id":"xd44venm"}
6
+ {"time":"2025-09-08T06:46:35.219656757Z","level":"INFO","msg":"writer: started","stream_id":"xd44venm"}
output/wandb/run-20250908_064634-xd44venm/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
2
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Configure stats pid to 3454108
3
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
4
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
5
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_064634-xd44venm/logs/debug.log
7
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_064634-xd44venm/logs/debug-internal.log
8
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:init():813] calling init triggers
9
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 6790074, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
11
+ 2025-09-08 06:46:34,798 INFO MainThread:3454108 [wandb_init.py:init():854] starting backend
12
+ 2025-09-08 06:46:35,006 INFO MainThread:3454108 [wandb_init.py:init():857] sending inform_init request
13
+ 2025-09-08 06:46:35,010 INFO MainThread:3454108 [wandb_init.py:init():865] backend started and connected
14
+ 2025-09-08 06:46:35,015 INFO MainThread:3454108 [wandb_init.py:init():936] updated telemetry
15
+ 2025-09-08 06:46:35,020 INFO MainThread:3454108 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
16
+ 2025-09-08 06:46:35,404 INFO MainThread:3454108 [wandb_init.py:init():1011] starting run threads in backend
17
+ 2025-09-08 06:46:35,515 INFO MainThread:3454108 [wandb_run.py:_console_start():2494] atexit reg
18
+ 2025-09-08 06:46:35,516 INFO MainThread:3454108 [wandb_run.py:_redirect():2342] redirect: wrap_raw
19
+ 2025-09-08 06:46:35,516 INFO MainThread:3454108 [wandb_run.py:_redirect():2411] Wrapping output streams.
20
+ 2025-09-08 06:46:35,516 INFO MainThread:3454108 [wandb_run.py:_redirect():2434] Redirects installed.
21
+ 2025-09-08 06:46:35,518 INFO MainThread:3454108 [wandb_init.py:init():1057] run started, returning control to user process
22
+ 2025-09-08 06:46:35,894 INFO MainThread:3454108 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
output/wandb/run-20250908_082236-gjh65qzq/files/config.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.3
4
+ code_path: source-self-forcing-causvid_train_ode.py
5
+ e:
6
+ ffoakmd0zn8f6sjv7nu2r58nmtyazqxg:
7
+ args:
8
+ - --config_path
9
+ - configs/wan_causal_ode.yaml
10
+ codePath: causvid/train_ode.py
11
+ codePathLocal: causvid/train_ode.py
12
+ cpu_count: 48
13
+ cpu_count_logical: 96
14
+ cudaVersion: "12.8"
15
+ disk:
16
+ /:
17
+ total: "2079114358784"
18
+ used: "1376086970368"
19
+ email: liyitong.thu@gmail.com
20
+ executable: /home/yitongli/miniconda3/envs/causvid/bin/python3.10
21
+ git:
22
+ commit: adb6a5ecd07666b4d0290042915c8406e6d5ce22
23
+ remote: https://github.com/tianweiy/CausVid.git
24
+ gpu: NVIDIA A100-SXM4-80GB
25
+ gpu_count: 8
26
+ gpu_nvidia:
27
+ - architecture: Ampere
28
+ cudaCores: 6912
29
+ memoryTotal: "85899345920"
30
+ name: NVIDIA A100-SXM4-80GB
31
+ uuid: GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949
32
+ - architecture: Ampere
33
+ cudaCores: 6912
34
+ memoryTotal: "85899345920"
35
+ name: NVIDIA A100-SXM4-80GB
36
+ uuid: GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42
37
+ - architecture: Ampere
38
+ cudaCores: 6912
39
+ memoryTotal: "85899345920"
40
+ name: NVIDIA A100-SXM4-80GB
41
+ uuid: GPU-1f47056a-9c2c-8af1-8232-636203622830
42
+ - architecture: Ampere
43
+ cudaCores: 6912
44
+ memoryTotal: "85899345920"
45
+ name: NVIDIA A100-SXM4-80GB
46
+ uuid: GPU-38de1dff-a680-ec66-625b-49f2a31e3b42
47
+ - architecture: Ampere
48
+ cudaCores: 6912
49
+ memoryTotal: "85899345920"
50
+ name: NVIDIA A100-SXM4-80GB
51
+ uuid: GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-194ed489-5bfa-defc-85b1-812953e7d84a
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057
67
+ host: ip-172-31-3-169
68
+ memory:
69
+ total: "1204521443328"
70
+ os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.39
71
+ program: /home/yitongli/CausVid/causvid/train_ode.py
72
+ python: CPython 3.10.18
73
+ root: ./output
74
+ startedAt: "2025-09-08T08:22:36.803576Z"
75
+ writerId: ffoakmd0zn8f6sjv7nu2r58nmtyazqxg
76
+ m: []
77
+ python_version: 3.10.18
78
+ t:
79
+ "1":
80
+ - 1
81
+ - 11
82
+ - 41
83
+ - 49
84
+ - 71
85
+ - 83
86
+ - 105
87
+ "2":
88
+ - 1
89
+ - 11
90
+ - 41
91
+ - 49
92
+ - 71
93
+ - 83
94
+ - 105
95
+ "3":
96
+ - 16
97
+ - 17
98
+ "4": 3.10.18
99
+ "5": 0.21.3
100
+ "6": 4.56.1
101
+ "12": 0.21.3
102
+ "13": linux-x86_64
103
+ batch_size:
104
+ value: 2
105
+ beta1:
106
+ value: 0.9
107
+ beta2:
108
+ value: 0.999
109
+ data_path:
110
+ value: ../mixkit_ode_lmdb
111
+ denoising_step_list:
112
+ value:
113
+ - 1000
114
+ - 757
115
+ - 522
116
+ - 0
117
+ distillation_loss:
118
+ value: ode
119
+ generator_fsdp_wrap_strategy:
120
+ value: size
121
+ generator_grad:
122
+ value:
123
+ model: true
124
+ generator_task:
125
+ value: causal_video
126
+ gradient_checkpointing:
127
+ value: true
128
+ log_iters:
129
+ value: 200
130
+ lr:
131
+ value: 2e-06
132
+ mixed_precision:
133
+ value: true
134
+ model_name:
135
+ value: causal_wan
136
+ no_save:
137
+ value: false
138
+ num_frame_per_block:
139
+ value: 3
140
+ output_path:
141
+ value: ./output
142
+ seed:
143
+ value: 7735925
144
+ sharding_strategy:
145
+ value: hybrid_full
146
+ text_encoder_fsdp_wrap_strategy:
147
+ value: size
148
+ wandb_entity:
149
+ value: liyitong-Tsinghua University
150
+ wandb_host:
151
+ value: https://api.wandb.ai
152
+ wandb_key:
153
+ value: 5409d3b960b01b25cec0f6abb5361b4022f0cc41
154
+ wandb_name:
155
+ value: wan_causal_ode
156
+ wandb_project:
157
+ value: self-forcing
158
+ warp_denoising_step:
159
+ value: false
output/wandb/run-20250908_082236-gjh65qzq/files/output.log ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run dir: ./output/wandb/run-20250908_082236-gjh65qzq/files
2
+ KV inference with 3 frames per block
3
+ ODERegression initialized.
4
+ ########### torch.Size([2, 21, 16, 60, 104])
5
+ Traceback (most recent call last):
6
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 229, in <module>
7
+ main()
8
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 223, in main
9
+ trainer.train()
10
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 187, in train
11
+ self.generate_video()
12
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 181, in generate_video
13
+ self.distillation_model.generate_video(ode_latent, conditional_dict)
14
+ File "/home/yitongli/CausVid/causvid/ode_regression.py", line 199, in generate_video
15
+ video = self.inference_pipeline.inference(
16
+ File "/home/yitongli/CausVid/causvid/models/wan/causal_inference.py", line 186, in inference
17
+ noisy_input = self.scheduler.add_noise(
18
+ File "/home/yitongli/CausVid/causvid/models/wan/flow_match.py", line 72, in add_noise
19
+ sample = (1 - sigma) * original_samples + sigma * noise
20
+ RuntimeError: The size of tensor a (2) must match the size of tensor b (6) at non-singleton dimension 0
21
+ [rank0]: Traceback (most recent call last):
22
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 229, in <module>
23
+ [rank0]: main()
24
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 223, in main
25
+ [rank0]: trainer.train()
26
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 187, in train
27
+ [rank0]: self.generate_video()
28
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 181, in generate_video
29
+ [rank0]: self.distillation_model.generate_video(ode_latent, conditional_dict)
30
+ [rank0]: File "/home/yitongli/CausVid/causvid/ode_regression.py", line 199, in generate_video
31
+ [rank0]: video = self.inference_pipeline.inference(
32
+ [rank0]: File "/home/yitongli/CausVid/causvid/models/wan/causal_inference.py", line 186, in inference
33
+ [rank0]: noisy_input = self.scheduler.add_noise(
34
+ [rank0]: File "/home/yitongli/CausVid/causvid/models/wan/flow_match.py", line 72, in add_noise
35
+ [rank0]: sample = (1 - sigma) * original_samples + sigma * noise
36
+ [rank0]: RuntimeError: The size of tensor a (2) must match the size of tensor b (6) at non-singleton dimension 0
output/wandb/run-20250908_082236-gjh65qzq/files/requirements.txt ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-nvrtc-cu12==12.8.93
2
+ filelock==3.19.1
3
+ aiofiles==24.1.0
4
+ PyYAML==6.0.2
5
+ httpx==0.28.1
6
+ multidict==6.6.4
7
+ tifffile==2025.5.10
8
+ tzdata==2025.2
9
+ urllib3==2.5.0
10
+ decord==0.6.0
11
+ certifi==2025.8.3
12
+ setuptools==78.1.1
13
+ websocket-client==1.8.0
14
+ gitdb==4.0.12
15
+ yarl==1.20.1
16
+ tqdm==4.67.1
17
+ open_clip_torch==3.1.0
18
+ pyparsing==3.2.3
19
+ fastapi==0.116.1
20
+ nvidia-curand-cu12==10.3.9.90
21
+ mdurl==0.1.2
22
+ torchvision==0.23.0
23
+ h11==0.16.0
24
+ pytz==2025.2
25
+ six==1.17.0
26
+ nvidia-cufile-cu12==1.13.1.3
27
+ aiohappyeyeballs==2.6.1
28
+ wandb==0.21.3
29
+ nvidia-nvjitlink-cu12==12.8.93
30
+ cycler==0.12.1
31
+ anyio==4.10.0
32
+ scikit-image==0.25.2
33
+ nvidia-cuda-runtime-cu12==12.8.90
34
+ sentencepiece==0.2.1
35
+ rich==14.1.0
36
+ pydantic==2.11.7
37
+ GitPython==3.1.45
38
+ python-dateutil==2.9.0.post0
39
+ accelerate==1.10.1
40
+ proglog==0.1.12
41
+ sentry-sdk==2.37.0
42
+ nvidia-cusparselt-cu12==0.7.1
43
+ dashscope==1.24.3
44
+ platformdirs==4.4.0
45
+ safehttpx==0.1.6
46
+ fsspec==2025.9.0
47
+ lazy_loader==0.4
48
+ typing_extensions==4.15.0
49
+ semantic-version==2.10.0
50
+ numpy==1.24.4
51
+ nvidia-cufft-cu12==11.3.3.83
52
+ ruff==0.12.12
53
+ nvidia-cudnn-cu12==9.10.2.21
54
+ annotated-types==0.7.0
55
+ decorator==4.4.2
56
+ antlr4-python3-runtime==4.9.3
57
+ psutil==7.0.0
58
+ Brotli==1.1.0
59
+ tomlkit==0.13.3
60
+ httpcore==1.0.9
61
+ kiwisolver==1.4.9
62
+ nvidia-cusparse-cu12==12.5.8.93
63
+ groovy==0.1.2
64
+ protobuf==6.32.0
65
+ orjson==3.11.3
66
+ scipy==1.15.3
67
+ regex==2025.9.1
68
+ MarkupSafe==3.0.2
69
+ av==13.1.0
70
+ timm==1.0.19
71
+ uvicorn==0.35.0
72
+ wheel==0.45.1
73
+ Pygments==2.19.2
74
+ websockets==15.0.1
75
+ lmdb==1.7.3
76
+ sympy==1.14.0
77
+ einops==0.8.1
78
+ idna==3.10
79
+ triton==3.4.0
80
+ torch==2.8.0
81
+ moviepy==1.0.3
82
+ nvidia-nvtx-cu12==12.8.90
83
+ matplotlib==3.10.6
84
+ pillow==11.3.0
85
+ charset-normalizer==3.4.3
86
+ attrs==25.3.0
87
+ aiosignal==1.4.0
88
+ markdown-it-py==4.0.0
89
+ requests==2.32.5
90
+ typer==0.17.4
91
+ huggingface-hub==0.34.4
92
+ nvidia-nccl-cu12==2.27.3
93
+ propcache==0.3.2
94
+ opencv-python==4.11.0.86
95
+ ffmpy==0.6.1
96
+ jmespath==1.0.1
97
+ botocore==1.40.25
98
+ pydantic_core==2.33.2
99
+ fonttools==4.59.2
100
+ omegaconf==2.3.0
101
+ pycparser==2.22
102
+ mpmath==1.3.0
103
+ flash_attn==2.8.3
104
+ smmap==5.0.2
105
+ gradio_client==1.12.1
106
+ exceptiongroup==1.3.0
107
+ cffi==1.17.1
108
+ diffusers==0.31.0
109
+ nvidia-cusolver-cu12==11.7.3.90
110
+ contourpy==1.3.2
111
+ cryptography==45.0.7
112
+ nvidia-cuda-cupti-cu12==12.8.90
113
+ wcwidth==0.2.13
114
+ zipp==3.23.0
115
+ safetensors==0.6.2
116
+ gradio==5.44.1
117
+ click==8.2.1
118
+ frozenlist==1.7.0
119
+ networkx==3.4.2
120
+ s3transfer==0.13.1
121
+ shellingham==1.5.4
122
+ starlette==0.47.3
123
+ packaging==25.0
124
+ ftfy==6.3.1
125
+ importlib_metadata==8.7.0
126
+ transformers==4.56.1
127
+ nvidia-cublas-cu12==12.8.4.1
128
+ Jinja2==3.1.6
129
+ pycocotools==2.0.10
130
+ sniffio==1.3.1
131
+ hf-xet==1.1.9
132
+ typing-inspection==0.4.1
133
+ pandas==2.3.2
134
+ python-multipart==0.0.20
135
+ aiohttp==3.12.15
136
+ clip==1.0
137
+ pydub==0.25.1
138
+ easydict==1.13
139
+ pip==25.2
140
+ tokenizers==0.22.0
141
+ imageio==2.37.0
142
+ async-timeout==5.0.1
143
+ boto3==1.40.25
144
+ imageio-ffmpeg==0.6.0
145
+ CausVid==0.0.1
output/wandb/run-20250908_082236-gjh65qzq/files/wandb-metadata.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-08T08:22:36.803576Z",
5
+ "args": [
6
+ "--config_path",
7
+ "configs/wan_causal_ode.yaml"
8
+ ],
9
+ "program": "/home/yitongli/CausVid/causvid/train_ode.py",
10
+ "codePath": "causvid/train_ode.py",
11
+ "codePathLocal": "causvid/train_ode.py",
12
+ "git": {
13
+ "remote": "https://github.com/tianweiy/CausVid.git",
14
+ "commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
15
+ },
16
+ "email": "liyitong.thu@gmail.com",
17
+ "root": "./output",
18
+ "host": "ip-172-31-3-169",
19
+ "executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
20
+ "cpu_count": 48,
21
+ "cpu_count_logical": 96,
22
+ "gpu": "NVIDIA A100-SXM4-80GB",
23
+ "gpu_count": 8,
24
+ "disk": {
25
+ "/": {
26
+ "total": "2079114358784",
27
+ "used": "1376086970368"
28
+ }
29
+ },
30
+ "memory": {
31
+ "total": "1204521443328"
32
+ },
33
+ "gpu_nvidia": [
34
+ {
35
+ "name": "NVIDIA A100-SXM4-80GB",
36
+ "memoryTotal": "85899345920",
37
+ "cudaCores": 6912,
38
+ "architecture": "Ampere",
39
+ "uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
40
+ },
41
+ {
42
+ "name": "NVIDIA A100-SXM4-80GB",
43
+ "memoryTotal": "85899345920",
44
+ "cudaCores": 6912,
45
+ "architecture": "Ampere",
46
+ "uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
47
+ },
48
+ {
49
+ "name": "NVIDIA A100-SXM4-80GB",
50
+ "memoryTotal": "85899345920",
51
+ "cudaCores": 6912,
52
+ "architecture": "Ampere",
53
+ "uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
54
+ },
55
+ {
56
+ "name": "NVIDIA A100-SXM4-80GB",
57
+ "memoryTotal": "85899345920",
58
+ "cudaCores": 6912,
59
+ "architecture": "Ampere",
60
+ "uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
61
+ },
62
+ {
63
+ "name": "NVIDIA A100-SXM4-80GB",
64
+ "memoryTotal": "85899345920",
65
+ "cudaCores": 6912,
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
68
+ },
69
+ {
70
+ "name": "NVIDIA A100-SXM4-80GB",
71
+ "memoryTotal": "85899345920",
72
+ "cudaCores": 6912,
73
+ "architecture": "Ampere",
74
+ "uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
75
+ },
76
+ {
77
+ "name": "NVIDIA A100-SXM4-80GB",
78
+ "memoryTotal": "85899345920",
79
+ "cudaCores": 6912,
80
+ "architecture": "Ampere",
81
+ "uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
82
+ },
83
+ {
84
+ "name": "NVIDIA A100-SXM4-80GB",
85
+ "memoryTotal": "85899345920",
86
+ "cudaCores": 6912,
87
+ "architecture": "Ampere",
88
+ "uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
89
+ }
90
+ ],
91
+ "cudaVersion": "12.8",
92
+ "writerId": "ffoakmd0zn8f6sjv7nu2r58nmtyazqxg"
93
+ }
output/wandb/run-20250908_082236-gjh65qzq/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":98},"_runtime":98}
output/wandb/run-20250908_082236-gjh65qzq/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T08:22:36.825042468Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpk_l8zwe3/port-3836172.txt","pid":3836172,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-08T08:22:36.826033664Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3836172}
3
+ {"time":"2025-09-08T08:22:36.825970015Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3836172-3836464-2285782236/socket","Net":"unix"}}
4
+ {"time":"2025-09-08T08:22:37.011146283Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-08T08:22:37.01985984Z","level":"INFO","msg":"handleInformInit: received","streamId":"gjh65qzq","id":"1(@)"}
6
+ {"time":"2025-09-08T08:22:37.227697259Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gjh65qzq","id":"1(@)"}
7
+ {"time":"2025-09-08T08:24:15.728157025Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-08T08:24:15.728207682Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-09-08T08:24:15.728220917Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-09-08T08:24:15.728290134Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-08T08:24:15.728328201Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3836172-3836464-2285782236/socket","Net":"unix"}}
12
+ {"time":"2025-09-08T08:24:16.267531359Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-08T08:24:16.267567371Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-08T08:24:16.26757774Z","level":"INFO","msg":"server is closed"}
output/wandb/run-20250908_082236-gjh65qzq/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T08:22:37.019990235Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
2
+ {"time":"2025-09-08T08:22:37.227508836Z","level":"INFO","msg":"stream: created new stream","id":"gjh65qzq"}
3
+ {"time":"2025-09-08T08:22:37.227691977Z","level":"INFO","msg":"stream: started","id":"gjh65qzq"}
4
+ {"time":"2025-09-08T08:22:37.227709815Z","level":"INFO","msg":"writer: started","stream_id":"gjh65qzq"}
5
+ {"time":"2025-09-08T08:22:37.227733552Z","level":"INFO","msg":"handler: started","stream_id":"gjh65qzq"}
6
+ {"time":"2025-09-08T08:22:37.227762439Z","level":"INFO","msg":"sender: started","stream_id":"gjh65qzq"}
7
+ {"time":"2025-09-08T08:24:15.728213817Z","level":"INFO","msg":"stream: closing","id":"gjh65qzq"}
8
+ {"time":"2025-09-08T08:24:16.029737674Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-09-08T08:24:16.167542431Z","level":"INFO","msg":"handler: closed","stream_id":"gjh65qzq"}
10
+ {"time":"2025-09-08T08:24:16.167636838Z","level":"INFO","msg":"sender: closed","stream_id":"gjh65qzq"}
11
+ {"time":"2025-09-08T08:24:16.167654281Z","level":"INFO","msg":"stream: closed","id":"gjh65qzq"}
output/wandb/run-20250908_082236-gjh65qzq/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-08 08:22:36,804 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
2
+ 2025-09-08 08:22:36,804 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Configure stats pid to 3836172
3
+ 2025-09-08 08:22:36,804 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
4
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
5
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_082236-gjh65qzq/logs/debug.log
7
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_082236-gjh65qzq/logs/debug-internal.log
8
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:init():813] calling init triggers
9
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 7735925, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
11
+ 2025-09-08 08:22:36,805 INFO MainThread:3836172 [wandb_init.py:init():854] starting backend
12
+ 2025-09-08 08:22:37,011 INFO MainThread:3836172 [wandb_init.py:init():857] sending inform_init request
13
+ 2025-09-08 08:22:37,015 INFO MainThread:3836172 [wandb_init.py:init():865] backend started and connected
14
+ 2025-09-08 08:22:37,018 INFO MainThread:3836172 [wandb_init.py:init():936] updated telemetry
15
+ 2025-09-08 08:22:37,022 INFO MainThread:3836172 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
16
+ 2025-09-08 08:22:37,413 INFO MainThread:3836172 [wandb_init.py:init():1011] starting run threads in backend
17
+ 2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_console_start():2494] atexit reg
18
+ 2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_redirect():2342] redirect: wrap_raw
19
+ 2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_redirect():2411] Wrapping output streams.
20
+ 2025-09-08 08:22:37,526 INFO MainThread:3836172 [wandb_run.py:_redirect():2434] Redirects installed.
21
+ 2025-09-08 08:22:37,528 INFO MainThread:3836172 [wandb_init.py:init():1057] run started, returning control to user process
22
+ 2025-09-08 08:22:37,914 INFO MainThread:3836172 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
23
+ 2025-09-08 08:24:15,728 INFO wandb-AsyncioManager-main:3836172 [service_client.py:_forward_responses():84] Reached EOF.
24
+ 2025-09-08 08:24:15,729 INFO wandb-AsyncioManager-main:3836172 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
output/wandb/run-20250908_082236-gjh65qzq/run-gjh65qzq.wandb ADDED
Binary file (28.8 kB). View file
 
output/wandb/run-20250908_091215-tz5j30tc/files/config.yaml ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.3
4
+ code_path: source-self-forcing-causvid_train_ode.py
5
+ e:
6
+ dzykhs2dp41ohf8z5ba4wgauzbq2dbq2:
7
+ args:
8
+ - --config_path
9
+ - configs/wan_causal_ode.yaml
10
+ codePath: causvid/train_ode.py
11
+ codePathLocal: causvid/train_ode.py
12
+ cpu_count: 48
13
+ cpu_count_logical: 96
14
+ cudaVersion: "12.8"
15
+ disk:
16
+ /:
17
+ total: "2079114358784"
18
+ used: "1400048283648"
19
+ email: liyitong.thu@gmail.com
20
+ executable: /home/yitongli/miniconda3/envs/causvid/bin/python3.10
21
+ git:
22
+ commit: adb6a5ecd07666b4d0290042915c8406e6d5ce22
23
+ remote: https://github.com/tianweiy/CausVid.git
24
+ gpu: NVIDIA A100-SXM4-80GB
25
+ gpu_count: 8
26
+ gpu_nvidia:
27
+ - architecture: Ampere
28
+ cudaCores: 6912
29
+ memoryTotal: "85899345920"
30
+ name: NVIDIA A100-SXM4-80GB
31
+ uuid: GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949
32
+ - architecture: Ampere
33
+ cudaCores: 6912
34
+ memoryTotal: "85899345920"
35
+ name: NVIDIA A100-SXM4-80GB
36
+ uuid: GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42
37
+ - architecture: Ampere
38
+ cudaCores: 6912
39
+ memoryTotal: "85899345920"
40
+ name: NVIDIA A100-SXM4-80GB
41
+ uuid: GPU-1f47056a-9c2c-8af1-8232-636203622830
42
+ - architecture: Ampere
43
+ cudaCores: 6912
44
+ memoryTotal: "85899345920"
45
+ name: NVIDIA A100-SXM4-80GB
46
+ uuid: GPU-38de1dff-a680-ec66-625b-49f2a31e3b42
47
+ - architecture: Ampere
48
+ cudaCores: 6912
49
+ memoryTotal: "85899345920"
50
+ name: NVIDIA A100-SXM4-80GB
51
+ uuid: GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-194ed489-5bfa-defc-85b1-812953e7d84a
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057
67
+ host: ip-172-31-3-169
68
+ memory:
69
+ total: "1204521443328"
70
+ os: Linux-6.8.0-1028-aws-x86_64-with-glibc2.39
71
+ program: /home/yitongli/CausVid/causvid/train_ode.py
72
+ python: CPython 3.10.18
73
+ root: ./output
74
+ startedAt: "2025-09-08T09:12:15.567002Z"
75
+ writerId: dzykhs2dp41ohf8z5ba4wgauzbq2dbq2
76
+ m: []
77
+ python_version: 3.10.18
78
+ t:
79
+ "1":
80
+ - 1
81
+ - 11
82
+ - 41
83
+ - 49
84
+ - 71
85
+ - 83
86
+ - 105
87
+ "2":
88
+ - 1
89
+ - 11
90
+ - 41
91
+ - 49
92
+ - 71
93
+ - 83
94
+ - 105
95
+ "3":
96
+ - 16
97
+ - 17
98
+ - 61
99
+ "4": 3.10.18
100
+ "5": 0.21.3
101
+ "6": 4.56.1
102
+ "12": 0.21.3
103
+ "13": linux-x86_64
104
+ batch_size:
105
+ value: 2
106
+ beta1:
107
+ value: 0.9
108
+ beta2:
109
+ value: 0.999
110
+ data_path:
111
+ value: ../mixkit_ode_lmdb
112
+ denoising_step_list:
113
+ value:
114
+ - 1000
115
+ - 757
116
+ - 522
117
+ - 0
118
+ distillation_loss:
119
+ value: ode
120
+ generator_fsdp_wrap_strategy:
121
+ value: size
122
+ generator_grad:
123
+ value:
124
+ model: true
125
+ generator_task:
126
+ value: causal_video
127
+ gradient_checkpointing:
128
+ value: true
129
+ log_iters:
130
+ value: 200
131
+ lr:
132
+ value: 2e-06
133
+ mixed_precision:
134
+ value: true
135
+ model_name:
136
+ value: causal_wan
137
+ no_save:
138
+ value: false
139
+ num_frame_per_block:
140
+ value: 3
141
+ output_path:
142
+ value: ./output
143
+ seed:
144
+ value: 550819
145
+ sharding_strategy:
146
+ value: hybrid_full
147
+ text_encoder_fsdp_wrap_strategy:
148
+ value: size
149
+ wandb_entity:
150
+ value: liyitong-Tsinghua University
151
+ wandb_host:
152
+ value: https://api.wandb.ai
153
+ wandb_key:
154
+ value: 5409d3b960b01b25cec0f6abb5361b4022f0cc41
155
+ wandb_name:
156
+ value: wan_causal_ode
157
+ wandb_project:
158
+ value: self-forcing
159
+ warp_denoising_step:
160
+ value: false
output/wandb/run-20250908_091215-tz5j30tc/files/output.log ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run dir: ./output/wandb/run-20250908_091215-tz5j30tc/files
2
+ KV inference with 3 frames per block
3
+ ODERegression initialized.
4
+ cache a block wise causal mask with block size of 3 frames
5
+ BlockMask(shape=(1, 1, 32768, 32768), sparsity=42.52%,
6
+ (0, 0)
7
+ ████░░
8
+ ████░░
9
+ ████░░░░░░░░
10
+ ██████████░░
11
+ ██████████░░
12
+ ██████████░░░░░░░░
13
+ ████████████████░░
14
+ ████████████████░░
15
+ ████████████████░░░░░░░░
16
+ ██████████████████████░░
17
+ ██████████████████████░░
18
+ ██████████████████████░░░░░░░░
19
+ ████████████████████████████░░
20
+ ████████████████████████████░░
21
+ ████████████████████████████████░░
22
+ ████████████████████████████████░░
23
+ ████████████████████████████████░░░░░░░░
24
+ ████████████████████████████████████████
25
+ ████████████████████████████████████████
26
+ ████████████████████████████████████████
27
+ )
28
+ arrive
29
+ Start gathering distributed model states...
30
+ /home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:678: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
31
+ warnings.warn(
32
+ Model saved to ./output/2025-09-08-09-12-15.318383_seed550819/checkpoint_model_000000/model.pt
33
+ training step 0...
34
+ Traceback (most recent call last):
35
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 283, in <module>
36
+ main()
37
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 277, in main
38
+ trainer.train()
39
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 241, in train
40
+ self.generate_video()
41
+ File "/home/yitongli/CausVid/causvid/train_ode.py", line 213, in generate_video
42
+ output_path = os.path.join("tmp", f"teacher_{self.step:06d}_{base_name}.mp4")
43
+ UnboundLocalError: local variable 'base_name' referenced before assignment
44
+ [rank0]: Traceback (most recent call last):
45
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 283, in <module>
46
+ [rank0]: main()
47
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 277, in main
48
+ [rank0]: trainer.train()
49
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 241, in train
50
+ [rank0]: self.generate_video()
51
+ [rank0]: File "/home/yitongli/CausVid/causvid/train_ode.py", line 213, in generate_video
52
+ [rank0]: output_path = os.path.join("tmp", f"teacher_{self.step:06d}_{base_name}.mp4")
53
+ [rank0]: UnboundLocalError: local variable 'base_name' referenced before assignment
output/wandb/run-20250908_091215-tz5j30tc/files/requirements.txt ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-nvrtc-cu12==12.8.93
2
+ filelock==3.19.1
3
+ aiofiles==24.1.0
4
+ PyYAML==6.0.2
5
+ httpx==0.28.1
6
+ multidict==6.6.4
7
+ tifffile==2025.5.10
8
+ tzdata==2025.2
9
+ urllib3==2.5.0
10
+ decord==0.6.0
11
+ certifi==2025.8.3
12
+ setuptools==78.1.1
13
+ websocket-client==1.8.0
14
+ gitdb==4.0.12
15
+ yarl==1.20.1
16
+ tqdm==4.67.1
17
+ open_clip_torch==3.1.0
18
+ pyparsing==3.2.3
19
+ fastapi==0.116.1
20
+ nvidia-curand-cu12==10.3.9.90
21
+ mdurl==0.1.2
22
+ torchvision==0.23.0
23
+ h11==0.16.0
24
+ pytz==2025.2
25
+ six==1.17.0
26
+ nvidia-cufile-cu12==1.13.1.3
27
+ aiohappyeyeballs==2.6.1
28
+ wandb==0.21.3
29
+ nvidia-nvjitlink-cu12==12.8.93
30
+ cycler==0.12.1
31
+ anyio==4.10.0
32
+ scikit-image==0.25.2
33
+ nvidia-cuda-runtime-cu12==12.8.90
34
+ sentencepiece==0.2.1
35
+ rich==14.1.0
36
+ pydantic==2.11.7
37
+ GitPython==3.1.45
38
+ python-dateutil==2.9.0.post0
39
+ accelerate==1.10.1
40
+ proglog==0.1.12
41
+ sentry-sdk==2.37.0
42
+ nvidia-cusparselt-cu12==0.7.1
43
+ dashscope==1.24.3
44
+ platformdirs==4.4.0
45
+ safehttpx==0.1.6
46
+ fsspec==2025.9.0
47
+ lazy_loader==0.4
48
+ typing_extensions==4.15.0
49
+ semantic-version==2.10.0
50
+ numpy==1.24.4
51
+ nvidia-cufft-cu12==11.3.3.83
52
+ ruff==0.12.12
53
+ nvidia-cudnn-cu12==9.10.2.21
54
+ annotated-types==0.7.0
55
+ decorator==4.4.2
56
+ antlr4-python3-runtime==4.9.3
57
+ psutil==7.0.0
58
+ Brotli==1.1.0
59
+ tomlkit==0.13.3
60
+ httpcore==1.0.9
61
+ kiwisolver==1.4.9
62
+ nvidia-cusparse-cu12==12.5.8.93
63
+ groovy==0.1.2
64
+ protobuf==6.32.0
65
+ orjson==3.11.3
66
+ scipy==1.15.3
67
+ regex==2025.9.1
68
+ MarkupSafe==3.0.2
69
+ av==13.1.0
70
+ timm==1.0.19
71
+ uvicorn==0.35.0
72
+ wheel==0.45.1
73
+ Pygments==2.19.2
74
+ websockets==15.0.1
75
+ lmdb==1.7.3
76
+ sympy==1.14.0
77
+ einops==0.8.1
78
+ idna==3.10
79
+ triton==3.4.0
80
+ torch==2.8.0
81
+ moviepy==1.0.3
82
+ nvidia-nvtx-cu12==12.8.90
83
+ matplotlib==3.10.6
84
+ pillow==11.3.0
85
+ charset-normalizer==3.4.3
86
+ attrs==25.3.0
87
+ aiosignal==1.4.0
88
+ markdown-it-py==4.0.0
89
+ requests==2.32.5
90
+ typer==0.17.4
91
+ huggingface-hub==0.34.4
92
+ nvidia-nccl-cu12==2.27.3
93
+ propcache==0.3.2
94
+ opencv-python==4.11.0.86
95
+ ffmpy==0.6.1
96
+ jmespath==1.0.1
97
+ botocore==1.40.25
98
+ pydantic_core==2.33.2
99
+ fonttools==4.59.2
100
+ omegaconf==2.3.0
101
+ pycparser==2.22
102
+ mpmath==1.3.0
103
+ flash_attn==2.8.3
104
+ smmap==5.0.2
105
+ gradio_client==1.12.1
106
+ exceptiongroup==1.3.0
107
+ cffi==1.17.1
108
+ diffusers==0.31.0
109
+ nvidia-cusolver-cu12==11.7.3.90
110
+ contourpy==1.3.2
111
+ cryptography==45.0.7
112
+ nvidia-cuda-cupti-cu12==12.8.90
113
+ wcwidth==0.2.13
114
+ zipp==3.23.0
115
+ safetensors==0.6.2
116
+ gradio==5.44.1
117
+ click==8.2.1
118
+ frozenlist==1.7.0
119
+ networkx==3.4.2
120
+ s3transfer==0.13.1
121
+ shellingham==1.5.4
122
+ starlette==0.47.3
123
+ packaging==25.0
124
+ ftfy==6.3.1
125
+ importlib_metadata==8.7.0
126
+ transformers==4.56.1
127
+ nvidia-cublas-cu12==12.8.4.1
128
+ Jinja2==3.1.6
129
+ pycocotools==2.0.10
130
+ sniffio==1.3.1
131
+ hf-xet==1.1.9
132
+ typing-inspection==0.4.1
133
+ pandas==2.3.2
134
+ python-multipart==0.0.20
135
+ aiohttp==3.12.15
136
+ clip==1.0
137
+ pydub==0.25.1
138
+ easydict==1.13
139
+ pip==25.2
140
+ tokenizers==0.22.0
141
+ imageio==2.37.0
142
+ async-timeout==5.0.1
143
+ boto3==1.40.25
144
+ imageio-ffmpeg==0.6.0
145
+ CausVid==0.0.1
output/wandb/run-20250908_091215-tz5j30tc/files/wandb-metadata.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-08T09:12:15.567002Z",
5
+ "args": [
6
+ "--config_path",
7
+ "configs/wan_causal_ode.yaml"
8
+ ],
9
+ "program": "/home/yitongli/CausVid/causvid/train_ode.py",
10
+ "codePath": "causvid/train_ode.py",
11
+ "codePathLocal": "causvid/train_ode.py",
12
+ "git": {
13
+ "remote": "https://github.com/tianweiy/CausVid.git",
14
+ "commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
15
+ },
16
+ "email": "liyitong.thu@gmail.com",
17
+ "root": "./output",
18
+ "host": "ip-172-31-3-169",
19
+ "executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
20
+ "cpu_count": 48,
21
+ "cpu_count_logical": 96,
22
+ "gpu": "NVIDIA A100-SXM4-80GB",
23
+ "gpu_count": 8,
24
+ "disk": {
25
+ "/": {
26
+ "total": "2079114358784",
27
+ "used": "1400048283648"
28
+ }
29
+ },
30
+ "memory": {
31
+ "total": "1204521443328"
32
+ },
33
+ "gpu_nvidia": [
34
+ {
35
+ "name": "NVIDIA A100-SXM4-80GB",
36
+ "memoryTotal": "85899345920",
37
+ "cudaCores": 6912,
38
+ "architecture": "Ampere",
39
+ "uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
40
+ },
41
+ {
42
+ "name": "NVIDIA A100-SXM4-80GB",
43
+ "memoryTotal": "85899345920",
44
+ "cudaCores": 6912,
45
+ "architecture": "Ampere",
46
+ "uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
47
+ },
48
+ {
49
+ "name": "NVIDIA A100-SXM4-80GB",
50
+ "memoryTotal": "85899345920",
51
+ "cudaCores": 6912,
52
+ "architecture": "Ampere",
53
+ "uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
54
+ },
55
+ {
56
+ "name": "NVIDIA A100-SXM4-80GB",
57
+ "memoryTotal": "85899345920",
58
+ "cudaCores": 6912,
59
+ "architecture": "Ampere",
60
+ "uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
61
+ },
62
+ {
63
+ "name": "NVIDIA A100-SXM4-80GB",
64
+ "memoryTotal": "85899345920",
65
+ "cudaCores": 6912,
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
68
+ },
69
+ {
70
+ "name": "NVIDIA A100-SXM4-80GB",
71
+ "memoryTotal": "85899345920",
72
+ "cudaCores": 6912,
73
+ "architecture": "Ampere",
74
+ "uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
75
+ },
76
+ {
77
+ "name": "NVIDIA A100-SXM4-80GB",
78
+ "memoryTotal": "85899345920",
79
+ "cudaCores": 6912,
80
+ "architecture": "Ampere",
81
+ "uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
82
+ },
83
+ {
84
+ "name": "NVIDIA A100-SXM4-80GB",
85
+ "memoryTotal": "85899345920",
86
+ "cudaCores": 6912,
87
+ "architecture": "Ampere",
88
+ "uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
89
+ }
90
+ ],
91
+ "cudaVersion": "12.8",
92
+ "writerId": "dzykhs2dp41ohf8z5ba4wgauzbq2dbq2"
93
+ }
output/wandb/run-20250908_091215-tz5j30tc/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_step":0,"_wandb":{"runtime":139},"_runtime":139.90914664,"generator_loss":0.31640625,"generator_grad_norm":5.5625,"loss_at_time_500":0.2705078125,"_timestamp":1.7573228593443248e+09}
output/wandb/run-20250908_091215-tz5j30tc/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T09:12:15.588566306Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpnban0o38/port-4018970.txt","pid":4018970,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-08T09:12:15.589212266Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4018970}
3
+ {"time":"2025-09-08T09:12:15.589223222Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4018970-4019303-1328136160/socket","Net":"unix"}}
4
+ {"time":"2025-09-08T09:12:15.77464832Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-08T09:12:15.785168473Z","level":"INFO","msg":"handleInformInit: received","streamId":"tz5j30tc","id":"1(@)"}
6
+ {"time":"2025-09-08T09:12:16.008129399Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tz5j30tc","id":"1(@)"}
7
+ {"time":"2025-09-08T09:14:36.117068968Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-08T09:14:36.117157346Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-09-08T09:14:36.117145923Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-09-08T09:14:36.117277861Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4018970-4019303-1328136160/socket","Net":"unix"}}
11
+ {"time":"2025-09-08T09:14:36.117345059Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
12
+ {"time":"2025-09-08T09:14:36.703976989Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-08T09:14:36.70401689Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-08T09:14:36.7040303Z","level":"INFO","msg":"server is closed"}
output/wandb/run-20250908_091215-tz5j30tc/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T09:12:15.785345022Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
2
+ {"time":"2025-09-08T09:12:16.008075812Z","level":"INFO","msg":"stream: created new stream","id":"tz5j30tc"}
3
+ {"time":"2025-09-08T09:12:16.008124642Z","level":"INFO","msg":"stream: started","id":"tz5j30tc"}
4
+ {"time":"2025-09-08T09:12:16.008135566Z","level":"INFO","msg":"handler: started","stream_id":"tz5j30tc"}
5
+ {"time":"2025-09-08T09:12:16.008145832Z","level":"INFO","msg":"sender: started","stream_id":"tz5j30tc"}
6
+ {"time":"2025-09-08T09:12:16.008160919Z","level":"INFO","msg":"writer: started","stream_id":"tz5j30tc"}
7
+ {"time":"2025-09-08T09:14:36.117135291Z","level":"INFO","msg":"stream: closing","id":"tz5j30tc"}
8
+ {"time":"2025-09-08T09:14:36.435658587Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-09-08T09:14:36.58265815Z","level":"INFO","msg":"handler: closed","stream_id":"tz5j30tc"}
10
+ {"time":"2025-09-08T09:14:36.582727241Z","level":"INFO","msg":"sender: closed","stream_id":"tz5j30tc"}
11
+ {"time":"2025-09-08T09:14:36.582737303Z","level":"INFO","msg":"stream: closed","id":"tz5j30tc"}
output/wandb/run-20250908_091215-tz5j30tc/logs/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
2
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Configure stats pid to 4018970
3
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
4
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
5
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250908_091215-tz5j30tc/logs/debug.log
7
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250908_091215-tz5j30tc/logs/debug-internal.log
8
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:init():813] calling init triggers
9
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'causal_wan', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 550819, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
11
+ 2025-09-08 09:12:15,568 INFO MainThread:4018970 [wandb_init.py:init():854] starting backend
12
+ 2025-09-08 09:12:15,774 INFO MainThread:4018970 [wandb_init.py:init():857] sending inform_init request
13
+ 2025-09-08 09:12:15,779 INFO MainThread:4018970 [wandb_init.py:init():865] backend started and connected
14
+ 2025-09-08 09:12:15,783 INFO MainThread:4018970 [wandb_init.py:init():936] updated telemetry
15
+ 2025-09-08 09:12:15,788 INFO MainThread:4018970 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
16
+ 2025-09-08 09:12:16,206 INFO MainThread:4018970 [wandb_init.py:init():1011] starting run threads in backend
17
+ 2025-09-08 09:12:16,315 INFO MainThread:4018970 [wandb_run.py:_console_start():2494] atexit reg
18
+ 2025-09-08 09:12:16,316 INFO MainThread:4018970 [wandb_run.py:_redirect():2342] redirect: wrap_raw
19
+ 2025-09-08 09:12:16,316 INFO MainThread:4018970 [wandb_run.py:_redirect():2411] Wrapping output streams.
20
+ 2025-09-08 09:12:16,316 INFO MainThread:4018970 [wandb_run.py:_redirect():2434] Redirects installed.
21
+ 2025-09-08 09:12:16,318 INFO MainThread:4018970 [wandb_init.py:init():1057] run started, returning control to user process
22
+ 2025-09-08 09:12:17,359 INFO MainThread:4018970 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
23
+ 2025-09-08 09:14:36,117 INFO wandb-AsyncioManager-main:4018970 [service_client.py:_forward_responses():84] Reached EOF.
24
+ 2025-09-08 09:14:36,117 INFO wandb-AsyncioManager-main:4018970 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
output/wandb/run-20250908_091215-tz5j30tc/run-tz5j30tc.wandb ADDED
Binary file (41.8 kB). View file
 
output/wandb/run-20250908_091534-f394z0xa/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T09:15:34.947665584Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpb9cxfvx4/port-4032801.txt","pid":4032801,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-08T09:15:34.948269838Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4032801}
3
+ {"time":"2025-09-08T09:15:34.948238593Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4032801-4033128-1592522043/socket","Net":"unix"}}
4
+ {"time":"2025-09-08T09:15:35.132762156Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-08T09:15:35.13989349Z","level":"INFO","msg":"handleInformInit: received","streamId":"f394z0xa","id":"1(@)"}
6
+ {"time":"2025-09-08T09:15:35.350122319Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"f394z0xa","id":"1(@)"}
7
+ {"time":"2025-09-08T09:17:58.289894961Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-08T09:17:58.289957034Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-09-08T09:17:58.289973286Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-09-08T09:17:58.290055209Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-08T09:17:58.29008793Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4032801-4033128-1592522043/socket","Net":"unix"}}
12
+ {"time":"2025-09-08T09:17:58.843012332Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-08T09:17:58.843047327Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-08T09:17:58.843056791Z","level":"INFO","msg":"server is closed"}
output/wandb/run-20250908_091953-n3vl9u22/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-08T09:19:53.577274617Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyl_6c_ow/port-4049443.txt","pid":4049443,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-08T09:19:53.577824208Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4049443}
3
+ {"time":"2025-09-08T09:19:53.577821212Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4049443-4049733-3298022966/socket","Net":"unix"}}
4
+ {"time":"2025-09-08T09:19:53.762195815Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-08T09:19:53.77245292Z","level":"INFO","msg":"handleInformInit: received","streamId":"n3vl9u22","id":"1(@)"}
6
+ {"time":"2025-09-08T09:19:53.97809872Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"n3vl9u22","id":"1(@)"}
7
+ {"time":"2025-09-08T09:31:04.091714838Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
output/wandb/run-20250909_031406-fvhxlznm/files/output.log ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run dir: ./output/wandb/run-20250909_031406-fvhxlznm/files
2
+ Loading pretrained generator from pretrained_ode.pt
3
+ KV inference with 3 frames per block
4
+ ODERegression initialized.
5
+ cache a block wise causal mask with block size of 3 frames
6
+ BlockMask(shape=(1, 1, 32768, 32768), sparsity=42.52%,
7
+ (0, 0)
8
+ ████░░
9
+ ████░░
10
+ ████░░░░░░░░
11
+ ██████████░░
12
+ ██████████░░
13
+ ██████████░░░░░░░░
14
+ ████████████████░░
15
+ ████████████████░░
16
+ ████████████████░░░░░░░░
17
+ ██████████████████████░░
18
+ ██████████████████████░░
19
+ ██████████████████████░░░░░░░░
20
+ ████████████████████████████░░
21
+ ████████████████████████████░░
22
+ ████████████████████████████████░░
23
+ ████████████████████████████████░░
24
+ ████████████████████████████████░░░░░░░░
25
+ ████████████████████████████████████████
26
+ ████████████████████████████████████████
27
+ ████████████████████████████████████████
28
+ )
29
+ Start gathering distributed model states...
30
+ /home/yitongli/miniconda3/envs/causvid/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:678: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
31
+ warnings.warn(
32
+ Model saved to ./output/2025-09-09-03-14-06.343483_seed6553852/checkpoint_model_000000/model.pt
33
+ training step 0...
34
+ Saving video: 100%|██████████████████████████████████████████████████████████| 81/81 [00:03<00:00, 24.90it/s]
35
+ in main process
36
+ log video_0
37
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
38
+ log video_1
39
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
40
+ log video_2
41
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
42
+ log video_3
43
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
44
+ log video_4
45
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
46
+ log video_5
47
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
48
+ log video_6
49
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
50
+ log video_7
51
+ wandb: WARNING `fps` argument does not affect the frame rate of the video when providing a file path or raw bytes.
52
+ training step 1...
53
+ training step 2...
54
+ training step 3...
55
+ training step 4...
56
+ training step 5...
57
+ training step 6...
58
+ training step 7...
59
+ training step 8...
60
+ training step 9...
61
+ training step 10...
62
+ training step 11...
63
+ training step 12...
64
+ training step 13...
65
+ training step 14...
66
+ training step 15...
67
+ training step 16...
68
+ training step 17...
69
+ training step 18...
70
+ training step 19...
71
+ training step 20...
72
+ training step 21...
73
+ training step 22...
74
+ training step 23...
75
+ training step 24...
76
+ training step 25...
77
+ training step 26...
78
+ training step 27...
79
+ training step 28...
80
+ training step 29...
81
+ training step 30...
82
+ training step 31...
83
+ training step 32...
84
+ training step 33...
85
+ training step 34...
86
+ training step 35...
87
+ training step 36...
88
+ training step 37...
89
+ training step 38...
90
+ training step 39...
91
+ training step 40...
92
+ training step 41...
93
+ training step 42...
94
+ training step 43...
95
+ training step 44...
96
+ training step 45...
97
+ training step 46...
98
+ training step 47...
99
+ training step 48...
100
+ training step 49...
101
+ training step 50...
102
+ training step 51...
103
+ training step 52...
104
+ training step 53...
105
+ training step 54...
106
+ training step 55...
107
+ training step 56...
108
+ training step 57...
109
+ training step 58...
110
+ training step 59...
111
+ training step 60...
112
+ training step 61...
113
+ training step 62...
output/wandb/run-20250909_031406-fvhxlznm/files/requirements.txt ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-nvrtc-cu12==12.8.93
2
+ filelock==3.19.1
3
+ aiofiles==24.1.0
4
+ PyYAML==6.0.2
5
+ httpx==0.28.1
6
+ multidict==6.6.4
7
+ tifffile==2025.5.10
8
+ tzdata==2025.2
9
+ urllib3==2.5.0
10
+ decord==0.6.0
11
+ certifi==2025.8.3
12
+ setuptools==78.1.1
13
+ websocket-client==1.8.0
14
+ gitdb==4.0.12
15
+ yarl==1.20.1
16
+ tqdm==4.67.1
17
+ open_clip_torch==3.1.0
18
+ pyparsing==3.2.3
19
+ fastapi==0.116.1
20
+ nvidia-curand-cu12==10.3.9.90
21
+ mdurl==0.1.2
22
+ torchvision==0.23.0
23
+ h11==0.16.0
24
+ pytz==2025.2
25
+ six==1.17.0
26
+ nvidia-cufile-cu12==1.13.1.3
27
+ aiohappyeyeballs==2.6.1
28
+ wandb==0.21.3
29
+ nvidia-nvjitlink-cu12==12.8.93
30
+ cycler==0.12.1
31
+ anyio==4.10.0
32
+ scikit-image==0.25.2
33
+ nvidia-cuda-runtime-cu12==12.8.90
34
+ sentencepiece==0.2.1
35
+ rich==14.1.0
36
+ pydantic==2.11.7
37
+ GitPython==3.1.45
38
+ python-dateutil==2.9.0.post0
39
+ accelerate==1.10.1
40
+ proglog==0.1.12
41
+ sentry-sdk==2.37.0
42
+ nvidia-cusparselt-cu12==0.7.1
43
+ dashscope==1.24.3
44
+ platformdirs==4.4.0
45
+ safehttpx==0.1.6
46
+ fsspec==2025.9.0
47
+ lazy_loader==0.4
48
+ typing_extensions==4.15.0
49
+ semantic-version==2.10.0
50
+ numpy==1.24.4
51
+ nvidia-cufft-cu12==11.3.3.83
52
+ ruff==0.12.12
53
+ nvidia-cudnn-cu12==9.10.2.21
54
+ annotated-types==0.7.0
55
+ decorator==4.4.2
56
+ antlr4-python3-runtime==4.9.3
57
+ psutil==7.0.0
58
+ Brotli==1.1.0
59
+ tomlkit==0.13.3
60
+ httpcore==1.0.9
61
+ kiwisolver==1.4.9
62
+ nvidia-cusparse-cu12==12.5.8.93
63
+ groovy==0.1.2
64
+ protobuf==6.32.0
65
+ orjson==3.11.3
66
+ scipy==1.15.3
67
+ regex==2025.9.1
68
+ MarkupSafe==3.0.2
69
+ av==13.1.0
70
+ timm==1.0.19
71
+ uvicorn==0.35.0
72
+ wheel==0.45.1
73
+ Pygments==2.19.2
74
+ websockets==15.0.1
75
+ lmdb==1.7.3
76
+ sympy==1.14.0
77
+ einops==0.8.1
78
+ idna==3.10
79
+ triton==3.4.0
80
+ torch==2.8.0
81
+ moviepy==1.0.3
82
+ nvidia-nvtx-cu12==12.8.90
83
+ matplotlib==3.10.6
84
+ pillow==11.3.0
85
+ charset-normalizer==3.4.3
86
+ attrs==25.3.0
87
+ aiosignal==1.4.0
88
+ markdown-it-py==4.0.0
89
+ requests==2.32.5
90
+ typer==0.17.4
91
+ huggingface-hub==0.34.4
92
+ nvidia-nccl-cu12==2.27.3
93
+ propcache==0.3.2
94
+ opencv-python==4.11.0.86
95
+ ffmpy==0.6.1
96
+ jmespath==1.0.1
97
+ botocore==1.40.25
98
+ pydantic_core==2.33.2
99
+ fonttools==4.59.2
100
+ omegaconf==2.3.0
101
+ pycparser==2.22
102
+ mpmath==1.3.0
103
+ flash_attn==2.8.3
104
+ smmap==5.0.2
105
+ gradio_client==1.12.1
106
+ exceptiongroup==1.3.0
107
+ cffi==1.17.1
108
+ diffusers==0.31.0
109
+ nvidia-cusolver-cu12==11.7.3.90
110
+ contourpy==1.3.2
111
+ cryptography==45.0.7
112
+ nvidia-cuda-cupti-cu12==12.8.90
113
+ wcwidth==0.2.13
114
+ zipp==3.23.0
115
+ safetensors==0.6.2
116
+ gradio==5.44.1
117
+ click==8.2.1
118
+ frozenlist==1.7.0
119
+ networkx==3.4.2
120
+ s3transfer==0.13.1
121
+ shellingham==1.5.4
122
+ starlette==0.47.3
123
+ packaging==25.0
124
+ ftfy==6.3.1
125
+ importlib_metadata==8.7.0
126
+ transformers==4.56.1
127
+ nvidia-cublas-cu12==12.8.4.1
128
+ Jinja2==3.1.6
129
+ pycocotools==2.0.10
130
+ sniffio==1.3.1
131
+ hf-xet==1.1.9
132
+ typing-inspection==0.4.1
133
+ pandas==2.3.2
134
+ python-multipart==0.0.20
135
+ aiohttp==3.12.15
136
+ clip==1.0
137
+ pydub==0.25.1
138
+ easydict==1.13
139
+ pip==25.2
140
+ tokenizers==0.22.0
141
+ imageio==2.37.0
142
+ async-timeout==5.0.1
143
+ boto3==1.40.25
144
+ imageio-ffmpeg==0.6.0
145
+ CausVid==0.0.1
output/wandb/run-20250909_031406-fvhxlznm/files/wandb-metadata.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1028-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-09T03:14:06.607859Z",
5
+ "args": [
6
+ "--config_path",
7
+ "configs/wan_causal_ode.yaml"
8
+ ],
9
+ "program": "/home/yitongli/CausVid/causvid/train_ode.py",
10
+ "codePath": "causvid/train_ode.py",
11
+ "codePathLocal": "causvid/train_ode.py",
12
+ "git": {
13
+ "remote": "https://github.com/tianweiy/CausVid.git",
14
+ "commit": "adb6a5ecd07666b4d0290042915c8406e6d5ce22"
15
+ },
16
+ "email": "liyitong.thu@gmail.com",
17
+ "root": "./output",
18
+ "host": "ip-172-31-3-169",
19
+ "executable": "/home/yitongli/miniconda3/envs/causvid/bin/python3.10",
20
+ "cpu_count": 48,
21
+ "cpu_count_logical": 96,
22
+ "gpu": "NVIDIA A100-SXM4-80GB",
23
+ "gpu_count": 8,
24
+ "disk": {
25
+ "/": {
26
+ "total": "2079114358784",
27
+ "used": "1450179067904"
28
+ }
29
+ },
30
+ "memory": {
31
+ "total": "1204521443328"
32
+ },
33
+ "gpu_nvidia": [
34
+ {
35
+ "name": "NVIDIA A100-SXM4-80GB",
36
+ "memoryTotal": "85899345920",
37
+ "cudaCores": 6912,
38
+ "architecture": "Ampere",
39
+ "uuid": "GPU-88ce35f2-86d5-a394-f90d-a52e2cd45949"
40
+ },
41
+ {
42
+ "name": "NVIDIA A100-SXM4-80GB",
43
+ "memoryTotal": "85899345920",
44
+ "cudaCores": 6912,
45
+ "architecture": "Ampere",
46
+ "uuid": "GPU-1039594d-55ae-3eb3-339e-6d4d62c81d42"
47
+ },
48
+ {
49
+ "name": "NVIDIA A100-SXM4-80GB",
50
+ "memoryTotal": "85899345920",
51
+ "cudaCores": 6912,
52
+ "architecture": "Ampere",
53
+ "uuid": "GPU-1f47056a-9c2c-8af1-8232-636203622830"
54
+ },
55
+ {
56
+ "name": "NVIDIA A100-SXM4-80GB",
57
+ "memoryTotal": "85899345920",
58
+ "cudaCores": 6912,
59
+ "architecture": "Ampere",
60
+ "uuid": "GPU-38de1dff-a680-ec66-625b-49f2a31e3b42"
61
+ },
62
+ {
63
+ "name": "NVIDIA A100-SXM4-80GB",
64
+ "memoryTotal": "85899345920",
65
+ "cudaCores": 6912,
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-712ffb29-5da2-92e5-99b7-9f148e8c1204"
68
+ },
69
+ {
70
+ "name": "NVIDIA A100-SXM4-80GB",
71
+ "memoryTotal": "85899345920",
72
+ "cudaCores": 6912,
73
+ "architecture": "Ampere",
74
+ "uuid": "GPU-194ed489-5bfa-defc-85b1-812953e7d84a"
75
+ },
76
+ {
77
+ "name": "NVIDIA A100-SXM4-80GB",
78
+ "memoryTotal": "85899345920",
79
+ "cudaCores": 6912,
80
+ "architecture": "Ampere",
81
+ "uuid": "GPU-21b1f0be-4c00-4af7-0649-9942ae8fd0e4"
82
+ },
83
+ {
84
+ "name": "NVIDIA A100-SXM4-80GB",
85
+ "memoryTotal": "85899345920",
86
+ "cudaCores": 6912,
87
+ "architecture": "Ampere",
88
+ "uuid": "GPU-195cca5b-c8ee-790b-4d30-ae4d7aa93057"
89
+ }
90
+ ],
91
+ "cudaVersion": "12.8",
92
+ "writerId": "fsrpwg7v4b4972ihmv15clw215fcozrh"
93
+ }
output/wandb/run-20250909_031406-fvhxlznm/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-09T03:14:06.646254908Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp527vay8l/port-1234529.txt","pid":1234529,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-09T03:14:06.646757368Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1234529}
3
+ {"time":"2025-09-09T03:14:06.646738738Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1234529-1235338-3412218744/socket","Net":"unix"}}
4
+ {"time":"2025-09-09T03:14:06.816587266Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-09T03:14:06.827214814Z","level":"INFO","msg":"handleInformInit: received","streamId":"fvhxlznm","id":"1(@)"}
6
+ {"time":"2025-09-09T03:14:07.041711266Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fvhxlznm","id":"1(@)"}
7
+ {"time":"2025-09-09T04:05:19.585076497Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-09-09T03:14:06.827346748Z","level":"INFO","msg":"stream: starting","core version":"0.21.3"}
2
+ {"time":"2025-09-09T03:14:07.041670446Z","level":"INFO","msg":"stream: created new stream","id":"fvhxlznm"}
3
+ {"time":"2025-09-09T03:14:07.04170638Z","level":"INFO","msg":"stream: started","id":"fvhxlznm"}
4
+ {"time":"2025-09-09T03:14:07.04171651Z","level":"INFO","msg":"writer: started","stream_id":"fvhxlznm"}
5
+ {"time":"2025-09-09T03:14:07.041726675Z","level":"INFO","msg":"handler: started","stream_id":"fvhxlznm"}
6
+ {"time":"2025-09-09T03:14:07.041739753Z","level":"INFO","msg":"sender: started","stream_id":"fvhxlznm"}
output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Current SDK version is 0.21.3
2
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Configure stats pid to 1234529
3
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/.config/wandb/settings
4
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from /home/yitongli/CausVid/wandb/settings
5
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():686] Logging user logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug.log
7
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:setup_run_log_directory():687] Logging internal logs to ./output/wandb/run-20250909_031406-fvhxlznm/logs/debug-internal.log
8
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():813] calling init triggers
9
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():818] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'causal_wan', 'generator_ckpt': 'pretrained_ode.pt', 'generator_grad': {'model': True}, 'denoising_step_list': [1000, 757, 522, 0], 'generator_task': 'causal_video', 'generator_fsdp_wrap_strategy': 'size', 'text_encoder_fsdp_wrap_strategy': 'size', 'mixed_precision': True, 'seed': 6553852, 'wandb_host': 'https://api.wandb.ai', 'wandb_key': '5409d3b960b01b25cec0f6abb5361b4022f0cc41', 'wandb_entity': 'liyitong-Tsinghua University', 'wandb_project': 'self-forcing', 'wandb_name': 'wan_causal_ode', 'sharding_strategy': 'hybrid_full', 'lr': 2e-06, 'beta1': 0.9, 'beta2': 0.999, 'data_path': '../mixkit_ode_lmdb', 'batch_size': 2, 'log_iters': 200, 'output_path': './output', 'distillation_loss': 'ode', 'gradient_checkpointing': True, 'num_frame_per_block': 3, 'warp_denoising_step': False, 'no_save': False, '_wandb': {}}
11
+ 2025-09-09 03:14:06,609 INFO MainThread:1234529 [wandb_init.py:init():854] starting backend
12
+ 2025-09-09 03:14:06,816 INFO MainThread:1234529 [wandb_init.py:init():857] sending inform_init request
13
+ 2025-09-09 03:14:06,821 INFO MainThread:1234529 [wandb_init.py:init():865] backend started and connected
14
+ 2025-09-09 03:14:06,825 INFO MainThread:1234529 [wandb_init.py:init():936] updated telemetry
15
+ 2025-09-09 03:14:06,830 INFO MainThread:1234529 [wandb_init.py:init():960] communicating run to backend with 90.0 second timeout
16
+ 2025-09-09 03:14:07,304 INFO MainThread:1234529 [wandb_init.py:init():1011] starting run threads in backend
17
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_console_start():2494] atexit reg
18
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2342] redirect: wrap_raw
19
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2411] Wrapping output streams.
20
+ 2025-09-09 03:14:07,415 INFO MainThread:1234529 [wandb_run.py:_redirect():2434] Redirects installed.
21
+ 2025-09-09 03:14:07,417 INFO MainThread:1234529 [wandb_init.py:init():1057] run started, returning control to user process
22
+ 2025-09-09 03:14:07,902 INFO MainThread:1234529 [wandb_run.py:_config_callback():1380] config_cb None None {'_wandb': {'code_path': 'source-self-forcing-causvid_train_ode.py'}}
train.log ADDED
The diff for this file is too large to render. See raw diff
 
video_processing.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2025-09-08 05:00:38,647 - INFO - Batch processing completed in 0.00 seconds
2
+ 2025-09-08 05:01:15,712 - INFO - Batch processing completed in 0.00 seconds