W0408 11:05:05.680000 100112 site-packages/torch/distributed/run.py:793] W0408 11:05:05.680000 100112 site-packages/torch/distributed/run.py:793] ***************************************** W0408 11:05:05.680000 100112 site-packages/torch/distributed/run.py:793] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. W0408 11:05:05.680000 100112 site-packages/torch/distributed/run.py:793] ***************************************** 时间网格:t_c=0.25, 步数 (1→t_c)=100, (t_c→0)=2 Total number of images that will be sampled: 5120 sampling: 0%| | 0/20 [00:00 sys.exit(load_entry_point('torch==2.5.1', 'console_scripts', 'torchrun')()) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper return f(*args, **kwargs) ^^^^^^^^^^^^^^^^^^ File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/run.py", line 919, in main run(args) File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/run.py", line 910, in run elastic_launch( File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 138, in __call__ return launch_agent(self._config, self._entrypoint, list(args)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 260, in launch_agent result = agent.run() ^^^^^^^^^^^ File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/metrics/api.py", line 137, in wrapper result = f(*args, **kwargs) ^^^^^^^^^^^^^^^^^^ File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/agent/server/api.py", line 696, in run result = self._invoke_run(role) ^^^^^^^^^^^^^^^^^^^^^^ File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/agent/server/api.py", line 855, in _invoke_run time.sleep(monitor_interval) File "/gemini/space/zhaozy/guzhenyu/envs/envs/SiT/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/api.py", line 84, in _terminate_process_handler raise SignalException(f"Process {os.getpid()} got signal: {sigval}", sigval=sigval) torch.distributed.elastic.multiprocessing.api.SignalException: Process 100112 got signal: 15