ibcplateformes Claude Opus 4.6 commited on
Commit
60ed88a
·
1 Parent(s): e11dc24

Add detailed logging to preprocessing to debug missing audio slices

Browse files
Files changed (1) hide show
  1. pipeline/training.py +37 -7
pipeline/training.py CHANGED
@@ -64,19 +64,49 @@ def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
64
  ]
65
 
66
  logger.info(f"Running preprocessing for {model_name}...")
 
 
 
67
  result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
68
 
 
 
 
 
 
 
69
  if result.returncode != 0:
70
- logger.error(f"Preprocess stderr: {result.stderr}")
71
  raise RuntimeError(f"Preprocessing failed: {result.stderr[-500:]}")
72
 
 
 
 
 
 
 
 
 
 
 
 
73
  sliced_dir = os.path.join(exp_dir, "sliced_audios")
74
- if not os.path.exists(sliced_dir) or len(os.listdir(sliced_dir)) == 0:
75
- raise RuntimeError("Preprocessing produced no audio slices. Check your input audio.")
76
-
77
- n_slices = len(os.listdir(sliced_dir))
78
- logger.info(f"Preprocessing complete: {n_slices} slices created.")
79
- return n_slices
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
  @spaces.GPU(duration=120)
 
64
  ]
65
 
66
  logger.info(f"Running preprocessing for {model_name}...")
67
+ logger.info(f"Command: {' '.join(command)}")
68
+ logger.info(f"Dataset dir contents: {os.listdir(dataset_dir)}")
69
+
70
  result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
71
 
72
+ logger.info(f"Preprocess returncode: {result.returncode}")
73
+ if result.stdout:
74
+ logger.info(f"Preprocess stdout: {result.stdout[-1000:]}")
75
+ if result.stderr:
76
+ logger.info(f"Preprocess stderr: {result.stderr[-1000:]}")
77
+
78
  if result.returncode != 0:
 
79
  raise RuntimeError(f"Preprocessing failed: {result.stderr[-500:]}")
80
 
81
+ # Log what was created in exp_dir
82
+ logger.info(f"Contents of exp_dir ({exp_dir}):")
83
+ for item in os.listdir(exp_dir):
84
+ full = os.path.join(exp_dir, item)
85
+ if os.path.isdir(full):
86
+ contents = os.listdir(full)
87
+ logger.info(f" {item}/ ({len(contents)} files): {contents[:5]}")
88
+ else:
89
+ logger.info(f" {item} ({os.path.getsize(full)} bytes)")
90
+
91
+ # Check multiple possible output directories
92
  sliced_dir = os.path.join(exp_dir, "sliced_audios")
93
+ sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
94
+
95
+ if os.path.exists(sliced_dir) and len(os.listdir(sliced_dir)) > 0:
96
+ n_slices = len(os.listdir(sliced_dir))
97
+ logger.info(f"Preprocessing complete: {n_slices} slices created.")
98
+ return n_slices
99
+ elif os.path.exists(sliced_16k_dir) and len(os.listdir(sliced_16k_dir)) > 0:
100
+ n_slices = len(os.listdir(sliced_16k_dir))
101
+ logger.info(f"Preprocessing complete: {n_slices} 16k slices created (no native rate slices).")
102
+ return n_slices
103
+ else:
104
+ raise RuntimeError(
105
+ f"Preprocessing produced no audio slices. "
106
+ f"exp_dir contents: {os.listdir(exp_dir)}. "
107
+ f"stdout: {result.stdout[-300:] if result.stdout else 'empty'}. "
108
+ f"stderr: {result.stderr[-300:] if result.stderr else 'empty'}"
109
+ )
110
 
111
 
112
  @spaces.GPU(duration=120)