Spaces:
Sleeping
Sleeping
ibcplateformes Claude Opus 4.6 commited on
Commit ·
60ed88a
1
Parent(s): e11dc24
Add detailed logging to preprocessing to debug missing audio slices
Browse files- pipeline/training.py +37 -7
pipeline/training.py
CHANGED
|
@@ -64,19 +64,49 @@ def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
|
|
| 64 |
]
|
| 65 |
|
| 66 |
logger.info(f"Running preprocessing for {model_name}...")
|
|
|
|
|
|
|
|
|
|
| 67 |
result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
if result.returncode != 0:
|
| 70 |
-
logger.error(f"Preprocess stderr: {result.stderr}")
|
| 71 |
raise RuntimeError(f"Preprocessing failed: {result.stderr[-500:]}")
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
sliced_dir = os.path.join(exp_dir, "sliced_audios")
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
@spaces.GPU(duration=120)
|
|
|
|
| 64 |
]
|
| 65 |
|
| 66 |
logger.info(f"Running preprocessing for {model_name}...")
|
| 67 |
+
logger.info(f"Command: {' '.join(command)}")
|
| 68 |
+
logger.info(f"Dataset dir contents: {os.listdir(dataset_dir)}")
|
| 69 |
+
|
| 70 |
result = subprocess.run(command, capture_output=True, text=True, cwd=APPLIO_DIR)
|
| 71 |
|
| 72 |
+
logger.info(f"Preprocess returncode: {result.returncode}")
|
| 73 |
+
if result.stdout:
|
| 74 |
+
logger.info(f"Preprocess stdout: {result.stdout[-1000:]}")
|
| 75 |
+
if result.stderr:
|
| 76 |
+
logger.info(f"Preprocess stderr: {result.stderr[-1000:]}")
|
| 77 |
+
|
| 78 |
if result.returncode != 0:
|
|
|
|
| 79 |
raise RuntimeError(f"Preprocessing failed: {result.stderr[-500:]}")
|
| 80 |
|
| 81 |
+
# Log what was created in exp_dir
|
| 82 |
+
logger.info(f"Contents of exp_dir ({exp_dir}):")
|
| 83 |
+
for item in os.listdir(exp_dir):
|
| 84 |
+
full = os.path.join(exp_dir, item)
|
| 85 |
+
if os.path.isdir(full):
|
| 86 |
+
contents = os.listdir(full)
|
| 87 |
+
logger.info(f" {item}/ ({len(contents)} files): {contents[:5]}")
|
| 88 |
+
else:
|
| 89 |
+
logger.info(f" {item} ({os.path.getsize(full)} bytes)")
|
| 90 |
+
|
| 91 |
+
# Check multiple possible output directories
|
| 92 |
sliced_dir = os.path.join(exp_dir, "sliced_audios")
|
| 93 |
+
sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
|
| 94 |
+
|
| 95 |
+
if os.path.exists(sliced_dir) and len(os.listdir(sliced_dir)) > 0:
|
| 96 |
+
n_slices = len(os.listdir(sliced_dir))
|
| 97 |
+
logger.info(f"Preprocessing complete: {n_slices} slices created.")
|
| 98 |
+
return n_slices
|
| 99 |
+
elif os.path.exists(sliced_16k_dir) and len(os.listdir(sliced_16k_dir)) > 0:
|
| 100 |
+
n_slices = len(os.listdir(sliced_16k_dir))
|
| 101 |
+
logger.info(f"Preprocessing complete: {n_slices} 16k slices created (no native rate slices).")
|
| 102 |
+
return n_slices
|
| 103 |
+
else:
|
| 104 |
+
raise RuntimeError(
|
| 105 |
+
f"Preprocessing produced no audio slices. "
|
| 106 |
+
f"exp_dir contents: {os.listdir(exp_dir)}. "
|
| 107 |
+
f"stdout: {result.stdout[-300:] if result.stdout else 'empty'}. "
|
| 108 |
+
f"stderr: {result.stderr[-300:] if result.stderr else 'empty'}"
|
| 109 |
+
)
|
| 110 |
|
| 111 |
|
| 112 |
@spaces.GPU(duration=120)
|