Spaces:
Sleeping
Sleeping
ibcplateformes Claude Opus 4.6 commited on
Commit ·
dcf6e3c
1
Parent(s): 60ed88a
Search for sliced audio recursively and show directory tree in errors
Browse filesApplio may store sliced audio in subdirectories within sliced_audios/.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- pipeline/training.py +39 -12
pipeline/training.py
CHANGED
|
@@ -88,24 +88,51 @@ def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
|
|
| 88 |
else:
|
| 89 |
logger.info(f" {item} ({os.path.getsize(full)} bytes)")
|
| 90 |
|
| 91 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
sliced_dir = os.path.join(exp_dir, "sliced_audios")
|
| 93 |
sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
else:
|
| 104 |
raise RuntimeError(
|
| 105 |
f"Preprocessing produced no audio slices. "
|
| 106 |
-
f"
|
| 107 |
-
f"
|
| 108 |
-
f"
|
| 109 |
)
|
| 110 |
|
| 111 |
|
|
|
|
| 88 |
else:
|
| 89 |
logger.info(f" {item} ({os.path.getsize(full)} bytes)")
|
| 90 |
|
| 91 |
+
# Count sliced audio files (Applio may nest them in subdirectories)
|
| 92 |
+
def count_wav_files(directory):
|
| 93 |
+
"""Count .wav files recursively."""
|
| 94 |
+
count = 0
|
| 95 |
+
if os.path.exists(directory):
|
| 96 |
+
for root, dirs, files in os.walk(directory):
|
| 97 |
+
for f in files:
|
| 98 |
+
if f.endswith(".wav"):
|
| 99 |
+
count += 1
|
| 100 |
+
return count
|
| 101 |
+
|
| 102 |
sliced_dir = os.path.join(exp_dir, "sliced_audios")
|
| 103 |
sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
|
| 104 |
|
| 105 |
+
n_slices = count_wav_files(sliced_dir)
|
| 106 |
+
n_slices_16k = count_wav_files(sliced_16k_dir)
|
| 107 |
+
|
| 108 |
+
# Debug: show exact directory structure
|
| 109 |
+
def dir_tree(path, depth=2):
|
| 110 |
+
"""Show directory tree for debugging."""
|
| 111 |
+
items = []
|
| 112 |
+
if os.path.exists(path):
|
| 113 |
+
for item in os.listdir(path):
|
| 114 |
+
full = os.path.join(path, item)
|
| 115 |
+
if os.path.isdir(full) and depth > 0:
|
| 116 |
+
sub_items = os.listdir(full)
|
| 117 |
+
items.append(f"{item}/({len(sub_items)} items: {sub_items[:3]})")
|
| 118 |
+
else:
|
| 119 |
+
items.append(item)
|
| 120 |
+
return items
|
| 121 |
+
|
| 122 |
+
logger.info(f"sliced_audios tree: {dir_tree(sliced_dir)}")
|
| 123 |
+
logger.info(f"sliced_audios_16k tree: {dir_tree(sliced_16k_dir)}")
|
| 124 |
+
logger.info(f"WAV counts: sliced={n_slices}, 16k={n_slices_16k}")
|
| 125 |
+
|
| 126 |
+
if n_slices > 0 or n_slices_16k > 0:
|
| 127 |
+
total = max(n_slices, n_slices_16k)
|
| 128 |
+
logger.info(f"Preprocessing complete: {total} slices created.")
|
| 129 |
+
return total
|
| 130 |
else:
|
| 131 |
raise RuntimeError(
|
| 132 |
f"Preprocessing produced no audio slices. "
|
| 133 |
+
f"sliced_audios: {dir_tree(sliced_dir)}. "
|
| 134 |
+
f"sliced_audios_16k: {dir_tree(sliced_16k_dir)}. "
|
| 135 |
+
f"stdout: {result.stdout[-200:] if result.stdout else 'empty'}"
|
| 136 |
)
|
| 137 |
|
| 138 |
|