ibcplateformes Claude Opus 4.6 commited on
Commit
dcf6e3c
·
1 Parent(s): 60ed88a

Search for sliced audio recursively and show directory tree in errors

Browse files

Applio may store sliced audio in subdirectories within sliced_audios/.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. pipeline/training.py +39 -12
pipeline/training.py CHANGED
@@ -88,24 +88,51 @@ def preprocess(model_name: str, audio_path: str, sample_rate: int = 40000):
88
  else:
89
  logger.info(f" {item} ({os.path.getsize(full)} bytes)")
90
 
91
- # Check multiple possible output directories
 
 
 
 
 
 
 
 
 
 
92
  sliced_dir = os.path.join(exp_dir, "sliced_audios")
93
  sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
94
 
95
- if os.path.exists(sliced_dir) and len(os.listdir(sliced_dir)) > 0:
96
- n_slices = len(os.listdir(sliced_dir))
97
- logger.info(f"Preprocessing complete: {n_slices} slices created.")
98
- return n_slices
99
- elif os.path.exists(sliced_16k_dir) and len(os.listdir(sliced_16k_dir)) > 0:
100
- n_slices = len(os.listdir(sliced_16k_dir))
101
- logger.info(f"Preprocessing complete: {n_slices} 16k slices created (no native rate slices).")
102
- return n_slices
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  else:
104
  raise RuntimeError(
105
  f"Preprocessing produced no audio slices. "
106
- f"exp_dir contents: {os.listdir(exp_dir)}. "
107
- f"stdout: {result.stdout[-300:] if result.stdout else 'empty'}. "
108
- f"stderr: {result.stderr[-300:] if result.stderr else 'empty'}"
109
  )
110
 
111
 
 
88
  else:
89
  logger.info(f" {item} ({os.path.getsize(full)} bytes)")
90
 
91
+ # Count sliced audio files (Applio may nest them in subdirectories)
92
+ def count_wav_files(directory):
93
+ """Count .wav files recursively."""
94
+ count = 0
95
+ if os.path.exists(directory):
96
+ for root, dirs, files in os.walk(directory):
97
+ for f in files:
98
+ if f.endswith(".wav"):
99
+ count += 1
100
+ return count
101
+
102
  sliced_dir = os.path.join(exp_dir, "sliced_audios")
103
  sliced_16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
104
 
105
+ n_slices = count_wav_files(sliced_dir)
106
+ n_slices_16k = count_wav_files(sliced_16k_dir)
107
+
108
+ # Debug: show exact directory structure
109
+ def dir_tree(path, depth=2):
110
+ """Show directory tree for debugging."""
111
+ items = []
112
+ if os.path.exists(path):
113
+ for item in os.listdir(path):
114
+ full = os.path.join(path, item)
115
+ if os.path.isdir(full) and depth > 0:
116
+ sub_items = os.listdir(full)
117
+ items.append(f"{item}/({len(sub_items)} items: {sub_items[:3]})")
118
+ else:
119
+ items.append(item)
120
+ return items
121
+
122
+ logger.info(f"sliced_audios tree: {dir_tree(sliced_dir)}")
123
+ logger.info(f"sliced_audios_16k tree: {dir_tree(sliced_16k_dir)}")
124
+ logger.info(f"WAV counts: sliced={n_slices}, 16k={n_slices_16k}")
125
+
126
+ if n_slices > 0 or n_slices_16k > 0:
127
+ total = max(n_slices, n_slices_16k)
128
+ logger.info(f"Preprocessing complete: {total} slices created.")
129
+ return total
130
  else:
131
  raise RuntimeError(
132
  f"Preprocessing produced no audio slices. "
133
+ f"sliced_audios: {dir_tree(sliced_dir)}. "
134
+ f"sliced_audios_16k: {dir_tree(sliced_16k_dir)}. "
135
+ f"stdout: {result.stdout[-200:] if result.stdout else 'empty'}"
136
  )
137
 
138