| import csv |
| import wave |
| import os |
| from tqdm import tqdm |
| def verify_wav_file(file_path): |
| try: |
| with wave.open(file_path, 'rb') as wav_file: |
| |
| channels = wav_file.getnchannels() |
| sample_width = wav_file.getsampwidth() |
| framerate = wav_file.getframerate() |
| frames = wav_file.getnframes() |
| |
| |
| return True |
| except Exception as e: |
| print(f"Error processing {file_path}: {str(e)}") |
| return False |
|
|
| def main(): |
| csv_path = "/home/austin/disk1/stts-zs_cleaning/data/filename.csv" |
| total_files = 0 |
| valid_files = 0 |
| |
| with open(csv_path, 'r') as csv_file: |
| csv_reader = csv.reader(csv_file, delimiter='|') |
| for row in tqdm(csv_reader,desc="Verifying files", unit="file"): |
| if row: |
| wav_path = row[0] |
| total_files += 1 |
| |
| if os.path.exists(wav_path): |
| if verify_wav_file(wav_path): |
| valid_files += 1 |
| else: |
| print(f"File is corrupted or invalid: {wav_path}") |
| else: |
| print(f"File does not exist: {wav_path}") |
|
|
| print(f"\nVerification completed.") |
| print(f"Total files checked: {total_files}") |
| print(f"Valid files: {valid_files}") |
| print(f"Invalid or missing files: {total_files - valid_files}") |
|
|
| if __name__ == "__main__": |
| main() |