| import os |
| import argparse |
| from tqdm import tqdm |
|
|
| START = 00 |
| FOLDER = "chunks" |
| DEBUG = True |
|
|
| def seconds_to_hms(seconds): |
| """ |
| Convert seconds to hours, minutes, seconds format. |
| |
| Args: |
| seconds (int): Total number of seconds to convert |
| |
| Returns: |
| tuple: A tuple containing (hours, minutes, seconds) |
| |
| Example: |
| >>> seconds_to_hms(3665) |
| (1, 1, 5) # 1 hour, 1 minute, 5 seconds |
| """ |
| hour = 00 |
| minute = 00 |
| second = seconds |
|
|
| while second >= 60: |
| minute += 1 |
| second -= 60 |
| while minute >= 60: |
| hour += 1 |
| minute -= 60 |
|
|
| return hour, minute, second |
|
|
| def hms_to_seconds(hour, minute, second): |
| """ |
| Convert hours, minutes, seconds to total seconds. |
| |
| Args: |
| hour (int): Number of hours |
| minute (int): Number of minutes |
| second (int): Number of seconds |
| |
| Returns: |
| int: Total number of seconds |
| |
| Example: |
| >>> hms_to_seconds(1, 1, 5) |
| 3665 # 1 hour + 1 minute + 5 seconds in seconds |
| """ |
| return hour*3600 + minute*60 + second |
|
|
| def slice_audio(input_audio_path, output_folder, chunks_seconds, chunk_overlap_seconds): |
| """ |
| Slice audio into chunks with specified duration and overlap. |
| |
| This function takes an audio file and splits it into smaller chunks with a specified |
| duration and overlap between chunks. It uses ffmpeg for the actual audio processing. |
| |
| Args: |
| input_audio_path (str): Path to the input audio file |
| output_folder (str): Directory where the chunks will be saved |
| chunks_seconds (int): Duration of each chunk in seconds |
| chunk_overlap_seconds (int): Amount of overlap between consecutive chunks in seconds |
| |
| Returns: |
| None: Creates audio chunks in the specified output folder and generates |
| a text file listing all chunk files |
| |
| Raises: |
| ValueError: If chunk_overlap_seconds is greater than or equal to chunks_seconds |
| |
| Example: |
| >>> slice_audio("input.mp3", "chunks", 30, 5) |
| # Creates chunks of 30 seconds with 5 seconds overlap |
| """ |
| _, filename = os.path.split(input_audio_path) |
| name, extension = os.path.splitext(filename) |
|
|
| |
| duration = float(os.popen(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {input_audio_path}').read()) |
| hour, minute, second = seconds_to_hms(int(duration)) |
| print(f"\tDuration ({duration} seconds): {hour:02d}:{minute:02d}:{second:02d}") |
|
|
| |
| effective_chunk = chunks_seconds - chunk_overlap_seconds |
| |
| |
| if effective_chunk > 0: |
| num_chunks = -(-int(duration - chunk_overlap_seconds) // effective_chunk) |
| else: |
| raise ValueError("Overlap duration must be less than chunk duration") |
|
|
| |
| output_files = [] |
| progress_bar = tqdm(total=num_chunks, desc="Slice audio into chunks progress") |
| |
| for chunk in range(num_chunks): |
| |
| start_time = chunk * effective_chunk |
| end_time = min(start_time + chunks_seconds, duration) |
| |
| |
| hour_start, minute_start, second_start = seconds_to_hms(start_time) |
| |
| |
| chunk_duration = end_time - start_time |
| hour_duration, minute_duration, second_duration = seconds_to_hms(chunk_duration) |
| |
| |
| output = f"{output_folder}/{name}_chunk{chunk:003d}{extension}" |
|
|
| if DEBUG: |
| if os.path.exists(output): |
| output_files.append(output) |
| progress_bar.update(1) |
| continue |
| |
| |
| if chunk == num_chunks - 1: |
| command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -loglevel error {output}' |
| else: |
| command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -t {hour_duration:02d}:{minute_duration:02d}:{second_duration:02d} -loglevel error {output}' |
| |
| |
| os.system(command) |
| output_files.append(output) |
| progress_bar.update(1) |
|
|
| progress_bar.close() |
|
|
| |
| with open(f"{output_folder}/output_files.txt", "w") as f: |
| for output_file in output_files: |
| f.write(f"{output_file}\n") |
|
|