Spaces:

Sara1708
/

deepfake-audio-detector

Running

File size: 1,990 Bytes

b95ff5a

"""
Audio preprocessing for ASVspoof 2019 LA.

Given a .flac path, produces a list of fixed-length 4-second segments at 16kHz
mono, ready to feed into Wav2Vec 2.0.

Pipeline:
    load .flac -> ensure mono -> ensure 16kHz -> window with 50% overlap
    (short clips are zero-padded to one full window)
"""

from typing import List
import torch
import torchaudio
import torchaudio.functional as F


SAMPLE_RATE = 16000
WINDOW_SECONDS = 4.0
OVERLAP_RATIO = 0.5

WINDOW_SAMPLES = int(SAMPLE_RATE * WINDOW_SECONDS)              # 64000
HOP_SAMPLES = int(WINDOW_SAMPLES * (1.0 - OVERLAP_RATIO))       # 32000


def load_audio(path: str, target_sr: int = SAMPLE_RATE) -> torch.Tensor:
    """Load a .flac file and return a 1-D mono waveform at target_sr."""
    waveform, sr = torchaudio.load(path)
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
    if sr != target_sr:
        waveform = F.resample(waveform, orig_freq=sr, new_freq=target_sr)
    return waveform.squeeze(0)


def segment_waveform(
    waveform: torch.Tensor,
    window_samples: int = WINDOW_SAMPLES,
    hop_samples: int = HOP_SAMPLES,
) -> List[torch.Tensor]:
    """Split a 1-D waveform into fixed-length windows with overlap."""
    n = waveform.shape[0]
    if n <= window_samples:
        padded = torch.zeros(window_samples, dtype=waveform.dtype)
        padded[:n] = waveform
        return [padded]

    windows = []
    start = 0
    while start < n:
        end = start + window_samples
        if end <= n:
            windows.append(waveform[start:end])
        else:
            tail = waveform[start:]
            padded = torch.zeros(window_samples, dtype=waveform.dtype)
            padded[:tail.shape[0]] = tail
            windows.append(padded)
            break
        start += hop_samples
    return windows


def preprocess(path: str) -> List[torch.Tensor]:
    """Full pipeline: load + window."""
    waveform = load_audio(path)
    return segment_waveform(waveform)