deepfake-audio-detector / src /data /protocols_2021.py
Saracasm
Phase 5b: 2021 LA cross-dataset eval — 9.09% EER, matches strongest baseline
3957e44
"""
ASVspoof 2021 LA protocol parser.
Format (8 space-separated columns):
speaker_id utterance_id codec channel attack_id label trim partition
speaker_id : anonymized speaker
utterance_id : filename without extension (e.g., "LA_E_9332881")
codec : audio codec applied (alaw, ulaw, g722, mp3, pcm, ...)
channel : transmission channel (ita_tx, sin_tx, loc_tx, ...)
attack_id : "-" for bonafide, "A07"-"A19" for spoof
label : "bonafide" or "spoof"
trim : "trim" or "notrim"
partition : "eval", "progress", or "hidden"
"""
from dataclasses import dataclass
from typing import List, Optional
import os
@dataclass
class Utterance2021:
"""One row from an ASVspoof 2021 LA cm protocol file."""
speaker_id: str
utterance_id: str
codec: str
channel: str
attack_id: str
label: str
label_int: int
trim: str
partition: str
flac_path: str
def parse_protocol_2021(
protocol_path: str,
audio_root: str,
partition_filter: Optional[str] = "eval",
) -> List[Utterance2021]:
"""Parse the 2021 LA cm protocol with keys.
Args:
protocol_path: full path to trial_metadata.txt
audio_root: full path to the flac/ folder
partition_filter: only return rows matching this partition.
Valid: "eval", "progress", "hidden", or None for all.
Returns:
List of Utterance2021 objects.
"""
utterances: List[Utterance2021] = []
with open(protocol_path, "r") as f:
for line in f:
parts = line.strip().split()
if len(parts) != 8:
continue
speaker_id, utt_id, codec, channel, attack_id, label, trim, partition = parts
if partition_filter is not None and partition != partition_filter:
continue
label_int = 0 if label == "bonafide" else 1
flac_path = os.path.join(audio_root, f"{utt_id}.flac")
utterances.append(Utterance2021(
speaker_id=speaker_id,
utterance_id=utt_id,
codec=codec,
channel=channel,
attack_id=attack_id,
label=label,
label_int=label_int,
trim=trim,
partition=partition,
flac_path=flac_path,
))
return utterances