File size: 3,284 Bytes
7b34e81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
ASVspoof 2019 LA protocol parser.

Reads the official .txt protocol files and yields structured Utterance objects
that downstream code (datasets, evaluation) can use.

Protocol file format (5 space-separated columns):
    speaker_id  utterance_id  -  attack_id  label

    speaker_id   : anonymized speaker (e.g., "LA_0079")
    utterance_id : filename without extension (e.g., "LA_T_1138215")
    column 3     : unused, always "-"
    attack_id    : "-" for bonafide, "A01"-"A19" for spoof samples
    label        : "bonafide" or "spoof"
"""

from dataclasses import dataclass
from typing import List, Dict
import os


@dataclass
class Utterance:
    """One row from an ASVspoof 2019 LA protocol file."""
    speaker_id: str
    utterance_id: str
    attack_id: str        # "-" for bonafide, "A01"-"A19" for spoof
    label: str            # "bonafide" or "spoof"
    label_int: int        # 0 = bonafide, 1 = spoof
    flac_path: str        # absolute path to the .flac file


def parse_protocol(protocol_path: str, audio_root: str) -> List[Utterance]:
    """Parse one ASVspoof 2019 LA cm protocol file.

    Args:
        protocol_path: full path to the .txt protocol file.
        audio_root: full path to the folder containing the .flac files.

    Returns:
        List of Utterance objects, one per valid line.
    """
    utterances: List[Utterance] = []
    with open(protocol_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            speaker_id, utt_id, _unused, attack_id, label = parts
            label_int = 0 if label == "bonafide" else 1
            flac_path = os.path.join(audio_root, f"{utt_id}.flac")
            utterances.append(Utterance(
                speaker_id=speaker_id,
                utterance_id=utt_id,
                attack_id=attack_id,
                label=label,
                label_int=label_int,
                flac_path=flac_path,
            ))
    return utterances


def parse_all_partitions(la_root: str) -> Dict[str, List[Utterance]]:
    """Parse train, dev, and eval protocols at once.

    Args:
        la_root: path to the LA folder, e.g.
                 ".../asvspoof_2019/LA"

    Returns:
        Dict with keys "train", "dev", "eval" mapping to lists of Utterances.
    """
    proto_dir = os.path.join(la_root, "ASVspoof2019_LA_cm_protocols")
    partitions = {
        "train": (
            os.path.join(proto_dir, "ASVspoof2019.LA.cm.train.trn.txt"),
            os.path.join(la_root, "ASVspoof2019_LA_train", "flac"),
        ),
        "dev": (
            os.path.join(proto_dir, "ASVspoof2019.LA.cm.dev.trl.txt"),
            os.path.join(la_root, "ASVspoof2019_LA_dev", "flac"),
        ),
        "eval": (
            os.path.join(proto_dir, "ASVspoof2019.LA.cm.eval.trl.txt"),
            os.path.join(la_root, "ASVspoof2019_LA_eval", "flac"),
        ),
    }
    return {
        name: parse_protocol(proto, audio)
        for name, (proto, audio) in partitions.items()
    }


def class_counts(utterances: List[Utterance]) -> Dict[str, int]:
    """Return {'bonafide': N, 'spoof': M} counts."""
    counts = {"bonafide": 0, "spoof": 0}
    for u in utterances:
        counts[u.label] += 1
    return counts