File size: 758 Bytes
e078b1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from __future__ import annotations

import argparse

from src.data.prepare import prepare_dataset


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Prepare US and GCC traffic incident corpora.")
    parser.add_argument("--source", choices=["us", "gcc", "both"], default="both")
    parser.add_argument("--config", default="config.yaml")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    cleaned_df, experiment_df, eval_df = prepare_dataset(source=args.source, config_path=args.config)
    print("Preparation complete")
    print(f"  cleaned rows:    {len(cleaned_df):,}")
    print(f"  experiment rows: {len(experiment_df):,}")
    print(f"  eval rows:       {len(eval_df):,}")