Spaces:
Running
Running
File size: 758 Bytes
e078b1d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | from __future__ import annotations
import argparse
from src.data.prepare import prepare_dataset
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Prepare US and GCC traffic incident corpora.")
parser.add_argument("--source", choices=["us", "gcc", "both"], default="both")
parser.add_argument("--config", default="config.yaml")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
cleaned_df, experiment_df, eval_df = prepare_dataset(source=args.source, config_path=args.config)
print("Preparation complete")
print(f" cleaned rows: {len(cleaned_df):,}")
print(f" experiment rows: {len(experiment_df):,}")
print(f" eval rows: {len(eval_df):,}")
|