Spaces:
Running
Running
| from __future__ import annotations | |
| import argparse | |
| from src.data.prepare import prepare_dataset | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Prepare US and GCC traffic incident corpora.") | |
| parser.add_argument("--source", choices=["us", "gcc", "both"], default="both") | |
| parser.add_argument("--config", default="config.yaml") | |
| return parser.parse_args() | |
| if __name__ == "__main__": | |
| args = parse_args() | |
| cleaned_df, experiment_df, eval_df = prepare_dataset(source=args.source, config_path=args.config) | |
| print("Preparation complete") | |
| print(f" cleaned rows: {len(cleaned_df):,}") | |
| print(f" experiment rows: {len(experiment_df):,}") | |
| print(f" eval rows: {len(eval_df):,}") | |