temporal-twins-code / scripts /generate_dataset.py
temporal-twins-anon's picture
Add anonymous Temporal Twins code release
a3682cf verified
raw
history blame
1.24 kB
import os
import sys
import pandas as pd
from src.core.config_loader import load_config
from src.generators.user_generator import generate_users
from src.generators.transaction_generator import generate_transactions
from src.fraud.fraud_engine import FraudEngine
from src.risk.risk_engine import apply_risk_engine
def main():
config = load_config("config/default.yaml")
difficulty = sys.argv[1] if len(sys.argv) > 1 else "medium"
print("Generating users...")
users = generate_users(config)
print("Generating transactions...")
df = generate_transactions(users, config)
print("Applying risk engine...")
df = apply_risk_engine(df, users, config)
print(f"Applying fraud engine (difficulty={difficulty})...")
engine = FraudEngine(difficulty=difficulty)
df = engine.apply(df)
df = df.sort_values("timestamp").reset_index(drop=True)
os.makedirs("data/processed", exist_ok=True)
print("Saving dataset...")
df.to_csv("data/processed/transactions.csv", index=False)
users.to_csv("data/processed/users.csv", index=False)
print("Dataset generation complete")
print(f"Transactions: {len(df)}")
print(f"Users: {len(users)}")
if __name__ == "__main__":
main()