""" data_factory/schemas.py ======================== SQLite CREATE TABLE statements for all four domains. Each schema is fully self-contained and has been verified to create without errors in SQLite 3.x. """ from __future__ import annotations import sqlite3 import random from datetime import date, timedelta from typing import Callable # ───────────────────────────────────────────────────────────────────────────── # SQL SCHEMAS # ───────────────────────────────────────────────────────────────────────────── ECOMMERCE_SCHEMA = """ CREATE TABLE IF NOT EXISTS categories ( id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE ); CREATE TABLE IF NOT EXISTS products ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, category_id INTEGER NOT NULL REFERENCES categories(id), price REAL NOT NULL CHECK(price >= 0), stock_quantity INTEGER NOT NULL DEFAULT 0 ); CREATE TABLE IF NOT EXISTS customers ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT NOT NULL UNIQUE, country TEXT NOT NULL, tier TEXT NOT NULL DEFAULT 'bronze' CHECK(tier IN ('bronze', 'silver', 'gold')), created_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS orders ( id INTEGER PRIMARY KEY, customer_id INTEGER NOT NULL REFERENCES customers(id), status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending','processing','shipped','delivered','cancelled')), created_at TEXT NOT NULL, total_amount REAL NOT NULL CHECK(total_amount >= 0) ); CREATE TABLE IF NOT EXISTS order_items ( id INTEGER PRIMARY KEY, order_id INTEGER NOT NULL REFERENCES orders(id), product_id INTEGER NOT NULL REFERENCES products(id), quantity INTEGER NOT NULL CHECK(quantity > 0), unit_price REAL NOT NULL CHECK(unit_price >= 0) ); CREATE TABLE IF NOT EXISTS reviews ( id INTEGER PRIMARY KEY, product_id INTEGER NOT NULL REFERENCES products(id), customer_id INTEGER NOT NULL REFERENCES customers(id), rating INTEGER NOT NULL CHECK(rating BETWEEN 1 AND 5), created_at TEXT NOT NULL ); CREATE INDEX IF NOT EXISTS idx_products_category ON products(category_id); CREATE INDEX IF NOT EXISTS idx_orders_customer ON orders(customer_id); CREATE INDEX IF NOT EXISTS idx_orders_status ON orders(status); CREATE INDEX IF NOT EXISTS idx_orders_created ON orders(created_at); CREATE INDEX IF NOT EXISTS idx_order_items_order ON order_items(order_id); CREATE INDEX IF NOT EXISTS idx_order_items_product ON order_items(product_id); CREATE INDEX IF NOT EXISTS idx_reviews_product ON reviews(product_id); CREATE INDEX IF NOT EXISTS idx_customers_tier ON customers(tier); """ HEALTHCARE_SCHEMA = """ CREATE TABLE IF NOT EXISTS patients ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, date_of_birth TEXT NOT NULL, gender TEXT NOT NULL CHECK(gender IN ('M','F','Other')), blood_type TEXT NOT NULL, country TEXT NOT NULL, registered_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS doctors ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, specialization TEXT NOT NULL, department TEXT NOT NULL, experience_years INTEGER NOT NULL CHECK(experience_years >= 0), consultation_fee REAL NOT NULL CHECK(consultation_fee >= 0) ); CREATE TABLE IF NOT EXISTS appointments ( id INTEGER PRIMARY KEY, patient_id INTEGER NOT NULL REFERENCES patients(id), doctor_id INTEGER NOT NULL REFERENCES doctors(id), scheduled_at TEXT NOT NULL, status TEXT NOT NULL CHECK(status IN ('scheduled','completed','cancelled','no_show')), notes TEXT ); CREATE TABLE IF NOT EXISTS diagnoses ( id INTEGER PRIMARY KEY, appointment_id INTEGER NOT NULL REFERENCES appointments(id), icd_code TEXT NOT NULL, description TEXT NOT NULL, severity TEXT NOT NULL CHECK(severity IN ('mild','moderate','severe')) ); CREATE TABLE IF NOT EXISTS medications ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, category TEXT NOT NULL, unit_price REAL NOT NULL CHECK(unit_price >= 0) ); CREATE TABLE IF NOT EXISTS prescriptions ( id INTEGER PRIMARY KEY, appointment_id INTEGER NOT NULL REFERENCES appointments(id), medication_id INTEGER NOT NULL REFERENCES medications(id), dosage TEXT NOT NULL, duration_days INTEGER NOT NULL CHECK(duration_days > 0), quantity INTEGER NOT NULL CHECK(quantity > 0) ); CREATE INDEX IF NOT EXISTS idx_appt_patient ON appointments(patient_id); CREATE INDEX IF NOT EXISTS idx_appt_doctor ON appointments(doctor_id); CREATE INDEX IF NOT EXISTS idx_appt_status ON appointments(status); CREATE INDEX IF NOT EXISTS idx_diag_appt ON diagnoses(appointment_id); CREATE INDEX IF NOT EXISTS idx_presc_appt ON prescriptions(appointment_id); CREATE INDEX IF NOT EXISTS idx_presc_med ON prescriptions(medication_id); """ FINANCE_SCHEMA = """ CREATE TABLE IF NOT EXISTS fin_customers ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT NOT NULL UNIQUE, country TEXT NOT NULL, kyc_status TEXT NOT NULL CHECK(kyc_status IN ('pending','verified','rejected')), created_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS accounts ( id INTEGER PRIMARY KEY, customer_id INTEGER NOT NULL REFERENCES fin_customers(id), account_type TEXT NOT NULL CHECK(account_type IN ('savings','current','fixed_deposit','loan')), balance REAL NOT NULL DEFAULT 0, currency TEXT NOT NULL DEFAULT 'USD', status TEXT NOT NULL CHECK(status IN ('active','dormant','closed')), opened_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS transactions ( id INTEGER PRIMARY KEY, account_id INTEGER NOT NULL REFERENCES accounts(id), txn_type TEXT NOT NULL CHECK(txn_type IN ('credit','debit')), amount REAL NOT NULL CHECK(amount > 0), currency TEXT NOT NULL DEFAULT 'USD', merchant TEXT, created_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS loans ( id INTEGER PRIMARY KEY, customer_id INTEGER NOT NULL REFERENCES fin_customers(id), loan_type TEXT NOT NULL CHECK(loan_type IN ('personal','home','auto','business')), principal_amount REAL NOT NULL, interest_rate REAL NOT NULL, tenure_months INTEGER NOT NULL, status TEXT NOT NULL CHECK(status IN ('active','closed','defaulted')), disbursed_at TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS loan_payments ( id INTEGER PRIMARY KEY, loan_id INTEGER NOT NULL REFERENCES loans(id), amount_paid REAL NOT NULL CHECK(amount_paid > 0), payment_date TEXT NOT NULL, is_late INTEGER NOT NULL DEFAULT 0 CHECK(is_late IN (0,1)) ); CREATE INDEX IF NOT EXISTS idx_acct_customer ON accounts(customer_id); CREATE INDEX IF NOT EXISTS idx_txn_account ON transactions(account_id); CREATE INDEX IF NOT EXISTS idx_txn_type ON transactions(txn_type); CREATE INDEX IF NOT EXISTS idx_loan_customer ON loans(customer_id); CREATE INDEX IF NOT EXISTS idx_lp_loan ON loan_payments(loan_id); """ HR_SCHEMA = """ CREATE TABLE IF NOT EXISTS departments ( id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE, location TEXT NOT NULL, budget REAL NOT NULL CHECK(budget >= 0) ); CREATE TABLE IF NOT EXISTS employees ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT NOT NULL UNIQUE, department_id INTEGER NOT NULL REFERENCES departments(id), job_title TEXT NOT NULL, hire_date TEXT NOT NULL, salary REAL NOT NULL CHECK(salary >= 0), status TEXT NOT NULL CHECK(status IN ('active','resigned','terminated')) ); CREATE TABLE IF NOT EXISTS performance_reviews ( id INTEGER PRIMARY KEY, employee_id INTEGER NOT NULL REFERENCES employees(id), review_year INTEGER NOT NULL, rating INTEGER NOT NULL CHECK(rating BETWEEN 1 AND 5), reviewer_id INTEGER NOT NULL REFERENCES employees(id), comments TEXT ); CREATE TABLE IF NOT EXISTS projects ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, department_id INTEGER NOT NULL REFERENCES departments(id), start_date TEXT NOT NULL, end_date TEXT, budget REAL NOT NULL, status TEXT NOT NULL CHECK(status IN ('planned','active','completed','cancelled')) ); CREATE TABLE IF NOT EXISTS project_assignments ( id INTEGER PRIMARY KEY, employee_id INTEGER NOT NULL REFERENCES employees(id), project_id INTEGER NOT NULL REFERENCES projects(id), role TEXT NOT NULL, hours_allocated INTEGER NOT NULL CHECK(hours_allocated > 0) ); CREATE INDEX IF NOT EXISTS idx_emp_dept ON employees(department_id); CREATE INDEX IF NOT EXISTS idx_emp_status ON employees(status); CREATE INDEX IF NOT EXISTS idx_pr_employee ON performance_reviews(employee_id); CREATE INDEX IF NOT EXISTS idx_proj_dept ON projects(department_id); CREATE INDEX IF NOT EXISTS idx_pa_employee ON project_assignments(employee_id); CREATE INDEX IF NOT EXISTS idx_pa_project ON project_assignments(project_id); """ # ───────────────────────────────────────────────────────────────────────────── # SCHEMA REGISTRY # ───────────────────────────────────────────────────────────────────────────── SCHEMA_MAP: dict[str, str] = { "ecommerce": ECOMMERCE_SCHEMA, "healthcare": HEALTHCARE_SCHEMA, "finance": FINANCE_SCHEMA, "hr": HR_SCHEMA, } # ───────────────────────────────────────────────────────────────────────────── # COMPACT SCHEMA CONTEXT (injected into every training prompt) # ───────────────────────────────────────────────────────────────────────────── SCHEMA_CONTEXT: dict[str, str] = { "ecommerce": """\ Database: ecommerce (SQLite, read-only) TABLES ------ categories(id INTEGER PK, name TEXT) products(id INTEGER PK, name TEXT, category_id INTEGER FK→categories.id, price REAL, stock_quantity INTEGER) customers(id INTEGER PK, name TEXT, email TEXT, country TEXT, tier TEXT ∈ {bronze|silver|gold}, created_at TEXT ISO-8601) orders(id INTEGER PK, customer_id INTEGER FK→customers.id, status TEXT ∈ {pending|processing|shipped|delivered|cancelled}, created_at TEXT ISO-8601, total_amount REAL) order_items(id INTEGER PK, order_id INTEGER FK→orders.id, product_id INTEGER FK→products.id, quantity INTEGER, unit_price REAL) reviews(id INTEGER PK, product_id INTEGER FK→products.id, customer_id INTEGER FK→customers.id, rating INTEGER 1-5, created_at TEXT ISO-8601) NOTES ----- - Use created_at >= '2024-01-01' for date filtering (ISO text sort works) - SQLite window functions: RANK, DENSE_RANK, ROW_NUMBER, LAG, LEAD - strftime('%Y-%m', created_at) returns 'YYYY-MM' - All monetary values in USD """, "healthcare": """\ Database: healthcare (SQLite, read-only) TABLES ------ patients(id INTEGER PK, name TEXT, date_of_birth TEXT ISO-8601, gender TEXT ∈ {M|F|Other}, blood_type TEXT, country TEXT, registered_at TEXT ISO-8601) doctors(id INTEGER PK, name TEXT, specialization TEXT, department TEXT, experience_years INTEGER, consultation_fee REAL) appointments(id INTEGER PK, patient_id INTEGER FK→patients.id, doctor_id INTEGER FK→doctors.id, scheduled_at TEXT ISO-8601, status TEXT ∈ {scheduled|completed|cancelled|no_show}, notes TEXT nullable) diagnoses(id INTEGER PK, appointment_id INTEGER FK→appointments.id, icd_code TEXT, description TEXT, severity TEXT ∈ {mild|moderate|severe}) medications(id INTEGER PK, name TEXT, category TEXT, unit_price REAL) prescriptions(id INTEGER PK, appointment_id INTEGER FK→appointments.id, medication_id INTEGER FK→medications.id, dosage TEXT, duration_days INTEGER, quantity INTEGER) NOTES ----- - consultation_fee is in USD per visit - ICD codes follow WHO ICD-10 format (e.g. 'I10', 'E11') - SQLite window functions available """, "finance": """\ Database: finance (SQLite, read-only) TABLES ------ fin_customers(id INTEGER PK, name TEXT, email TEXT, country TEXT, kyc_status TEXT ∈ {pending|verified|rejected}, created_at TEXT ISO-8601) accounts(id INTEGER PK, customer_id INTEGER FK→fin_customers.id, account_type TEXT ∈ {savings|current|fixed_deposit|loan}, balance REAL, currency TEXT, status TEXT ∈ {active|dormant|closed}, opened_at TEXT ISO-8601) transactions(id INTEGER PK, account_id INTEGER FK→accounts.id, txn_type TEXT ∈ {credit|debit}, amount REAL, currency TEXT, merchant TEXT nullable, created_at TEXT ISO-8601) loans(id INTEGER PK, customer_id INTEGER FK→fin_customers.id, loan_type TEXT ∈ {personal|home|auto|business}, principal_amount REAL, interest_rate REAL, tenure_months INTEGER, status TEXT ∈ {active|closed|defaulted}, disbursed_at TEXT ISO-8601) loan_payments(id INTEGER PK, loan_id INTEGER FK→loans.id, amount_paid REAL, payment_date TEXT ISO-8601, is_late INTEGER ∈ {0|1}) NOTES ----- - All monetary values in USD unless currency column specifies otherwise - is_late = 1 means the payment was overdue - SQLite window functions available """, "hr": """\ Database: hr (SQLite, read-only) TABLES ------ departments(id INTEGER PK, name TEXT, location TEXT, budget REAL) employees(id INTEGER PK, name TEXT, email TEXT, department_id INTEGER FK→departments.id, job_title TEXT, hire_date TEXT ISO-8601, salary REAL, status TEXT ∈ {active|resigned|terminated}) performance_reviews(id INTEGER PK, employee_id INTEGER FK→employees.id, review_year INTEGER, rating INTEGER 1-5, reviewer_id INTEGER FK→employees.id, comments TEXT nullable) projects(id INTEGER PK, name TEXT, department_id INTEGER FK→departments.id, start_date TEXT ISO-8601, end_date TEXT nullable, budget REAL, status TEXT ∈ {planned|active|completed|cancelled}) project_assignments(id INTEGER PK, employee_id INTEGER FK→employees.id, project_id INTEGER FK→projects.id, role TEXT, hours_allocated INTEGER) NOTES ----- - salary is annual in USD - performance rating: 1 (lowest) to 5 (highest) - end_date is NULL for ongoing projects - SQLite window functions available """, } # ───────────────────────────────────────────────────────────────────────────── # SEED FUNCTIONS (deterministic, SEED=42) # ───────────────────────────────────────────────────────────────────────────── def _rdate(rng: random.Random, start: str = "2022-01-01", end: str = "2024-12-31") -> str: s = date.fromisoformat(start) e = date.fromisoformat(end) return (s + timedelta(days=rng.randint(0, (e - s).days))).isoformat() def seed_ecommerce(conn: sqlite3.Connection, seed: int = 42) -> None: rng = random.Random(seed) cats = ["Electronics", "Clothing", "Books", "Home & Garden", "Sports & Outdoors", "Toys & Games", "Beauty", "Automotive"] conn.executemany("INSERT INTO categories(id,name) VALUES(?,?)", enumerate(cats, 1)) products = [ (1,"Wireless Headphones",1,149.99,50),(2,"Laptop Stand",1,59.99,120), (3,"USB-C Hub",1,49.99,90),(4,"Webcam 4K",1,89.99,30), (5,"Cotton T-Shirt",2,19.99,200),(6,"Winter Jacket",2,129.99,60), (7,"Running Shorts",2,34.99,150),(8,"Clean Code",3,39.99,80), (9,"Deep Learning Book",3,59.99,45),(10,"Coffee Maker",4,89.99,40), (11,"Air Purifier",4,199.99,25),(12,"Yoga Mat",5,29.99,150), (13,"Resistance Bands",5,14.99,200),(14,"Lego City Set",6,79.99,60), (15,"Face Serum",7,34.99,100),(16,"Dash Cam",8,119.99,35), ] conn.executemany("INSERT INTO products VALUES(?,?,?,?,?)", products) countries = ["India","USA","Germany","UK","Canada","Australia","France","Brazil"] tiers = ["bronze","silver","gold"] customers = [] for i in range(1, 51): customers.append((i, f"Customer {i}", f"cust{i}@shop.com", rng.choice(countries), rng.choice(tiers), _rdate(rng))) conn.executemany("INSERT INTO customers VALUES(?,?,?,?,?,?)", customers) statuses = ["pending","processing","shipped","delivered","cancelled"] orders = [] for i in range(1, 201): orders.append((i, rng.randint(1, 50), rng.choice(statuses), _rdate(rng), round(rng.uniform(20, 800), 2))) conn.executemany("INSERT INTO orders VALUES(?,?,?,?,?)", orders) items = [] for i in range(1, 301): items.append((i, rng.randint(1, 200), rng.randint(1, 16), rng.randint(1, 5), round(rng.uniform(10, 200), 2))) conn.executemany("INSERT INTO order_items VALUES(?,?,?,?,?)", items) reviews = [] for i in range(1, 151): reviews.append((i, rng.randint(1, 16), rng.randint(1, 50), rng.randint(1, 5), _rdate(rng))) conn.executemany("INSERT INTO reviews VALUES(?,?,?,?,?)", reviews) conn.commit() def seed_healthcare(conn: sqlite3.Connection, seed: int = 42) -> None: rng = random.Random(seed) specs = [("Cardiology","Cardiology"), ("Neurology","Neurology"), ("Orthopedics","Orthopedics"), ("Dermatology","Dermatology"), ("Pediatrics","Pediatrics"), ("Oncology","Oncology"), ("Endocrinology","Endocrinology"), ("Gastroenterology","Gastroenterology")] for i, (spec, dept) in enumerate(specs, 1): conn.execute("INSERT INTO doctors VALUES(?,?,?,?,?,?)", (i, f"Dr. {['Smith','Patel','Kim','Müller','Okafor','Chen','Lopez','Roy'][i-1]}", spec, dept, rng.randint(2, 25), round(rng.uniform(50, 350), 2))) genders = ["M", "F", "Other"] blood_types = ["A+","A-","B+","B-","O+","O-","AB+","AB-"] countries = ["India","USA","Germany","UK","Canada","Australia"] for i in range(1, 101): conn.execute("INSERT INTO patients VALUES(?,?,?,?,?,?,?)", (i, f"Patient {i}", _rdate(rng, "1950-01-01", "2010-01-01"), rng.choice(genders), rng.choice(blood_types), rng.choice(countries), _rdate(rng, "2020-01-01", "2024-12-31"))) appt_statuses = ["scheduled", "completed", "cancelled", "no_show"] weights = [0.15, 0.60, 0.15, 0.10] for i in range(1, 301): conn.execute("INSERT INTO appointments VALUES(?,?,?,?,?,?)", (i, rng.randint(1, 100), rng.randint(1, 8), _rdate(rng, "2022-01-01", "2024-12-31"), rng.choices(appt_statuses, weights)[0], None)) icd_codes = ["I10","E11","J45","M54","K21","F32","G43","L30","N39","R05", "C50","Z87","I25","E78","J18"] descs = ["Hypertension","Type 2 Diabetes","Asthma","Back Pain","GERD", "Depression","Migraine","Dermatitis","UTI","Cough", "Breast Cancer","Family History","Coronary Artery Disease", "Hyperlipidemia","Pneumonia"] severities = ["mild","moderate","severe"] for i in range(1, 201): conn.execute("INSERT INTO diagnoses VALUES(?,?,?,?,?)", (i, rng.randint(1, 300), rng.choice(icd_codes), rng.choice(descs), rng.choice(severities))) meds = [("Metformin","Antidiabetic",0.15),("Lisinopril","Antihypertensive",0.20), ("Atorvastatin","Statin",0.25),("Amoxicillin","Antibiotic",0.30), ("Ibuprofen","NSAID",0.10),("Omeprazole","PPI",0.18), ("Sertraline","Antidepressant",0.35),("Cetirizine","Antihistamine",0.08), ("Paracetamol","Analgesic",0.05),("Aspirin","Antiplatelet",0.07)] for i, (name, cat, price) in enumerate(meds, 1): conn.execute("INSERT INTO medications VALUES(?,?,?,?)", (i, name, cat, price)) dosages = ["1x daily","2x daily","3x daily","once at night","as needed"] for i in range(1, 251): conn.execute("INSERT INTO prescriptions VALUES(?,?,?,?,?,?)", (i, rng.randint(1, 300), rng.randint(1, 10), rng.choice(dosages), rng.randint(5, 60), rng.randint(10, 90))) conn.commit() def seed_finance(conn: sqlite3.Connection, seed: int = 42) -> None: rng = random.Random(seed) countries = ["India","USA","Germany","UK","Singapore","UAE","Canada"] kyc = ["pending","verified","verified","verified","rejected"] for i in range(1, 51): conn.execute("INSERT INTO fin_customers VALUES(?,?,?,?,?,?)", (i, f"FinClient {i}", f"fincli{i}@bank.com", rng.choice(countries), rng.choice(kyc), _rdate(rng))) acct_types = ["savings","savings","current","fixed_deposit"] statuses = ["active","active","active","dormant","closed"] for i in range(1, 101): conn.execute("INSERT INTO accounts VALUES(?,?,?,?,?,?,?)", (i, rng.randint(1, 50), rng.choice(acct_types), round(rng.uniform(100, 100000), 2), "USD", rng.choice(statuses), _rdate(rng))) merchants = [None, "Amazon", "Walmart", "Netflix", "Uber", "Apple", "Google Pay", "Zomato", "Flipkart", "Airbnb"] for i in range(1, 501): conn.execute("INSERT INTO transactions VALUES(?,?,?,?,?,?,?)", (i, rng.randint(1, 100), rng.choice(["credit","debit"]), round(rng.uniform(5, 10000), 2), "USD", rng.choice(merchants), _rdate(rng))) loan_types = ["personal","home","auto","business"] loan_statuses = ["active","active","closed","defaulted"] for i in range(1, 51): conn.execute("INSERT INTO loans VALUES(?,?,?,?,?,?,?,?)", (i, rng.randint(1, 50), rng.choice(loan_types), round(rng.uniform(5000, 500000), 2), round(rng.uniform(5, 18), 2), rng.randint(12, 360), rng.choice(loan_statuses), _rdate(rng))) for i in range(1, 201): conn.execute("INSERT INTO loan_payments VALUES(?,?,?,?,?)", (i, rng.randint(1, 50), round(rng.uniform(500, 10000), 2), _rdate(rng), rng.randint(0, 1))) conn.commit() def seed_hr(conn: sqlite3.Connection, seed: int = 42) -> None: rng = random.Random(seed) depts = [("Engineering","Bangalore",8000000),("Marketing","Mumbai",3000000), ("Finance","Delhi",2000000),("HR","Chennai",1500000), ("Sales","Hyderabad",5000000),("Product","Pune",4000000), ("Legal","Delhi",1000000),("Operations","Kolkata",2500000)] for i, (name, loc, bud) in enumerate(depts, 1): conn.execute("INSERT INTO departments VALUES(?,?,?,?)", (i, name, loc, bud)) titles = ["Software Engineer","Senior Engineer","Staff Engineer","Principal Engineer", "Engineering Manager","Product Manager","Data Analyst","Data Scientist", "Marketing Specialist","Sales Executive","HR Specialist","Finance Analyst", "Director","VP","Legal Counsel"] statuses = ["active","active","active","active","resigned","terminated"] for i in range(1, 101): conn.execute("INSERT INTO employees VALUES(?,?,?,?,?,?,?,?)", (i, f"Employee {i}", f"emp{i}@corp.com", rng.randint(1, 8), rng.choice(titles), _rdate(rng, "2015-01-01", "2024-01-01"), round(rng.uniform(25000, 200000), 2), rng.choice(statuses))) for i in range(1, 201): conn.execute("INSERT INTO performance_reviews VALUES(?,?,?,?,?,?)", (i, rng.randint(1, 100), rng.randint(2019, 2024), rng.randint(1, 5), rng.randint(1, 100), rng.choice(["Excellent work","Good performance","Needs improvement", "Outstanding","Meeting expectations"]))) proj_statuses = ["planned","active","active","completed","cancelled"] for i in range(1, 51): sd = _rdate(rng, "2021-01-01", "2024-01-01") conn.execute("INSERT INTO projects VALUES(?,?,?,?,?,?,?)", (i, f"Project {i}", rng.randint(1, 8), sd, _rdate(rng, sd, "2025-06-01") if rng.random() > 0.25 else None, round(rng.uniform(50000, 2000000), 2), rng.choice(proj_statuses))) roles = ["Lead","Senior Developer","Developer","Tester","Analyst","DevOps"] for i in range(1, 251): conn.execute("INSERT INTO project_assignments VALUES(?,?,?,?,?)", (i, rng.randint(1, 100), rng.randint(1, 50), rng.choice(roles), rng.randint(20, 400))) conn.commit() # ───────────────────────────────────────────────────────────────────────────── # REGISTRY # ───────────────────────────────────────────────────────────────────────────── SEED_MAP: dict[str, Callable] = { "ecommerce": seed_ecommerce, "healthcare": seed_healthcare, "finance": seed_finance, "hr": seed_hr, } def build_connection(domain: str, seed: int = 42) -> sqlite3.Connection: """Return a seeded in-memory SQLite connection for the given domain.""" conn = sqlite3.connect(":memory:", check_same_thread=False) conn.row_factory = sqlite3.Row conn.execute("PRAGMA foreign_keys = ON") conn.executescript(SCHEMA_MAP[domain]) SEED_MAP[domain](conn, seed=seed) return conn