""" Generates a realistic in-memory SQLite database for TechMart, a fictional e-commerce company. The data contains deliberate patterns that support nine investigation tasks: 1. Orders drop after a major promotion ends 2. Product returns spike for a specific SKU in the West region 3. Customer churn concentrated in the Enterprise/Northeast segment 4. Shipping delays by QuickShip in the Midwest driving support tickets 5. Revenue up but profit down (multi-causal paradox) 6. Supplier quality crisis (AudioTech products 6 & 7) 7. Inventory stockout in West for Monitor 27-inch during promo 8. Coordinated fraud ring in Southeast with new accounts 9. Repeat purchase decline masked by new-customer acquisition spend """ import random import sqlite3 from datetime import datetime, timedelta PRODUCTS = [ (1, "Laptop Pro 15", "Electronics", 999.99, 650.00, "TechCorp"), (2, "Desktop Workstation", "Electronics", 1499.99, 950.00, "TechCorp"), (3, "Tablet Ultra", "Electronics", 599.99, 350.00, "TechCorp"), (4, "Monitor 27-inch", "Electronics", 449.99, 280.00, "DisplayMax"), (5, "Smart TV 55-inch", "Electronics", 699.99, 420.00, "DisplayMax"), (6, "Wireless Headphones Pro", "Accessories", 149.99, 45.00, "AudioTech"), (7, "Bluetooth Speaker", "Accessories", 79.99, 30.00, "AudioTech"), (8, "USB-C Hub", "Accessories", 49.99, 15.00, "ConnectPlus"), (9, "Laptop Bag Premium", "Accessories", 39.99, 12.00, "CarryAll"), (10, "Mouse Pad XL", "Accessories", 24.99, 8.00, "CarryAll"), (11, "Office Suite License", "Software", 199.99, 20.00, "SoftVault"), (12, "Antivirus Pro Annual", "Software", 49.99, 5.00, "SecureNet"), (13, "Cloud Backup 1TB", "Software", 99.99, 10.00, "CloudStore"), (14, "Design Studio Pro", "Software", 299.99, 30.00, "CreativeSoft"), (15, "DevTools Ultimate", "Software", 149.99, 15.00, "CodeForge"), (16, "Mechanical Keyboard RGB", "Peripherals", 129.99, 60.00, "KeyMaster"), (17, "Wireless Mouse Pro", "Peripherals", 59.99, 20.00, "ClickTech"), (18, "Webcam HD 1080p", "Peripherals", 89.99, 35.00, "VisionCam"), (19, "External SSD 1TB", "Peripherals", 109.99, 55.00, "StoragePro"), (20, "Laser Printer Pro", "Peripherals", 249.99, 130.00, "PrintMax"), ] _FIRST = [ "James","Mary","Robert","Patricia","John","Jennifer","Michael","Linda", "David","Elizabeth","William","Barbara","Richard","Susan","Joseph","Jessica", "Thomas","Sarah","Christopher","Karen","Charles","Lisa","Daniel","Nancy", "Matthew","Betty","Anthony","Margaret","Mark","Sandra","Donald","Ashley", "Steven","Dorothy","Andrew","Kimberly","Paul","Emily","Joshua","Donna", "Kenneth","Michelle","Kevin","Carol","Brian","Amanda","George","Melissa", "Timothy","Deborah", ] _LAST = [ "Smith","Johnson","Williams","Brown","Jones","Garcia","Miller","Davis", "Rodriguez","Martinez","Hernandez","Lopez","Gonzalez","Wilson","Anderson", "Thomas","Taylor","Moore","Jackson","Martin","Lee","Perez","Thompson", "White","Harris","Sanchez","Clark","Ramirez","Lewis","Robinson","Walker", "Young","Allen","King","Wright","Scott","Torres","Nguyen","Hill","Flores", "Green","Adams","Nelson","Baker","Hall","Rivera","Campbell","Mitchell", "Carter","Roberts", ] REGIONS = ["Northeast", "Southeast", "West", "Midwest"] PRICE_CHANGES = [ (1, 999.99, 1149.99, "2024-02-01", "Annual pricing adjustment"), (2, 1499.99, 1699.99, "2024-02-01", "Annual pricing adjustment"), (11, 199.99, 229.99, "2024-02-01", "Annual pricing adjustment"), (15, 149.99, 174.99, "2024-02-01", "Annual pricing adjustment"), (19, 109.99, 129.99, "2024-02-01", "Annual pricing adjustment"), ] PROMOTIONS = [ (1, "New Year Kickoff", "2024-01-01", "2024-01-15", 10.0, "All"), (2, "Valentine Tech Sale", "2024-02-10", "2024-02-14", 15.0, "Electronics"), (3, "Spring Mega Sale", "2024-02-15", "2024-03-01", 25.0, "All"), ] CARRIERS = ["QuickShip", "FastFreight", "ReliableLogistics"] TICKET_CATEGORIES = ["delivery_delay", "product_defect", "billing_issue", "general_inquiry"] MARKETING_CHANNELS = ["email", "social_media", "search_ads", "display_ads", "affiliate"] def _date_range(start: datetime, end: datetime): d = start while d <= end: yield d d += timedelta(days=1) def _effective_price(base_prices: dict, changes_by_pid: dict, pid: int, date_str: str): """Return the unit price for *pid* on *date_str*, considering price changes.""" price = base_prices[pid] for new_price, change_date in changes_by_pid.get(pid, []): if date_str >= change_date: price = new_price return price def create_database(seed: int = 42) -> sqlite3.Connection: rng = random.Random(seed) conn = sqlite3.connect(":memory:", check_same_thread=False) c = conn.cursor() c.executescript(""" CREATE TABLE customers ( customer_id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT NOT NULL, region TEXT NOT NULL, segment TEXT NOT NULL, signup_date TEXT NOT NULL ); CREATE TABLE products ( product_id INTEGER PRIMARY KEY, name TEXT NOT NULL, category TEXT NOT NULL, price REAL NOT NULL, cost REAL NOT NULL, supplier TEXT NOT NULL ); CREATE TABLE orders ( order_id INTEGER PRIMARY KEY AUTOINCREMENT, customer_id INTEGER NOT NULL, order_date TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'completed', total_amount REAL NOT NULL DEFAULT 0, FOREIGN KEY (customer_id) REFERENCES customers(customer_id) ); CREATE TABLE order_items ( item_id INTEGER PRIMARY KEY AUTOINCREMENT, order_id INTEGER NOT NULL, product_id INTEGER NOT NULL, quantity INTEGER NOT NULL DEFAULT 1, unit_price REAL NOT NULL, FOREIGN KEY (order_id) REFERENCES orders(order_id), FOREIGN KEY (product_id) REFERENCES products(product_id) ); CREATE TABLE returns ( return_id INTEGER PRIMARY KEY AUTOINCREMENT, order_id INTEGER NOT NULL, product_id INTEGER NOT NULL, return_date TEXT NOT NULL, reason TEXT NOT NULL, refund_amount REAL NOT NULL, FOREIGN KEY (order_id) REFERENCES orders(order_id), FOREIGN KEY (product_id) REFERENCES products(product_id) ); CREATE TABLE promotions ( promo_id INTEGER PRIMARY KEY, name TEXT NOT NULL, start_date TEXT NOT NULL, end_date TEXT NOT NULL, discount_pct REAL NOT NULL, applicable_category TEXT ); CREATE TABLE price_changes ( change_id INTEGER PRIMARY KEY AUTOINCREMENT, product_id INTEGER NOT NULL, old_price REAL NOT NULL, new_price REAL NOT NULL, change_date TEXT NOT NULL, reason TEXT, FOREIGN KEY (product_id) REFERENCES products(product_id) ); CREATE TABLE shipping ( shipment_id INTEGER PRIMARY KEY AUTOINCREMENT, order_id INTEGER NOT NULL, carrier TEXT NOT NULL, ship_date TEXT NOT NULL, delivery_date TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'delivered', FOREIGN KEY (order_id) REFERENCES orders(order_id) ); CREATE TABLE support_tickets ( ticket_id INTEGER PRIMARY KEY AUTOINCREMENT, customer_id INTEGER NOT NULL, product_id INTEGER, created_date TEXT NOT NULL, category TEXT NOT NULL, priority TEXT NOT NULL DEFAULT 'medium', resolution_status TEXT NOT NULL DEFAULT 'open', FOREIGN KEY (customer_id) REFERENCES customers(customer_id), FOREIGN KEY (product_id) REFERENCES products(product_id) ); CREATE TABLE inventory_log ( log_id INTEGER PRIMARY KEY AUTOINCREMENT, product_id INTEGER NOT NULL, log_date TEXT NOT NULL, units_in_stock INTEGER NOT NULL, units_ordered INTEGER NOT NULL DEFAULT 0, warehouse_region TEXT NOT NULL, FOREIGN KEY (product_id) REFERENCES products(product_id) ); CREATE TABLE marketing_spend ( spend_id INTEGER PRIMARY KEY AUTOINCREMENT, channel TEXT NOT NULL, campaign_name TEXT NOT NULL, region TEXT NOT NULL, spend_date TEXT NOT NULL, amount REAL NOT NULL ); """) c.executemany("INSERT INTO products VALUES (?,?,?,?,?,?)", PRODUCTS) base_prices = {p[0]: p[3] for p in PRODUCTS} segments_pool = ["Enterprise"] * 35 + ["SMB"] * 55 + ["Consumer"] * 60 rng.shuffle(segments_pool) customers = [] for i in range(150): first = rng.choice(_FIRST) last = rng.choice(_LAST) name = f"{first} {last}" email = f"{first.lower()}.{last.lower()}{i}@techmart.com" region = REGIONS[i % 4] segment = segments_pool[i] signup = (datetime(2023, 1, 1) + timedelta(days=rng.randint(0, 364))).strftime("%Y-%m-%d") c.execute("INSERT INTO customers VALUES (?,?,?,?,?,?)", (i + 1, name, email, region, segment, signup)) customers.append((i + 1, name, email, region, segment, signup)) ent_ne = [cu for cu in customers if cu[4] == "Enterprise" and cu[3] == "Northeast"] ent_other = [cu for cu in customers if cu[4] == "Enterprise" and cu[3] != "Northeast"] smb_all = [cu for cu in customers if cu[4] == "SMB"] con_all = [cu for cu in customers if cu[4] == "Consumer"] c.executemany("INSERT INTO promotions VALUES (?,?,?,?,?,?)", PROMOTIONS) for pid, old_p, new_p, dt, reason in PRICE_CHANGES: c.execute( "INSERT INTO price_changes (product_id,old_price,new_price,change_date,reason) VALUES (?,?,?,?,?)", (pid, old_p, new_p, dt, reason), ) changes_by_pid: dict[int, list] = {} for pid, _, new_p, dt, _ in PRICE_CHANGES: changes_by_pid.setdefault(pid, []).append((new_p, dt)) START = datetime(2024, 1, 1) END = datetime(2024, 3, 15) PROMO_S = datetime(2024, 2, 15) PROMO_E = datetime(2024, 3, 1) PRICE_INC = datetime(2024, 2, 1) product_weights_base = [1.0] * 20 product_weights_base[5] = 3.0 for day in _date_range(START, END): date_str = day.strftime("%Y-%m-%d") is_promo = PROMO_S <= day <= PROMO_E after_price_inc = day >= PRICE_INC daily_count = rng.randint(25, 35) if is_promo else rng.randint(12, 18) for _ in range(daily_count): roll = rng.random() if roll < 0.08: pool = ent_ne if after_price_inc and rng.random() < 0.85: continue elif roll < 0.22: pool = ent_other if after_price_inc and rng.random() < 0.50: continue elif roll < 0.55: pool = smb_all if after_price_inc and rng.random() < 0.20: continue else: pool = con_all cust = rng.choice(pool) cust_id, _, _, cust_region, _, _ = cust weights = list(product_weights_base) if cust_region == "West": weights[5] = 7.0 if is_promo: weights[3] = 0.1 num_items = rng.choices([1, 2, 3], weights=[0.6, 0.3, 0.1])[0] pids = list(set(rng.choices(range(1, 21), weights=weights, k=num_items))) c.execute( "INSERT INTO orders (customer_id, order_date, status, total_amount) VALUES (?,?,?,?)", (cust_id, date_str, "completed", 0), ) order_id = c.lastrowid total = 0.0 for pid in pids: qty = rng.choices([1, 2, 3], weights=[0.75, 0.20, 0.05])[0] price = _effective_price(base_prices, changes_by_pid, pid, date_str) if is_promo: price = round(price * 0.75, 2) total += price * qty c.execute( "INSERT INTO order_items (order_id, product_id, quantity, unit_price) VALUES (?,?,?,?)", (order_id, pid, qty, round(price, 2)), ) c.execute("UPDATE orders SET total_amount=? WHERE order_id=?", (round(total, 2), order_id)) c.execute(""" SELECT oi.item_id, oi.order_id, oi.product_id, oi.unit_price, oi.quantity, o.order_date, cu.region FROM order_items oi JOIN orders o ON oi.order_id = o.order_id JOIN customers cu ON o.customer_id = cu.customer_id """) items = c.fetchall() defect_reasons = ["defective_unit", "stopped_working", "poor_audio_quality", "battery_issue"] normal_reasons = ["changed_mind", "wrong_size", "found_cheaper", "not_as_expected"] speaker_defect_reasons = ["audio_distortion", "bluetooth_disconnect", "battery_issue", "stopped_working"] for _, order_id, product_id, unit_price, qty, order_date, region in items: if product_id == 6 and region == "West": prob = 0.38 reasons = defect_reasons elif product_id == 6: prob = 0.08 reasons = defect_reasons + normal_reasons elif product_id == 7: prob = 0.12 reasons = speaker_defect_reasons else: prob = 0.04 reasons = normal_reasons if rng.random() < prob: ret_date = (datetime.strptime(order_date, "%Y-%m-%d") + timedelta(days=rng.randint(3, 14))).strftime("%Y-%m-%d") c.execute( "INSERT INTO returns (order_id, product_id, return_date, reason, refund_amount) VALUES (?,?,?,?,?)", (order_id, product_id, ret_date, rng.choice(reasons), round(unit_price * qty, 2)), ) # -- Shipping records for every order ------------------------------------ QUICKSHIP_DELAY_START = datetime(2024, 2, 10) c.execute("SELECT order_id, order_date, customer_id FROM orders") all_orders = c.fetchall() cust_region_map = {cu[0]: cu[3] for cu in customers} for order_id, order_date_str, cust_id in all_orders: order_dt = datetime.strptime(order_date_str, "%Y-%m-%d") region = cust_region_map[cust_id] if region == "Midwest": carrier = rng.choices(CARRIERS, weights=[0.40, 0.35, 0.25])[0] else: carrier = rng.choices(CARRIERS, weights=[0.25, 0.40, 0.35])[0] ship_dt = order_dt + timedelta(days=rng.randint(0, 1)) base_transit = rng.randint(2, 4) if carrier == "QuickShip" and region == "Midwest" and order_dt >= QUICKSHIP_DELAY_START: extra_delay = rng.randint(5, 10) status = "delayed" elif carrier == "FastFreight": extra_delay = rng.randint(0, 2) status = "delivered" else: extra_delay = 0 status = "delivered" delivery_dt = ship_dt + timedelta(days=base_transit + extra_delay) if status == "delayed" and rng.random() < 0.7: status = "delivered" c.execute( "INSERT INTO shipping (order_id, carrier, ship_date, delivery_date, status) " "VALUES (?,?,?,?,?)", (order_id, carrier, ship_dt.strftime("%Y-%m-%d"), delivery_dt.strftime("%Y-%m-%d"), status), ) # -- Support tickets ----------------------------------------------------- ticket_priorities = ["low", "medium", "high", "critical"] ticket_resolutions = ["open", "resolved", "escalated"] for day in _date_range(START, END): date_str = day.strftime("%Y-%m-%d") after_qs_issues = day >= QUICKSHIP_DELAY_START for region_name in REGIONS: region_custs = [cu for cu in customers if cu[3] == region_name] # Delivery delay tickets: spike in Midwest after QuickShip issues if region_name == "Midwest" and after_qs_issues: n_delay = rng.randint(3, 6) else: n_delay = rng.randint(0, 1) for _ in range(n_delay): cu = rng.choice(region_custs) pri = rng.choices(ticket_priorities, weights=[0.1, 0.3, 0.4, 0.2])[0] res = rng.choices(ticket_resolutions, weights=[0.3, 0.5, 0.2])[0] c.execute( "INSERT INTO support_tickets " "(customer_id, product_id, created_date, category, priority, resolution_status) " "VALUES (?,?,?,?,?,?)", (cu[0], None, date_str, "delivery_delay", pri, res), ) # Product defect tickets: elevated for AudioTech products (6 in West, 7 everywhere) if region_name == "West": n_defect = rng.randint(1, 3) else: n_defect = 1 if rng.random() < 0.3 else 0 for _ in range(n_defect): cu = rng.choice(region_custs) pid = 6 if region_name == "West" or rng.random() < 0.4 else rng.randint(1, 20) pri = rng.choices(ticket_priorities, weights=[0.1, 0.3, 0.4, 0.2])[0] res = rng.choices(ticket_resolutions, weights=[0.4, 0.4, 0.2])[0] c.execute( "INSERT INTO support_tickets " "(customer_id, product_id, created_date, category, priority, resolution_status) " "VALUES (?,?,?,?,?,?)", (cu[0], pid, date_str, "product_defect", pri, res), ) # Product 7 (Bluetooth Speaker) defect tickets across all regions if rng.random() < 0.45: cu = rng.choice(region_custs) pri = rng.choices(ticket_priorities, weights=[0.1, 0.4, 0.35, 0.15])[0] res = rng.choices(ticket_resolutions, weights=[0.35, 0.45, 0.2])[0] c.execute( "INSERT INTO support_tickets " "(customer_id, product_id, created_date, category, priority, resolution_status) " "VALUES (?,?,?,?,?,?)", (cu[0], 7, date_str, "product_defect", pri, res), ) # Billing issue tickets: evenly spread (red herring / noise) if rng.random() < 0.25: cu = rng.choice(region_custs) c.execute( "INSERT INTO support_tickets " "(customer_id, product_id, created_date, category, priority, resolution_status) " "VALUES (?,?,?,?,?,?)", (cu[0], None, date_str, "billing_issue", rng.choice(ticket_priorities), rng.choice(ticket_resolutions)), ) # General inquiry: background noise if rng.random() < 0.35: cu = rng.choice(region_custs) c.execute( "INSERT INTO support_tickets " "(customer_id, product_id, created_date, category, priority, resolution_status) " "VALUES (?,?,?,?,?,?)", (cu[0], None, date_str, "general_inquiry", "low", rng.choice(["resolved", "open"])), ) # -- Inventory log ------------------------------------------------------- # Daily stock levels per product per warehouse region. # Product 4 (Monitor 27-inch) stocks out in West during promo. base_stock = {} for p in PRODUCTS: pid = p[0] if pid in (1, 2, 3, 4, 5): # electronics — higher stock base_stock[pid] = 200 elif pid <= 10: # accessories base_stock[pid] = 350 elif pid <= 15: # software (digital) base_stock[pid] = 9999 else: # peripherals base_stock[pid] = 250 for day in _date_range(START, END): date_str = day.strftime("%Y-%m-%d") is_promo = PROMO_S <= day <= PROMO_E for region_name in REGIONS: for p in PRODUCTS: pid = p[0] stock = base_stock[pid] daily_sold = rng.randint(2, 8) if is_promo: daily_sold = rng.randint(5, 15) # Product 4 stockout in West during promo if pid == 4 and region_name == "West" and is_promo: stock = rng.randint(0, 2) daily_sold = rng.randint(0, 1) else: stock = max(stock - daily_sold + rng.randint(1, 6), 10) # Product 6 fluctuates in West but never stocks out (red herring) if pid == 6 and region_name == "West": stock = rng.randint(30, 80) reorder = 0 if stock < 20 and pid <= 15: reorder = rng.randint(50, 100) c.execute( "INSERT INTO inventory_log " "(product_id, log_date, units_in_stock, units_ordered, warehouse_region) " "VALUES (?,?,?,?,?)", (pid, date_str, stock, reorder, region_name), ) # -- Fraudulent accounts --------------------------------------------------- # ~15 fake accounts in Southeast, Consumer, all signed up late Feb, # placing high-value Electronics orders (products 1 & 2). fraud_customers = [] for i in range(15): cid = 151 + i first = rng.choice(_FIRST) last = rng.choice(_LAST) name = f"{first} {last}" email = f"{first.lower()}.{last.lower()}{cid}@techmart.com" signup = (datetime(2024, 2, 20) + timedelta(days=rng.randint(0, 7))).strftime("%Y-%m-%d") c.execute("INSERT INTO customers VALUES (?,?,?,?,?,?)", (cid, name, email, "Southeast", "Consumer", signup)) fraud_customers.append(cid) customers.append((cid, name, email, "Southeast", "Consumer", signup)) cust_region_map.update({cid: "Southeast" for cid in fraud_customers}) FRAUD_ORDER_START = datetime(2024, 2, 25) FRAUD_ORDER_END = datetime(2024, 3, 10) for cid in fraud_customers: n_orders = rng.randint(3, 5) for _ in range(n_orders): order_day = FRAUD_ORDER_START + timedelta( days=rng.randint(0, (FRAUD_ORDER_END - FRAUD_ORDER_START).days)) date_str = order_day.strftime("%Y-%m-%d") fraud_pid = rng.choice([1, 2]) qty = rng.randint(1, 2) price = _effective_price(base_prices, changes_by_pid, fraud_pid, date_str) is_promo_day = PROMO_S <= order_day <= PROMO_E if is_promo_day: price = round(price * 0.75, 2) total = round(price * qty, 2) c.execute( "INSERT INTO orders (customer_id, order_date, status, total_amount) VALUES (?,?,?,?)", (cid, date_str, "completed", total), ) oid = c.lastrowid c.execute( "INSERT INTO order_items (order_id, product_id, quantity, unit_price) VALUES (?,?,?,?)", (oid, fraud_pid, qty, round(price, 2)), ) ship_dt = order_day + timedelta(days=rng.randint(0, 1)) delivery_dt = ship_dt + timedelta(days=rng.randint(2, 4)) c.execute( "INSERT INTO shipping (order_id, carrier, ship_date, delivery_date, status) " "VALUES (?,?,?,?,?)", (oid, "FastFreight", ship_dt.strftime("%Y-%m-%d"), delivery_dt.strftime("%Y-%m-%d"), "delivered"), ) # -- Marketing spend ------------------------------------------------------- # Heavy acquisition spend (search_ads, social_media) in Feb/Mar. # Email (retention) drops off after Jan. Southeast gets a big bump in Feb # (red herring for fraud task). acq_channels = ["search_ads", "social_media", "display_ads", "affiliate"] for day in _date_range(START, END): date_str = day.strftime("%Y-%m-%d") month = day.month for region_name in REGIONS: # Retention channel: email if month == 1: email_spend = round(rng.uniform(200, 400), 2) else: email_spend = round(rng.uniform(20, 60), 2) c.execute( "INSERT INTO marketing_spend (channel, campaign_name, region, spend_date, amount) " "VALUES (?,?,?,?,?)", ("email", "Customer Retention", region_name, date_str, email_spend), ) # Acquisition channels for ch in acq_channels: if month == 1: base_spend = rng.uniform(100, 200) else: base_spend = rng.uniform(300, 600) if region_name == "Southeast" and month >= 2: base_spend *= 1.5 c.execute( "INSERT INTO marketing_spend (channel, campaign_name, region, spend_date, amount) " "VALUES (?,?,?,?,?)", (ch, "New Customer Acquisition", region_name, date_str, round(base_spend, 2)), ) conn.commit() return conn def get_schema_info(conn: sqlite3.Connection) -> str: """Human-readable database schema for the LLM agent.""" c = conn.cursor() c.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name") tables = [r[0] for r in c.fetchall()] parts = ["DATABASE SCHEMA", "=" * 50, ""] for table in tables: c.execute(f"SELECT COUNT(*) FROM {table}") count = c.fetchone()[0] parts.append(f"Table: {table} ({count} rows)") c.execute(f"PRAGMA table_info({table})") for col in c.fetchall(): pk = " [PK]" if col[5] else "" parts.append(f" - {col[1]} {col[2]}{pk}") if table == "customers": c.execute("SELECT DISTINCT region FROM customers ORDER BY region") parts.append(f" Regions: {', '.join(r[0] for r in c.fetchall())}") c.execute("SELECT DISTINCT segment FROM customers ORDER BY segment") parts.append(f" Segments: {', '.join(r[0] for r in c.fetchall())}") elif table == "products": c.execute("SELECT DISTINCT category FROM products ORDER BY category") parts.append(f" Categories: {', '.join(r[0] for r in c.fetchall())}") c.execute("SELECT DISTINCT supplier FROM products ORDER BY supplier") parts.append(f" Suppliers: {', '.join(r[0] for r in c.fetchall())}") elif table == "shipping": c.execute("SELECT DISTINCT carrier FROM shipping ORDER BY carrier") parts.append(f" Carriers: {', '.join(r[0] for r in c.fetchall())}") c.execute("SELECT DISTINCT status FROM shipping ORDER BY status") parts.append(f" Statuses: {', '.join(r[0] for r in c.fetchall())}") elif table == "support_tickets": c.execute("SELECT DISTINCT category FROM support_tickets ORDER BY category") parts.append(f" Categories: {', '.join(r[0] for r in c.fetchall())}") c.execute("SELECT DISTINCT priority FROM support_tickets ORDER BY priority") parts.append(f" Priorities: {', '.join(r[0] for r in c.fetchall())}") elif table == "inventory_log": c.execute("SELECT DISTINCT warehouse_region FROM inventory_log ORDER BY warehouse_region") parts.append(f" Warehouse regions: {', '.join(r[0] for r in c.fetchall())}") elif table == "marketing_spend": c.execute("SELECT DISTINCT channel FROM marketing_spend ORDER BY channel") parts.append(f" Channels: {', '.join(r[0] for r in c.fetchall())}") c.execute("SELECT DISTINCT campaign_name FROM marketing_spend ORDER BY campaign_name") parts.append(f" Campaigns: {', '.join(r[0] for r in c.fetchall())}") parts.append("") parts += [ "=" * 50, "Data spans: 2024-01-01 to 2024-03-15", "All dates stored as YYYY-MM-DD text.", "Use standard SQLite functions (strftime, date, etc.).", ] return "\n".join(parts)