import json import re import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from data_factory.templates import ALL_TEMPLATES # Define strict categorical swaps based on the exact schemas SWAP_RULES = { "ecommerce": [ (r"'gold'", r"gold", ["'silver'", "'bronze'"], ["silver", "bronze"]), (r"'delivered'", r"delivered", ["'pending'", "'processing'", "'shipped'", "'cancelled'"], ["pending", "processing", "shipped", "cancelled"]), (r"'India'", r"India", ["'USA'", "'Germany'", "'UK'", "'Canada'"], ["USA", "Germany", "UK", "Canada"]) ], "healthcare": [ (r"'severe'", r"severe", ["'mild'", "'moderate'"], ["mild", "moderate"]), (r"'completed'", r"completed", ["'scheduled'", "'cancelled'", "'no_show'"], ["scheduled", "cancelled", "no-show"]) ], "finance": [ (r"'active'", r"active", ["'dormant'", "'closed'"], ["dormant", "closed"]), (r"'credit'", r"credit", ["'debit'"], ["debit"]), (r"'verified'", r"verified", ["'pending'", "'rejected'"], ["pending", "rejected"]) ], "hr": [ (r"'active'", r"active", ["'resigned'", "'terminated'"], ["resigned", "terminated"]) ] } def generate_swaps(): expanded_templates = [] for template in ALL_TEMPLATES: expanded_templates.append(template) # Keep the original domain = template["domain"] if domain not in SWAP_RULES: continue for sql_target, nl_target, sql_replacements, nl_replacements in SWAP_RULES[domain]: if re.search(sql_target, template["sql"], re.IGNORECASE): for sql_repl, nl_repl in zip(sql_replacements, nl_replacements): new_template = template.copy() # Swap in SQL new_template["sql"] = re.sub(sql_target, sql_repl, template["sql"], flags=re.IGNORECASE) # Swap in NL and Description new_template["base_nl"] = re.sub(nl_target, nl_repl, template["base_nl"], flags=re.IGNORECASE) new_template["description"] = re.sub(nl_target, nl_repl, template["description"], flags=re.IGNORECASE) # Create a unique ID new_template["id"] = f"{template.get('id', 'temp')}_swap_{nl_repl.replace(' ', '_')}" expanded_templates.append(new_template) return expanded_templates if __name__ == "__main__": swapped = generate_swaps() print(f"Original Templates: {len(ALL_TEMPLATES)}") print(f"After Value Swapping: {len(swapped)}") with open("swapped_templates.json", "w") as f: json.dump(swapped, f, indent=2) print("Saved to swapped_templates.json")