Spaces:

SalwaM
/

TestForge2AI

Running

App Files Files Community

TestForge2AI / app.py

SalwaM

Update app.py

e2a6267 verified 24 days ago

raw

history blame contribute delete

24.5 kB

	import gradio as gr
	from datetime import datetime
	from groq import Groq
	import json
	import os
	import traceback
	import re

	# --- 1. API KEY ---
	api_key_coder = os.environ.get('fristapi')
	if not api_key_coder:
	raise ValueError("Groq API key not found. Set fristapi environment variable.")

	# --- 2. LLM CLIENT ---
	class GroqLLM:
	def __init__(self, api_key, model="llama-3.3-70b-versatile", temperature=0.7):
	self.client = Groq(api_key=api_key)
	self.model = model
	self.temperature = temperature

	def invoke(self, prompt):
	try:
	response = self.client.chat.completions.create(
	model=self.model,
	messages=[{"role": "user", "content": prompt}],
	temperature=self.temperature,
	max_tokens=4000
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"LLM Error: {str(e)}"

	llm = GroqLLM(api_key=api_key_coder)

	class SyntheticDataGenerator:
	"""Generates synthetic test data in multiple formats"""

	def __init__(self, llm):
	self.llm = llm

	def generate_data(self, schema_description, data_type="json", language="Python", record_count=5):
	"""
	Generate synthetic test data based on schema description
	"""
	import json

	# Ensure schema_description is safely embedded
	if isinstance(schema_description, dict):
	schema_description_str = json.dumps(schema_description, indent=2)
	else:
	schema_description_str = schema_description

	# Map data types to formats
	format_map = {
	"json": "JSON (array of objects)",
	"csv": "CSV (comma-separated values with headers)",
	"sql": "SQL INSERT statements",
	"xml": "XML document",
	"python_dict": "Python list of dictionaries",
	"yaml": "YAML format",
	"excel_style": "Table format (markdown)"
	}

	output_format = format_map.get(data_type, "JSON")

	prompt = f"""
	You are a TEST DATA ENGINEER specializing in synthetic data generation.
	Generate realistic, diverse, and comprehensive test data.
	=====================
	CONFIGURATION
	=====================
	Data Schema: {schema_description_str}
	Output Format: {output_format}
	Number of Records: {record_count}
	Programming Context: {language}
	=====================
	REQUIREMENTS
	=====================
	1. Generate {record_count} unique, realistic records
	2. Include edge cases and boundary values
	3. Ensure data variety (different types, values)
	4. Make data look production-like
	5. Include at least one record with null/empty values (if applicable)
	6. Include at least one record with extreme values
	=====================
	OUTPUT STRUCTURE
	=====================
	Generate the response with:
	## 📊 Data Generation Summary
	- Schema: [brief description]
	- Format: {output_format}
	- Records: {record_count}
	- Language Context: {language}
	## 📝 Generated Test Data
	```{data_type if data_type != 'excel_style' else 'markdown'}
	[generated data here]
	🔍 Data Quality Notes
	[Special cases included]
	[Edge values]
	[Validation notes]

	💡 Usage Example
	[How to use this data in {language} tests]
	"""
	# Invoke LLM
	return self.llm.invoke(prompt)

	# ============================================
	# Synthetic Data Generator: Bulk Function
	# ============================================

	def generate_bulk_data(self, schema_description, formats=None, record_counts=None):
	"""Generate data in multiple formats at once"""
	# Default formats
	if formats is None:
	formats = ["json", "csv", "sql"]
	# Default record counts
	if record_counts is None:
	record_counts = [3, 5, 10]

	results = {}
	for format_type in formats:
	# If only one record count is given, use it for all formats
	count = record_counts[0] if len(record_counts) == 1 else record_counts[formats.index(format_type)]
	# Generate data using the main generator
	results[format_type] = self.generate_data(
	schema_description=schema_description,
	data_type=format_type,
	record_count=count
	)

	return results


	# Initialize the synthetic data generator
	synthetic_generator = SyntheticDataGenerator(llm)


	# ============================================
	# Template Functions
	# ============================================

	def load_schema_template(template_name):
	"""Return the JSON schema for a given template name"""
	templates = {
	"custom": '''{
	"user_id": "uuid",
	"full_name": "name",
	"email": "email",
	"age": {"type": "int", "min": 18, "max": 99},
	"is_active": "boolean",
	"country": "country"
	}''',
	"user_profile": '''{
	"user_id": "integer",
	"username": "string (3-20 chars)",
	"email": "email format",
	"age": "integer (18-80)",
	"country": "ISO country code",
	"is_premium": "boolean",
	"registration_date": "date (last 2 years)",
	"last_login": "datetime"
	}''',
	"ecommerce_order": '''{
	"order_id": "string (format: ORD-XXXX)",
	"customer_name": "string",
	"email": "email",
	"items": [
	{
	"product_id": "string",
	"quantity": "integer (1-10)",
	"price": "decimal (10.99-999.99)"
	}
	],
	"total_amount": "decimal",
	"status": "enum(pending,shipped,delivered,cancelled)",
	"order_date": "datetime"
	}''',
	"api_request_log": '''{
	"request_id": "uuid",
	"endpoint": "string",
	"method": "enum(GET,POST,PUT,DELETE)",
	"status_code": "integer (200,400,401,404,500)",
	"response_time_ms": "integer (50-5000)",
	"timestamp": "datetime",
	"user_agent": "string",
	"ip_address": "ipv4"
	}''',
	"contact_info": '''{
	"contact_id": "integer",
	"first_name": "string",
	"last_name": "string",
	"email": "email",
	"phone": "phone number",
	"address": {
	"street": "string",
	"city": "string",
	"state": "string",
	"zip_code": "string",
	"country": "string"
	},
	"company": "string",
	"notes": "string (optional)"
	}''',
	"financial_transaction": '''{
	"transaction_id": "uuid",
	"account_number": "string (format: ****1234)",
	"transaction_type": "enum(deposit,withdrawal,transfer,payment)",
	"amount": "decimal (0.01-10000.00)",
	"currency": "enum(USD,EUR,GBP,JPY)",
	"timestamp": "datetime",
	"status": "enum(pending,completed,failed,reversed)",
	"description": "string"
	}''',
	"healthcare_patient": '''{
	"patient_id": "string (format: PAT-XXXX)",
	"first_name": "string",
	"last_name": "string",
	"date_of_birth": "date (1950-2020)",
	"gender": "enum(M,F,Other)",
	"blood_type": "enum(A+,A-,B+,B-,AB+,AB-,O+,O-)",
	"allergies": "array of strings",
	"medications": "array of strings",
	"last_visit": "datetime",
	"insurance_provider": "string"
	}'''
	}
	# Return the requested template, default to 'custom'
	return templates.get(template_name, templates["custom"])

	def get_output_format_description(output_type):
	"""
	Return a human-readable description for a given output format type
	"""
	descriptions = {
	"json": "📄 JSON format - Array of objects, ideal for API testing and data interchange",
	"csv": "📊 CSV format - Comma-separated values, perfect for data import and spreadsheet analysis",
	"sql": "🗄️ SQL format - INSERT statements with CREATE TABLE, ready for database seeding",
	"xml": "📋 XML format - Structured document format for enterprise systems",
	"python_dict": "🐍 Python format - List of dictionaries, ready to use in Python tests",
	"yaml": "⚙️ YAML format - Human-readable configuration format",
	"excel_style": "📑 Table format - Markdown table, easy to read and document"
	}
	# Return description if found, otherwise default message
	return descriptions.get(output_type, "📄 Standard format for test data")
	# ============================================
	# Gradio UI Functions
	# ============================================

	def generate_synthetic_data(language, schema_json, record_count, data_format, template_name):
	"""Main generation function for Gradio"""
	try:
	# Validate schema
	if not schema_json or not schema_json.strip():
	return (
	"### ⚠️ No Data Generated\n\nPlease provide a schema definition.",
	"# No data generated\nPlease provide a schema definition.",
	"### 📊 Generation Statistics\nNo data generated yet",
	schema_json
	)

	# Parse record count
	try:
	count = int(record_count)
	if count < 1:
	count = 1
	elif count > 100:
	count = 100
	except:
	count = 5

	# Generate synthetic data
	result = synthetic_generator.generate_data(
	schema_description=schema_json,
	data_type=data_format,
	language=language,
	record_count=count
	)

	# Extract data from result
	pattern = rf'{data_format}\n(.*?)\n'
	if data_format == 'excel_style':
	pattern = r'markdown\n(.*?)\n'

	data_match = re.search(pattern, result, re.DOTALL)

	if data_match:
	generated_data = data_match.group(1)

	# Generate usage example based on format
	usage_examples = {
	"json": f'''# Generated test data for {language}
	Created with Synthetic Data Generator
	import json

	test_data = {generated_data[:500] + '...' if len(generated_data) > 500 else generated_data}

	for record in test_data:
	# Your test logic here
	print(f"Processing: {{record}}")

	# Example validation
	assert 'user_id' in record
	assert 'email' in record
	''',
	"python_dict": f'''# Generated test data for {language}
	Test data as Python dictionary/list
	test_data = {generated_data[:500] + '...' if len(generated_data) > 500 else generated_data}

	for record in test_data:
	print(f"Testing with: {{record.get('name', 'N/A')}}")
	''',
	"csv": f'''# Generated test data for {language}
	import csv
	from io import StringIO

	csv_data = """{generated_data[:500]}"""

	reader = csv.DictReader(StringIO(csv_data))
	for row in reader:
	print(f"Processing row: {{row}}")
	''',
	"sql": f'''# Generated test data for {language}
	import sqlite3

	sql_statements = """{generated_data[:500]}"""

	conn = sqlite3.connect(':memory:')
	cursor = conn.cursor()
	for statement in sql_statements.split(';'):
	if statement.strip():
	cursor.execute(statement)
	conn.commit()
	print("Database seeded successfully!")
	''',
	"xml": f'''# Generated test data for {language}
	import xml.etree.ElementTree as ET

	xml_data = """{generated_data[:500]}"""
	root = ET.fromstring(xml_data)
	for record in root:
	print(f"Record: {{record.tag}}")
	''',
	"yaml": f'''# Generated test data for {language}
	import yaml

	yaml_data = """{generated_data[:500]}"""
	test_data = yaml.safe_load(yaml_data)
	for record in test_data:
	print(f"Testing with: {{record}}")
	'''
	}

	code_template = usage_examples.get(
	data_format,
	f'''# Generated test data for {language}
	Generated Data:
	{generated_data[:500]}
	Use this data in your tests as needed'''
	)

	# Generation statistics
	stats = f"""### 📊 Generation Statistics
	Metric Value
	Records Generated {count}
	Language {language}
	Format {data_format.upper()}
	Template {template_name}
	Status ✅ Success
	Generated {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	Data Quality Notes:
	✅ Realistic test data created
	✅ Includes edge cases and variations
	✅ Ready for immediate use in tests
	✅ Follows schema specifications
	Special Cases Included:
	Boundary values
	Edge cases
	Null/empty values (where applicable)
	Extreme values
	"""

	return (
	generated_data,
	code_template,
	stats,
	schema_json
	)
	else:
	# No data found in expected format, return raw result
	return (
	result[:2000] + "\n\n... (truncated)",
	f"# Generated Output\n\n{result}",
	"### ⚠️ Generation completed but format may need review\n\nThe data may not be in the expected format. Please check the output above.",
	schema_json
	)

	except Exception as e:
	error_msg = f"❌ Error: {str(e)}\n\n{traceback.format_exc()}\n"
	return (
	error_msg,
	f"# Error generating data\n\n{str(e)}",
	f"### ❌ Generation Failed\n\nError: {str(e)}",
	schema_json
	)


	def preview_schema(schema_json):
	"""Preview and validate schema"""
	try:
	if not schema_json or not schema_json.strip():
	return "⚠️ No schema provided"

	# Try to parse as JSON for validation
	try:
	parsed = json.loads(schema_json)
	return f"""### ✅ Valid JSON Schema
	Structure Preview:

	{json.dumps(parsed, indent=2, ensure_ascii=False)[:500]}
	Fields Detected: {len(parsed) if isinstance(parsed, dict) else 'Multiple records'}

	This schema will generate realistic test data based on the field definitions.
	"""
	except json.JSONDecodeError:
	return f"""### 📝 Schema Description (Text Format)
	{schema_json[:300]}
	Note: The schema is in text format. The AI will interpret and generate appropriate test data.
	"""

	except Exception as e:
	return f"❌ Error previewing schema: {str(e)}"
	# ============================================
	# Gradio UI (Standalone with Home Tab)
	# ============================================

	with gr.Blocks(title="Synthetic Test Data Generator") as demo:
	gr.Markdown("# 🎲 Synthetic Test Data Generator")
	gr.Markdown("### Create Realistic Test Data for Your Applications with AI")

	# ============================================
	# Tab 0: Home
	# ============================================
	with gr.Tab("🏠 Home"):
	gr.Markdown("""
	🎯 Welcome to Synthetic Test Data Generator
	... (المحتوى)
	""")

	# Stats and Use Cases
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📊 Quick Stats")
	gr.Markdown("""
	Formats Supported: 7
	Max Records: 100 per generation
	Templates: 7 pre-built
	Languages: 4
	""")
	with gr.Column():
	gr.Markdown("### 🎯 Use Cases")
	gr.Markdown("""
	API Testing
	Database Seeding
	Load Testing
	UI Testing
	Integration Tests
	Demo Data Creation
	""")

	# Quick Action Button
	gr.Markdown("---")
	gr.Markdown("### 🔧 Quick Action")
	quick_generate_btn = gr.Button("🚀 Go to Generator Tab", variant="primary")

	quick_generate_btn.click(
	fn=None,
	inputs=[],
	outputs=[],
	js='() => { document.querySelector(\'[data-testid="tab-Generator"]\').click(); }'
	)

	# ============================================
	# Tab 1: Generator (Main) - يجب أن يكون داخل كتلة demo
	# ============================================
	with gr.Tab("Generator") as generator_tab:

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Configuration")

	# Language Selection
	data_language = gr.Dropdown(
	choices=["python", "javascript", "java", "csharp"],
	value="python",
	label="🎯 Target Language"
	)

	# Format Selection
	data_format = gr.Dropdown(
	choices=["json", "csv", "sql", "xml", "python_dict", "yaml", "excel_style"],
	value="json",
	label="📄 Output Format"
	)

	# Format description
	format_desc = gr.Markdown("Format Info: " + get_output_format_description("json"))

	# Record count
	data_count = gr.Slider(
	minimum=1,
	maximum=100,
	value=10,
	step=1,
	label="📊 Number of Records"
	)

	gr.Markdown("### 📝 Schema Definition")

	# Template selection
	data_template = gr.Dropdown(
	choices=[
	"custom", "user_profile", "ecommerce_order", "api_request_log",
	"contact_info", "financial_transaction", "healthcare_patient"
	],
	value="custom",
	label="📁 Schema Template"
	)

	# Schema input
	schema_input = gr.Textbox(
	value='''{
	"user_id": "uuid",
	"full_name": "name",
	"email": "email",
	"age": {"type": "int", "min": 18, "max": 99},
	"is_active": "boolean",
	"country": "country"
	}''',
	lines=15,
	label="Schema Definition"
	)

	# Schema preview
	preview_btn = gr.Button("🔍 Preview Schema", size="sm")
	schema_preview = gr.Markdown("")

	# Actions
	generate_btn = gr.Button("✨ Generate Test Data", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 📊 Generation Statistics")
	data_stats = gr.Markdown("No data generated yet")

	gr.Markdown("### 📝 Generated Data")
	data_output = gr.Code(label="Test Data", language="json", lines=15)

	gr.Markdown("### 💻 Ready-to-use Code Template")
	code_output = gr.Code(label="Code Template", language="python", lines=12)

	# تعريف الدوال المساعدة
	def update_code_language(format_type):
	lang_map = {
	"json": "json",
	"csv": "csv",
	"sql": "sql",
	"xml": "xml",
	"python_dict": "python",
	"yaml": "yaml",
	"excel_style": "markdown"
	}
	return gr.update(language=lang_map.get(format_type, "text"))

	# Event Handlers
	data_template.change(
	fn=load_schema_template,
	inputs=[data_template],
	outputs=[schema_input]
	)

	preview_btn.click(
	fn=preview_schema,
	inputs=[schema_input],
	outputs=[schema_preview]
	)

	generate_btn.click(
	fn=generate_synthetic_data,
	inputs=[data_language, schema_input, data_count, data_format, data_template],
	outputs=[data_output, code_output, data_stats, schema_input]
	)

	data_format.change(
	fn=get_output_format_description,
	inputs=[data_format],
	outputs=[format_desc]
	)

	data_format.change(
	fn=update_code_language,
	inputs=[data_format],
	outputs=[data_output]
	)

	# ============================================
	# Tab 2: Templates Library
	# ============================================
	with gr.Tab("📚 Templates Library"):
	gr.Markdown("# 📚 Schema Templates Library")
	gr.Markdown("Browse and learn from these example schemas")

	template_selector = gr.Dropdown(
	choices=[
	"user_profile", "ecommerce_order", "api_request_log",
	"contact_info", "financial_transaction", "healthcare_patient"
	],
	value="user_profile",
	label="Select Template to View"
	)

	template_display = gr.Code(label="Template Schema", language="json", lines=20)
	template_description = gr.Markdown("")

	def show_template_details(template_name):
	templates_info = {
	"user_profile": {
	"desc": "User Profile Schema\n\nPerfect for testing user management systems, authentication, and profile features.\n\nUse Cases:\n- User registration tests\n- Profile management\n- User search/filtering\n- Data export features",
	"schema": load_schema_template(template_name)
	},
	"ecommerce_order": {
	"desc": "E-commerce Order Schema\n\nIdeal for testing shopping carts, checkout flows, and order management systems.\n\nUse Cases:\n- Order creation tests\n- Cart calculations\n- Inventory management\n- Order history features",
	"schema": load_schema_template(template_name)
	},
	"api_request_log": {
	"desc": "API Request Log Schema\n\nGreat for testing logging systems, analytics, and monitoring tools.\n\nUse Cases:\n- API monitoring tests\n- Log analysis\n- Performance metrics\n- Error tracking",
	"schema": load_schema_template(template_name)
	},
	"contact_info": {
	"desc": "Contact Information Schema\n\nUseful for CRM systems, address books, and contact management features.\n\nUse Cases:\n- Contact import/export\n- Address validation\n- Phone number formatting\n- Data enrichment tests",
	"schema": load_schema_template(template_name)
	},
	"financial_transaction": {
	"desc": "Financial Transaction Schema\n\nEssential for banking, payment systems, and financial applications.\n\nUse Cases:\n- Payment processing tests\n- Transaction history\n- Fraud detection\n- Statement generation",
	"schema": load_schema_template(template_name)
	},
	"healthcare_patient": {
	"desc": "Healthcare Patient Schema\n\nDesigned for healthcare systems, patient management, and medical records.\n\nUse Cases:\n- Patient registration\n- Medical records\n- Appointment scheduling\n- Insurance verification",
	"schema": load_schema_template(template_name)
	}
	}

	info = templates_info.get(template_name, templates_info["user_profile"])
	return info["schema"], info["desc"]

	template_selector.change(
	fn=show_template_details,
	inputs=[template_selector],
	outputs=[template_display, template_description]
	)

	# Load initial template
	initial_schema, initial_desc = show_template_details("user_profile")
	template_display.value = initial_schema
	template_description.value = initial_desc

	gr.Markdown("---")
	gr.Markdown("### 💡 Tips for Creating Custom Schemas")
	gr.Markdown("""
	- Use descriptive field names - The AI understands context (e.g., 'email' vs 'e')
	- Specify data types - Include hints like 'string', 'integer', 'decimal', 'date'
	- Add constraints - Use ranges like 'age: 18-80' or patterns like 'email format'
	- Include enums - For fields with limited options (e.g., 'status: pending,active,closed')
	- Structure nested data - Use JSON objects or arrays for complex relationships
	""")

	# ============================================
	# Tab 3: About & Help
	# ============================================
	with gr.Tab("ℹ️ About & Help"):
	gr.Markdown("""
	ℹ️ About Synthetic Test Data Generator

	What is this tool?
	This tool uses AI (Groq's LLM) to generate realistic, production-like test data based on your schema definitions. It helps QA engineers and developers create comprehensive test datasets quickly and efficiently.

	... (باقي المحتوى)
	""")

	# ============================================
	# Launch Application
	# ============================================
	if __name__ == "__main__":
	print("=" * 60)
	print("🎲 Synthetic Test Data Generator")
	print("=" * 60)
	print("✅ All components loaded successfully!")
	print("=" * 60)
	print("🚀 Launching application...")
	print("=" * 60)

	demo.launch(
	share=True,
	debug=False,
	server_name="0.0.0.0",
	server_port=7860,
	theme=gr.themes.Soft()
	)