| import gradio as gr |
| import torch |
| from transformers import T5ForConditionalGeneration, T5Tokenizer |
|
|
| |
| model_path = "./" |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| tokenizer = T5Tokenizer.from_pretrained(model_path) |
| model = T5ForConditionalGeneration.from_pretrained(model_path).to(device) |
| model.eval() |
|
|
| def generate_sql(schema, user_query, date_info=""): |
| prompt = f"""Given the following database schema and requirements, generate a PostgreSQL query: |
| |
| {schema} |
| |
| User Query: "{user_query}" |
| |
| IMPORTANT REQUIREMENTS: |
| - Always filter by user_id = $1 for security |
| - The current year is 2025. You are working in this year! |
| - CRITICAL: Use ONLY the dates provided in the input parameters. Do NOT infer or change dates on your own! |
| - If date range is provided, use DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
| - If single date is provided, use DATE(created_at) = 'YYYY-MM-DD' |
| - NEVER use hardcoded years like 2024 - always use the provided dates exactly as given |
| - Use proper SQL syntax |
| - CRITICAL: Generate ONLY simple SQL statements - NO WITH clauses, NO CTEs, NO complex subqueries |
| - Use direct SELECT, INSERT, UPDATE, DELETE statements only |
| - Keep queries simple and straightforward |
| - For INSERT statements, use the RETURNING clause |
| - For INSERT statements with specific date: |
| * Use INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, amount_value, 'category_name', 'note_text', 'YYYY-MM-DD HH:MM:SS') RETURNING * |
| * If date is provided, use that specific date for created_at instead of NOW() |
| * Format the date as 'YYYY-MM-DD 00:00:00' for the specific date |
| - For SELECT statements: |
| * Use SIMPLE SELECT statements - NO WITH clauses, NO CTEs, NO complex subqueries |
| * If category is provided: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' |
| * If keywords are provided: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' |
| * If single date is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = 'YYYY-MM-DD' |
| * If date range is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
| * If both category and keywords: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' |
| * If both category and single date: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) = 'YYYY-MM-DD' |
| * If both category and date range: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
| * If both keywords and single date: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD' |
| * If both keywords and date range: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
| * If all three (category, keywords, date): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD' |
| * If all three (category, keywords, date range): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
| * If none provided: SELECT * FROM expenses WHERE user_id = $1 |
| * Return all relevant columns |
| * CRITICAL: Use ONLY simple SELECT statements, NO WITH clauses, NO CTEs |
| * CRITICAL: For date filtering, use DATE(created_at) = 'YYYY-MM-DD' for single dates, DATE(created_at) BETWEEN 'startDate' AND 'endDate' for date ranges |
| * CRITICAL: Use the EXACT dates provided in the input parameters. Do NOT infer or override dates from the user query wording. |
| * EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19' |
| - For UPDATE statements: |
| * Use UPDATE expenses SET amount = new_amount WHERE user_id = $1 AND LOWER(note) LIKE '%keyword%' |
| * If category is provided, also add AND category = 'category_name' |
| * Use RETURNING clause to return the updated record |
| - For DELETE statements: |
| * If category is provided: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' |
| * If keywords are provided: DELETE FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' |
| * If both category and keywords: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' |
| * If no category and no keywords (general delete): DELETE FROM expenses WHERE user_id = $1 |
| * Use RETURNING clause to return the deleted records |
| - Handle the user_id parameter safely |
| - If the query mentions a specific month (like "June"), filter by that month using EXTRACT(MONTH FROM created_at) |
| - If the query mentions a specific year, filter by that year using EXTRACT(YEAR FROM created_at) |
| - CRITICAL: If a specific date is provided (like "26th june 2025"), use exact date filtering: DATE(created_at) = 'YYYY-MM-DD' |
| - For date filtering, use proper PostgreSQL date functions |
| - Use EXACT category names from the list above |
| - For keyword searches in UPDATE/DELETE, use LOWER(note) LIKE '%exact_keyword%' pattern |
| - For category searches, use category = 'exact_category_name' |
| - When both category and keywords are provided for SELECT, prioritize category filtering |
| - CRITICAL: If the intent is UPDATE, generate an UPDATE query, NOT a SELECT query |
| - CRITICAL: If the intent is DELETE, generate a DELETE query, NOT a SELECT query |
| - NEVER use placeholder text like '%keyword%' or 'category_name' - use the actual values provided |
| |
| CORRECT SQL EXAMPLES: |
| - SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26'; |
| - SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26'; |
| - SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'; |
| - SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'; |
| - SELECT * FROM expenses WHERE user_id = $1 AND category = 'Food & Dining'; |
| - SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%lunch%'; |
| - INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, 500.00, 'Health & Fitness', 'Spent 500 rupees yesterday on medicines', '2025-01-19 00:00:00') RETURNING *; |
| - EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19' |
| |
| INCORRECT SQL EXAMPLES (DO NOT USE): |
| - WITH filtered_date AS (SELECT '2025-06-24' AS target_date) SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = (SELECT target_date FROM filtered_date); |
| - WITH filtered_expenses AS (SELECT * FROM expenses WHERE user_id = $1) SELECT * FROM filtered_expenses; |
| |
| |
| SQL Query:""" |
|
|
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device) |
| with torch.no_grad(): |
| outputs = model.generate(**inputs, max_length=256 , decoder_start_token_id=tokenizer.convert_tokens_to_ids(tokenizer.pad_token)) |
| generated_sql = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return generated_sql |
|
|
| |
| iface = gr.Interface( |
| fn=generate_sql, |
| inputs=[ |
| gr.Textbox(label="Database Schema", lines=12, placeholder="CREATE TABLE ..."), |
| gr.Textbox(label="User Query", placeholder="How much did I spend on food last week?"), |
| gr.Textbox(label="Date Info (optional)", placeholder="2025-06-12 or 2025-06-01 to 2025-06-07") |
| ], |
| outputs=gr.Textbox(label="Generated SQL Query"), |
| title="HISAB AI - Natural Language to SQL", |
| description="Enter your schema, user query and date (optional). Model will output SQL query." |
| ) |
|
|
| iface.launch() |
|
|