# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from transformers import AutoTokenizer, AutoModelForCausalLM class SQLModel: def __init__(self, model_name="google/flan-t5-base"): # self.tokenizer = AutoTokenizer.from_pretrained(model_name) # self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name) self.tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it") self.model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",) def generate_sql(self, natural_language_query): input_text = f"""You are a highly skilled SQL translator. Your task is to convert natural language descriptions of data queries into correct and optimized SQL statements. Here is the schema information for our database : Table: Employees - id (INT) - NAME (VARCHAR) - Department (VARCHAR) - Salary (INT) - Hire_Date (DATE) Table: Departments - ID (INT) - Name (VARCHAR) - Manager (VARCHAR) Here are a few examples: 1. **Input**: "Show me all employees in the Sales department." **Output**: SELECT * FROM Employees WHERE Department = 'Sales'; 2. **Input**: "Who is the manager of the Engineering department?" **Output**: SELECT Manager FROM Departments WHERE Name = 'Engineering'; 3. **Input**: "List all employees hired after 2021-01-01." **Output**: SELECT * FROM Employees WHERE Hire_Date > '2021-01-01'; 4. **Input**: "What is the total salary expense for the Marketing department?" **Output**: SELECT SUM(Salary) FROM Employees WHERE Department = 'Marketing'; 5. **Input**: "Find the average salary of employees in each department." **Output**: SELECT Department, AVG(Salary) AS average_salary FROM Employees GROUP BY Department; Please do not return additional text besides query. Please only answer queries which makes sense for the given schema. Else just return - "No information found" Now, translate the following natural language query into an syntactically correct SQL query: **Input**: {natural_language_query} **Output**: """ # input_text = f""" # Translate the following natural language query into a syntactically correct SQL query using the provided database schema. Output only the SQL query with no additional text or explanation. # Database Schema: # Table: Employees # - id (INT) # - NAME (VARCHAR) # - Department (VARCHAR) # - Salary (INT) # - Hire_Date (DATE) # Table: Departments # - ID (INT) # - Name (VARCHAR) # - Manager (VARCHAR) # Examples: # 1. Natural Language Query: "List all employees who were hired after '2020-01-01'." # Output: SELECT * FROM Employees WHERE Hire_Date > '2020-01-01'; # 2. Natural Language Query: "Retrieve the names and salaries of employees in the 'Sales' department." # Output: SELECT NAME, Salary FROM Employees WHERE Department = 'Sales'; # Now, translate the following query: # {natural_language_query} # """ # input_text = f"translate English to SQL: {natural_language_query}" # inputs = self.tokenizer(input_text, return_tensors="pt").input_ids # outputs = self.model.generate(inputs, max_new_tokens=100, do_sample=False) # sql_query = self.tokenizer.decode(outputs[0], skip_special_tokens=True) inputs = self.tokenizer(input_text, return_tensors="pt") outputs = self.model.generate(**inputs) sql_query = self.tokenizer.decode(outputs[0], skip_special_tokens=True) print(sql_query) return sql_query