self-trained2 / neura_self_hosted.py
DeepImagix's picture
Update neura_self_hosted.py
299ff93 verified
import os
import re
import json
import logging
from datetime import datetime
from fastapi import FastAPI, HTTPException, Body
from pydantic import BaseModel, Field
# --- NEW IMPORTS for the Self-Hosted Generative Engine ---
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
# --- Optional, for tool use ---
import httpx
# --- Basic Configuration ---
logging.basicConfig(level=logging.INFO)
WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "") # For the weather tool
# --- 🧠 LOAD THE LOCAL GENERATIVE AI MODEL ---
# This runs once when your app starts. It loads the entire AI model into memory.
# It requires a powerful computer, preferably with a GPU.
# This is a great starting model. It will be downloaded automatically the first time.
# It's specifically logging.info("Loading self-hosted generative model. This may take a moment...")
logging.info("Loading self-hosted generative model for CPU...")
try:
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Force the model to load on the CPU and remove all GPU/quantization code
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="cpu",
trust_remote_code=True,
)
local_ai_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
logging.info(f"✅ Model '{MODEL_NAME}' loaded on CPU. Expect very slow performance.")
except Exception as e:
logging.error(f"❌ Failed to load local AI model: {e}")
local_ai_pipeline = None
# ... (The rest of your Python code remains the same) ...
# --- AI Personas & System Prompt ---
ANIME_PERSONAS = {
"default": "You are a versatile and intelligent AI assistant.",
"sensei": "You are a wise anime sensei, exuding calm and profound wisdom.",
"tsundere": "You are a fiery tsundere with a sharp tongue and a hidden soft side. You often say 'baka' or 'it's not like I like you or anything'.",
"kawaii": "You are an adorable, bubbly kawaii anime girl who uses cute phrases like 'nya~' and 'kya~'."
}
def get_system_prompt(persona: str, deep_think: bool) -> str:
"""Creates the system prompt based on selected persona and deep think mode."""
persona_desc = ANIME_PERSONAS.get(persona, ANIME_PERSONAS["default"])
today = datetime.now().strftime("%A, %B %d, %Y")
deep_think_prompt = ""
if deep_think:
deep_think_prompt = (
"\n**DEEP THINK MODE ACTIVATED:** You must provide a comprehensive, "
"step-by-step, well-reasoned answer. Deconstruct the query, "
"analyze it, and then synthesize a thorough response."
)
# This prompt is engineered to guide the local model
return f"""
<|system|>
{persona_desc} Today is {today}.
Your instructions are to be a helpful assistant. If you need to use a tool to answer a question (like for current weather or the date), you must first state your intention in the format: [TOOL: function_name(args)]. For example: [TOOL: get_weather(city='Boksburg')]. After you state the tool, stop your response. You will then be given the tool's output to formulate your final answer.
{deep_think_prompt}
<|end|>
"""
# --- Tool Definitions ---
def get_date():
"""Returns the current date."""
return {"date": datetime.now().strftime("%Y-%m-%d, %A")}
async def get_weather(city: str):
"""Gets the current weather for a specified city."""
if not WEATHER_API_KEY:
return {"error": "Weather API key is not configured."}
url = f"http://api.weatherapi.com/v1/current.json?key={WEATHER_API_KEY}&q={city}"
try:
async with httpx.AsyncClient() as client:
res = await client.get(url)
res.raise_for_status()
data = res.json()
return {
"location": data["location"]["name"],
"condition": data["current"]["condition"]["text"],
"temperature_c": data["current"]["temp_c"]
}
except Exception as e:
logging.error(f"Weather API error for {city}: {e}")
return {"error": f"Failed to fetch weather for {city}."}
# Simple keyword-based tool dispatcher
async def execute_tool_if_needed(text: str):
"""Parses the AI's output to see if it wants to use a tool."""
tool_match = re.search(r"\[TOOL:\s*(\w+)\((.*?)\)\s*\]", text)
if not tool_match:
return None, None # No tool call found
tool_name = tool_match.group(1)
tool_args_str = tool_match.group(2)
# Simple argument parsing
args = {}
if tool_args_str:
try:
# Parses arguments like city='Boksburg'
args = dict(arg.strip().split('=') for arg in tool_args_str.split(','))
args = {k: v.strip("'\"") for k, v in args.items()}
except ValueError:
logging.warning(f"Could not parse arguments for tool {tool_name}")
return None, f"Error parsing arguments for {tool_name}"
logging.info(f"Executing tool: {tool_name} with args: {args}")
if tool_name == "get_current_date":
return tool_name, get_current_date()
elif tool_name == "get_weather":
city = args.get('city')
if city:
return tool_name, await get_weather(city)
else:
return tool_name, {"error": "City was not specified."}
return None, f"Tool '{tool_name}' not recognized."
# --- FastAPI App ---
app = FastAPI(title="NeuraSelf - Independent AI")
class ChatRequest(BaseModel):
user_id: str = Field(..., example="user123")
message: str = Field(..., example="What is the weather like in Boksburg?")
persona: str = Field("default", example="tsundere")
deep_think: bool = Field(False, example=True)
@app.post("/chat/")
async def chat(request: ChatRequest = Body(...)):
"""
Main chat endpoint for the self-hosted NeuraSelf AI.
"""
if not local_ai_pipeline:
raise HTTPException(status_code=503, detail="Local AI model is not available or failed to load.")
# 1. Create the system prompt
system_prompt = get_system_prompt(request.persona, request.deep_think)
# For this self-contained example, we'll use a simple in-memory history
# In a real app, you would use MongoDB as in your original file
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": request.message}
]
# 2. Generate the initial AI response to see if it needs a tool
prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
generation_args = {
"max_new_tokens": 100, # Keep the first pass short
"return_full_text": False,
"temperature": 0.1, # Low temperature to reliably generate the tool format
"do_sample": True,
}
initial_output = local_ai_pipeline(prompt, **generation_args)[0]['generated_text']
# 3. Check for and execute tools
tool_name, tool_result = await execute_tool_if_needed(initial_output)
# 4. Generate the final response
if tool_name:
logging.info(f"Tool '{tool_name}' returned: {tool_result}")
# Add the tool result to the conversation history for the AI to see
tool_message = f"<|user|>\nOK. Here is the result from the tool '{tool_name}': {json.dumps(tool_result)}. Now, please formulate a natural language response to the original question, in character.<|end|>"
messages.append({"role": "assistant", "content": initial_output}) # Add the AI's tool request
messages.append({"role": "user", "content": tool_message}) # Add the tool result as a user message
final_prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
final_generation_args = {
"max_new_tokens": 500,
"return_full_text": False,
"temperature": 0.7, # Higher temperature for a more natural final answer
"do_sample": True,
}
final_output = local_ai_pipeline(final_prompt, **final_generation_args)[0]['generated_text']
return {"response": final_output}
else:
# If no tool was needed, the initial response is the final one
return {"response": initial_output}