Spaces:

DeepImagix
/

self-trained2

Running

App Files Files Community

self-trained2 / neura_self_hosted.py

DeepImagix

Update neura_self_hosted.py

299ff93 verified 9 months ago

raw

history blame contribute delete

8.37 kB

	import os
	import re
	import json
	import logging
	from datetime import datetime
	from fastapi import FastAPI, HTTPException, Body
	from pydantic import BaseModel, Field

	# --- NEW IMPORTS for the Self-Hosted Generative Engine ---
	import torch
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

	# --- Optional, for tool use ---
	import httpx

	# --- Basic Configuration ---
	logging.basicConfig(level=logging.INFO)
	WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "") # For the weather tool

	# --- 🧠 LOAD THE LOCAL GENERATIVE AI MODEL ---
	# This runs once when your app starts. It loads the entire AI model into memory.
	# It requires a powerful computer, preferably with a GPU.

	# This is a great starting model. It will be downloaded automatically the first time.
	# It's specifically logging.info("Loading self-hosted generative model. This may take a moment...")

	logging.info("Loading self-hosted generative model for CPU...")
	try:
	MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	# Force the model to load on the CPU and remove all GPU/quantization code
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	device_map="cpu",
	trust_remote_code=True,
	)

	local_ai_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	)
	logging.info(f"✅ Model '{MODEL_NAME}' loaded on CPU. Expect very slow performance.")
	except Exception as e:
	logging.error(f"❌ Failed to load local AI model: {e}")
	local_ai_pipeline = None

	# ... (The rest of your Python code remains the same) ...

	# --- AI Personas & System Prompt ---
	ANIME_PERSONAS = {
	"default": "You are a versatile and intelligent AI assistant.",
	"sensei": "You are a wise anime sensei, exuding calm and profound wisdom.",
	"tsundere": "You are a fiery tsundere with a sharp tongue and a hidden soft side. You often say 'baka' or 'it's not like I like you or anything'.",
	"kawaii": "You are an adorable, bubbly kawaii anime girl who uses cute phrases like 'nya~' and 'kya~'."
	}

	def get_system_prompt(persona: str, deep_think: bool) -> str:
	"""Creates the system prompt based on selected persona and deep think mode."""
	persona_desc = ANIME_PERSONAS.get(persona, ANIME_PERSONAS["default"])
	today = datetime.now().strftime("%A, %B %d, %Y")

	deep_think_prompt = ""
	if deep_think:
	deep_think_prompt = (
	"\nDEEP THINK MODE ACTIVATED: You must provide a comprehensive, "
	"step-by-step, well-reasoned answer. Deconstruct the query, "
	"analyze it, and then synthesize a thorough response."
	)

	# This prompt is engineered to guide the local model
	return f"""
	<\|system\|>
	{persona_desc} Today is {today}.
	Your instructions are to be a helpful assistant. If you need to use a tool to answer a question (like for current weather or the date), you must first state your intention in the format: [TOOL: function_name(args)]. For example: [TOOL: get_weather(city='Boksburg')]. After you state the tool, stop your response. You will then be given the tool's output to formulate your final answer.
	{deep_think_prompt}
	<\|end\|>
	"""

	# --- Tool Definitions ---

	def get_date():
	"""Returns the current date."""
	return {"date": datetime.now().strftime("%Y-%m-%d, %A")}

	async def get_weather(city: str):
	"""Gets the current weather for a specified city."""
	if not WEATHER_API_KEY:
	return {"error": "Weather API key is not configured."}
	url = f"http://api.weatherapi.com/v1/current.json?key={WEATHER_API_KEY}&q={city}"
	try:
	async with httpx.AsyncClient() as client:
	res = await client.get(url)
	res.raise_for_status()
	data = res.json()
	return {
	"location": data["location"]["name"],
	"condition": data["current"]["condition"]["text"],
	"temperature_c": data["current"]["temp_c"]
	}
	except Exception as e:
	logging.error(f"Weather API error for {city}: {e}")
	return {"error": f"Failed to fetch weather for {city}."}

	# Simple keyword-based tool dispatcher
	async def execute_tool_if_needed(text: str):
	"""Parses the AI's output to see if it wants to use a tool."""
	tool_match = re.search(r"\[TOOL:\s(\w+)\((.?)\)\s*\]", text)
	if not tool_match:
	return None, None # No tool call found

	tool_name = tool_match.group(1)
	tool_args_str = tool_match.group(2)

	# Simple argument parsing
	args = {}
	if tool_args_str:
	try:
	# Parses arguments like city='Boksburg'
	args = dict(arg.strip().split('=') for arg in tool_args_str.split(','))
	args = {k: v.strip("'\"") for k, v in args.items()}
	except ValueError:
	logging.warning(f"Could not parse arguments for tool {tool_name}")
	return None, f"Error parsing arguments for {tool_name}"

	logging.info(f"Executing tool: {tool_name} with args: {args}")

	if tool_name == "get_current_date":
	return tool_name, get_current_date()
	elif tool_name == "get_weather":
	city = args.get('city')
	if city:
	return tool_name, await get_weather(city)
	else:
	return tool_name, {"error": "City was not specified."}

	return None, f"Tool '{tool_name}' not recognized."


	# --- FastAPI App ---
	app = FastAPI(title="NeuraSelf - Independent AI")

	class ChatRequest(BaseModel):
	user_id: str = Field(..., example="user123")
	message: str = Field(..., example="What is the weather like in Boksburg?")
	persona: str = Field("default", example="tsundere")
	deep_think: bool = Field(False, example=True)

	@app.post("/chat/")
	async def chat(request: ChatRequest = Body(...)):
	"""
	Main chat endpoint for the self-hosted NeuraSelf AI.
	"""
	if not local_ai_pipeline:
	raise HTTPException(status_code=503, detail="Local AI model is not available or failed to load.")

	# 1. Create the system prompt
	system_prompt = get_system_prompt(request.persona, request.deep_think)

	# For this self-contained example, we'll use a simple in-memory history
	# In a real app, you would use MongoDB as in your original file
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": request.message}
	]

	# 2. Generate the initial AI response to see if it needs a tool
	prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	generation_args = {
	"max_new_tokens": 100, # Keep the first pass short
	"return_full_text": False,
	"temperature": 0.1, # Low temperature to reliably generate the tool format
	"do_sample": True,
	}

	initial_output = local_ai_pipeline(prompt, **generation_args)[0]['generated_text']

	# 3. Check for and execute tools
	tool_name, tool_result = await execute_tool_if_needed(initial_output)

	# 4. Generate the final response
	if tool_name:
	logging.info(f"Tool '{tool_name}' returned: {tool_result}")
	# Add the tool result to the conversation history for the AI to see
	tool_message = f"<\|user\|>\nOK. Here is the result from the tool '{tool_name}': {json.dumps(tool_result)}. Now, please formulate a natural language response to the original question, in character.<\|end\|>"
	messages.append({"role": "assistant", "content": initial_output}) # Add the AI's tool request
	messages.append({"role": "user", "content": tool_message}) # Add the tool result as a user message

	final_prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	final_generation_args = {
	"max_new_tokens": 500,
	"return_full_text": False,
	"temperature": 0.7, # Higher temperature for a more natural final answer
	"do_sample": True,
	}

	final_output = local_ai_pipeline(final_prompt, **final_generation_args)[0]['generated_text']
	return {"response": final_output}
	else:
	# If no tool was needed, the initial response is the final one
	return {"response": initial_output}