Spaces:
Running
Running
| import os | |
| import re | |
| import json | |
| import logging | |
| from datetime import datetime | |
| from fastapi import FastAPI, HTTPException, Body | |
| from pydantic import BaseModel, Field | |
| # --- NEW IMPORTS for the Self-Hosted Generative Engine --- | |
| import torch | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| # --- Optional, for tool use --- | |
| import httpx | |
| # --- Basic Configuration --- | |
| logging.basicConfig(level=logging.INFO) | |
| WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "") # For the weather tool | |
| # --- 🧠 LOAD THE LOCAL GENERATIVE AI MODEL --- | |
| # This runs once when your app starts. It loads the entire AI model into memory. | |
| # It requires a powerful computer, preferably with a GPU. | |
| # This is a great starting model. It will be downloaded automatically the first time. | |
| # It's specifically logging.info("Loading self-hosted generative model. This may take a moment...") | |
| logging.info("Loading self-hosted generative model for CPU...") | |
| try: | |
| MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| # Force the model to load on the CPU and remove all GPU/quantization code | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| device_map="cpu", | |
| trust_remote_code=True, | |
| ) | |
| local_ai_pipeline = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| ) | |
| logging.info(f"✅ Model '{MODEL_NAME}' loaded on CPU. Expect very slow performance.") | |
| except Exception as e: | |
| logging.error(f"❌ Failed to load local AI model: {e}") | |
| local_ai_pipeline = None | |
| # ... (The rest of your Python code remains the same) ... | |
| # --- AI Personas & System Prompt --- | |
| ANIME_PERSONAS = { | |
| "default": "You are a versatile and intelligent AI assistant.", | |
| "sensei": "You are a wise anime sensei, exuding calm and profound wisdom.", | |
| "tsundere": "You are a fiery tsundere with a sharp tongue and a hidden soft side. You often say 'baka' or 'it's not like I like you or anything'.", | |
| "kawaii": "You are an adorable, bubbly kawaii anime girl who uses cute phrases like 'nya~' and 'kya~'." | |
| } | |
| def get_system_prompt(persona: str, deep_think: bool) -> str: | |
| """Creates the system prompt based on selected persona and deep think mode.""" | |
| persona_desc = ANIME_PERSONAS.get(persona, ANIME_PERSONAS["default"]) | |
| today = datetime.now().strftime("%A, %B %d, %Y") | |
| deep_think_prompt = "" | |
| if deep_think: | |
| deep_think_prompt = ( | |
| "\n**DEEP THINK MODE ACTIVATED:** You must provide a comprehensive, " | |
| "step-by-step, well-reasoned answer. Deconstruct the query, " | |
| "analyze it, and then synthesize a thorough response." | |
| ) | |
| # This prompt is engineered to guide the local model | |
| return f""" | |
| <|system|> | |
| {persona_desc} Today is {today}. | |
| Your instructions are to be a helpful assistant. If you need to use a tool to answer a question (like for current weather or the date), you must first state your intention in the format: [TOOL: function_name(args)]. For example: [TOOL: get_weather(city='Boksburg')]. After you state the tool, stop your response. You will then be given the tool's output to formulate your final answer. | |
| {deep_think_prompt} | |
| <|end|> | |
| """ | |
| # --- Tool Definitions --- | |
| def get_date(): | |
| """Returns the current date.""" | |
| return {"date": datetime.now().strftime("%Y-%m-%d, %A")} | |
| async def get_weather(city: str): | |
| """Gets the current weather for a specified city.""" | |
| if not WEATHER_API_KEY: | |
| return {"error": "Weather API key is not configured."} | |
| url = f"http://api.weatherapi.com/v1/current.json?key={WEATHER_API_KEY}&q={city}" | |
| try: | |
| async with httpx.AsyncClient() as client: | |
| res = await client.get(url) | |
| res.raise_for_status() | |
| data = res.json() | |
| return { | |
| "location": data["location"]["name"], | |
| "condition": data["current"]["condition"]["text"], | |
| "temperature_c": data["current"]["temp_c"] | |
| } | |
| except Exception as e: | |
| logging.error(f"Weather API error for {city}: {e}") | |
| return {"error": f"Failed to fetch weather for {city}."} | |
| # Simple keyword-based tool dispatcher | |
| async def execute_tool_if_needed(text: str): | |
| """Parses the AI's output to see if it wants to use a tool.""" | |
| tool_match = re.search(r"\[TOOL:\s*(\w+)\((.*?)\)\s*\]", text) | |
| if not tool_match: | |
| return None, None # No tool call found | |
| tool_name = tool_match.group(1) | |
| tool_args_str = tool_match.group(2) | |
| # Simple argument parsing | |
| args = {} | |
| if tool_args_str: | |
| try: | |
| # Parses arguments like city='Boksburg' | |
| args = dict(arg.strip().split('=') for arg in tool_args_str.split(',')) | |
| args = {k: v.strip("'\"") for k, v in args.items()} | |
| except ValueError: | |
| logging.warning(f"Could not parse arguments for tool {tool_name}") | |
| return None, f"Error parsing arguments for {tool_name}" | |
| logging.info(f"Executing tool: {tool_name} with args: {args}") | |
| if tool_name == "get_current_date": | |
| return tool_name, get_current_date() | |
| elif tool_name == "get_weather": | |
| city = args.get('city') | |
| if city: | |
| return tool_name, await get_weather(city) | |
| else: | |
| return tool_name, {"error": "City was not specified."} | |
| return None, f"Tool '{tool_name}' not recognized." | |
| # --- FastAPI App --- | |
| app = FastAPI(title="NeuraSelf - Independent AI") | |
| class ChatRequest(BaseModel): | |
| user_id: str = Field(..., example="user123") | |
| message: str = Field(..., example="What is the weather like in Boksburg?") | |
| persona: str = Field("default", example="tsundere") | |
| deep_think: bool = Field(False, example=True) | |
| async def chat(request: ChatRequest = Body(...)): | |
| """ | |
| Main chat endpoint for the self-hosted NeuraSelf AI. | |
| """ | |
| if not local_ai_pipeline: | |
| raise HTTPException(status_code=503, detail="Local AI model is not available or failed to load.") | |
| # 1. Create the system prompt | |
| system_prompt = get_system_prompt(request.persona, request.deep_think) | |
| # For this self-contained example, we'll use a simple in-memory history | |
| # In a real app, you would use MongoDB as in your original file | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": request.message} | |
| ] | |
| # 2. Generate the initial AI response to see if it needs a tool | |
| prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| generation_args = { | |
| "max_new_tokens": 100, # Keep the first pass short | |
| "return_full_text": False, | |
| "temperature": 0.1, # Low temperature to reliably generate the tool format | |
| "do_sample": True, | |
| } | |
| initial_output = local_ai_pipeline(prompt, **generation_args)[0]['generated_text'] | |
| # 3. Check for and execute tools | |
| tool_name, tool_result = await execute_tool_if_needed(initial_output) | |
| # 4. Generate the final response | |
| if tool_name: | |
| logging.info(f"Tool '{tool_name}' returned: {tool_result}") | |
| # Add the tool result to the conversation history for the AI to see | |
| tool_message = f"<|user|>\nOK. Here is the result from the tool '{tool_name}': {json.dumps(tool_result)}. Now, please formulate a natural language response to the original question, in character.<|end|>" | |
| messages.append({"role": "assistant", "content": initial_output}) # Add the AI's tool request | |
| messages.append({"role": "user", "content": tool_message}) # Add the tool result as a user message | |
| final_prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| final_generation_args = { | |
| "max_new_tokens": 500, | |
| "return_full_text": False, | |
| "temperature": 0.7, # Higher temperature for a more natural final answer | |
| "do_sample": True, | |
| } | |
| final_output = local_ai_pipeline(final_prompt, **final_generation_args)[0]['generated_text'] | |
| return {"response": final_output} | |
| else: | |
| # If no tool was needed, the initial response is the final one | |
| return {"response": initial_output} |