Spaces:

MINZO4546
/

minzo-api

Build error

File size: 1,877 Bytes

b0ae281
d76d7d1
4ce55c5
 
83957eb
7d813f4
4ce55c5
d76d7d1
7d813f4
 
 
 
 
4ce55c5
 
 
9154c39
249aa04
4ce55c5
249aa04
7d813f4
 
 
 
 
 
 
 
5e1de17
6219a3a
 
 
7d813f4
 
 
 
 
7ae0e9a
7d813f4
 
 
 
 
 
 
550f38c
7d813f4
4ce55c5
 
 
 
 
deadea5
6219a3a
4ce55c5
6219a3a
deadea5
6219a3a

import os
from fastapi import FastAPI
from llama_cpp import Llama
import requests

# 🔱 CPU Core Management: සර්වර් එකේ තියෙන Cores ගණනට Threads සීමා කිරීම
threads = int(os.cpu_count() or 2)

# 🔱 Load Model: CPU එකට ගැලපෙන Gemma 3 GGUF මොඩල් එක
# HF Space එකේදී හරි path එකක් ලබා දීම හෝ Repo ID එක පාවිච්චි කරන්න
llm = Llama.from_pretrained(
    repo_id="google/gemma-3-1b-it-GGUF", 
    filename="*q4_k_m.gguf", # 4-bit Quantized version for best performance
    n_ctx=2048,
    n_threads=threads,
    verbose=False
)

main = FastAPI()

def web_search(query):
    try:
        url = f"https://api.duckduckgo.com/?q={query}&format=json"
        response = requests.get(url, timeout=5).json()
        return response.get("AbstractText", "No data.")
    except:
        return "Search failed."

@main.post("/v1/chat")
async def chat(data: dict):
    user_query = data.get("message", "")
    
    # 🔱 Inachi AI Identity
    system_instr = (
        "You are Inachi AI, developed by the Inachi Team. "
        "You are an expert system architect."
    )
    
    # Simple search context logic
    search_context = ""
    if "search" in user_query.lower():
        search_context = f"\nContext: {web_search(user_query)}"

    # Prompt Template
    prompt = f"<bos><start_of_turn>system\n{system_instr}{search_context}<end_of_turn>\n<start_of_turn>user\n{user_query}<end_of_turn>\n<start_of_turn>model\n"
    
    # Generation
    output = llm(
        prompt,
        max_tokens=512,
        stop=["<end_of_turn>"],
        echo=False
    )
    
    reply = output['choices'][0]['text'].strip()
    return {"reply": reply}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(main, host="0.0.0.0", port=7860)