minzo456's picture
Create app.py
99d98a7 verified
from fastapi import FastAPI, Request
from llama_cpp import Llama
import uvicorn
app = FastAPI()
# Model එක Load කිරීම (මෙයට විනාඩි කිහිපයක් ගතවේ)
llm = Llama(
model_path="model.gguf",
n_ctx=2048,
n_threads=4 # CPU cores ගණන
)
@app.post("/v1/chat/completions")
async def chat(request: Request):
data = await request.json()
messages = data.get("messages", [])
# Prompt එක සකසා ගැනීම
full_prompt = ""
for m in messages:
full_prompt += f"{m['role']}: {m['content']}\n"
full_prompt += "assistant: "
# Model එකෙන් පිළිතුර ලබා ගැනීම
output = llm(
full_prompt,
max_tokens=512,
stop=["user:", "\n"],
echo=False
)
response_text = output["choices"][0]["text"]
return {
"choices": [{
"message": {
"role": "assistant",
"content": response_text.strip()
}
}]
}
@app.get("/")
def home():
return {"status": "MINZO-CORE v1.0 is Live"}