tiny / app.py
vish85521's picture
Rename app.py.txt to app.py
623fed2 verified
raw
history blame contribute delete
824 Bytes
from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
app = FastAPI()
# Download the quantized model
model_path = hf_hub_download(
repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
)
# Initialize LLM for 2 vCPUs
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
# Define the request body format
class PromptRequest(BaseModel):
prompt: str
@app.post("/generate")
def generate_text(request: PromptRequest):
output = llm(
f"<|system|>\nYou are a helpful API.\n<|user|>\n{request.prompt}\n<|assistant|>\n",
max_tokens=256,
stop=["<|user|>"],
echo=False
)
return {"response": output['choices'][0]['text']}