Spaces:

samfred2
/

lego

Build error

App Files Files Community

lego / app.py

samfred2

Create app.py

e36439e verified 2 months ago

raw

history blame contribute delete

2.17 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os

	app = FastAPI(title="Llama 3.2 1B API")

	# Model configuration
	REPO_ID = "bartowski/Llama-3.2-1B-Instruct-GGUF"
	FILENAME = "Llama-3.2-1B-Instruct-Q4_K_M.gguf"
	MODEL_PATH = os.path.join(os.getcwd(), FILENAME)

	def ensure_model_exists():
	if not os.path.exists(MODEL_PATH):
	print(f"Downloading model {FILENAME} from {REPO_ID}...")
	try:
	hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME,
	local_dir=os.getcwd(),
	local_dir_use_symlinks=False
	)
	print("Download complete.")
	except Exception as e:
	raise RuntimeError(f"Failed to download model: {e}")
	else:
	print(f"Model found at {MODEL_PATH}")

	# Ensure model is downloaded before initializing Llama
	ensure_model_exists()

	# Initialize the model
	# n_threads=4 as requested by the user
	# n_ctx=2048 for a reasonable context window
	llm = Llama(
	model_path=MODEL_PATH,
	n_threads=4,
	n_ctx=2048,
	verbose=False
	)

	class ChatRequest(BaseModel):
	prompt: str
	max_tokens: int = 512
	temperature: float = 0.7
	top_p: float = 0.9

	@app.get("/")
	async def root():
	return {"message": "Llama 3.2 1B FastAPI server is running", "model": FILENAME}

	@app.post("/v1/chat/completions")
	async def chat_completion(request: ChatRequest):
	try:
	# Simple prompt template for Llama 3.2 Instruct
	formatted_prompt = f"<\|begin_of_text\|><\|start_header_id\|>user<\|end_header_id\|>\n\n{request.prompt}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n\n"

	output = llm(
	formatted_prompt,
	max_tokens=request.max_tokens,
	temperature=request.temperature,
	top_p=request.top_p,
	stop=["<\|eot_id\|>"]
	)

	return output
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)