Spaces:

vish85521
/

tiny

Build error

vish85521 commited on Mar 5

Commit

c68a24d

verified ·

1 Parent(s): 58f1e6f

Upload 3 files

Files changed (3) hide show

Dockerfile.txt ADDED Viewed

+FROM python:3.9-slim
+WORKDIR /code
+# Install dependencies
+COPY ./requirements.txt /code/requirements.txt
+# Compiling llama-cpp-python requires build-essential
+RUN apt-get update && apt-get install -y build-essential
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Copy the API code
+COPY . .
+# Expose the port Hugging Face uses
+EXPOSE 7860
+# Run the FastAPI server
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py.txt ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+app = FastAPI()
+# Download the quantized model
+model_path = hf_hub_download(
+    repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+    filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+)
+# Initialize LLM for 2 vCPUs
+llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
+# Define the request body format
+class PromptRequest(BaseModel):
+    prompt: str
+@app.post("/generate")
+def generate_text(request: PromptRequest):
+    output = llm(
+        f"<|system|>\nYou are a helpful API.\n<|user|>\n{request.prompt}\n<|assistant|>\n",
+        max_tokens=256,
+        stop=["<|user|>"],
+        echo=False
+    )
+    return {"response": output['choices'][0]['text']}

requirements.txt.txt ADDED Viewed

+fastapi
+uvicorn
+pydantic
+llama-cpp-python
+huggingface_hub