vish85521 commited on
Commit
c68a24d
·
verified ·
1 Parent(s): 58f1e6f

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile.txt +18 -0
  2. app.py.txt +29 -0
  3. requirements.txt.txt +5 -0
Dockerfile.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Install dependencies
6
+ COPY ./requirements.txt /code/requirements.txt
7
+ # Compiling llama-cpp-python requires build-essential
8
+ RUN apt-get update && apt-get install -y build-essential
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ # Copy the API code
12
+ COPY . .
13
+
14
+ # Expose the port Hugging Face uses
15
+ EXPOSE 7860
16
+
17
+ # Run the FastAPI server
18
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ app = FastAPI()
7
+
8
+ # Download the quantized model
9
+ model_path = hf_hub_download(
10
+ repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
11
+ filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
12
+ )
13
+
14
+ # Initialize LLM for 2 vCPUs
15
+ llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
16
+
17
+ # Define the request body format
18
+ class PromptRequest(BaseModel):
19
+ prompt: str
20
+
21
+ @app.post("/generate")
22
+ def generate_text(request: PromptRequest):
23
+ output = llm(
24
+ f"<|system|>\nYou are a helpful API.\n<|user|>\n{request.prompt}\n<|assistant|>\n",
25
+ max_tokens=256,
26
+ stop=["<|user|>"],
27
+ echo=False
28
+ )
29
+ return {"response": output['choices'][0]['text']}
requirements.txt.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ llama-cpp-python
5
+ huggingface_hub