| from llama_cpp import Llama |
| from huggingface_hub import hf_hub_download |
| from flask import Flask, request, jsonify |
|
|
| app = Flask(__name__) |
|
|
| hf_hub_download("TheBloke/phi-2-GGUF", "phi-2.Q8_0.gguf", local_dir="./") |
| phi = Llama(model_path="./phi-2.Q8_0.gguf", n_ctx=2048, n_gpu_layers=999) |
|
|
| app.route("/spaces/MrOvkill/phastfi/", methods=["GET"]) |
| def index(): |
| return "<html><body><h1>Use API</h1><p>Use /completion as POST with a prompt in a JSON query.</p></body></html>" |
|
|
| @app.route("/spaces/MrOvkill/phastfi/generate", methods=["POST"]) |
| def completion(): |
| prompt = request.json["prompt"] |
| res = phi( |
| prompt, |
| temperature=0.33, |
| top_p=0.95, |
| top_k=42, |
| max_tokens=1024, |
| num_completions=2, |
| ) |
| return jsonify({ |
| "responses": [ |
| res["choices"][0]["text"], |
| res["choices"][1]["text"] |
| ] |
| }) |
|
|
| if __name__ == "__main__": |
| app.run(host="0.0.0.0", port=7860) |