Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
-
import json
|
| 3 |
from flask import Flask, request, jsonify
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
|
| 6 |
app = Flask(__name__)
|
| 7 |
-
|
| 8 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 9 |
-
DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-
|
| 10 |
client = InferenceClient(token=HF_TOKEN)
|
| 11 |
|
| 12 |
@app.route("/", methods=["GET"])
|
|
@@ -18,7 +16,9 @@ def models():
|
|
| 18 |
return jsonify({
|
| 19 |
"object": "list",
|
| 20 |
"data": [
|
|
|
|
| 21 |
{"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
|
|
|
|
| 22 |
{"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
|
| 23 |
]
|
| 24 |
})
|
|
@@ -26,17 +26,35 @@ def models():
|
|
| 26 |
@app.route("/v1/chat/completions", methods=["POST"])
|
| 27 |
def chat():
|
| 28 |
data = request.json
|
|
|
|
|
|
|
|
|
|
| 29 |
messages = data.get("messages", [])
|
| 30 |
model = data.get("model", DEFAULT_MODEL)
|
| 31 |
-
max_tokens = data.get("max_tokens",
|
| 32 |
-
temperature = data.get("temperature", 0.7)
|
|
|
|
|
|
|
| 33 |
try:
|
| 34 |
response = client.chat_completion(
|
| 35 |
model=model,
|
| 36 |
messages=messages,
|
| 37 |
max_tokens=max_tokens,
|
| 38 |
temperature=temperature,
|
|
|
|
| 39 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
return jsonify({
|
| 41 |
"id": "chatcmpl-hf",
|
| 42 |
"object": "chat.completion",
|
|
@@ -45,13 +63,26 @@ def chat():
|
|
| 45 |
"index": 0,
|
| 46 |
"message": {
|
| 47 |
"role": "assistant",
|
| 48 |
-
"content":
|
| 49 |
},
|
| 50 |
"finish_reason": "stop"
|
| 51 |
-
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
})
|
|
|
|
| 53 |
except Exception as e:
|
| 54 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
if __name__ == "__main__":
|
| 57 |
-
app.run(host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
from flask import Flask, request, jsonify
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
|
| 5 |
app = Flask(__name__)
|
|
|
|
| 6 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 7 |
+
DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-72B-Instruct")
|
| 8 |
client = InferenceClient(token=HF_TOKEN)
|
| 9 |
|
| 10 |
@app.route("/", methods=["GET"])
|
|
|
|
| 16 |
return jsonify({
|
| 17 |
"object": "list",
|
| 18 |
"data": [
|
| 19 |
+
{"id": "Qwen/Qwen2.5-72B-Instruct", "object": "model"},
|
| 20 |
{"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
|
| 21 |
+
{"id": "meta-llama/Meta-Llama-3.1-8B-Instruct", "object": "model"},
|
| 22 |
{"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
|
| 23 |
]
|
| 24 |
})
|
|
|
|
| 26 |
@app.route("/v1/chat/completions", methods=["POST"])
|
| 27 |
def chat():
|
| 28 |
data = request.json
|
| 29 |
+
if not data:
|
| 30 |
+
return jsonify({"error": "No JSON body"}), 400
|
| 31 |
+
|
| 32 |
messages = data.get("messages", [])
|
| 33 |
model = data.get("model", DEFAULT_MODEL)
|
| 34 |
+
max_tokens = int(data.get("max_tokens", 2048))
|
| 35 |
+
temperature = float(data.get("temperature", 0.7))
|
| 36 |
+
stream = data.get("stream", False)
|
| 37 |
+
|
| 38 |
try:
|
| 39 |
response = client.chat_completion(
|
| 40 |
model=model,
|
| 41 |
messages=messages,
|
| 42 |
max_tokens=max_tokens,
|
| 43 |
temperature=temperature,
|
| 44 |
+
stream=False,
|
| 45 |
)
|
| 46 |
+
|
| 47 |
+
content = ""
|
| 48 |
+
if response.choices and len(response.choices) > 0:
|
| 49 |
+
choice = response.choices[0]
|
| 50 |
+
if hasattr(choice, "message") and choice.message:
|
| 51 |
+
content = choice.message.content or ""
|
| 52 |
+
elif hasattr(choice, "text"):
|
| 53 |
+
content = choice.text or ""
|
| 54 |
+
|
| 55 |
+
if not content:
|
| 56 |
+
return jsonify({"error": "Empty response from model"}), 500
|
| 57 |
+
|
| 58 |
return jsonify({
|
| 59 |
"id": "chatcmpl-hf",
|
| 60 |
"object": "chat.completion",
|
|
|
|
| 63 |
"index": 0,
|
| 64 |
"message": {
|
| 65 |
"role": "assistant",
|
| 66 |
+
"content": content
|
| 67 |
},
|
| 68 |
"finish_reason": "stop"
|
| 69 |
+
}],
|
| 70 |
+
"usage": {
|
| 71 |
+
"prompt_tokens": 0,
|
| 72 |
+
"completion_tokens": 0,
|
| 73 |
+
"total_tokens": 0
|
| 74 |
+
}
|
| 75 |
})
|
| 76 |
+
|
| 77 |
except Exception as e:
|
| 78 |
+
return jsonify({
|
| 79 |
+
"error": str(e),
|
| 80 |
+
"choices": [{
|
| 81 |
+
"index": 0,
|
| 82 |
+
"message": {"role": "assistant", "content": f"Error: {str(e)}"},
|
| 83 |
+
"finish_reason": "stop"
|
| 84 |
+
}]
|
| 85 |
+
}), 500
|
| 86 |
|
| 87 |
if __name__ == "__main__":
|
| 88 |
+
app.run(host="0.0.0.0", port=7860, debug=False)
|