infinityonline commited on
Commit
ee1b98b
·
verified ·
1 Parent(s): 6e16b45

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from flask import Flask, request, jsonify
4
+ from huggingface_hub import InferenceClient
5
+
6
+ app = Flask(__name__)
7
+
8
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
9
+ DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
10
+ client = InferenceClient(token=HF_TOKEN)
11
+
12
+ @app.route("/", methods=["GET"])
13
+ def index():
14
+ return jsonify({"status": "ok", "message": "InfinityLLM API is running"})
15
+
16
+ @app.route("/v1/models", methods=["GET"])
17
+ def models():
18
+ return jsonify({
19
+ "object": "list",
20
+ "data": [
21
+ {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
22
+ {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
23
+ ]
24
+ })
25
+
26
+ @app.route("/v1/chat/completions", methods=["POST"])
27
+ def chat():
28
+ data = request.json
29
+ messages = data.get("messages", [])
30
+ model = data.get("model", DEFAULT_MODEL)
31
+ max_tokens = data.get("max_tokens", 1024)
32
+ temperature = data.get("temperature", 0.7)
33
+ try:
34
+ response = client.chat_completion(
35
+ model=model,
36
+ messages=messages,
37
+ max_tokens=max_tokens,
38
+ temperature=temperature,
39
+ )
40
+ return jsonify({
41
+ "id": "chatcmpl-hf",
42
+ "object": "chat.completion",
43
+ "model": model,
44
+ "choices": [{
45
+ "index": 0,
46
+ "message": {
47
+ "role": "assistant",
48
+ "content": response.choices[0].message.content
49
+ },
50
+ "finish_reason": "stop"
51
+ }]
52
+ })
53
+ except Exception as e:
54
+ return jsonify({"error": str(e)}), 500
55
+
56
+ if __name__ == "__main__":
57
+ app.run(host="0.0.0.0", port=7860)