infinityonline commited on
Commit
2d5c9d2
·
verified ·
1 Parent(s): 3c90b1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -9
app.py CHANGED
@@ -1,12 +1,10 @@
1
  import os
2
- import json
3
  from flask import Flask, request, jsonify
4
  from huggingface_hub import InferenceClient
5
 
6
  app = Flask(__name__)
7
-
8
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
9
- DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
10
  client = InferenceClient(token=HF_TOKEN)
11
 
12
  @app.route("/", methods=["GET"])
@@ -18,7 +16,9 @@ def models():
18
  return jsonify({
19
  "object": "list",
20
  "data": [
 
21
  {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
 
22
  {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
23
  ]
24
  })
@@ -26,17 +26,35 @@ def models():
26
  @app.route("/v1/chat/completions", methods=["POST"])
27
  def chat():
28
  data = request.json
 
 
 
29
  messages = data.get("messages", [])
30
  model = data.get("model", DEFAULT_MODEL)
31
- max_tokens = data.get("max_tokens", 1024)
32
- temperature = data.get("temperature", 0.7)
 
 
33
  try:
34
  response = client.chat_completion(
35
  model=model,
36
  messages=messages,
37
  max_tokens=max_tokens,
38
  temperature=temperature,
 
39
  )
 
 
 
 
 
 
 
 
 
 
 
 
40
  return jsonify({
41
  "id": "chatcmpl-hf",
42
  "object": "chat.completion",
@@ -45,13 +63,26 @@ def chat():
45
  "index": 0,
46
  "message": {
47
  "role": "assistant",
48
- "content": response.choices[0].message.content
49
  },
50
  "finish_reason": "stop"
51
- }]
 
 
 
 
 
52
  })
 
53
  except Exception as e:
54
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
55
 
56
  if __name__ == "__main__":
57
- app.run(host="0.0.0.0", port=7860)
 
1
  import os
 
2
  from flask import Flask, request, jsonify
3
  from huggingface_hub import InferenceClient
4
 
5
  app = Flask(__name__)
 
6
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
7
+ DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-72B-Instruct")
8
  client = InferenceClient(token=HF_TOKEN)
9
 
10
  @app.route("/", methods=["GET"])
 
16
  return jsonify({
17
  "object": "list",
18
  "data": [
19
+ {"id": "Qwen/Qwen2.5-72B-Instruct", "object": "model"},
20
  {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
21
+ {"id": "meta-llama/Meta-Llama-3.1-8B-Instruct", "object": "model"},
22
  {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
23
  ]
24
  })
 
26
  @app.route("/v1/chat/completions", methods=["POST"])
27
  def chat():
28
  data = request.json
29
+ if not data:
30
+ return jsonify({"error": "No JSON body"}), 400
31
+
32
  messages = data.get("messages", [])
33
  model = data.get("model", DEFAULT_MODEL)
34
+ max_tokens = int(data.get("max_tokens", 2048))
35
+ temperature = float(data.get("temperature", 0.7))
36
+ stream = data.get("stream", False)
37
+
38
  try:
39
  response = client.chat_completion(
40
  model=model,
41
  messages=messages,
42
  max_tokens=max_tokens,
43
  temperature=temperature,
44
+ stream=False,
45
  )
46
+
47
+ content = ""
48
+ if response.choices and len(response.choices) > 0:
49
+ choice = response.choices[0]
50
+ if hasattr(choice, "message") and choice.message:
51
+ content = choice.message.content or ""
52
+ elif hasattr(choice, "text"):
53
+ content = choice.text or ""
54
+
55
+ if not content:
56
+ return jsonify({"error": "Empty response from model"}), 500
57
+
58
  return jsonify({
59
  "id": "chatcmpl-hf",
60
  "object": "chat.completion",
 
63
  "index": 0,
64
  "message": {
65
  "role": "assistant",
66
+ "content": content
67
  },
68
  "finish_reason": "stop"
69
+ }],
70
+ "usage": {
71
+ "prompt_tokens": 0,
72
+ "completion_tokens": 0,
73
+ "total_tokens": 0
74
+ }
75
  })
76
+
77
  except Exception as e:
78
+ return jsonify({
79
+ "error": str(e),
80
+ "choices": [{
81
+ "index": 0,
82
+ "message": {"role": "assistant", "content": f"Error: {str(e)}"},
83
+ "finish_reason": "stop"
84
+ }]
85
+ }), 500
86
 
87
  if __name__ == "__main__":
88
+ app.run(host="0.0.0.0", port=7860, debug=False)