Spaces:

infinityonline
/

infinity

Running

App Files Files Community

infinityonline commited on 21 days ago

Commit

2d5c9d2

verified ·

1 Parent(s): 3c90b1c

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -9

app.py CHANGED Viewed

@@ -1,12 +1,10 @@
 import os
-import json
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 app = Flask(__name__)
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
-DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
 client = InferenceClient(token=HF_TOKEN)
 @app.route("/", methods=["GET"])
@@ -18,7 +16,9 @@ def models():
     return jsonify({
         "object": "list",
         "data": [
             {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
             {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
         ]
     })
@@ -26,17 +26,35 @@ def models():
 @app.route("/v1/chat/completions", methods=["POST"])
 def chat():
     data = request.json
     messages = data.get("messages", [])
     model = data.get("model", DEFAULT_MODEL)
-    max_tokens = data.get("max_tokens", 1024)
-    temperature = data.get("temperature", 0.7)
     try:
         response = client.chat_completion(
             model=model,
             messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
         )
         return jsonify({
             "id": "chatcmpl-hf",
             "object": "chat.completion",
@@ -45,13 +63,26 @@ def chat():
                 "index": 0,
                 "message": {
                     "role": "assistant",
-                    "content": response.choices[0].message.content
                 },
                 "finish_reason": "stop"
-            }]
         })
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 import os
 from flask import Flask, request, jsonify
 from huggingface_hub import InferenceClient
 app = Flask(__name__)
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
+DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-72B-Instruct")
 client = InferenceClient(token=HF_TOKEN)
 @app.route("/", methods=["GET"])
     return jsonify({
         "object": "list",
         "data": [
+            {"id": "Qwen/Qwen2.5-72B-Instruct", "object": "model"},
             {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
+            {"id": "meta-llama/Meta-Llama-3.1-8B-Instruct", "object": "model"},
             {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
         ]
     })
 @app.route("/v1/chat/completions", methods=["POST"])
 def chat():
     data = request.json
+    if not data:
+        return jsonify({"error": "No JSON body"}), 400
     messages = data.get("messages", [])
     model = data.get("model", DEFAULT_MODEL)
+    max_tokens = int(data.get("max_tokens", 2048))
+    temperature = float(data.get("temperature", 0.7))
+    stream = data.get("stream", False)
     try:
         response = client.chat_completion(
             model=model,
             messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
+            stream=False,
         )
+        content = ""
+        if response.choices and len(response.choices) > 0:
+            choice = response.choices[0]
+            if hasattr(choice, "message") and choice.message:
+                content = choice.message.content or ""
+            elif hasattr(choice, "text"):
+                content = choice.text or ""
+        if not content:
+            return jsonify({"error": "Empty response from model"}), 500
         return jsonify({
             "id": "chatcmpl-hf",
             "object": "chat.completion",
                 "index": 0,
                 "message": {
                     "role": "assistant",
+                    "content": content
                 },
                 "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0
+            }
         })
     except Exception as e:
+        return jsonify({
+            "error": str(e),
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": f"Error: {str(e)}"},
+                "finish_reason": "stop"
+            }]
+        }), 500
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=False)