Spaces:

infinityonline
/

infinity

Running

App Files Files Community

infinityonline commited on 21 days ago

Commit

ee1b98b

verified ·

1 Parent(s): 6e16b45

Create app.py

Browse files

Files changed (1) hide show

app.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import json
+from flask import Flask, request, jsonify
+from huggingface_hub import InferenceClient
+app = Flask(__name__)
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+client = InferenceClient(token=HF_TOKEN)
+@app.route("/", methods=["GET"])
+def index():
+    return jsonify({"status": "ok", "message": "InfinityLLM API is running"})
+@app.route("/v1/models", methods=["GET"])
+def models():
+    return jsonify({
+        "object": "list",
+        "data": [
+            {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
+            {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
+        ]
+    })
+@app.route("/v1/chat/completions", methods=["POST"])
+def chat():
+    data = request.json
+    messages = data.get("messages", [])
+    model = data.get("model", DEFAULT_MODEL)
+    max_tokens = data.get("max_tokens", 1024)
+    temperature = data.get("temperature", 0.7)
+    try:
+        response = client.chat_completion(
+            model=model,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+        )
+        return jsonify({
+            "id": "chatcmpl-hf",
+            "object": "chat.completion",
+            "model": model,
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": response.choices[0].message.content
+                },
+                "finish_reason": "stop"
+            }]
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)