Spaces:

Infinityecs1
/

infinityllm

Running

App Files Files Community

Infinityecs1 commited on 16 days ago

Commit

78904ef

verified ·

1 Parent(s): 8e703d5

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +15 -0
README.md +5 -4
app.py +139 -0
requirements.txt +2 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.11-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY --chown=user . /app
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,12 @@
 ---
-title: Infinityllm
-emoji: 🏢
-colorFrom: gray
-colorTo: pink
 sdk: docker
 pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Infinity
+emoji: 👁
+colorFrom: green
+colorTo: yellow
 sdk: docker
 pinned: false
 license: mit
+short_description: infinityLLM
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import os
+import json
+from flask import Flask, request, jsonify, Response, stream_with_context
+from huggingface_hub import InferenceClient
+app = Flask(__name__)
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "Qwen/Qwen2.5-72B-Instruct")
+client = InferenceClient(token=HF_TOKEN)
+@app.route("/", methods=["GET"])
+def index():
+    return jsonify({"status": "ok", "message": "InfinityLLM API is running"})
+@app.route("/v1/models", methods=["GET"])
+def models():
+    return jsonify({
+        "object": "list",
+        "data": [
+            {"id": "Qwen/Qwen2.5-72B-Instruct", "object": "model"},
+            {"id": "Qwen/Qwen2.5-7B-Instruct", "object": "model"},
+            {"id": "meta-llama/Meta-Llama-3.1-8B-Instruct", "object": "model"},
+            {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model"},
+        ]
+    })
+@app.route("/v1/chat/completions", methods=["POST"])
+def chat():
+    data = request.json
+    if not data:
+        return jsonify({"error": "No JSON body"}), 400
+    messages = data.get("messages", [])
+    model = data.get("model", DEFAULT_MODEL)
+    max_tokens = int(data.get("max_tokens", 2048))
+    temperature = float(data.get("temperature", 0.7))
+    stream = data.get("stream", False)
+    try:
+        if stream:
+            def generate():
+                full_content = ""
+                try:
+                    for chunk in client.chat_completion(
+                        model=model,
+                        messages=messages,
+                        max_tokens=max_tokens,
+                        temperature=temperature,
+                        stream=True,
+                    ):
+                        delta_content = ""
+                        if chunk.choices and chunk.choices[0].delta:
+                            delta_content = chunk.choices[0].delta.content or ""
+                        full_content += delta_content
+                        chunk_data = {
+                            "id": "chatcmpl-hf",
+                            "object": "chat.completion.chunk",
+                            "model": model,
+                            "choices": [{
+                                "index": 0,
+                                "delta": {"role": "assistant", "content": delta_content},
+                                "finish_reason": None
+                            }]
+                        }
+                        yield f"data: {json.dumps(chunk_data)}\n\n"
+                    final = {
+                        "id": "chatcmpl-hf",
+                        "object": "chat.completion.chunk",
+                        "model": model,
+                        "choices": [{
+                            "index": 0,
+                            "delta": {},
+                            "finish_reason": "stop"
+                        }]
+                    }
+                    yield f"data: {json.dumps(final)}\n\n"
+                    yield "data: [DONE]\n\n"
+                except Exception as e:
+                    yield f"data: {json.dumps({'error': str(e)})}\n\n"
+            return Response(
+                stream_with_context(generate()),
+                mimetype="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "X-Accel-Buffering": "no"
+                }
+            )
+        else:
+            response = client.chat_completion(
+                model=model,
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                stream=False,
+            )
+            content = ""
+            if response.choices and len(response.choices) > 0:
+                choice = response.choices[0]
+                if hasattr(choice, "message") and choice.message:
+                    content = choice.message.content or ""
+            if not content:
+                return jsonify({"error": "Empty response from model"}), 500
+            return jsonify({
+                "id": "chatcmpl-hf",
+                "object": "chat.completion",
+                "model": model,
+                "choices": [{
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": content
+                    },
+                    "finish_reason": "stop"
+                }],
+                "usage": {
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "total_tokens": 0
+                }
+            })
+    except Exception as e:
+        return jsonify({
+            "error": str(e),
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": f"Error: {str(e)}"},
+                "finish_reason": "stop"
+            }]
+        }), 500
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ flask
2	+ huggingface_hub