infinityonline commited on
Commit
a3e88ad
·
verified ·
1 Parent(s): b2f1d3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -35
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
- from flask import Flask, request, jsonify
 
3
  from huggingface_hub import InferenceClient
4
 
5
  app = Flask(__name__)
@@ -33,45 +34,96 @@ def chat():
33
  model = data.get("model", DEFAULT_MODEL)
34
  max_tokens = int(data.get("max_tokens", 2048))
35
  temperature = float(data.get("temperature", 0.7))
 
36
 
37
  try:
38
- response = client.chat_completion(
39
- model=model,
40
- messages=messages,
41
- max_tokens=max_tokens,
42
- temperature=temperature,
43
- stream=False,
44
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- content = ""
47
- if response.choices and len(response.choices) > 0:
48
- choice = response.choices[0]
49
- if hasattr(choice, "message") and choice.message:
50
- content = choice.message.content or ""
51
- elif hasattr(choice, "text"):
52
- content = choice.text or ""
 
53
 
54
- if not content:
55
- return jsonify({"error": "Empty response from model"}), 500
 
 
 
 
 
 
56
 
57
- return jsonify({
58
- "id": "chatcmpl-hf",
59
- "object": "chat.completion",
60
- "model": model,
61
- "choices": [{
62
- "index": 0,
63
- "message": {
64
- "role": "assistant",
65
- "content": content
66
- },
67
- "finish_reason": "stop"
68
- }],
69
- "usage": {
70
- "prompt_tokens": 0,
71
- "completion_tokens": 0,
72
- "total_tokens": 0
73
- }
74
- })
 
 
 
 
 
 
 
 
 
75
 
76
  except Exception as e:
77
  return jsonify({
 
1
  import os
2
+ import json
3
+ from flask import Flask, request, jsonify, Response, stream_with_context
4
  from huggingface_hub import InferenceClient
5
 
6
  app = Flask(__name__)
 
34
  model = data.get("model", DEFAULT_MODEL)
35
  max_tokens = int(data.get("max_tokens", 2048))
36
  temperature = float(data.get("temperature", 0.7))
37
+ stream = data.get("stream", False)
38
 
39
  try:
40
+ if stream:
41
+ def generate():
42
+ full_content = ""
43
+ try:
44
+ for chunk in client.chat_completion(
45
+ model=model,
46
+ messages=messages,
47
+ max_tokens=max_tokens,
48
+ temperature=temperature,
49
+ stream=True,
50
+ ):
51
+ delta_content = ""
52
+ if chunk.choices and chunk.choices[0].delta:
53
+ delta_content = chunk.choices[0].delta.content or ""
54
+ full_content += delta_content
55
+ chunk_data = {
56
+ "id": "chatcmpl-hf",
57
+ "object": "chat.completion.chunk",
58
+ "model": model,
59
+ "choices": [{
60
+ "index": 0,
61
+ "delta": {"role": "assistant", "content": delta_content},
62
+ "finish_reason": None
63
+ }]
64
+ }
65
+ yield f"data: {json.dumps(chunk_data)}\n\n"
66
+
67
+ final = {
68
+ "id": "chatcmpl-hf",
69
+ "object": "chat.completion.chunk",
70
+ "model": model,
71
+ "choices": [{
72
+ "index": 0,
73
+ "delta": {},
74
+ "finish_reason": "stop"
75
+ }]
76
+ }
77
+ yield f"data: {json.dumps(final)}\n\n"
78
+ yield "data: [DONE]\n\n"
79
+ except Exception as e:
80
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
81
 
82
+ return Response(
83
+ stream_with_context(generate()),
84
+ mimetype="text/event-stream",
85
+ headers={
86
+ "Cache-Control": "no-cache",
87
+ "X-Accel-Buffering": "no"
88
+ }
89
+ )
90
 
91
+ else:
92
+ response = client.chat_completion(
93
+ model=model,
94
+ messages=messages,
95
+ max_tokens=max_tokens,
96
+ temperature=temperature,
97
+ stream=False,
98
+ )
99
 
100
+ content = ""
101
+ if response.choices and len(response.choices) > 0:
102
+ choice = response.choices[0]
103
+ if hasattr(choice, "message") and choice.message:
104
+ content = choice.message.content or ""
105
+
106
+ if not content:
107
+ return jsonify({"error": "Empty response from model"}), 500
108
+
109
+ return jsonify({
110
+ "id": "chatcmpl-hf",
111
+ "object": "chat.completion",
112
+ "model": model,
113
+ "choices": [{
114
+ "index": 0,
115
+ "message": {
116
+ "role": "assistant",
117
+ "content": content
118
+ },
119
+ "finish_reason": "stop"
120
+ }],
121
+ "usage": {
122
+ "prompt_tokens": 0,
123
+ "completion_tokens": 0,
124
+ "total_tokens": 0
125
+ }
126
+ })
127
 
128
  except Exception as e:
129
  return jsonify({