Raju2024 commited on
Commit
2d8f511
·
verified ·
1 Parent(s): 4819b01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -57
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import json
3
  import requests
4
  from fastapi import FastAPI, Header, HTTPException
5
- from fastapi.responses import StreamingResponse, PlainTextResponse, JSONResponse
6
  from pydantic import BaseModel
7
  from typing import List, Optional, Union
8
  from dotenv import load_dotenv
@@ -13,11 +13,9 @@ app = FastAPI()
13
 
14
  GEMMA_API_KEY = os.getenv("GEMMA_API_KEY")
15
  APP_API_KEY = os.getenv("APP_API_KEY")
16
-
17
  GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
18
 
19
 
20
- # -------- Models --------
21
  class Message(BaseModel):
22
  role: str
23
  content: Union[str, List[dict]]
@@ -30,7 +28,17 @@ class ChatRequest(BaseModel):
30
  plain: Optional[bool] = False
31
 
32
 
33
- # -------- Helpers --------
 
 
 
 
 
 
 
 
 
 
34
  def extract_text(messages):
35
  text = ""
36
 
@@ -42,7 +50,6 @@ def extract_text(messages):
42
  if item.get("type") == "text":
43
  text += item.get("text", "") + "\n"
44
  else:
45
- # ignore images and any unknown part types safely
46
  continue
47
  elif isinstance(content, str):
48
  text += content + "\n"
@@ -50,47 +57,49 @@ def extract_text(messages):
50
  return text.strip()
51
 
52
 
53
- def build_payload(prompt: str):
54
- return {
55
- "contents": [
56
- {
57
- "parts": [{"text": prompt}]
58
- }
59
- ]
60
- }
61
 
 
 
 
 
 
 
62
 
63
- def get_stream_url(model_name: str) -> str:
64
- return f"{GEMINI_BASE_URL}/models/{model_name}:streamGenerateContent?alt=sse&key={GEMMA_API_KEY}"
65
 
66
 
67
- def get_generate_url(model_name: str) -> str:
68
  return f"{GEMINI_BASE_URL}/models/{model_name}:generateContent?key={GEMMA_API_KEY}"
69
 
70
 
71
- def parse_gemini_text(chunk_json: dict) -> str:
72
- """
73
- Gemini streaming chunks usually contain:
74
- candidates[0].content.parts[0].text
75
- """
76
- try:
77
- candidates = chunk_json.get("candidates", [])
78
- if not candidates:
79
- return ""
80
- content = candidates[0].get("content", {})
81
- parts = content.get("parts", [])
82
- if not parts:
83
- return ""
84
- return parts[0].get("text", "") or ""
85
- except Exception:
86
  return ""
87
 
 
 
88
 
89
- # -------- Endpoint --------
90
  @app.post("/v1/chat/completions")
91
  def chat_completions(
92
  request: ChatRequest,
93
- authorization: Optional[str] = Header(None)
94
  ):
95
  if not authorization:
96
  raise HTTPException(status_code=401, detail="Missing Authorization header")
@@ -104,22 +113,30 @@ def chat_completions(
104
 
105
  model_name = request.model or "gemma-3-27b-it"
106
  prompt = extract_text(request.messages)
107
- payload = build_payload(prompt)
 
 
 
 
 
 
 
108
 
109
  # -------- STREAM MODE --------
110
  if request.stream:
111
  def generate():
112
  try:
113
- url = get_stream_url(model_name)
114
 
115
  with requests.post(
116
  url,
117
  json=payload,
118
  stream=True,
119
  timeout=120,
120
- headers={"Content-Type": "application/json"}
121
  ) as res:
122
  res.raise_for_status()
 
123
 
124
  sent_role = False
125
 
@@ -132,22 +149,20 @@ def chat_completions(
132
  if line.startswith("data:"):
133
  line = line[5:].strip()
134
 
135
- if not line:
136
  continue
137
 
138
- # Some SSE implementations may send end markers
139
- if line == "[DONE]":
140
- break
141
-
142
  try:
143
  chunk_json = json.loads(line)
144
  except json.JSONDecodeError:
145
  continue
146
 
147
- text = parse_gemini_text(chunk_json)
148
  if not text:
149
  continue
150
 
 
 
151
  delta = {"content": text}
152
  if not sent_role:
153
  delta["role"] = "assistant"
@@ -160,9 +175,9 @@ def chat_completions(
160
  {
161
  "index": 0,
162
  "delta": delta,
163
- "finish_reason": None
164
  }
165
- ]
166
  }
167
 
168
  yield f"data: {json.dumps(openai_chunk, ensure_ascii=False)}\n\n"
@@ -170,37 +185,44 @@ def chat_completions(
170
  yield "data: [DONE]\n\n"
171
 
172
  except Exception as e:
173
- error_chunk = {
174
- "error": str(e)
175
- }
176
  yield f"data: {json.dumps(error_chunk, ensure_ascii=False)}\n\n"
177
  yield "data: [DONE]\n\n"
178
 
179
- return StreamingResponse(generate(), media_type="text/event-stream")
 
 
 
 
180
 
181
  # -------- NON-STREAM --------
182
  try:
183
- url = get_generate_url(model_name)
184
  res = requests.post(
185
  url,
186
  json=payload,
187
  timeout=120,
188
- headers={"Content-Type": "application/json"}
189
  )
190
  res.raise_for_status()
191
- data = res.json()
192
 
193
- output = data["candidates"][0]["content"]["parts"][0]["text"]
 
 
194
 
195
  except Exception as e:
196
  raise HTTPException(status_code=500, detail=str(e))
197
 
198
  # -------- PLAIN TEXT --------
199
  if request.plain:
200
- return PlainTextResponse(output)
 
 
 
201
 
202
  # -------- OPENAI JSON --------
203
- return JSONResponse({
204
  "id": "chatcmpl-gemma",
205
  "object": "chat.completion",
206
  "choices": [
@@ -208,9 +230,9 @@ def chat_completions(
208
  "index": 0,
209
  "message": {
210
  "role": "assistant",
211
- "content": output
212
  },
213
- "finish_reason": "stop"
214
  }
215
- ]
216
  })
 
2
  import json
3
  import requests
4
  from fastapi import FastAPI, Header, HTTPException
5
+ from fastapi.responses import StreamingResponse, PlainTextResponse, Response
6
  from pydantic import BaseModel
7
  from typing import List, Optional, Union
8
  from dotenv import load_dotenv
 
13
 
14
  GEMMA_API_KEY = os.getenv("GEMMA_API_KEY")
15
  APP_API_KEY = os.getenv("APP_API_KEY")
 
16
  GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
17
 
18
 
 
19
  class Message(BaseModel):
20
  role: str
21
  content: Union[str, List[dict]]
 
28
  plain: Optional[bool] = False
29
 
30
 
31
+ class UTF8JSONResponse(Response):
32
+ media_type = "application/json; charset=utf-8"
33
+
34
+ def render(self, content) -> bytes:
35
+ return json.dumps(
36
+ content,
37
+ ensure_ascii=False,
38
+ separators=(",", ":"),
39
+ ).encode("utf-8")
40
+
41
+
42
  def extract_text(messages):
43
  text = ""
44
 
 
50
  if item.get("type") == "text":
51
  text += item.get("text", "") + "\n"
52
  else:
 
53
  continue
54
  elif isinstance(content, str):
55
  text += content + "\n"
 
57
  return text.strip()
58
 
59
 
60
+ def fix_mojibake(text: str) -> str:
61
+ """
62
+ Repairs common UTF-8-as-latin1 mojibake such as:
63
+ It’s -> It’s
64
+ """
65
+ if not isinstance(text, str):
66
+ return text
 
67
 
68
+ suspicious = ("’", "“", "”", "‘", "–", "—", "Ã")
69
+ if any(s in text for s in suspicious):
70
+ try:
71
+ return text.encode("latin1").decode("utf-8")
72
+ except UnicodeError:
73
+ return text
74
 
75
+ return text
 
76
 
77
 
78
+ def gemini_generate_url(model_name: str) -> str:
79
  return f"{GEMINI_BASE_URL}/models/{model_name}:generateContent?key={GEMMA_API_KEY}"
80
 
81
 
82
+ def gemini_stream_url(model_name: str) -> str:
83
+ return f"{GEMINI_BASE_URL}/models/{model_name}:streamGenerateContent?alt=sse&key={GEMMA_API_KEY}"
84
+
85
+
86
+ def extract_gemini_text(payload: dict) -> str:
87
+ candidates = payload.get("candidates") or []
88
+ if not candidates:
89
+ return ""
90
+
91
+ content = candidates[0].get("content") or {}
92
+ parts = content.get("parts") or []
93
+ if not parts:
 
 
 
94
  return ""
95
 
96
+ return parts[0].get("text", "") or ""
97
+
98
 
 
99
  @app.post("/v1/chat/completions")
100
  def chat_completions(
101
  request: ChatRequest,
102
+ authorization: Optional[str] = Header(None),
103
  ):
104
  if not authorization:
105
  raise HTTPException(status_code=401, detail="Missing Authorization header")
 
113
 
114
  model_name = request.model or "gemma-3-27b-it"
115
  prompt = extract_text(request.messages)
116
+
117
+ payload = {
118
+ "contents": [
119
+ {
120
+ "parts": [{"text": prompt}]
121
+ }
122
+ ]
123
+ }
124
 
125
  # -------- STREAM MODE --------
126
  if request.stream:
127
  def generate():
128
  try:
129
+ url = gemini_stream_url(model_name)
130
 
131
  with requests.post(
132
  url,
133
  json=payload,
134
  stream=True,
135
  timeout=120,
136
+ headers={"Content-Type": "application/json"},
137
  ) as res:
138
  res.raise_for_status()
139
+ res.encoding = "utf-8"
140
 
141
  sent_role = False
142
 
 
149
  if line.startswith("data:"):
150
  line = line[5:].strip()
151
 
152
+ if not line or line == "[DONE]":
153
  continue
154
 
 
 
 
 
155
  try:
156
  chunk_json = json.loads(line)
157
  except json.JSONDecodeError:
158
  continue
159
 
160
+ text = extract_gemini_text(chunk_json)
161
  if not text:
162
  continue
163
 
164
+ text = fix_mojibake(text)
165
+
166
  delta = {"content": text}
167
  if not sent_role:
168
  delta["role"] = "assistant"
 
175
  {
176
  "index": 0,
177
  "delta": delta,
178
+ "finish_reason": None,
179
  }
180
+ ],
181
  }
182
 
183
  yield f"data: {json.dumps(openai_chunk, ensure_ascii=False)}\n\n"
 
185
  yield "data: [DONE]\n\n"
186
 
187
  except Exception as e:
188
+ error_chunk = {"error": str(e)}
 
 
189
  yield f"data: {json.dumps(error_chunk, ensure_ascii=False)}\n\n"
190
  yield "data: [DONE]\n\n"
191
 
192
+ return StreamingResponse(
193
+ generate(),
194
+ media_type="text/event-stream; charset=utf-8",
195
+ headers={"Cache-Control": "no-cache"},
196
+ )
197
 
198
  # -------- NON-STREAM --------
199
  try:
200
+ url = gemini_generate_url(model_name)
201
  res = requests.post(
202
  url,
203
  json=payload,
204
  timeout=120,
205
+ headers={"Content-Type": "application/json"},
206
  )
207
  res.raise_for_status()
208
+ res.encoding = "utf-8"
209
 
210
+ data = res.json()
211
+ output = extract_gemini_text(data)
212
+ output = fix_mojibake(output)
213
 
214
  except Exception as e:
215
  raise HTTPException(status_code=500, detail=str(e))
216
 
217
  # -------- PLAIN TEXT --------
218
  if request.plain:
219
+ return PlainTextResponse(
220
+ output,
221
+ media_type="text/plain; charset=utf-8",
222
+ )
223
 
224
  # -------- OPENAI JSON --------
225
+ return UTF8JSONResponse({
226
  "id": "chatcmpl-gemma",
227
  "object": "chat.completion",
228
  "choices": [
 
230
  "index": 0,
231
  "message": {
232
  "role": "assistant",
233
+ "content": output,
234
  },
235
+ "finish_reason": "stop",
236
  }
237
+ ],
238
  })