ricebug commited on
Commit
7b6656c
·
verified ·
1 Parent(s): f6b14d2

Create main.ts

Browse files
Files changed (1) hide show
  1. main.ts +789 -0
main.ts ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { serve } from "https://deno.land/std@0.208.0/http/server.ts";
2
+ import { decode } from "https://deno.land/std@0.208.0/encoding/base64.ts";
3
+
4
+ // --- 常量定义 ---
5
+ const MAX_DOCUMENT_SIZE_MB = 20; // 设置最大文档大小限制(单位:MB)
6
+ const MAX_DOCUMENT_SIZE_BYTES = MAX_DOCUMENT_SIZE_MB * 1024 * 1024;
7
+ const MODELS_CACHE_DURATION = 60000; // 1分钟模型缓存
8
+
9
+ interface OpenAIMessage {
10
+ role: "system" | "user" | "assistant";
11
+ content: string | Array<{
12
+ type: string;
13
+ text?: string;
14
+ image_url?: { url: string };
15
+ document?: { url: string; type: string }; // 支持多种文档类型
16
+ }>;
17
+ }
18
+
19
+ interface OpenAIRequest {
20
+ model: string;
21
+ messages: OpenAIMessage[];
22
+ max_tokens?: number;
23
+ temperature?: number;
24
+ stream?: boolean;
25
+ }
26
+
27
+ interface OpenAITTSRequest {
28
+ model: string;
29
+ input: string;
30
+ voice: 'Zephyr' | 'Puck' | 'Charon' | 'Kore' | 'Fenrir' | 'Leda' | string;
31
+ }
32
+
33
+ class GoogleAIService {
34
+ public apiKeys: string[];
35
+ public currentKeyIndex = 0;
36
+ public cachedModels: any[] = [];
37
+ public modelsLastFetch = 0;
38
+
39
+ constructor() {
40
+ this.apiKeys = [];
41
+ this.apiKeys = Deno.env.get(`GOOGLE_AI_KEYS`).split(',').map(s => s.trim());
42
+ if (this.apiKeys.length === 0) {
43
+ throw new Error("No Google AI API keys found in environment variables (e.g., GOOGLE_AI_KEYS)");
44
+ }
45
+ }
46
+
47
+ private getNextApiKey(): string {
48
+ const key = this.apiKeys[this.currentKeyIndex];
49
+ console.log(key)
50
+ this.currentKeyIndex = (this.currentKeyIndex + 1) % this.apiKeys.length;
51
+ return key;
52
+ }
53
+
54
+ async fetchOfficialModels(): Promise<any[]> {
55
+ const now = Date.now();
56
+ if (this.cachedModels.length > 0 && (now - this.modelsLastFetch) < MODELS_CACHE_DURATION) {
57
+ return this.cachedModels;
58
+ }
59
+
60
+ const apiKey = this.getNextApiKey();
61
+ try {
62
+ const response = await fetch(
63
+ `https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`,
64
+ { method: "GET", headers: { "Content-Type": "application/json" } }
65
+ );
66
+
67
+ if (!response.ok) {
68
+ console.warn(`Failed to fetch models from Google AI: ${response.status}. Using fallback models.`);
69
+ return this.getFallbackModels();
70
+ }
71
+
72
+ const data = await response.json();
73
+ if (data.models && Array.isArray(data.models)) {
74
+ this.cachedModels = data.models.filter((model: any) =>
75
+ model.supportedGenerationMethods?.includes('generateContent')
76
+ );
77
+ this.modelsLastFetch = now;
78
+ this.cachedModels.push({
79
+ "id": "gemini-2.0-flash-search",
80
+ "name": "gemini-2.0-flash-search",
81
+ "object": "model",
82
+ "created": now,
83
+ "owned_by": "google",
84
+ "description": "Gemini 2.0 Flash with GoogleSearch",
85
+ "maxTokens": 1048576
86
+ })
87
+ this.cachedModels.push({
88
+ "id": "gemini-2.5-flash-search",
89
+ "name": "gemini-2.5-flash-search",
90
+ "object": "model",
91
+ "created": now,
92
+ "owned_by": "google",
93
+ "description": "Gemini 2.5 Flash with GoogleSearch",
94
+ "maxTokens": 1048576
95
+ })
96
+ this.cachedModels.push({
97
+ "id": "gemini-2.5-pro-search",
98
+ "name": "gemini-2.5-pro-search",
99
+ "object": "model",
100
+ "created": now,
101
+ "owned_by": "google",
102
+ "description": "Gemini 2.5 Pro with GoogleSearch",
103
+ "maxTokens": 1048576
104
+ })
105
+ console.log(`Fetched ${this.cachedModels.length} models from Google AI`);
106
+ return this.cachedModels;
107
+ }
108
+ return this.getFallbackModels();
109
+ } catch (error) {
110
+ console.warn("Error fetching models from Google AI:", error.message, ". Using fallback models.");
111
+ return this.getFallbackModels();
112
+ }
113
+ }
114
+
115
+ private getFallbackModels(): any[] {
116
+ return [
117
+ { name: "models/gemini-1.5-pro", displayName: "Gemini 1.5 Pro", description: "Mid-size multimodal model that supports up to 1 million tokens, images, and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
118
+ { name: "models/gemini-1.5-flash", displayName: "Gemini 1.5 Flash", description: "Fast and versatile multimodal model for diverse tasks, supports images and documents (PDF, TXT, MD)", supportedGenerationMethods: ["generateContent"], maxTokens: 1000000, supportsDocuments: true },
119
+ { name: "models/gemini-2.0-flash-preview-image-generation", displayName: "Gemini 2.0 Flash Image Generation", description: "Advanced model for generating and editing high-quality images with text and image outputs", supportedGenerationMethods: ["generateContent"], maxTokens: 100000, capabilities: ["text", "image_generation", "image_editing"] },
120
+ { name: "models/gemini-2.5-flash-preview-tts", displayName: "Gemini 2.5 Flash TTS", description: "Advanced model for generating high-quality speech from text.", supportedGenerationMethods: ["generateContent"] },
121
+ ];
122
+ }
123
+
124
+ public isVisionModel = (modelName: string): boolean => modelName.toLowerCase().includes('vision') || modelName.toLowerCase().includes('pro');
125
+ public isImageGenerationModel = (modelName: string): boolean => modelName.includes('image') || modelName === 'gemini-2.0-flash-preview-image-generation' || modelName === 'gemini-2.5-flash-image-preview';
126
+ public isImageEditingModel = (modelName: string): boolean => modelName.includes('image') || modelName === 'gemini-2.0-flash-preview-image-generation' || modelName === 'gemini-2.5-flash-image-preview';
127
+ public isDocumentModel = (modelName: string): boolean => modelName.toLowerCase().includes('gemini-1.5') || modelName.toLowerCase().includes('pro') || modelName.toLowerCase().includes('flash');
128
+ public isTTSModel = (modelName: string): boolean => modelName.toLowerCase().includes('tts');
129
+
130
+ async generateSpeech(text: string, modelName: string, voiceName: string): Promise<string> {
131
+ const apiKey = this.getNextApiKey();
132
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
133
+
134
+ console.log(`Generating speech with model: ${fullModelName}, voice: ${voiceName}`);
135
+
136
+ const requestBody = {
137
+ contents: [{
138
+ parts: [{ "text": text }]
139
+ }],
140
+ generationConfig: {
141
+ responseModalities: ["AUDIO"],
142
+ speechConfig: {
143
+ voiceConfig: {
144
+ prebuiltVoiceConfig: {
145
+ voiceName: voiceName
146
+ }
147
+ }
148
+ }
149
+ },
150
+ model: fullModelName,
151
+ };
152
+
153
+ const response = await fetch(
154
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
155
+ {
156
+ method: "POST",
157
+ headers: { "Content-Type": "application/json" },
158
+ body: JSON.stringify(requestBody),
159
+ }
160
+ );
161
+
162
+ if (!response.ok) {
163
+ const errorBody = await response.json().catch(() => response.text());
164
+ const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
165
+ console.error(`Google TTS API Error: ${response.status} - ${errorMessage}`);
166
+ throw new Error(`Google TTS API request failed with status ${response.status}: ${errorMessage}`);
167
+ }
168
+
169
+ const data = await response.json();
170
+ const audioData = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
171
+
172
+ if (!audioData) {
173
+ console.error("Invalid TTS response from Google AI:", JSON.stringify(data));
174
+ throw new Error("No audio data received from Google AI TTS service.");
175
+ }
176
+
177
+ return audioData;
178
+ }
179
+
180
+ private getDocumentType(url: string): string {
181
+ const lowerUrl = url.toLowerCase();
182
+ if (lowerUrl.startsWith('data:application/pdf') || lowerUrl.includes('.pdf')) return 'pdf';
183
+ if (lowerUrl.startsWith('data:text/plain') || lowerUrl.includes('.txt')) return 'txt';
184
+ if (lowerUrl.startsWith('data:text/markdown') || lowerUrl.includes('.md')) return 'md';
185
+ if (lowerUrl.startsWith('data:application/msword') || lowerUrl.includes('.doc')) return 'doc';
186
+ if (lowerUrl.startsWith('data:application/vnd.openxmlformats-officedocument.wordprocessingml.document') || lowerUrl.includes('.docx')) return 'docx';
187
+ return 'unknown';
188
+ }
189
+
190
+ /**
191
+ * [关键改进] 提取并验证文档数据,增加大小检查和更稳健的解析
192
+ */
193
+ private extractDocumentData(documentUrl: string): { mimeType: string; data: string; text?: string; docType: string } {
194
+ const docType = this.getDocumentType(documentUrl);
195
+
196
+ if (!documentUrl.startsWith("data:")) {
197
+ if (documentUrl.startsWith("http")) {
198
+ throw new Error("Document URL downloads are not supported. Please provide base64 encoded data URLs.");
199
+ }
200
+ // 如果不是data url或http url,则假定为纯base64数据,但这是一种不推荐的格式
201
+ // 为了健壮性,我们强制要求使用标准的 data URL
202
+ throw new Error("Document must be provided as a standard base64 data URL (e.g., 'data:application/pdf;base64,...').");
203
+ }
204
+
205
+ const parts = documentUrl.split(",");
206
+ if (parts.length !== 2) {
207
+ throw new Error("Invalid data URL format for document. Expected 'data:[mime];base64,[data]'.");
208
+ }
209
+ const [mimeInfo, base64Data] = parts;
210
+
211
+ // **改进1: 检查文件大小**
212
+ // Base64 字符串的长度约是原始数据的 4/3。
213
+ const approxSizeInBytes = base64Data.length * 0.75;
214
+ if (approxSizeInBytes > MAX_DOCUMENT_SIZE_BYTES) {
215
+ throw new Error(`Document size (${(approxSizeInBytes / 1024 / 1024).toFixed(2)}MB) exceeds the ${MAX_DOCUMENT_SIZE_MB}MB limit.`);
216
+ }
217
+
218
+ const mimeType = mimeInfo.split(":")[1]?.split(";")[0] || 'application/octet-stream';
219
+
220
+ if (docType === 'txt' || docType === 'md') {
221
+ try {
222
+ const textContent = atob(base64Data);
223
+ return { mimeType, data: base64Data, text: textContent, docType };
224
+ } catch (error) {
225
+ console.error(`Failed to decode base64 content for ${docType}:`, error);
226
+ throw new Error(`Invalid base64 encoding for ${docType} document.`);
227
+ }
228
+ }
229
+
230
+ // 自动识别PDF的MIME类型
231
+ const finalMimeType = docType === 'pdf' ? 'application/pdf' : mimeType;
232
+ return { mimeType: finalMimeType, data: base64Data, docType };
233
+ }
234
+
235
+ private extractImageData(imageUrl: string): { mimeType: string; data: string } {
236
+ if (imageUrl.startsWith("data:image/")) {
237
+ const [mimeInfo, base64Data] = imageUrl.split(",");
238
+ const mimeType = mimeInfo.split(":")[1].split(";")[0];
239
+ return { mimeType, data: base64Data };
240
+ } else if (imageUrl.startsWith("http")) {
241
+ throw new Error("URL images are not supported yet. Please provide base64 encoded images.");
242
+ } else {
243
+ return { mimeType: "image/jpeg", data: imageUrl };
244
+ }
245
+ }
246
+
247
+ async generateContentWithDocument(messages: OpenAIMessage[], modelName: string, maxTokens?: number): Promise<string> {
248
+ const apiKey = this.getNextApiKey();
249
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
250
+ const documentModel = this.isDocumentModel(fullModelName) ? fullModelName : 'models/gemini-1.5-pro-latest';
251
+
252
+ console.log(`Processing document with model: ${documentModel}`);
253
+
254
+ let contents;
255
+ try {
256
+ contents = messages.map(msg => {
257
+ if (typeof msg.content === "string") {
258
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
259
+ }
260
+
261
+ const messageParts = msg.content.map(part => {
262
+ if (part.type === "text") return { text: part.text };
263
+
264
+ if (part.type === "image_url" && part.image_url) {
265
+ const { mimeType, data } = this.extractImageData(part.image_url.url);
266
+ return { inlineData: { mimeType, data } };
267
+ }
268
+
269
+ if (part.type === "document" && part.document) {
270
+ const docData = this.extractDocumentData(part.document.url);
271
+ console.log(`Processing document: ${docData.docType}, mime: ${docData.mimeType}, size: ${(docData.data.length * 0.75 / 1024).toFixed(2)} KB`);
272
+
273
+ if (docData.docType === 'txt' || docData.docType === 'md') {
274
+ const prefix = docData.docType === 'md' ? 'Markdown document content:\n' : 'Text document content:\n';
275
+ return { text: `${prefix}${docData.text}` };
276
+ }
277
+ if (docData.docType === 'pdf') {
278
+ return { inlineData: { mimeType: docData.mimeType, data: docData.data } };
279
+ }
280
+ return { text: `[Document type '${docData.docType}' is not supported for direct processing. Please convert to PDF, TXT, or MD.]` };
281
+ }
282
+ return { text: "" };
283
+ });
284
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts.filter(p => p.text || p.inlineData) };
285
+ });
286
+ } catch (error) {
287
+ throw error;
288
+ }
289
+
290
+ const requestBody = {
291
+ contents,
292
+ generationConfig: { temperature: 0.7, maxOutputTokens: maxTokens || 8192 }
293
+ };
294
+
295
+ const response = await fetch(
296
+ `https://generativelanguage.googleapis.com/v1beta/${documentModel}:generateContent?key=${apiKey}`,
297
+ {
298
+ method: "POST",
299
+ headers: { "Content-Type": "application/json" },
300
+ body: JSON.stringify(requestBody),
301
+ }
302
+ );
303
+
304
+ if (!response.ok) {
305
+ const errorBody = await response.json().catch(() => response.text());
306
+ const errorMessage = errorBody?.error?.message || JSON.stringify(errorBody);
307
+ console.error(`Google API Error: ${response.status} - ${errorMessage}`);
308
+ throw new Error(`Google API request failed with status ${response.status}: ${errorMessage}`);
309
+ }
310
+
311
+ const data = await response.json();
312
+ const promptFeedback = data.promptFeedback;
313
+ if (promptFeedback && promptFeedback.blockReason) {
314
+ const reason = promptFeedback.blockReason;
315
+ const safetyRatings = promptFeedback.safetyRatings?.map((r: any) => `${r.category}: ${r.probability}`).join(', ') || 'N/A';
316
+ throw new Error(`Request blocked by Google API. Reason: ${reason}. Safety Ratings: [${safetyRatings}]`);
317
+ }
318
+
319
+ if (!data.candidates || data.candidates.length === 0) {
320
+ throw new Error("No response generated for document content. The content might be empty or unreadable.");
321
+ }
322
+
323
+ const candidate = data.candidates[0];
324
+ if (candidate.finishReason === "SAFETY") {
325
+ throw new Error("Response blocked due to safety filters. Check content for sensitive topics.");
326
+ }
327
+ if (candidate.finishReason === "RECITATION") {
328
+ throw new Error("Response blocked due to recitation policy. The model's output was too similar to a copyrighted source.");
329
+ }
330
+
331
+ return candidate.content?.parts[0]?.text || "Document processed, but no text response was generated.";
332
+ }
333
+
334
+ // The rest of the original methods from the user's code
335
+ async generateContent(messages: OpenAIMessage[], modelName: string, maxTokens?: number, enableSearch: boolean = false): Promise<string> {
336
+ const hasDocument = messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "document"));
337
+ if (hasDocument) {
338
+ return await this.generateContentWithDocument(messages, modelName, maxTokens);
339
+ }
340
+
341
+ const apiKey = this.getNextApiKey();
342
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
343
+
344
+ const contents = messages.map(msg => {
345
+ if (typeof msg.content === "string") {
346
+ return { role: msg.role === "assistant" ? "model" : "user", parts: [{ text: msg.content }] };
347
+ } else {
348
+ const messageParts = msg.content.map(part => {
349
+ if (part.type === "text") {
350
+ return { text: part.text };
351
+ } else if (part.type === "image_url" && part.image_url) {
352
+ const imageData = part.image_url.url;
353
+ if (imageData.startsWith("data:image/")) {
354
+ const { mimeType, data } = this.extractImageData(imageData);
355
+ return { inlineData: { mimeType, data } };
356
+ } else {
357
+ return { fileData: { mimeType: "image/jpeg", fileUri: imageData } };
358
+ }
359
+ }
360
+ return { text: "" };
361
+ });
362
+ return { role: msg.role === "assistant" ? "model" : "user", parts: messageParts };
363
+ }
364
+ });
365
+
366
+ const requestBody: any = {
367
+ contents,
368
+ generationConfig: { temperature: 0.7, maxOutputTokens: maxTokens || 8192 }
369
+ };
370
+ if (enableSearch) {
371
+ requestBody.tools = [{ googleSearchRetrieval: {} }];
372
+ }
373
+
374
+ const response = await fetch(
375
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
376
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
377
+ );
378
+
379
+ if (!response.ok) {
380
+ const errorText = await response.text();
381
+ throw new Error(`Google AI API error: ${response.status} - ${errorText}`);
382
+ }
383
+ const data = await response.json();
384
+ if (!data.candidates || data.candidates.length === 0) {
385
+ throw new Error("No response generated from Google AI");
386
+ }
387
+ const candidate = data.candidates[0];
388
+ if (candidate.finishReason === "SAFETY") {
389
+ throw new Error("Response blocked due to safety filters");
390
+ }
391
+ return candidate.content?.parts[0]?.text || "No response generated";
392
+ }
393
+
394
+ async generateOrEditImageWithGemini(prompt: string, modelName: string = "gemini-2.0-flash-preview-image-generation", inputImage?: { mimeType: string; data: string }): Promise<{ text?: string; imageBase64?: string; imageUrl?: string }> {
395
+ const apiKey = this.getNextApiKey();
396
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
397
+ const requestParts: any[] = [{ text: prompt }];
398
+
399
+ if (inputImage) {
400
+ requestParts.push({ inline_data: { mime_type: inputImage.mimeType, data: inputImage.data } });
401
+ console.log(`Editing image with model: ${fullModelName}`);
402
+ } else {
403
+ console.log(`Generating image with model: ${fullModelName}`);
404
+ }
405
+
406
+ const requestBody = {
407
+ contents: [{ parts: requestParts }],
408
+ generationConfig: { responseModalities: ["TEXT", "IMAGE"], temperature: 0.7 }
409
+ };
410
+
411
+ const response = await fetch(
412
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
413
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
414
+ );
415
+
416
+ if (!response.ok) {
417
+ const errorText = await response.text();
418
+ throw new Error(`Image ${inputImage ? 'editing' : 'generation'} failed: ${response.status} - ${errorText}`);
419
+ }
420
+ const data = await response.json();
421
+ if (!data.candidates || data.candidates.length === 0) {
422
+ throw new Error(`No ${inputImage ? 'edited' : 'generated'} image returned`);
423
+ }
424
+
425
+ const candidate = data.candidates[0];
426
+ if (candidate.finishReason === "SAFETY") {
427
+ throw new Error(`Image ${inputImage ? 'editing' : 'generation'} blocked due to safety filters`);
428
+ }
429
+
430
+ const responseParts = candidate.content?.parts || [];
431
+ let textResponse = "";
432
+ let imageBase64 = "";
433
+
434
+ for (const part of responseParts) {
435
+ if (part.text) textResponse += part.text;
436
+ if (part.inlineData?.data) imageBase64 = part.inlineData.data;
437
+ if (part.inline_data?.data) imageBase64 = part.inline_data.data;
438
+ }
439
+
440
+ const result: { text?: string; imageBase64?: string; imageUrl?: string } = {};
441
+ if (textResponse) result.text = textResponse;
442
+ if (imageBase64) {
443
+ result.imageBase64 = imageBase64;
444
+ result.imageUrl = `data:image/png;base64,${imageBase64}`;
445
+ }
446
+ return result;
447
+ }
448
+
449
+ async generateContentWithGrounding(messages: OpenAIMessage[], modelName: string, maxTokens?: number): Promise<string> {
450
+ const apiKey = this.getNextApiKey();
451
+ const fullModelName = modelName.startsWith('models/') ? modelName : `models/${modelName}`;
452
+ const contents = messages.map(msg => ({ role: msg.role === 'assistant' ? 'model' : 'user', parts: [{ text: typeof msg.content === 'string' ? msg.content : '' }] }));
453
+
454
+ const requestBody = {
455
+ contents,
456
+ tools: [{ googleSearch: {} }],
457
+ generationConfig: { temperature: 0.7, maxOutputTokens: maxTokens || 8192 }
458
+ };
459
+ const response = await fetch(
460
+ `https://generativelanguage.googleapis.com/v1beta/${fullModelName}:generateContent?key=${apiKey}`,
461
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody) }
462
+ );
463
+ if (!response.ok) {
464
+ console.warn(`Google Search API failed: ${response.status}, trying alternative.`);
465
+ return await this.generateContentWithSearchPrompt(messages, modelName, maxTokens);
466
+ }
467
+ const data = await response.json();
468
+ if (!data.candidates || data.candidates.length === 0) {
469
+ return await this.generateContentWithSearchPrompt(messages, modelName, maxTokens);
470
+ }
471
+
472
+ const candidate = data.candidates[0];
473
+ if (candidate.finishReason === "SAFETY") {
474
+ throw new Error("Response blocked due to safety filters");
475
+ }
476
+ return candidate.content?.parts[0]?.text || "No response generated";
477
+ }
478
+
479
+ async generateContentWithSearchPrompt(messages: OpenAIMessage[], modelName: string, maxTokens?: number): Promise<string> {
480
+ const enhancedMessages = [...messages];
481
+ const lastMessage = enhancedMessages[enhancedMessages.length - 1];
482
+ if (typeof lastMessage.content === "string") {
483
+ lastMessage.content = `Please provide the most current and accurate information available about: ${lastMessage.content}.`;
484
+ }
485
+ return await this.generateContent(enhancedMessages, modelName, maxTokens, false);
486
+ }
487
+
488
+ async generateOrEditImage(prompt: string, modelName: string, inputImages?: any[]): Promise<string> {
489
+ if (this.isImageGenerationModel(modelName)) {
490
+ try {
491
+ let inputImage: { mimeType: string; data: string } | undefined;
492
+ if (inputImages && inputImages.length > 0) {
493
+ inputImage = this.extractImageData(inputImages[0].url);
494
+ }
495
+ const result = await this.generateOrEditImageWithGemini(prompt, modelName, inputImage);
496
+ let response = "";
497
+ if (result.text) response += result.text + "\\\\n\\\\n";
498
+ if (result.imageUrl) response += `![image](${result.imageUrl})`;
499
+ return response || `Image processing complete.`;
500
+ } catch (error) {
501
+ return `Image processing failed: ${error.message}`;
502
+ }
503
+ }
504
+ return `Model ${modelName} does not support image generation. Use a model like gemini-2.0-flash-preview-image-generation.`;
505
+ }
506
+ }
507
+
508
+ class OpenAICompatibleServer {
509
+ public googleAI: GoogleAIService;
510
+ private authKey: string;
511
+
512
+ constructor() {
513
+ this.googleAI = new GoogleAIService();
514
+ this.authKey = Deno.env.get("AUTH_KEY") || "";
515
+ }
516
+
517
+ private _writeString(view: DataView, offset: number, str: string) {
518
+ for (let i = 0; i < str.length; i++) {
519
+ view.setUint8(offset + i, str.charCodeAt(i));
520
+ }
521
+ }
522
+
523
+ private _createWavFile(pcmData: Uint8Array): Uint8Array {
524
+ const numChannels = 1;
525
+ const sampleRate = 24000;
526
+ const bitsPerSample = 16;
527
+ const dataSize = pcmData.length;
528
+ const headerSize = 44;
529
+ const buffer = new ArrayBuffer(headerSize + dataSize);
530
+ const view = new DataView(buffer);
531
+
532
+ this._writeString(view, 0, "RIFF");
533
+ view.setUint32(4, 36 + dataSize, true);
534
+ this._writeString(view, 8, "WAVE");
535
+ this._writeString(view, 12, "fmt ");
536
+ view.setUint32(16, 16, true);
537
+ view.setUint16(20, 1, true);
538
+ view.setUint16(22, numChannels, true);
539
+ view.setUint32(24, sampleRate, true);
540
+ view.setUint32(28, sampleRate * numChannels * (bitsPerSample / 8), true);
541
+ view.setUint16(32, numChannels * (bitsPerSample / 8), true);
542
+ view.setUint16(34, bitsPerSample, true);
543
+ this._writeString(view, 36, "data");
544
+ view.setUint32(40, dataSize, true);
545
+
546
+ const wavBytes = new Uint8Array(buffer);
547
+ wavBytes.set(pcmData, headerSize);
548
+ return wavBytes;
549
+ }
550
+
551
+ private authenticate(request: Request): boolean {
552
+ if (!this.authKey) return true;
553
+ const authHeader = request.headers.get("Authorization");
554
+ return authHeader ? authHeader.replace("Bearer ", "") === this.authKey : false;
555
+ }
556
+
557
+ private async handleAudioSpeech(request: Request): Promise<Response> {
558
+ try {
559
+ const body: OpenAITTSRequest = await request.json();
560
+ const modelMap: { [key: string]: string } = { 'tts-1': 'gemini-2.5-flash-preview-tts', 'tts-1-hd': 'gemini-2.5-flash-preview-tts' };
561
+ const geminiModel = modelMap[body.model] || (this.googleAI.isTTSModel(body.model) ? body.model : 'gemini-2.5-flash-preview-tts');
562
+ const voiceMap: { [key: string]: string } = { 'alloy': 'Krew', 'echo': 'Kore', 'fable': 'Chiron', 'onyx': 'Calypso', 'nova': 'Cria', 'shimmer': 'Estrella' };
563
+ const geminiVoice = voiceMap[body.voice] || 'Kore';
564
+
565
+ if (!body.input) throw new Error("The 'input' field is required for TTS requests.");
566
+
567
+ const audioBase64 = await this.googleAI.generateSpeech(body.input, geminiModel, geminiVoice);
568
+ const pcmBytes = decode(audioBase64);
569
+ const wavBytes = this._createWavFile(pcmBytes);
570
+
571
+ return new Response(wavBytes, { headers: { "Content-Type": "audio/wav" } });
572
+ } catch (error) {
573
+ console.error("Error in audio speech generation:", error.message);
574
+ const status = error.message.includes("required") ? 400 : 500;
575
+ return new Response(JSON.stringify({ error: { message: error.message, type: status === 400 ? "invalid_request_error" : "api_error", code: "tts_failed" } }), { status, headers: { "Content-Type": "application/json" } });
576
+ }
577
+ }
578
+
579
+ private isDocumentContent(url?: string): boolean {
580
+ if (!url) return false;
581
+ const lowerUrl = url.toLowerCase();
582
+ return lowerUrl.includes('.pdf') || lowerUrl.startsWith('data:application/pdf') ||
583
+ lowerUrl.includes('.txt') || lowerUrl.startsWith('data:text/plain') ||
584
+ lowerUrl.includes('.md') || lowerUrl.startsWith('data:text/markdown');
585
+ }
586
+
587
+ private async handleChatCompletions(request: Request): Promise<Response> {
588
+ try {
589
+ const body: OpenAIRequest = await request.json();
590
+ const requestedModel = body.model || "gemini-1.5-pro";
591
+ const stream = body.stream || false;
592
+ const maxTokens = body.max_tokens || 1048576;
593
+ console.log(`Request for model: ${requestedModel}, stream: ${stream}, max_tokens: ${maxTokens}`);
594
+ const lastMessage = body.messages[body.messages.length - 1];
595
+ const content = typeof lastMessage.content === "string"
596
+ ? lastMessage.content
597
+ : (Array.isArray(lastMessage.content) ? lastMessage.content.map(p => p.text || "").join(" ") : "");
598
+ if (content == 'ping'){
599
+ const responsePayload = {
600
+ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
601
+ choices: [{ index: 0, message: { role: "assistant", content: "pong" }, finish_reason: "stop" }],
602
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
603
+ };
604
+ return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
605
+ }
606
+ const hasDocument = body.messages.some(msg =>
607
+ Array.isArray(msg.content) &&
608
+ msg.content.some(part => part.type === "document" || this.isDocumentContent(part.document?.url))
609
+ );
610
+ const hasImages = body.messages.some(msg => Array.isArray(msg.content) && msg.content.some(part => part.type === "image_url"));
611
+
612
+ let inputImages: any[] = [];
613
+ if (hasImages) {
614
+ body.messages.forEach(msg => {
615
+ if (Array.isArray(msg.content)) {
616
+ msg.content.forEach(part => {
617
+ if (part.type === "image_url" && part.image_url) inputImages.push({ url: part.image_url.url });
618
+ });
619
+ }
620
+ });
621
+ }
622
+ let responseText: string;
623
+
624
+ // Routing logic based on keywords and content types
625
+ if (hasDocument) {
626
+ responseText = await this.googleAI.generateContentWithDocument(body.messages, requestedModel, maxTokens);
627
+ } else if (this.googleAI.isImageEditingModel(requestedModel) && hasImages) {
628
+ responseText = await this.googleAI.generateOrEditImage(content, requestedModel, inputImages);
629
+ } else if (this.googleAI.isImageGenerationModel(requestedModel)) {
630
+ responseText = await this.googleAI.generateOrEditImage(content, requestedModel);
631
+ } else if (requestedModel.endsWith("-search")) {
632
+ const searchMessages = [{ ...lastMessage, content: content }];
633
+ responseText = await this.googleAI.generateContentWithGrounding(searchMessages, requestedModel.slice(0, -"-search".length), maxTokens);
634
+ } else {
635
+ responseText = await this.googleAI.generateContent(body.messages, requestedModel, maxTokens, false);
636
+ }
637
+
638
+ if (stream) {
639
+ const streamResponse = await this.streamStringAsOpenAIResponse(responseText, requestedModel);
640
+ return new Response(streamResponse, {
641
+ headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive", "Access-Control-Allow-Origin": "*" }
642
+ });
643
+ } else {
644
+ const responsePayload = {
645
+ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: requestedModel,
646
+ choices: [{ index: 0, message: { role: "assistant", content: responseText }, finish_reason: "stop" }],
647
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
648
+ };
649
+ return new Response(JSON.stringify(responsePayload), { headers: { "Content-Type": "application/json" } });
650
+ }
651
+ } catch (error) {
652
+ console.error("Error in chat completions:", error.message);
653
+ const status = error.message.includes("exceeds the limit") || error.message.includes("Invalid") ? 400 : 500;
654
+ return new Response(
655
+ JSON.stringify({
656
+ error: {
657
+ message: error.message,
658
+ type: status === 400 ? "invalid_request_error" : "api_error",
659
+ code: null
660
+ }
661
+ }),
662
+ { status, headers: { "Content-Type": "application/json" } }
663
+ );
664
+ }
665
+ }
666
+
667
+ private async streamStringAsOpenAIResponse(content: string, modelName: string): Promise<ReadableStream<Uint8Array>> {
668
+ const encoder = new TextEncoder();
669
+ const streamId = `chatcmpl-${Date.now()}`;
670
+ const creationTime = Math.floor(Date.now() / 1000);
671
+ const chunkSize = 256; // 设置块大小为256个字符
672
+ let position = 0;
673
+
674
+ return new ReadableStream({
675
+ start(controller) {
676
+ const initialChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }] };
677
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(initialChunk)}\n\n`));
678
+ },
679
+ pull(controller) {
680
+ if (position >= content.length) {
681
+ const finalChunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
682
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalChunk)}\n\n`));
683
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
684
+ controller.close();
685
+ return;
686
+ }
687
+
688
+ const chunkContent = content.substring(position, position + chunkSize);
689
+ position += chunkSize;
690
+
691
+ const chunk = { id: streamId, object: 'chat.completion.chunk', created: creationTime, model: modelName, choices: [{ index: 0, delta: { content: chunkContent }, finish_reason: null }] };
692
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
693
+ }
694
+ });
695
+ }
696
+
697
+ private async handleModels(): Promise<Response> {
698
+ try {
699
+ const googleModels = await this.googleAI.fetchOfficialModels();
700
+ const models = {
701
+ object: "list",
702
+ data: googleModels.map(model => {
703
+ const modelId = model.name.replace('models/', '');
704
+ return {
705
+ id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "google",
706
+ description: model.description || model.displayName, maxTokens: model.inputTokenLimit || model.maxTokens
707
+ };
708
+ })
709
+ };
710
+ return new Response(JSON.stringify(models), { headers: { "Content-Type": "application/json" } });
711
+ } catch (error) {
712
+ console.error("Error fetching models:", error);
713
+ return new Response(JSON.stringify({ error: { message: "Failed to fetch models." } }), { status: 500 });
714
+ }
715
+ }
716
+
717
+ private async handleStatus(): Promise<Response> {
718
+ const status = {
719
+ status: "healthy", timestamp: new Date().toISOString(), version: "2.5.0",
720
+ api_keys_loaded: this.googleAI.apiKeys.length,
721
+ models_in_cache: this.googleAI.cachedModels.length,
722
+ models_last_fetched: this.googleAI.modelsLastFetch > 0 ? new Date(this.googleAI.modelsLastFetch).toISOString() : "never"
723
+ };
724
+ return new Response(JSON.stringify(status), { headers: { "Content-Type": "application/json" } });
725
+ }
726
+
727
+ async handleRequest(request: Request): Promise<Response> {
728
+ const corsHeaders = {
729
+ "Access-Control-Allow-Origin": "*",
730
+ "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
731
+ "Access-Control-Allow-Headers": "Content-Type, Authorization",
732
+ };
733
+
734
+ if (request.method === "OPTIONS") {
735
+ return new Response(null, { headers: corsHeaders });
736
+ }
737
+
738
+ const url = new URL(request.url);
739
+ let response: Response;
740
+
741
+ // Handle routes
742
+ if (url.pathname === "/health" || url.pathname === "/status") {
743
+ response = await this.handleStatus();
744
+ } else if (!this.authenticate(request)) {
745
+ response = new Response(JSON.stringify({ error: { message: "Unauthorized" } }), { status: 401 });
746
+ } else if (url.pathname === "/v1/audio/speech" && request.method === "POST") {
747
+ response = await this.handleAudioSpeech(request);
748
+ } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") {
749
+ response = await this.handleChatCompletions(request);
750
+ } else if (url.pathname === "/v1/models" && request.method === "GET") {
751
+ response = await this.handleModels();
752
+ } else {
753
+ response = new Response("Not Found", { status: 404 });
754
+ }
755
+
756
+ // Add CORS headers to all responses
757
+ const finalHeaders = new Headers(response.headers);
758
+ for (const [key, value] of Object.entries(corsHeaders)) {
759
+ finalHeaders.set(key, value);
760
+ }
761
+
762
+ return new Response(response.body, { status: response.status, headers: finalHeaders });
763
+ }
764
+ }
765
+
766
+ // --- 服务器启动 ---
767
+ const server = new OpenAICompatibleServer();
768
+
769
+ console.log("🚀 OpenAI Compatible Server with Google AI starting on port 8000...");
770
+ console.log(`✅ Loaded ${server.googleAI.apiKeys.length} API key(s).`);
771
+ console.log(`📄 Max document size set to ${MAX_DOCUMENT_SIZE_MB}MB.`);
772
+
773
+ // Pre-fetch models at startup
774
+ server.googleAI.fetchOfficialModels().then(models => {
775
+ console.log(`✅ Successfully fetched ${models.length} models from Google AI.`);
776
+ }).catch(error => {
777
+ console.warn(`⚠️ Could not pre-fetch models: ${error.message}. Will use fallbacks or fetch on first request.`);
778
+ });
779
+
780
+ console.log("\n🔗 Endpoints:");
781
+ console.log(" POST /v1/chat/completions");
782
+ console.log(" POST /v1/audio/speech");
783
+ console.log(" GET /v1/models");
784
+ console.log(" GET /status");
785
+
786
+ await serve(
787
+ (request: Request) => server.handleRequest(request),
788
+ { port: 7860 }
789
+ );