| import { HttpError } from "../utils/httpError.js"; |
| import { imageMimeType } from "../utils/mediaTypes.js"; |
| import { isDataUrl, isLikelyBase64, parseDataUrl, stripDataUrl } from "../utils/dataUrl.js"; |
|
|
| const SUPPORTED_AUDIO_FORMATS = new Set(["mp3", "wav", "m4a"]); |
|
|
| function isHttpUrl(value) { |
| return typeof value === "string" && (value.startsWith("http://") || value.startsWith("https://")); |
| } |
|
|
| function normalizeImageSource(source, mimeType) { |
| if (typeof source !== "string" || source.length === 0) { |
| throw new HttpError(400, "Image input must include a non-empty URL or base64 payload."); |
| } |
|
|
| if (source.startsWith("http://") || source.startsWith("https://") || isDataUrl(source)) { |
| return source; |
| } |
|
|
| if (isLikelyBase64(source)) { |
| return `data:${mimeType ?? imageMimeType("png")};base64,${source}`; |
| } |
|
|
| throw new HttpError(400, "Image input must be an http(s) URL, data URL, or raw base64 string."); |
| } |
|
|
| function normalizeImagePart(part) { |
| const image = typeof part.image_url === "string" ? { url: part.image_url } : part.image_url; |
| const source = image?.url ?? part.input_image?.url ?? part.input_image?.data; |
| const mimeType = part.mime_type ?? part.input_image?.mime_type; |
|
|
| return { |
| type: "image_url", |
| image_url: { |
| ...image, |
| url: normalizeImageSource(source, mimeType) |
| } |
| }; |
| } |
|
|
| function normalizeAudioBase64(audio) { |
| const format = audio.format?.toLowerCase(); |
| if (!SUPPORTED_AUDIO_FORMATS.has(format)) { |
| throw new HttpError(400, "Audio input format must be mp3, wav, or m4a."); |
| } |
|
|
| if (typeof audio.data !== "string" || audio.data.length === 0) { |
| throw new HttpError(400, "Audio input must include base64 data."); |
| } |
|
|
| const parsed = parseDataUrl(audio.data); |
| return { |
| data: stripDataUrl(audio.data), |
| format: inferAudioFormat(parsed?.mimeType, format) |
| }; |
| } |
|
|
| function inferAudioFormat(mimeType, fallbackFormat) { |
| const normalizedMimeType = String(mimeType || "").split(";")[0].trim().toLowerCase(); |
|
|
| if (normalizedMimeType === "audio/wav" || normalizedMimeType === "audio/x-wav") { |
| return "wav"; |
| } |
|
|
| if (normalizedMimeType === "audio/mp4" || normalizedMimeType === "audio/x-m4a") { |
| return "m4a"; |
| } |
|
|
| if (normalizedMimeType === "audio/mpeg" || normalizedMimeType === "audio/mp3") { |
| return "mp3"; |
| } |
|
|
| return fallbackFormat; |
| } |
|
|
| export function createRequestNormalizationService({ audioConversionService }) { |
| return { |
| async normalize(body) { |
| if (!body || !Array.isArray(body.messages)) { |
| throw new HttpError(400, "Request body must include a messages array."); |
| } |
|
|
| const normalized = structuredClone(body); |
| const proxyOptions = normalized.proxy ?? {}; |
| delete normalized.proxy; |
|
|
| for (const message of normalized.messages) { |
| if (!Array.isArray(message.content)) { |
| continue; |
| } |
|
|
| const nextParts = []; |
| for (const part of message.content) { |
| if (part.type === "image_url" || part.type === "input_image") { |
| nextParts.push(normalizeImagePart(part)); |
| continue; |
| } |
|
|
| if (part.type === "input_audio") { |
| const audio = part.input_audio ?? {}; |
| const audioUrl = audio.url || (isHttpUrl(audio.data) ? audio.data : null); |
|
|
| if (audioUrl) { |
| const converted = await audioConversionService.downloadAndConvertToMp3Base64(audioUrl); |
| nextParts.push({ |
| type: "input_audio", |
| input_audio: converted |
| }); |
| continue; |
| } |
|
|
| nextParts.push({ |
| type: "input_audio", |
| input_audio: await audioConversionService.normalizeBase64Audio(normalizeAudioBase64(audio)) |
| }); |
| continue; |
| } |
|
|
| nextParts.push(part); |
| } |
|
|
| message.content = nextParts; |
| } |
|
|
| return { |
| normalizedBody: normalized, |
| responseContext: { |
| audioFormat: normalized.audio?.format ?? "mp3", |
| exposeMediaUrls: proxyOptions.expose_media_urls !== false |
| } |
| }; |
| } |
| }; |
| } |
|
|