| const { |
| Time, |
| CacheKeys, |
| SEPARATORS, |
| parseTextParts, |
| findLastSeparatorIndex, |
| } = require('librechat-data-provider'); |
| const { getMessage } = require('~/models/Message'); |
| const { getLogStores } = require('~/cache'); |
|
|
| |
| |
| |
| |
| function getRandomVoiceId(voiceIds) { |
| const randomIndex = Math.floor(Math.random() * voiceIds.length); |
| return voiceIds[randomIndex]; |
| } |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| const MAX_NOT_FOUND_COUNT = 6; |
| const MAX_NO_CHANGE_COUNT = 10; |
|
|
| |
| |
| |
| |
| |
| function createChunkProcessor(user, messageId) { |
| let notFoundCount = 0; |
| let noChangeCount = 0; |
| let processedText = ''; |
| if (!messageId) { |
| throw new Error('Message ID is required'); |
| } |
|
|
| const messageCache = getLogStores(CacheKeys.MESSAGES); |
|
|
| |
| |
| |
| async function processChunks() { |
| if (notFoundCount >= MAX_NOT_FOUND_COUNT) { |
| return `Message not found after ${MAX_NOT_FOUND_COUNT} attempts`; |
| } |
|
|
| if (noChangeCount >= MAX_NO_CHANGE_COUNT) { |
| return `No change in message after ${MAX_NO_CHANGE_COUNT} attempts`; |
| } |
|
|
| |
| let message = await messageCache.get(messageId); |
| if (!message) { |
| message = await getMessage({ user, messageId }); |
| } |
|
|
| if (!message) { |
| notFoundCount++; |
| return []; |
| } else { |
| const text = message.content?.length > 0 ? parseTextParts(message.content) : message.text; |
| messageCache.set( |
| messageId, |
| { |
| text, |
| complete: true, |
| }, |
| Time.FIVE_MINUTES, |
| ); |
| } |
|
|
| const text = typeof message === 'string' ? message : message.text; |
| const complete = typeof message === 'string' ? false : (message.complete ?? true); |
|
|
| if (text === processedText) { |
| noChangeCount++; |
| } |
|
|
| const remainingText = text.slice(processedText.length); |
| const chunks = []; |
|
|
| if (!complete && remainingText.length >= 20) { |
| const separatorIndex = findLastSeparatorIndex(remainingText); |
| if (separatorIndex !== -1) { |
| const chunkText = remainingText.slice(0, separatorIndex + 1); |
| chunks.push({ text: chunkText, isFinished: false }); |
| processedText += chunkText; |
| } else { |
| chunks.push({ text: remainingText, isFinished: false }); |
| processedText = text; |
| } |
| } else if (complete && remainingText.trim().length > 0) { |
| chunks.push({ text: remainingText.trim(), isFinished: true }); |
| processedText = text; |
| } |
|
|
| return chunks; |
| } |
|
|
| return processChunks; |
| } |
|
|
| |
| |
| |
| |
| |
| function splitTextIntoChunks(text, chunkSize = 4000) { |
| if (!text) { |
| throw new Error('Text is required'); |
| } |
|
|
| const chunks = []; |
| let startIndex = 0; |
| const textLength = text.length; |
|
|
| while (startIndex < textLength) { |
| let endIndex = Math.min(startIndex + chunkSize, textLength); |
| let chunkText = text.slice(startIndex, endIndex); |
|
|
| if (endIndex < textLength) { |
| let lastSeparatorIndex = -1; |
| for (const separator of SEPARATORS) { |
| const index = chunkText.lastIndexOf(separator); |
| if (index !== -1) { |
| lastSeparatorIndex = Math.max(lastSeparatorIndex, index); |
| } |
| } |
|
|
| if (lastSeparatorIndex !== -1) { |
| endIndex = startIndex + lastSeparatorIndex + 1; |
| chunkText = text.slice(startIndex, endIndex); |
| } else { |
| const nextSeparatorIndex = text.slice(endIndex).search(/\S/); |
| if (nextSeparatorIndex !== -1) { |
| endIndex += nextSeparatorIndex; |
| chunkText = text.slice(startIndex, endIndex); |
| } |
| } |
| } |
|
|
| chunkText = chunkText.trim(); |
| if (chunkText) { |
| chunks.push({ |
| text: chunkText, |
| isFinished: endIndex >= textLength, |
| }); |
| } else if (chunks.length > 0) { |
| chunks[chunks.length - 1].isFinished = true; |
| } |
|
|
| startIndex = endIndex; |
| while (startIndex < textLength && text[startIndex].trim() === '') { |
| startIndex++; |
| } |
| } |
|
|
| return chunks; |
| } |
|
|
| |
| |
| |
| |
| async function* llmMessageSource(llmStream) { |
| for await (const chunk of llmStream) { |
| const message = chunk.choices[0].delta.content; |
| if (message) { |
| yield message; |
| } |
| } |
| } |
|
|
| module.exports = { |
| findLastSeparatorIndex, |
| createChunkProcessor, |
| splitTextIntoChunks, |
| llmMessageSource, |
| getRandomVoiceId, |
| }; |
|
|