muthuk1's picture
Convert OpenMAIC from Next.js to React (Vite)
f56a29b verified
/**
* Scene Outlines Streaming API (SSE)
*
* Streams outline generation via Server-Sent Events.
* Emits individual outline objects as they're parsed from the LLM response,
* so the frontend can display them incrementally.
*
* SSE events:
* { type: 'languageDirective', data: string }
* { type: 'outline', data: SceneOutline, index: number }
* { type: 'done', outlines: SceneOutline[], languageDirective: string }
* { type: 'error', error: string }
*/
import { NextRequest } from 'next/server';
import { streamLLM } from '@/lib/ai/llm';
import { buildPrompt, PROMPT_IDS } from '@/lib/prompts';
import {
formatImageDescription,
formatImagePlaceholder,
buildVisionUserContent,
uniquifyMediaElementIds,
formatTeacherPersonaForPrompt,
} from '@/lib/generation/generation-pipeline';
import type { AgentInfo } from '@/lib/generation/generation-pipeline';
import { DEFAULT_LANGUAGE_DIRECTIVE } from '@/lib/generation/outline-generator';
import { MAX_PDF_CONTENT_CHARS, MAX_VISION_IMAGES } from '@/lib/constants/generation';
import { nanoid } from 'nanoid';
import type {
UserRequirements,
PdfImage,
SceneOutline,
ImageMapping,
} from '@/lib/types/generation';
import { apiError } from '@/lib/server/api-response';
import { createLogger } from '@/lib/logger';
import { resolveModelFromRequest } from '@/lib/server/resolve-model';
const log = createLogger('Outlines Stream');
export const maxDuration = 300;
/**
* Extract the languageDirective from the streamed wrapper JSON.
* Matches `"languageDirective":"<value>"` in partial JSON like:
* {"languageDirective":"用中文授课...","outlines":[...
*/
function extractLanguageDirective(buffer: string): string | null {
const match = buffer.match(/"languageDirective"\s*:\s*"((?:[^"\\]|\\.)*)"/);
if (!match) return null;
try {
return JSON.parse(`"${match[1]}"`);
} catch {
return match[1];
}
}
/**
* Incremental JSON array parser.
* Extracts complete top-level objects from a partially-streamed JSON array.
* Supports both a flat array `[{...},{...}]` and a wrapper object
* `{"languageDirective":"...","outlines":[{...},{...}]}`.
* Returns newly found objects (skipping `alreadyParsed` count).
*/
function extractNewOutlines(buffer: string, alreadyParsed: number): SceneOutline[] {
const results: SceneOutline[] = [];
// Strip markdown fencing if present
const stripped = buffer.replace(/^[\s\S]*?(?=[\[{])/, '');
// Find the outlines array — either nested in {"outlines": [...]} or a flat array
let arrayStart = -1;
const outlinesKeyIdx = stripped.indexOf('"outlines"');
if (outlinesKeyIdx >= 0) {
// Wrapper format: find [ after "outlines":
arrayStart = stripped.indexOf('[', outlinesKeyIdx);
} else {
// Flat array fallback
arrayStart = stripped.indexOf('[');
}
if (arrayStart === -1) return results;
let depth = 0;
let objectStart = -1;
let inString = false;
let escaped = false;
let objectCount = 0;
for (let i = arrayStart + 1; i < stripped.length; i++) {
const char = stripped[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\' && inString) {
escaped = true;
continue;
}
if (char === '"') {
inString = !inString;
continue;
}
if (inString) continue;
if (char === '{') {
if (depth === 0) objectStart = i;
depth++;
} else if (char === '}') {
depth--;
if (depth === 0 && objectStart >= 0) {
objectCount++;
if (objectCount > alreadyParsed) {
try {
const obj = JSON.parse(stripped.substring(objectStart, i + 1));
results.push(obj);
} catch {
// Incomplete or invalid JSON — skip
}
}
objectStart = -1;
}
}
}
return results;
}
export async function POST(req: NextRequest) {
let requirementSnippet: string | undefined;
let resolvedModelString: string | undefined;
try {
const body = await req.json();
// Get API configuration from request headers/body
const {
model: languageModel,
modelInfo,
modelString,
thinkingConfig,
} = await resolveModelFromRequest(req, body);
resolvedModelString = modelString;
if (!body.requirements) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'Requirements are required');
}
const { requirements, pdfText, pdfImages, imageMapping, researchContext, agents } = body as {
requirements: UserRequirements;
pdfText?: string;
pdfImages?: PdfImage[];
imageMapping?: ImageMapping;
researchContext?: string;
agents?: AgentInfo[];
};
requirementSnippet = requirements?.requirement?.substring(0, 60);
// Build user profile string for language inference context
const userProfileText =
requirements.userNickname || requirements.userBio
? `## Student Profile\n\nStudent: ${requirements.userNickname || 'Unknown'}${requirements.userBio ? ` — ${requirements.userBio}` : ''}\n\nConsider this student's background when designing the course. Adapt difficulty, examples, and teaching approach accordingly.\n\n---`
: '';
// Detect vision capability
const hasVision = !!modelInfo?.capabilities?.vision;
// Build prompt (same logic as generateSceneOutlinesFromRequirements)
let availableImagesText = 'No images available';
let visionImages: Array<{ id: string; src: string }> | undefined;
if (pdfImages && pdfImages.length > 0) {
if (hasVision && imageMapping) {
// Vision mode: split into vision images (first N) and text-only (rest)
const allWithSrc = pdfImages.filter((img) => imageMapping[img.id]);
const visionSlice = allWithSrc.slice(0, MAX_VISION_IMAGES);
const textOnlySlice = allWithSrc.slice(MAX_VISION_IMAGES);
const noSrcImages = pdfImages.filter((img) => !imageMapping[img.id]);
const visionDescriptions = visionSlice.map((img) => formatImagePlaceholder(img));
const textDescriptions = [...textOnlySlice, ...noSrcImages].map((img) =>
formatImageDescription(img),
);
availableImagesText = [...visionDescriptions, ...textDescriptions].join('\n');
visionImages = visionSlice.map((img) => ({
id: img.id,
src: imageMapping[img.id],
width: img.width,
height: img.height,
}));
} else {
// Text-only mode: full descriptions
availableImagesText = pdfImages.map((img) => formatImageDescription(img)).join('\n');
}
}
// Build media snippet conditions based on enabled flags.
const imageGenerationEnabled = req.headers.get('x-image-generation-enabled') === 'true';
const videoGenerationEnabled = req.headers.get('x-video-generation-enabled') === 'true';
const mediaGenerationEnabled = imageGenerationEnabled || videoGenerationEnabled;
const hasSourceImages = (pdfImages?.length ?? 0) > 0;
// Build teacher context from agents (if available)
const teacherContext = formatTeacherPersonaForPrompt(agents);
// Check if Interactive Mode is enabled
const interactiveMode = requirements.interactiveMode ?? false;
const promptId = interactiveMode
? PROMPT_IDS.INTERACTIVE_OUTLINES
: PROMPT_IDS.REQUIREMENTS_TO_OUTLINES;
const prompts = buildPrompt(promptId, {
requirement: requirements.requirement,
pdfContent: pdfText ? pdfText.substring(0, MAX_PDF_CONTENT_CHARS) : 'None',
availableImages: availableImagesText,
researchContext: researchContext || 'None',
hasSourceImages,
imageEnabled: imageGenerationEnabled,
videoEnabled: videoGenerationEnabled,
mediaEnabled: mediaGenerationEnabled,
teacherContext,
userProfile: userProfileText,
});
if (!prompts) {
return apiError('INTERNAL_ERROR', 500, 'Prompt template not found');
}
log.info(
`Generating outlines: "${requirements.requirement.substring(0, 50)}" [model=${modelString}]`,
);
// Create SSE stream with heartbeat to prevent connection timeout
const encoder = new TextEncoder();
const HEARTBEAT_INTERVAL_MS = 15_000;
const stream = new ReadableStream({
async start(controller) {
// Heartbeat: periodically send SSE comments to keep the connection alive.
let heartbeatTimer: ReturnType<typeof setInterval> | null = null;
const startHeartbeat = () => {
stopHeartbeat();
heartbeatTimer = setInterval(() => {
try {
controller.enqueue(encoder.encode(`:heartbeat\n\n`));
} catch {
stopHeartbeat();
}
}, HEARTBEAT_INTERVAL_MS);
};
const stopHeartbeat = () => {
if (heartbeatTimer) {
clearInterval(heartbeatTimer);
heartbeatTimer = null;
}
};
const MAX_STREAM_RETRIES = 2;
try {
startHeartbeat();
const streamParams = visionImages?.length
? {
model: languageModel,
system: prompts.system,
messages: [
{
role: 'user' as const,
content: buildVisionUserContent(prompts.user, visionImages),
},
],
maxOutputTokens: modelInfo?.outputWindow,
}
: {
model: languageModel,
system: prompts.system,
prompt: prompts.user,
maxOutputTokens: modelInfo?.outputWindow,
};
let parsedOutlines: SceneOutline[] = [];
let languageDirective: string | null = null;
let lastError: string | undefined;
for (let attempt = 1; attempt <= MAX_STREAM_RETRIES + 1; attempt++) {
try {
const result = streamLLM(streamParams, 'scene-outlines-stream', thinkingConfig);
let fullText = '';
parsedOutlines = [];
languageDirective = null;
for await (const chunk of result.textStream) {
fullText += chunk;
// Try to extract language directive early
if (!languageDirective) {
languageDirective = extractLanguageDirective(fullText);
if (languageDirective) {
const ldEvent = JSON.stringify({
type: 'languageDirective',
data: languageDirective,
});
controller.enqueue(encoder.encode(`data: ${ldEvent}\n\n`));
}
}
// Try to extract new outlines from the accumulated text
const newOutlines = extractNewOutlines(fullText, parsedOutlines.length);
for (const outline of newOutlines) {
// Ensure ID and order
const enriched = {
...outline,
id: outline.id || nanoid(),
order: parsedOutlines.length + 1,
};
parsedOutlines.push(enriched);
const event = JSON.stringify({
type: 'outline',
data: enriched,
index: parsedOutlines.length - 1,
});
controller.enqueue(encoder.encode(`data: ${event}\n\n`));
}
}
// Validate: got outlines?
if (parsedOutlines.length > 0) break;
// Empty result — retry if we have attempts left
lastError = fullText.trim()
? 'LLM response could not be parsed into outlines'
: 'LLM returned empty response';
if (attempt <= MAX_STREAM_RETRIES) {
log.warn(
`Empty outlines (attempt ${attempt}/${MAX_STREAM_RETRIES + 1}), retrying...`,
);
// Notify client a retry is happening
const retryEvent = JSON.stringify({
type: 'retry',
attempt,
maxAttempts: MAX_STREAM_RETRIES + 1,
});
controller.enqueue(encoder.encode(`data: ${retryEvent}\n\n`));
}
} catch (error) {
lastError = error instanceof Error ? error.message : String(error);
if (attempt <= MAX_STREAM_RETRIES) {
log.warn(
`Stream error (attempt ${attempt}/${MAX_STREAM_RETRIES + 1}), retrying...`,
error,
);
const retryEvent = JSON.stringify({
type: 'retry',
attempt,
maxAttempts: MAX_STREAM_RETRIES + 1,
});
controller.enqueue(encoder.encode(`data: ${retryEvent}\n\n`));
continue;
}
}
}
if (parsedOutlines.length > 0) {
// Replace sequential gen_img_N/gen_vid_N with globally unique IDs
const uniquifiedOutlines = uniquifyMediaElementIds(parsedOutlines);
// Send done event with all outlines
const doneEvent = JSON.stringify({
type: 'done',
outlines: uniquifiedOutlines,
languageDirective: languageDirective || DEFAULT_LANGUAGE_DIRECTIVE,
});
controller.enqueue(encoder.encode(`data: ${doneEvent}\n\n`));
} else {
// All retries exhausted, no outlines produced
log.error(
`Outline generation failed after ${MAX_STREAM_RETRIES + 1} attempts: ${lastError}`,
);
const errorEvent = JSON.stringify({
type: 'error',
error: lastError || 'Failed to generate outlines',
});
controller.enqueue(encoder.encode(`data: ${errorEvent}\n\n`));
}
} catch (error) {
const errorEvent = JSON.stringify({
type: 'error',
error: error instanceof Error ? error.message : String(error),
});
controller.enqueue(encoder.encode(`data: ${errorEvent}\n\n`));
} finally {
stopHeartbeat();
controller.close();
}
},
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
},
});
} catch (error) {
log.error(
`Outline streaming failed [requirement="${requirementSnippet ?? 'unknown'}...", model=${resolvedModelString ?? 'unknown'}]:`,
error,
);
return apiError('INTERNAL_ERROR', 500, error instanceof Error ? error.message : String(error));
}
}