muthuk1's picture
Convert OpenMAIC from Next.js to React (Vite)
f56a29b verified
/**
* Scene Actions Generation API
*
* Generates actions for a scene given its outline and content,
* then assembles the complete Scene object.
* This is the second half of the two-step scene generation pipeline.
*/
import { NextRequest } from 'next/server';
import { callLLM } from '@/lib/ai/llm';
import {
generateSceneActions,
buildCompleteScene,
buildVisionUserContent,
type SceneGenerationContext,
type AgentInfo,
} from '@/lib/generation/generation-pipeline';
import type { SceneOutline } from '@/lib/types/generation';
import type {
GeneratedSlideContent,
GeneratedQuizContent,
GeneratedInteractiveContent,
GeneratedPBLContent,
} from '@/lib/types/generation';
import type { SpeechAction } from '@/lib/types/action';
import { createLogger } from '@/lib/logger';
import { apiError, apiSuccess } from '@/lib/server/api-response';
import { resolveModelFromRequest } from '@/lib/server/resolve-model';
const log = createLogger('Scene Actions API');
export const maxDuration = 60;
export async function POST(req: NextRequest) {
let outlineTitle: string | undefined;
let resolvedModelString: string | undefined;
try {
const body = await req.json();
const {
outline,
allOutlines,
content,
stageId,
agents,
previousSpeeches: incomingPreviousSpeeches,
userProfile,
languageDirective,
} = body as {
outline: SceneOutline;
allOutlines: SceneOutline[];
content:
| GeneratedSlideContent
| GeneratedQuizContent
| GeneratedInteractiveContent
| GeneratedPBLContent;
stageId: string;
agents?: AgentInfo[];
previousSpeeches?: string[];
userProfile?: string;
languageDirective?: string;
};
// Validate required fields
if (!outline) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'outline is required');
}
if (!allOutlines || allOutlines.length === 0) {
return apiError(
'MISSING_REQUIRED_FIELD',
400,
'allOutlines is required and must not be empty',
);
}
if (!content) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'content is required');
}
if (!stageId) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'stageId is required');
}
// ── Model resolution from request headers/body ──
const {
model: languageModel,
modelInfo,
modelString,
thinkingConfig,
} = await resolveModelFromRequest(req, body);
outlineTitle = outline?.title;
resolvedModelString = modelString;
// Detect vision capability
const hasVision = !!modelInfo?.capabilities?.vision;
// AI call function (actions typically don't use vision, but kept for consistency)
const aiCall = async (
systemPrompt: string,
userPrompt: string,
images?: Array<{ id: string; src: string }>,
): Promise<string> => {
if (images?.length && hasVision) {
const result = await callLLM(
{
model: languageModel,
system: systemPrompt,
messages: [
{
role: 'user' as const,
content: buildVisionUserContent(userPrompt, images),
},
],
maxOutputTokens: modelInfo?.outputWindow,
},
'scene-actions',
undefined,
thinkingConfig,
);
return result.text;
}
const result = await callLLM(
{
model: languageModel,
system: systemPrompt,
prompt: userPrompt,
maxOutputTokens: modelInfo?.outputWindow,
},
'scene-actions',
undefined,
thinkingConfig,
);
return result.text;
};
// ── Build cross-scene context ──
const allTitles = allOutlines.map((o) => o.title);
const pageIndex = allOutlines.findIndex((o) => o.id === outline.id);
const ctx: SceneGenerationContext = {
pageIndex: (pageIndex >= 0 ? pageIndex : 0) + 1,
totalPages: allOutlines.length,
allTitles,
previousSpeeches: incomingPreviousSpeeches ?? [],
};
// ── Generate actions ──
log.info(`Generating actions: "${outline.title}" (${outline.type}) [model=${modelString}]`);
const actions = await generateSceneActions(outline, content, aiCall, {
ctx,
agents,
userProfile,
languageDirective,
});
log.info(`Generated ${actions.length} actions for: "${outline.title}"`);
// ── Build complete scene ──
const scene = buildCompleteScene(outline, content, actions, stageId);
if (!scene) {
log.error(`Failed to build scene: "${outline.title}"`);
return apiError('GENERATION_FAILED', 500, `Failed to build scene: ${outline.title}`);
}
// ── Extract speeches for cross-scene coherence ──
const outputPreviousSpeeches = (scene.actions || [])
.filter((a): a is SpeechAction => a.type === 'speech')
.map((a) => a.text);
log.info(
`Scene assembled successfully: "${outline.title}" β€” ${scene.actions?.length ?? 0} actions`,
);
return apiSuccess({ scene, previousSpeeches: outputPreviousSpeeches });
} catch (error) {
log.error(
`Scene actions generation failed [scene="${outlineTitle ?? 'unknown'}", model=${resolvedModelString ?? 'unknown'}]:`,
error,
);
return apiError('INTERNAL_ERROR', 500, error instanceof Error ? error.message : String(error));
}
}