import { Play, StopCircle, Upload } from 'lucide-react' import { useState } from 'react' import { useNavigate } from 'react-router-dom' import type React from 'react' import BackendRejectedBanner from '../components/BackendRejectedBanner' import { useTrackedGenerate } from '../hooks/useTrackedGenerate' import type { GenerateSettings } from '../api/types' const ACCEPTED_MIME = /^(image\/.+|application\/pdf)$/ const defaultSettings: GenerateSettings = { zoom: 2.1, overlap: 20, viewport_width: 1920, viewport_height: 1080, max_screenshots: 50, } export default function ImageToVideo() { const [file, setFile] = useState(null) const [instructions, setInstructions] = useState('') const [systemPrompt, setSystemPrompt] = useState('') const [settings, setSettings] = useState(defaultSettings) const [dragActive, setDragActive] = useState(false) const [dropError, setDropError] = useState(null) const { state, generateFromImage, cancel } = useTrackedGenerate('image-to-video') const running = state.status === 'running' const nav = useNavigate() // Drag-and-drop — accepts the first dropped file that matches an image // or a PDF. Mirrors the accept="image/*,application/pdf" rule on the // . We deliberately ignore drops of multiple files; the backend // only processes a single source per run. const onDragOver = (e: React.DragEvent) => { if (running) return e.preventDefault() setDragActive(true) } const onDragLeave = (e: React.DragEvent) => { e.preventDefault() setDragActive(false) } const onDrop = (e: React.DragEvent) => { if (running) return e.preventDefault() setDragActive(false) setDropError(null) const dropped = e.dataTransfer?.files?.[0] if (!dropped) return if (!ACCEPTED_MIME.test(dropped.type) && !dropped.name.match(/\.(png|jpe?g|gif|webp|bmp|pdf)$/i)) { setDropError(`Unsupported file type: ${dropped.type || dropped.name}. Drop an image or PDF.`) return } setFile(dropped) } const onSubmit = async (e: React.FormEvent) => { e.preventDefault() if (!file) return const fd = new FormData() fd.append('image', file) fd.append('instructions', instructions) if (systemPrompt) fd.append('system_prompt', systemPrompt) fd.append('zoom', String(settings.zoom ?? 2.1)) fd.append('overlap', String(settings.overlap ?? 20)) fd.append('viewport_width', String(settings.viewport_width ?? 1920)) fd.append('viewport_height', String(settings.viewport_height ?? 1080)) fd.append('max_screenshots', String(settings.max_screenshots ?? 50)) const { queueId } = generateFromImage(fd, { files: [file], settings }) nav(`/processes?queue=${encodeURIComponent(queueId)}`) } return (
Tool · Image → Video

Image / PDF to Video

Upload a screenshot, photo, or PDF. Vision AI extracts text, formats it as HTML, and captures screenshots.

{dropError && (

{dropError}

)}