File size: 9,946 Bytes
5f3e9f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import { Play, StopCircle, Upload } from 'lucide-react'
import { useState } from 'react'
import { useNavigate } from 'react-router-dom'
import type React from 'react'
import BackendRejectedBanner from '../components/BackendRejectedBanner'
import { useTrackedGenerate } from '../hooks/useTrackedGenerate'
import type { GenerateSettings } from '../api/types'

const ACCEPTED_MIME = /^(image\/.+|application\/pdf)$/

const defaultSettings: GenerateSettings = {
  zoom: 2.1,
  overlap: 20,
  viewport_width: 1920,
  viewport_height: 1080,
  max_screenshots: 50,
}

export default function ImageToVideo() {
  const [file, setFile] = useState<File | null>(null)
  const [instructions, setInstructions] = useState('')
  const [systemPrompt, setSystemPrompt] = useState('')
  const [settings, setSettings] = useState<GenerateSettings>(defaultSettings)
  const [dragActive, setDragActive] = useState(false)
  const [dropError, setDropError] = useState<string | null>(null)
  const { state, generateFromImage, cancel } = useTrackedGenerate('image-to-video')
  const running = state.status === 'running'
  const nav = useNavigate()

  // Drag-and-drop — accepts the first dropped file that matches an image
  // or a PDF. Mirrors the accept="image/*,application/pdf" rule on the
  // <input>. We deliberately ignore drops of multiple files; the backend
  // only processes a single source per run.
  const onDragOver = (e: React.DragEvent<HTMLLabelElement>) => {
    if (running) return
    e.preventDefault()
    setDragActive(true)
  }
  const onDragLeave = (e: React.DragEvent<HTMLLabelElement>) => {
    e.preventDefault()
    setDragActive(false)
  }
  const onDrop = (e: React.DragEvent<HTMLLabelElement>) => {
    if (running) return
    e.preventDefault()
    setDragActive(false)
    setDropError(null)
    const dropped = e.dataTransfer?.files?.[0]
    if (!dropped) return
    if (!ACCEPTED_MIME.test(dropped.type) && !dropped.name.match(/\.(png|jpe?g|gif|webp|bmp|pdf)$/i)) {
      setDropError(`Unsupported file type: ${dropped.type || dropped.name}. Drop an image or PDF.`)
      return
    }
    setFile(dropped)
  }

  const onSubmit = async (e: React.FormEvent) => {
    e.preventDefault()
    if (!file) return
    const fd = new FormData()
    fd.append('image', file)
    fd.append('instructions', instructions)
    if (systemPrompt) fd.append('system_prompt', systemPrompt)
    fd.append('zoom', String(settings.zoom ?? 2.1))
    fd.append('overlap', String(settings.overlap ?? 20))
    fd.append('viewport_width', String(settings.viewport_width ?? 1920))
    fd.append('viewport_height', String(settings.viewport_height ?? 1080))
    fd.append('max_screenshots', String(settings.max_screenshots ?? 50))
    const { queueId } = generateFromImage(fd, { files: [file], settings })
    nav(`/processes?queue=${encodeURIComponent(queueId)}`)
  }

  return (
    <div className="container-form space-y-6">
      <div>
        <div className="eyebrow">
          <span className="h-1 w-1 rounded-full bg-brand-500" />
          Tool · Image → Video
        </div>
        <h1 className="h-page mt-2">Image / PDF to Video</h1>
        <p className="mt-2 text-sm text-muted">
          Upload a screenshot, photo, or PDF. Vision AI extracts text, formats it as HTML, and
          captures screenshots.
        </p>
      </div>

      <form onSubmit={onSubmit} className="space-y-4">
        <div className="card">
          <label className="label" htmlFor="image-file-input">Source file</label>
          <label
            htmlFor="image-file-input"
            onDragOver={onDragOver}
            onDragEnter={onDragOver}
            onDragLeave={onDragLeave}
            onDrop={onDrop}
            className={
              'flex cursor-pointer flex-col items-center justify-center rounded-lg border-2 border-dashed p-10 text-center transition-colors ' +
              (dragActive
                ? 'border-brand-500 bg-brand-50 dark:border-brand-400 dark:bg-brand-900/30'
                : 'border-slate-200 bg-slate-50 hover:border-brand-400 hover:bg-brand-50 dark:border-white/10 dark:bg-white/[0.03] dark:hover:border-brand-500/60 dark:hover:bg-brand-900/20')
            }
          >
            <Upload size={28} className={dragActive ? 'text-brand-500' : 'text-slate-400'} />
            <div className="mt-2 text-sm text-slate-600 dark:text-slate-300">
              {file ? (
                <span className="font-medium">{file.name}</span>
              ) : dragActive ? (
                <span className="font-medium text-brand-700 dark:text-brand-200">Drop to upload</span>
              ) : (
                <>
                  <span className="font-medium">Click to upload</span> or drag-and-drop · PNG, JPG, or PDF
                </>
              )}
            </div>
            <input
              id="image-file-input"
              type="file"
              className="hidden"
              accept="image/*,application/pdf"
              onChange={(e) => {
                setFile(e.target.files?.[0] ?? null)
                setDropError(null)
              }}
              disabled={running}
            />
          </label>
          {dropError && (
            <p className="mt-2 text-xs text-rose-600 dark:text-rose-300">{dropError}</p>
          )}
        </div>

        <div className="card space-y-4">
          <div>
            <label className="label" htmlFor="instructions">
              Extraction instructions (optional)
            </label>
            <textarea
              id="instructions"
              className="textarea h-20 resize-y"
              placeholder="e.g., Extract only the code blocks, preserve order…"
              value={instructions}
              onChange={(e) => setInstructions(e.target.value)}
              disabled={running}
            />
          </div>
          <div>
            <label className="label" htmlFor="system-prompt">
              Custom HTML system prompt (optional)
            </label>
            <textarea
              id="system-prompt"
              className="textarea h-20 resize-y"
              placeholder="Override the default HTML formatting prompt…"
              value={systemPrompt}
              onChange={(e) => setSystemPrompt(e.target.value)}
              disabled={running}
            />
          </div>
        </div>

        <div className="card space-y-4">
          <div className="text-sm font-medium text-slate-900 dark:text-slate-100">
            Capture settings
          </div>
          <div className="grid grid-cols-1 gap-4 sm:grid-cols-3">
            <Field label="Zoom" htmlFor="img-zoom">
              <input
                id="img-zoom"
                type="number"
                step="0.1"
                className="input"
                value={settings.zoom ?? 2.1}
                onChange={(e) => setSettings({ ...settings, zoom: Number(e.target.value) })}
                disabled={running}
              />
            </Field>
            <Field label="Overlap (px)" htmlFor="img-overlap">
              <input
                id="img-overlap"
                type="number"
                className="input"
                value={settings.overlap ?? 20}
                onChange={(e) => setSettings({ ...settings, overlap: Number(e.target.value) })}
                disabled={running}
              />
            </Field>
            <Field label="Max screenshots" htmlFor="img-max">
              <input
                id="img-max"
                type="number"
                className="input"
                value={settings.max_screenshots ?? 50}
                onChange={(e) =>
                  setSettings({ ...settings, max_screenshots: Number(e.target.value) })
                }
                disabled={running}
              />
            </Field>
          </div>
          <div className="grid grid-cols-1 gap-4 sm:grid-cols-2">
            <Field label="Viewport width" htmlFor="img-vw">
              <input
                id="img-vw"
                type="number"
                className="input"
                value={settings.viewport_width ?? 1920}
                onChange={(e) =>
                  setSettings({ ...settings, viewport_width: Number(e.target.value) })
                }
                disabled={running}
              />
            </Field>
            <Field label="Viewport height" htmlFor="img-vh">
              <input
                id="img-vh"
                type="number"
                className="input"
                value={settings.viewport_height ?? 1080}
                onChange={(e) =>
                  setSettings({ ...settings, viewport_height: Number(e.target.value) })
                }
                disabled={running}
              />
            </Field>
          </div>
        </div>

        <div className="flex flex-wrap items-center gap-3">
          {!running ? (
            <button type="submit" className="btn-primary" disabled={!file}>
              <Play size={16} /> Generate
            </button>
          ) : (
            <button type="button" className="btn-danger" onClick={() => cancel()}>
              <StopCircle size={16} /> Cancel
            </button>
          )}
          {state.status === 'error' && !state.rejectedReason && (
            <span className="text-sm text-red-600 dark:text-red-400">{state.error}</span>
          )}
        </div>
        {state.status === 'error' && state.rejectedReason && (
          <BackendRejectedBanner
            reason={state.rejectedReason}
            message={state.error ?? 'Backend rejected the run.'}
          />
        )}
      </form>

    </div>
  )
}

function Field({
  label,
  htmlFor,
  children,
}: {
  label: string
  htmlFor?: string
  children: React.ReactNode
}) {
  return (
    <div>
      <label className="label" htmlFor={htmlFor}>
        {label}
      </label>
      {children}
    </div>
  )
}