Spaces:
Running
Running
Add multi-reference image support + fix missing python-dotenv
Browse files- Add python-dotenv to requirements.txt (was missing, causing import error)
- Add multi-reference image support for img2img (character + pose reference)
- Add pose/style drop zone to UI with side-by-side layout
- Add multi-ref models: SeeDream Sequential, Kling O1, Qwen Multi-Angle
- Update wavespeed_provider to handle multiple image uploads
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
requirements.txt
CHANGED
|
@@ -12,4 +12,5 @@ apscheduler>=3.10.0
|
|
| 12 |
httpx>=0.26.0
|
| 13 |
pyyaml>=6.0
|
| 14 |
python-multipart>=0.0.6
|
|
|
|
| 15 |
higgsfield-client>=0.1.0
|
|
|
|
| 12 |
httpx>=0.26.0
|
| 13 |
pyyaml>=6.0
|
| 14 |
python-multipart>=0.0.6
|
| 15 |
+
python-dotenv>=1.0.0
|
| 16 |
higgsfield-client>=0.1.0
|
src/content_engine/api/routes_generation.py
CHANGED
|
@@ -289,6 +289,7 @@ async def cancel_job(job_id: str):
|
|
| 289 |
@router.post("/generate/img2img", response_model=GenerationResponse)
|
| 290 |
async def generate_img2img(
|
| 291 |
image: UploadFile = File(...),
|
|
|
|
| 292 |
positive_prompt: str = Form(""),
|
| 293 |
negative_prompt: str = Form(""),
|
| 294 |
character_id: str | None = Form(None),
|
|
@@ -309,12 +310,19 @@ async def generate_img2img(
|
|
| 309 |
Supports both local (ComfyUI) and cloud (WaveSpeed edit) backends.
|
| 310 |
- Local: denoise-based img2img via ComfyUI
|
| 311 |
- Cloud: prompt-guided editing via SeeDream/NanoBanana Edit APIs
|
|
|
|
|
|
|
| 312 |
"""
|
| 313 |
import json as json_module
|
| 314 |
|
| 315 |
job_id = str(uuid.uuid4())
|
| 316 |
image_bytes = await image.read()
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
# Parse template variables
|
| 319 |
try:
|
| 320 |
variables = json_module.loads(variables_json) if variables_json else {}
|
|
@@ -330,6 +338,7 @@ async def generate_img2img(
|
|
| 330 |
_run_cloud_img2img(
|
| 331 |
job_id=job_id,
|
| 332 |
image_bytes=image_bytes,
|
|
|
|
| 333 |
positive_prompt=positive_prompt,
|
| 334 |
model=checkpoint,
|
| 335 |
content_rating=content_rating,
|
|
@@ -491,6 +500,7 @@ async def _run_cloud_img2img(
|
|
| 491 |
*,
|
| 492 |
job_id: str,
|
| 493 |
image_bytes: bytes,
|
|
|
|
| 494 |
positive_prompt: str,
|
| 495 |
model: str | None,
|
| 496 |
content_rating: str,
|
|
@@ -545,6 +555,7 @@ async def _run_cloud_img2img(
|
|
| 545 |
result = await _wavespeed_provider.edit_image(
|
| 546 |
prompt=final_prompt,
|
| 547 |
image_bytes=image_bytes,
|
|
|
|
| 548 |
model=model,
|
| 549 |
size=size,
|
| 550 |
)
|
|
|
|
| 289 |
@router.post("/generate/img2img", response_model=GenerationResponse)
|
| 290 |
async def generate_img2img(
|
| 291 |
image: UploadFile = File(...),
|
| 292 |
+
image2: UploadFile | None = File(default=None),
|
| 293 |
positive_prompt: str = Form(""),
|
| 294 |
negative_prompt: str = Form(""),
|
| 295 |
character_id: str | None = Form(None),
|
|
|
|
| 310 |
Supports both local (ComfyUI) and cloud (WaveSpeed edit) backends.
|
| 311 |
- Local: denoise-based img2img via ComfyUI
|
| 312 |
- Cloud: prompt-guided editing via SeeDream/NanoBanana Edit APIs
|
| 313 |
+
|
| 314 |
+
Multi-reference: Pass a second image (pose/style reference) for models that support it.
|
| 315 |
"""
|
| 316 |
import json as json_module
|
| 317 |
|
| 318 |
job_id = str(uuid.uuid4())
|
| 319 |
image_bytes = await image.read()
|
| 320 |
|
| 321 |
+
# Read second reference image if provided (for multi-ref models)
|
| 322 |
+
image_bytes_2 = None
|
| 323 |
+
if image2 is not None:
|
| 324 |
+
image_bytes_2 = await image2.read()
|
| 325 |
+
|
| 326 |
# Parse template variables
|
| 327 |
try:
|
| 328 |
variables = json_module.loads(variables_json) if variables_json else {}
|
|
|
|
| 338 |
_run_cloud_img2img(
|
| 339 |
job_id=job_id,
|
| 340 |
image_bytes=image_bytes,
|
| 341 |
+
image_bytes_2=image_bytes_2,
|
| 342 |
positive_prompt=positive_prompt,
|
| 343 |
model=checkpoint,
|
| 344 |
content_rating=content_rating,
|
|
|
|
| 500 |
*,
|
| 501 |
job_id: str,
|
| 502 |
image_bytes: bytes,
|
| 503 |
+
image_bytes_2: bytes | None,
|
| 504 |
positive_prompt: str,
|
| 505 |
model: str | None,
|
| 506 |
content_rating: str,
|
|
|
|
| 555 |
result = await _wavespeed_provider.edit_image(
|
| 556 |
prompt=final_prompt,
|
| 557 |
image_bytes=image_bytes,
|
| 558 |
+
image_bytes_2=image_bytes_2,
|
| 559 |
model=model,
|
| 560 |
size=size,
|
| 561 |
)
|
src/content_engine/api/ui.html
CHANGED
|
@@ -909,6 +909,12 @@ select { cursor: pointer; }
|
|
| 909 |
<option value="higgsfield-soul">Higgsfield Soul (Faces)</option>
|
| 910 |
<option value="gpt-image-1.5-edit">GPT Image 1.5 Edit</option>
|
| 911 |
</optgroup>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
<optgroup label="NSFW Friendly">
|
| 913 |
<option value="seedream-4-edit">SeeDream v4 Edit</option>
|
| 914 |
<option value="wan-2.6-edit">WAN 2.6 Edit</option>
|
|
@@ -927,7 +933,7 @@ select { cursor: pointer; }
|
|
| 927 |
</optgroup>
|
| 928 |
</select>
|
| 929 |
<div style="font-size:11px;color:var(--text-secondary);margin-top:4px">
|
| 930 |
-
|
| 931 |
</div>
|
| 932 |
</div>
|
| 933 |
|
|
@@ -1005,14 +1011,30 @@ select { cursor: pointer; }
|
|
| 1005 |
|
| 1006 |
<!-- Reference image upload for img2img -->
|
| 1007 |
<div id="img2img-section" style="display:none">
|
| 1008 |
-
<div
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1013 |
</div>
|
| 1014 |
-
<
|
| 1015 |
-
|
|
|
|
|
|
|
| 1016 |
<div class="slider-row">
|
| 1017 |
<input type="range" id="gen-denoise" min="0" max="1" step="0.05" value="0.65" oninput="this.nextElementSibling.textContent=this.value">
|
| 1018 |
<span class="value">0.65</span>
|
|
@@ -1511,6 +1533,7 @@ let currentBatchId = null;
|
|
| 1511 |
let batchPollInterval = null;
|
| 1512 |
let trainingPollInterval = null;
|
| 1513 |
let refImageFile = null;
|
|
|
|
| 1514 |
let videoImageFile = null;
|
| 1515 |
let trainImageFiles = [];
|
| 1516 |
let trainCaptions = {}; // filename -> caption text
|
|
@@ -1663,7 +1686,7 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|
| 1663 |
|
| 1664 |
// --- Drop zone setup ---
|
| 1665 |
function setupDropZones() {
|
| 1666 |
-
['ref-drop-zone', 'train-drop-zone', 'video-drop-zone'].forEach(id => {
|
| 1667 |
const zone = document.getElementById(id);
|
| 1668 |
if (!zone) return;
|
| 1669 |
zone.addEventListener('dragover', e => { e.preventDefault(); zone.classList.add('dragover'); });
|
|
@@ -1676,6 +1699,9 @@ function setupDropZones() {
|
|
| 1676 |
if (id === 'ref-drop-zone') {
|
| 1677 |
refImageFile = file;
|
| 1678 |
showRefPreview(file);
|
|
|
|
|
|
|
|
|
|
| 1679 |
} else if (id === 'video-drop-zone') {
|
| 1680 |
videoImageFile = file;
|
| 1681 |
showVideoPreview(file);
|
|
@@ -1700,9 +1726,9 @@ function showRefPreview(file) {
|
|
| 1700 |
const reader = new FileReader();
|
| 1701 |
reader.onload = e => {
|
| 1702 |
zone.innerHTML = `
|
| 1703 |
-
<img src="${e.target.result}">
|
| 1704 |
-
<div style="margin-top:
|
| 1705 |
-
<button class="btn btn-secondary btn-small" onclick="event.stopPropagation();clearRefImage()" style="margin-top:8px">Remove</button>
|
| 1706 |
`;
|
| 1707 |
};
|
| 1708 |
reader.readAsDataURL(file);
|
|
@@ -1713,13 +1739,46 @@ function clearRefImage() {
|
|
| 1713 |
const zone = document.getElementById('ref-drop-zone');
|
| 1714 |
zone.classList.remove('has-file');
|
| 1715 |
zone.innerHTML = `
|
| 1716 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" style="width:
|
| 1717 |
-
<div>Drop
|
| 1718 |
-
<div style="font-size:
|
| 1719 |
`;
|
| 1720 |
document.getElementById('ref-file-input').value = '';
|
| 1721 |
}
|
| 1722 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1723 |
function handleVideoImage(input) {
|
| 1724 |
if (input.files[0]) {
|
| 1725 |
videoImageFile = input.files[0];
|
|
@@ -2158,6 +2217,10 @@ async function doGenerate() {
|
|
| 2158 |
}
|
| 2159 |
const formData = new FormData();
|
| 2160 |
formData.append('image', refImageFile);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2161 |
formData.append('positive_prompt', document.getElementById('gen-positive').value || '');
|
| 2162 |
formData.append('negative_prompt', document.getElementById('gen-negative').value || '');
|
| 2163 |
formData.append('content_rating', selectedRating);
|
|
|
|
| 909 |
<option value="higgsfield-soul">Higgsfield Soul (Faces)</option>
|
| 910 |
<option value="gpt-image-1.5-edit">GPT Image 1.5 Edit</option>
|
| 911 |
</optgroup>
|
| 912 |
+
<optgroup label="Multi-Reference (2+ images)">
|
| 913 |
+
<option value="seedream-4.5-multi">SeeDream v4.5 Sequential (up to 3)</option>
|
| 914 |
+
<option value="seedream-4-multi">SeeDream v4 Sequential (up to 3)</option>
|
| 915 |
+
<option value="kling-o1-multi">Kling O1 (up to 10 refs)</option>
|
| 916 |
+
<option value="qwen-multi-angle">Qwen Multi-Angle</option>
|
| 917 |
+
</optgroup>
|
| 918 |
<optgroup label="NSFW Friendly">
|
| 919 |
<option value="seedream-4-edit">SeeDream v4 Edit</option>
|
| 920 |
<option value="wan-2.6-edit">WAN 2.6 Edit</option>
|
|
|
|
| 933 |
</optgroup>
|
| 934 |
</select>
|
| 935 |
<div style="font-size:11px;color:var(--text-secondary);margin-top:4px">
|
| 936 |
+
Single-ref models use character image. Multi-ref models combine both images for consistency.
|
| 937 |
</div>
|
| 938 |
</div>
|
| 939 |
|
|
|
|
| 1011 |
|
| 1012 |
<!-- Reference image upload for img2img -->
|
| 1013 |
<div id="img2img-section" style="display:none">
|
| 1014 |
+
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px">
|
| 1015 |
+
<div>
|
| 1016 |
+
<div class="section-title">Character Reference</div>
|
| 1017 |
+
<div class="drop-zone" id="ref-drop-zone" onclick="document.getElementById('ref-file-input').click()" style="min-height:140px">
|
| 1018 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" style="width:28px;height:28px;opacity:0.5;margin-bottom:6px"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
|
| 1019 |
+
<div style="font-size:12px">Drop or click</div>
|
| 1020 |
+
<div style="font-size:10px;margin-top:2px;color:var(--text-secondary)">Main subject</div>
|
| 1021 |
+
</div>
|
| 1022 |
+
<input type="file" id="ref-file-input" accept="image/*" style="display:none" onchange="handleRefImage(this)">
|
| 1023 |
+
</div>
|
| 1024 |
+
<div>
|
| 1025 |
+
<div class="section-title">Pose/Style Reference <span style="font-weight:400;font-size:10px;color:var(--text-secondary)">(optional)</span></div>
|
| 1026 |
+
<div class="drop-zone" id="pose-drop-zone" onclick="document.getElementById('pose-file-input').click()" style="min-height:140px">
|
| 1027 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" style="width:28px;height:28px;opacity:0.5;margin-bottom:6px"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
|
| 1028 |
+
<div style="font-size:12px">Drop or click</div>
|
| 1029 |
+
<div style="font-size:10px;margin-top:2px;color:var(--text-secondary)">Pose or style</div>
|
| 1030 |
+
</div>
|
| 1031 |
+
<input type="file" id="pose-file-input" accept="image/*" style="display:none" onchange="handlePoseImage(this)">
|
| 1032 |
+
</div>
|
| 1033 |
</div>
|
| 1034 |
+
<div style="font-size:11px;color:var(--text-secondary);margin-top:8px">
|
| 1035 |
+
Multi-ref models (SeeDream Sequential, Kling O1) use both images for character consistency.
|
| 1036 |
+
</div>
|
| 1037 |
+
<label style="margin-top:12px">Denoise Strength (0 = keep original, 1 = ignore reference)</label>
|
| 1038 |
<div class="slider-row">
|
| 1039 |
<input type="range" id="gen-denoise" min="0" max="1" step="0.05" value="0.65" oninput="this.nextElementSibling.textContent=this.value">
|
| 1040 |
<span class="value">0.65</span>
|
|
|
|
| 1533 |
let batchPollInterval = null;
|
| 1534 |
let trainingPollInterval = null;
|
| 1535 |
let refImageFile = null;
|
| 1536 |
+
let poseImageFile = null;
|
| 1537 |
let videoImageFile = null;
|
| 1538 |
let trainImageFiles = [];
|
| 1539 |
let trainCaptions = {}; // filename -> caption text
|
|
|
|
| 1686 |
|
| 1687 |
// --- Drop zone setup ---
|
| 1688 |
function setupDropZones() {
|
| 1689 |
+
['ref-drop-zone', 'pose-drop-zone', 'train-drop-zone', 'video-drop-zone'].forEach(id => {
|
| 1690 |
const zone = document.getElementById(id);
|
| 1691 |
if (!zone) return;
|
| 1692 |
zone.addEventListener('dragover', e => { e.preventDefault(); zone.classList.add('dragover'); });
|
|
|
|
| 1699 |
if (id === 'ref-drop-zone') {
|
| 1700 |
refImageFile = file;
|
| 1701 |
showRefPreview(file);
|
| 1702 |
+
} else if (id === 'pose-drop-zone') {
|
| 1703 |
+
poseImageFile = file;
|
| 1704 |
+
showPosePreview(file);
|
| 1705 |
} else if (id === 'video-drop-zone') {
|
| 1706 |
videoImageFile = file;
|
| 1707 |
showVideoPreview(file);
|
|
|
|
| 1726 |
const reader = new FileReader();
|
| 1727 |
reader.onload = e => {
|
| 1728 |
zone.innerHTML = `
|
| 1729 |
+
<img src="${e.target.result}" style="max-height:100px;max-width:100%;border-radius:4px">
|
| 1730 |
+
<div style="margin-top:4px;font-size:11px">${file.name.substring(0,15)}${file.name.length > 15 ? '...' : ''}</div>
|
| 1731 |
+
<button class="btn btn-secondary btn-small" onclick="event.stopPropagation();clearRefImage()" style="margin-top:4px;padding:2px 8px;font-size:10px">Remove</button>
|
| 1732 |
`;
|
| 1733 |
};
|
| 1734 |
reader.readAsDataURL(file);
|
|
|
|
| 1739 |
const zone = document.getElementById('ref-drop-zone');
|
| 1740 |
zone.classList.remove('has-file');
|
| 1741 |
zone.innerHTML = `
|
| 1742 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" style="width:28px;height:28px;opacity:0.5;margin-bottom:6px"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
|
| 1743 |
+
<div style="font-size:12px">Drop or click</div>
|
| 1744 |
+
<div style="font-size:10px;margin-top:2px;color:var(--text-secondary)">Main subject</div>
|
| 1745 |
`;
|
| 1746 |
document.getElementById('ref-file-input').value = '';
|
| 1747 |
}
|
| 1748 |
|
| 1749 |
+
function handlePoseImage(input) {
|
| 1750 |
+
if (input.files[0]) {
|
| 1751 |
+
poseImageFile = input.files[0];
|
| 1752 |
+
showPosePreview(poseImageFile);
|
| 1753 |
+
}
|
| 1754 |
+
}
|
| 1755 |
+
|
| 1756 |
+
function showPosePreview(file) {
|
| 1757 |
+
const zone = document.getElementById('pose-drop-zone');
|
| 1758 |
+
zone.classList.add('has-file');
|
| 1759 |
+
const reader = new FileReader();
|
| 1760 |
+
reader.onload = e => {
|
| 1761 |
+
zone.innerHTML = `
|
| 1762 |
+
<img src="${e.target.result}" style="max-height:100px;max-width:100%;border-radius:4px">
|
| 1763 |
+
<div style="margin-top:4px;font-size:11px">${file.name.substring(0,15)}${file.name.length > 15 ? '...' : ''}</div>
|
| 1764 |
+
<button class="btn btn-secondary btn-small" onclick="event.stopPropagation();clearPoseImage()" style="margin-top:4px;padding:2px 8px;font-size:10px">Remove</button>
|
| 1765 |
+
`;
|
| 1766 |
+
};
|
| 1767 |
+
reader.readAsDataURL(file);
|
| 1768 |
+
}
|
| 1769 |
+
|
| 1770 |
+
function clearPoseImage() {
|
| 1771 |
+
poseImageFile = null;
|
| 1772 |
+
const zone = document.getElementById('pose-drop-zone');
|
| 1773 |
+
zone.classList.remove('has-file');
|
| 1774 |
+
zone.innerHTML = `
|
| 1775 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" style="width:28px;height:28px;opacity:0.5;margin-bottom:6px"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
|
| 1776 |
+
<div style="font-size:12px">Drop or click</div>
|
| 1777 |
+
<div style="font-size:10px;margin-top:2px;color:var(--text-secondary)">Pose or style</div>
|
| 1778 |
+
`;
|
| 1779 |
+
document.getElementById('pose-file-input').value = '';
|
| 1780 |
+
}
|
| 1781 |
+
|
| 1782 |
function handleVideoImage(input) {
|
| 1783 |
if (input.files[0]) {
|
| 1784 |
videoImageFile = input.files[0];
|
|
|
|
| 2217 |
}
|
| 2218 |
const formData = new FormData();
|
| 2219 |
formData.append('image', refImageFile);
|
| 2220 |
+
// Add pose/style reference image if provided (for multi-ref models)
|
| 2221 |
+
if (poseImageFile) {
|
| 2222 |
+
formData.append('image2', poseImageFile);
|
| 2223 |
+
}
|
| 2224 |
formData.append('positive_prompt', document.getElementById('gen-positive').value || '');
|
| 2225 |
formData.append('negative_prompt', document.getElementById('gen-negative').value || '');
|
| 2226 |
formData.append('content_rating', selectedRating);
|
src/content_engine/services/cloud_providers/wavespeed_provider.py
CHANGED
|
@@ -103,6 +103,9 @@ EDIT_MODEL_MAP = {
|
|
| 103 |
# SeeDream Edit (ByteDance) - NSFW OK
|
| 104 |
"seedream-4.5-edit": "bytedance/seedream-v4.5/edit",
|
| 105 |
"seedream-4-edit": "bytedance/seedream-v4/edit",
|
|
|
|
|
|
|
|
|
|
| 106 |
# WAN Edit (Alibaba)
|
| 107 |
"wan-2.6-edit": "alibaba/wan-2.6/image-edit",
|
| 108 |
"wan-2.5-edit": "alibaba/wan-2.5/image-edit",
|
|
@@ -126,6 +129,27 @@ EDIT_MODEL_MAP = {
|
|
| 126 |
"default": "bytedance/seedream-v4.5/edit",
|
| 127 |
}
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
WAVESPEED_API_BASE = "https://api.wavespeed.ai/api/v3"
|
| 130 |
|
| 131 |
|
|
@@ -153,6 +177,9 @@ class WaveSpeedProvider(CloudProvider):
|
|
| 153 |
"""Resolve a friendly name to a WaveSpeed edit model API path."""
|
| 154 |
if model_name and model_name in EDIT_MODEL_MAP:
|
| 155 |
return EDIT_MODEL_MAP[model_name]
|
|
|
|
|
|
|
|
|
|
| 156 |
if model_name:
|
| 157 |
return model_name
|
| 158 |
return EDIT_MODEL_MAP["default"]
|
|
@@ -430,23 +457,38 @@ class WaveSpeedProvider(CloudProvider):
|
|
| 430 |
*,
|
| 431 |
prompt: str,
|
| 432 |
image_bytes: bytes,
|
|
|
|
| 433 |
model: str | None = None,
|
| 434 |
size: str | None = None,
|
| 435 |
) -> CloudGenerationResult:
|
| 436 |
-
"""Full edit flow: upload image to temp host, call edit API, download result.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
start = time.time()
|
| 438 |
|
| 439 |
# WaveSpeed edit APIs require minimum image size (3686400 pixels = ~1920x1920)
|
| 440 |
# Auto-upscale small images to meet the requirement
|
| 441 |
image_bytes = self._ensure_min_image_size(image_bytes, min_pixels=3686400)
|
| 442 |
|
| 443 |
-
# Upload reference image to
|
| 444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
|
| 446 |
# Submit edit job
|
| 447 |
job_id = await self.submit_edit(
|
| 448 |
prompt=prompt,
|
| 449 |
-
image_urls=
|
| 450 |
model=model,
|
| 451 |
size=size,
|
| 452 |
)
|
|
|
|
| 103 |
# SeeDream Edit (ByteDance) - NSFW OK
|
| 104 |
"seedream-4.5-edit": "bytedance/seedream-v4.5/edit",
|
| 105 |
"seedream-4-edit": "bytedance/seedream-v4/edit",
|
| 106 |
+
# SeeDream Multi-Image (Character Consistency across images)
|
| 107 |
+
"seedream-4.5-multi": "bytedance/seedream-v4.5/edit-sequential",
|
| 108 |
+
"seedream-4-multi": "bytedance/seedream-v4/edit-sequential",
|
| 109 |
# WAN Edit (Alibaba)
|
| 110 |
"wan-2.6-edit": "alibaba/wan-2.6/image-edit",
|
| 111 |
"wan-2.5-edit": "alibaba/wan-2.5/image-edit",
|
|
|
|
| 129 |
"default": "bytedance/seedream-v4.5/edit",
|
| 130 |
}
|
| 131 |
|
| 132 |
+
# Models that support multiple reference images
|
| 133 |
+
MULTI_REF_MODELS = {
|
| 134 |
+
# SeeDream Sequential (up to 3 images for character consistency)
|
| 135 |
+
"seedream-4.5-multi": "bytedance/seedream-v4.5/edit-sequential",
|
| 136 |
+
"seedream-4-multi": "bytedance/seedream-v4/edit-sequential",
|
| 137 |
+
# Kling O1 (up to 10 reference images)
|
| 138 |
+
"kling-o1-multi": "kwaivgi/kling-o1/image-to-image",
|
| 139 |
+
# Qwen Multi-Angle (multiple angles of same subject)
|
| 140 |
+
"qwen-multi-angle": "wavespeed-ai/qwen-image/edit-multiple-angles",
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
# Reference-to-Video models (character + pose reference)
|
| 144 |
+
REF_TO_VIDEO_MAP = {
|
| 145 |
+
# WAN 2.6 Reference-to-Video (multi-view identity consistency)
|
| 146 |
+
"wan-2.6-ref": "alibaba/wan-2.6/reference-to-video",
|
| 147 |
+
"wan-2.6-ref-flash": "alibaba/wan-2.6/reference-to-video-flash",
|
| 148 |
+
# Kling O3 Reference-to-Video
|
| 149 |
+
"kling-o3-ref": "kwaivgi/kling-video-o3-pro/reference-to-video",
|
| 150 |
+
"kling-o3-std-ref": "kwaivgi/kling-video-o3-std/reference-to-video",
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
WAVESPEED_API_BASE = "https://api.wavespeed.ai/api/v3"
|
| 154 |
|
| 155 |
|
|
|
|
| 177 |
"""Resolve a friendly name to a WaveSpeed edit model API path."""
|
| 178 |
if model_name and model_name in EDIT_MODEL_MAP:
|
| 179 |
return EDIT_MODEL_MAP[model_name]
|
| 180 |
+
# Check multi-reference models
|
| 181 |
+
if model_name and model_name in MULTI_REF_MODELS:
|
| 182 |
+
return MULTI_REF_MODELS[model_name]
|
| 183 |
if model_name:
|
| 184 |
return model_name
|
| 185 |
return EDIT_MODEL_MAP["default"]
|
|
|
|
| 457 |
*,
|
| 458 |
prompt: str,
|
| 459 |
image_bytes: bytes,
|
| 460 |
+
image_bytes_2: bytes | None = None,
|
| 461 |
model: str | None = None,
|
| 462 |
size: str | None = None,
|
| 463 |
) -> CloudGenerationResult:
|
| 464 |
+
"""Full edit flow: upload image(s) to temp host, call edit API, download result.
|
| 465 |
+
|
| 466 |
+
Args:
|
| 467 |
+
prompt: The edit prompt
|
| 468 |
+
image_bytes: Primary reference image (character/subject)
|
| 469 |
+
image_bytes_2: Optional second reference image (pose/style reference)
|
| 470 |
+
model: Model name (some models support multiple references)
|
| 471 |
+
size: Output size (widthxheight)
|
| 472 |
+
"""
|
| 473 |
start = time.time()
|
| 474 |
|
| 475 |
# WaveSpeed edit APIs require minimum image size (3686400 pixels = ~1920x1920)
|
| 476 |
# Auto-upscale small images to meet the requirement
|
| 477 |
image_bytes = self._ensure_min_image_size(image_bytes, min_pixels=3686400)
|
| 478 |
|
| 479 |
+
# Upload reference image(s) to public URLs
|
| 480 |
+
image_urls = [await self._upload_temp_image(image_bytes)]
|
| 481 |
+
|
| 482 |
+
# Upload second reference if provided (for multi-ref models)
|
| 483 |
+
if image_bytes_2:
|
| 484 |
+
image_bytes_2 = self._ensure_min_image_size(image_bytes_2, min_pixels=3686400)
|
| 485 |
+
image_urls.append(await self._upload_temp_image(image_bytes_2))
|
| 486 |
+
logger.info("Multi-reference edit: uploading 2 images for model=%s", model)
|
| 487 |
|
| 488 |
# Submit edit job
|
| 489 |
job_id = await self.submit_edit(
|
| 490 |
prompt=prompt,
|
| 491 |
+
image_urls=image_urls,
|
| 492 |
model=model,
|
| 493 |
size=size,
|
| 494 |
)
|