tori29umai0123
Match CameraRotation slider steps to spec
0fada4f
from __future__ import annotations
import math
from dataclasses import dataclass
from typing import Any
import numpy as np
from PIL import Image
PRESET_NAME = "CameraRotation"
H_PRESETS_BY_LANG: dict[str, list[tuple[str, int]]] = {
"en": [
("front view (0°)", 0),
("front-right quarter view (45°)", 45),
("right side view (90°)", 90),
("back-right quarter view (135°)", 135),
("back view (180°)", 180),
("back-left quarter view (225°)", 225),
("left side view (270°)", 270),
("front-left quarter view (315°)", 315),
],
"ja": [
("正面 (0°)", 0),
("右前方クォーター (45°)", 45),
("右側面 (90°)", 90),
("右後方クォーター (135°)", 135),
("背面 (180°)", 180),
("左後方クォーター (225°)", 225),
("左側面 (270°)", 270),
("左前方クォーター (315°)", 315),
],
}
V_PRESETS_BY_LANG: dict[str, list[tuple[str, int]]] = {
"en": [
("low-angle shot (-30°)", -30),
("eye-level shot (0°)", 0),
("elevated shot (30°)", 30),
("high-angle shot (60°)", 60),
],
"ja": [
("ローアングル (-30°)", -30),
("アイレベル (0°)", 0),
("ややハイアングル (30°)", 30),
("ハイアングル (60°)", 60),
],
}
Z_PRESETS_BY_LANG: dict[str, list[tuple[str, float]]] = {
"en": [
("close-up (0.6)", 0.6),
("medium shot (1.0)", 1.0),
("wide shot (1.8)", 1.8),
],
"ja": [
("ワイドショット (1.0)", 1.0),
("ミディアムショット (5.0)", 5.0),
("クローズアップ (8.0)", 8.0),
],
}
def get_h_presets(language: str) -> list[tuple[str, int]]:
return H_PRESETS_BY_LANG.get(language, H_PRESETS_BY_LANG["en"])
def get_v_presets(language: str) -> list[tuple[str, int]]:
return V_PRESETS_BY_LANG.get(language, V_PRESETS_BY_LANG["en"])
def get_z_presets(language: str) -> list[tuple[str, float]]:
return Z_PRESETS_BY_LANG.get(language, Z_PRESETS_BY_LANG["en"])
H_PRESETS = H_PRESETS_BY_LANG["en"]
V_PRESETS = V_PRESETS_BY_LANG["en"]
Z_PRESETS = Z_PRESETS_BY_LANG["en"]
@dataclass
class AngleTokens:
horizontal: str
vertical: str
distance: str
def to_prompt(self) -> str:
return f"<sks> {self.horizontal} {self.vertical} {self.distance}"
def _nearest_horizontal_token(horizontal_angle: int) -> str:
h_angle = int(horizontal_angle) % 360
options = [
(0, "front view"),
(45, "front-right quarter view"),
(90, "right side view"),
(135, "back-right quarter view"),
(180, "back view"),
(225, "back-left quarter view"),
(270, "left side view"),
(315, "front-left quarter view"),
]
return min(options, key=lambda item: min((h_angle - item[0]) % 360, (item[0] - h_angle) % 360))[1]
def _nearest_vertical_token(vertical_angle: int) -> str:
v_angle = int(vertical_angle)
options = [
(-30, "low-angle shot"),
(0, "eye-level shot"),
(30, "elevated shot"),
(60, "high-angle shot"),
]
return min(options, key=lambda item: abs(v_angle - item[0]))[1]
def _nearest_distance_token(zoom: float) -> str:
z_value = float(zoom)
options = [
(0.6, "close-up"),
(1.0, "medium shot"),
(1.8, "wide shot"),
]
return min(options, key=lambda item: abs(z_value - item[0]))[1]
def angles_to_tokens(horizontal_angle: int, vertical_angle: int, zoom: float) -> AngleTokens:
return AngleTokens(
horizontal=_nearest_horizontal_token(horizontal_angle),
vertical=_nearest_vertical_token(vertical_angle),
distance=_nearest_distance_token(zoom),
)
def build_prompt(horizontal_angle: int, vertical_angle: int, zoom: float, extra: str | None) -> str:
angle_prompt = angles_to_tokens(horizontal_angle, vertical_angle, zoom).to_prompt()
if extra and extra.strip():
return f"{angle_prompt}, {extra.strip()}"
return angle_prompt
PREVIEW_CANVAS_SIZE = 320
PREVIEW_MAX_INPUT_EDGE = 256
_BACKGROUND_COLOR = (32, 32, 40, 255)
_THUMBNAIL_CACHE: dict[int, Image.Image] = {}
_THUMBNAIL_CACHE_LIMIT = 4
def _solve_perspective_coeffs(src: list[tuple[float, float]], dst: list[tuple[float, float]]) -> tuple[float, ...]:
a = np.empty((8, 8), dtype=np.float64)
b = np.empty(8, dtype=np.float64)
for i, ((sx, sy), (dx, dy)) in enumerate(zip(src, dst)):
a[2 * i] = (dx, dy, 1, 0, 0, 0, -sx * dx, -sx * dy)
a[2 * i + 1] = (0, 0, 0, dx, dy, 1, -sy * dx, -sy * dy)
b[2 * i] = sx
b[2 * i + 1] = sy
coeffs = np.linalg.solve(a, b)
return tuple(coeffs.tolist())
def _thumbnail_for_preview(image: Image.Image, max_edge: int = PREVIEW_MAX_INPUT_EDGE) -> Image.Image:
cache_key = id(image)
cached = _THUMBNAIL_CACHE.get(cache_key)
if cached is not None:
return cached
iw, ih = image.size
long_edge = max(iw, ih)
if long_edge <= max_edge:
thumb = image.convert("RGBA") if image.mode != "RGBA" else image
else:
scale = max_edge / float(long_edge)
new_size = (max(1, int(iw * scale)), max(1, int(ih * scale)))
resized = image.resize(new_size, Image.BILINEAR)
thumb = resized.convert("RGBA") if resized.mode != "RGBA" else resized
if len(_THUMBNAIL_CACHE) >= _THUMBNAIL_CACHE_LIMIT:
_THUMBNAIL_CACHE.pop(next(iter(_THUMBNAIL_CACHE)))
_THUMBNAIL_CACHE[cache_key] = thumb
return thumb
def render_preview(
image: Image.Image | None,
horizontal_angle: int,
vertical_angle: int,
zoom: float,
canvas_size: int = PREVIEW_CANVAS_SIZE,
) -> Image.Image:
if image is None:
return Image.new("RGB", (canvas_size, canvas_size), _BACKGROUND_COLOR[:3])
thumb = _thumbnail_for_preview(image)
iw, ih = thumb.size
aspect = iw / max(1, ih)
card_h = 1.6
card_w = card_h * aspect
half_w = card_w / 2.0
half_h = card_h / 2.0
corners_3d = (
(-half_w, half_h, 0.0),
(half_w, half_h, 0.0),
(half_w, -half_h, 0.0),
(-half_w, -half_h, 0.0),
)
az = math.radians(float(horizontal_angle))
el = math.radians(float(vertical_angle))
distance = max(0.5, 6.0 - (float(zoom) / 10.0) * 4.5)
cam_x = distance * math.sin(az) * math.cos(el)
cam_y = distance * math.sin(el)
cam_z = distance * math.cos(az) * math.cos(el)
cam = (cam_x, cam_y, cam_z)
fwd_x = -cam_x
fwd_y = -cam_y
fwd_z = -cam_z
fwd_len = math.sqrt(fwd_x * fwd_x + fwd_y * fwd_y + fwd_z * fwd_z) + 1e-9
fwd_x /= fwd_len
fwd_y /= fwd_len
fwd_z /= fwd_len
right_x = fwd_y * 0.0 - fwd_z * 1.0
right_y = fwd_z * 0.0 - fwd_x * 0.0
right_z = fwd_x * 1.0 - fwd_y * 0.0
r_len = math.sqrt(right_x * right_x + right_y * right_y + right_z * right_z)
if r_len < 1e-6:
right_x, right_y, right_z = 1.0, 0.0, 0.0
else:
right_x /= r_len
right_y /= r_len
right_z /= r_len
up_x = right_y * fwd_z - right_z * fwd_y
up_y = right_z * fwd_x - right_x * fwd_z
up_z = right_x * fwd_y - right_y * fwd_x
focal = canvas_size * 0.9
cx = canvas_size / 2.0
cy = canvas_size / 2.0
projected: list[tuple[float, float]] = []
any_behind = False
for x, y, z in corners_3d:
rx = x - cam[0]
ry = y - cam[1]
rz = z - cam[2]
cs_x = rx * right_x + ry * right_y + rz * right_z
cs_y = rx * up_x + ry * up_y + rz * up_z
cs_z = rx * fwd_x + ry * fwd_y + rz * fwd_z
if cs_z <= 0.05:
any_behind = True
cs_z = 0.05
projected.append((cx + (cs_x / cs_z) * focal, cy - (cs_y / cs_z) * focal))
if any_behind:
return Image.new("RGB", (canvas_size, canvas_size), _BACKGROUND_COLOR[:3])
src_corners = [(0.0, 0.0), (float(iw), 0.0), (float(iw), float(ih)), (0.0, float(ih))]
try:
coeffs = _solve_perspective_coeffs(src_corners, projected)
except np.linalg.LinAlgError:
return Image.new("RGB", (canvas_size, canvas_size), _BACKGROUND_COLOR[:3])
warped = thumb.transform(
(canvas_size, canvas_size),
Image.PERSPECTIVE,
coeffs,
Image.BILINEAR,
)
canvas = Image.new("RGBA", (canvas_size, canvas_size), _BACKGROUND_COLOR)
canvas.alpha_composite(warped)
return canvas.convert("RGB")
def normalize_for_render(image_value: Any) -> Image.Image | None:
if image_value is None:
return None
if isinstance(image_value, Image.Image):
return image_value
if isinstance(image_value, np.ndarray):
array = image_value
if array.dtype != np.uint8:
array = np.clip(array, 0, 255).astype(np.uint8)
return Image.fromarray(array)
if isinstance(image_value, dict):
for key in ("composite", "background"):
candidate = image_value.get(key)
if candidate is not None:
normalized = normalize_for_render(candidate)
if normalized is not None:
return normalized
layers = image_value.get("layers")
if isinstance(layers, list) and layers:
return normalize_for_render(layers[0])
return None
def is_multiangle(preset_name: str | None) -> bool:
return preset_name == PRESET_NAME