Spaces:
Running on Zero
Running on Zero
| from __future__ import annotations | |
| import math | |
| from dataclasses import dataclass | |
| from typing import Any | |
| import numpy as np | |
| from PIL import Image | |
| PRESET_NAME = "CameraRotation" | |
| H_PRESETS_BY_LANG: dict[str, list[tuple[str, int]]] = { | |
| "en": [ | |
| ("front view (0°)", 0), | |
| ("front-right quarter view (45°)", 45), | |
| ("right side view (90°)", 90), | |
| ("back-right quarter view (135°)", 135), | |
| ("back view (180°)", 180), | |
| ("back-left quarter view (225°)", 225), | |
| ("left side view (270°)", 270), | |
| ("front-left quarter view (315°)", 315), | |
| ], | |
| "ja": [ | |
| ("正面 (0°)", 0), | |
| ("右前方クォーター (45°)", 45), | |
| ("右側面 (90°)", 90), | |
| ("右後方クォーター (135°)", 135), | |
| ("背面 (180°)", 180), | |
| ("左後方クォーター (225°)", 225), | |
| ("左側面 (270°)", 270), | |
| ("左前方クォーター (315°)", 315), | |
| ], | |
| } | |
| V_PRESETS_BY_LANG: dict[str, list[tuple[str, int]]] = { | |
| "en": [ | |
| ("low-angle shot (-30°)", -30), | |
| ("eye-level shot (0°)", 0), | |
| ("elevated shot (30°)", 30), | |
| ("high-angle shot (60°)", 60), | |
| ], | |
| "ja": [ | |
| ("ローアングル (-30°)", -30), | |
| ("アイレベル (0°)", 0), | |
| ("ややハイアングル (30°)", 30), | |
| ("ハイアングル (60°)", 60), | |
| ], | |
| } | |
| Z_PRESETS_BY_LANG: dict[str, list[tuple[str, float]]] = { | |
| "en": [ | |
| ("close-up (0.6)", 0.6), | |
| ("medium shot (1.0)", 1.0), | |
| ("wide shot (1.8)", 1.8), | |
| ], | |
| "ja": [ | |
| ("ワイドショット (1.0)", 1.0), | |
| ("ミディアムショット (5.0)", 5.0), | |
| ("クローズアップ (8.0)", 8.0), | |
| ], | |
| } | |
| def get_h_presets(language: str) -> list[tuple[str, int]]: | |
| return H_PRESETS_BY_LANG.get(language, H_PRESETS_BY_LANG["en"]) | |
| def get_v_presets(language: str) -> list[tuple[str, int]]: | |
| return V_PRESETS_BY_LANG.get(language, V_PRESETS_BY_LANG["en"]) | |
| def get_z_presets(language: str) -> list[tuple[str, float]]: | |
| return Z_PRESETS_BY_LANG.get(language, Z_PRESETS_BY_LANG["en"]) | |
| H_PRESETS = H_PRESETS_BY_LANG["en"] | |
| V_PRESETS = V_PRESETS_BY_LANG["en"] | |
| Z_PRESETS = Z_PRESETS_BY_LANG["en"] | |
| class AngleTokens: | |
| horizontal: str | |
| vertical: str | |
| distance: str | |
| def to_prompt(self) -> str: | |
| return f"<sks> {self.horizontal} {self.vertical} {self.distance}" | |
| def _nearest_horizontal_token(horizontal_angle: int) -> str: | |
| h_angle = int(horizontal_angle) % 360 | |
| options = [ | |
| (0, "front view"), | |
| (45, "front-right quarter view"), | |
| (90, "right side view"), | |
| (135, "back-right quarter view"), | |
| (180, "back view"), | |
| (225, "back-left quarter view"), | |
| (270, "left side view"), | |
| (315, "front-left quarter view"), | |
| ] | |
| return min(options, key=lambda item: min((h_angle - item[0]) % 360, (item[0] - h_angle) % 360))[1] | |
| def _nearest_vertical_token(vertical_angle: int) -> str: | |
| v_angle = int(vertical_angle) | |
| options = [ | |
| (-30, "low-angle shot"), | |
| (0, "eye-level shot"), | |
| (30, "elevated shot"), | |
| (60, "high-angle shot"), | |
| ] | |
| return min(options, key=lambda item: abs(v_angle - item[0]))[1] | |
| def _nearest_distance_token(zoom: float) -> str: | |
| z_value = float(zoom) | |
| options = [ | |
| (0.6, "close-up"), | |
| (1.0, "medium shot"), | |
| (1.8, "wide shot"), | |
| ] | |
| return min(options, key=lambda item: abs(z_value - item[0]))[1] | |
| def angles_to_tokens(horizontal_angle: int, vertical_angle: int, zoom: float) -> AngleTokens: | |
| return AngleTokens( | |
| horizontal=_nearest_horizontal_token(horizontal_angle), | |
| vertical=_nearest_vertical_token(vertical_angle), | |
| distance=_nearest_distance_token(zoom), | |
| ) | |
| def build_prompt(horizontal_angle: int, vertical_angle: int, zoom: float, extra: str | None) -> str: | |
| angle_prompt = angles_to_tokens(horizontal_angle, vertical_angle, zoom).to_prompt() | |
| if extra and extra.strip(): | |
| return f"{angle_prompt}, {extra.strip()}" | |
| return angle_prompt | |
| PREVIEW_CANVAS_SIZE = 320 | |
| PREVIEW_MAX_INPUT_EDGE = 256 | |
| _BACKGROUND_COLOR = (32, 32, 40, 255) | |
| _THUMBNAIL_CACHE: dict[int, Image.Image] = {} | |
| _THUMBNAIL_CACHE_LIMIT = 4 | |
| def _solve_perspective_coeffs(src: list[tuple[float, float]], dst: list[tuple[float, float]]) -> tuple[float, ...]: | |
| a = np.empty((8, 8), dtype=np.float64) | |
| b = np.empty(8, dtype=np.float64) | |
| for i, ((sx, sy), (dx, dy)) in enumerate(zip(src, dst)): | |
| a[2 * i] = (dx, dy, 1, 0, 0, 0, -sx * dx, -sx * dy) | |
| a[2 * i + 1] = (0, 0, 0, dx, dy, 1, -sy * dx, -sy * dy) | |
| b[2 * i] = sx | |
| b[2 * i + 1] = sy | |
| coeffs = np.linalg.solve(a, b) | |
| return tuple(coeffs.tolist()) | |
| def _thumbnail_for_preview(image: Image.Image, max_edge: int = PREVIEW_MAX_INPUT_EDGE) -> Image.Image: | |
| cache_key = id(image) | |
| cached = _THUMBNAIL_CACHE.get(cache_key) | |
| if cached is not None: | |
| return cached | |
| iw, ih = image.size | |
| long_edge = max(iw, ih) | |
| if long_edge <= max_edge: | |
| thumb = image.convert("RGBA") if image.mode != "RGBA" else image | |
| else: | |
| scale = max_edge / float(long_edge) | |
| new_size = (max(1, int(iw * scale)), max(1, int(ih * scale))) | |
| resized = image.resize(new_size, Image.BILINEAR) | |
| thumb = resized.convert("RGBA") if resized.mode != "RGBA" else resized | |
| if len(_THUMBNAIL_CACHE) >= _THUMBNAIL_CACHE_LIMIT: | |
| _THUMBNAIL_CACHE.pop(next(iter(_THUMBNAIL_CACHE))) | |
| _THUMBNAIL_CACHE[cache_key] = thumb | |
| return thumb | |
| def render_preview( | |
| image: Image.Image | None, | |
| horizontal_angle: int, | |
| vertical_angle: int, | |
| zoom: float, | |
| canvas_size: int = PREVIEW_CANVAS_SIZE, | |
| ) -> Image.Image: | |
| if image is None: | |
| return Image.new("RGB", (canvas_size, canvas_size), _BACKGROUND_COLOR[:3]) | |
| thumb = _thumbnail_for_preview(image) | |
| iw, ih = thumb.size | |
| aspect = iw / max(1, ih) | |
| card_h = 1.6 | |
| card_w = card_h * aspect | |
| half_w = card_w / 2.0 | |
| half_h = card_h / 2.0 | |
| corners_3d = ( | |
| (-half_w, half_h, 0.0), | |
| (half_w, half_h, 0.0), | |
| (half_w, -half_h, 0.0), | |
| (-half_w, -half_h, 0.0), | |
| ) | |
| az = math.radians(float(horizontal_angle)) | |
| el = math.radians(float(vertical_angle)) | |
| distance = max(0.5, 6.0 - (float(zoom) / 10.0) * 4.5) | |
| cam_x = distance * math.sin(az) * math.cos(el) | |
| cam_y = distance * math.sin(el) | |
| cam_z = distance * math.cos(az) * math.cos(el) | |
| cam = (cam_x, cam_y, cam_z) | |
| fwd_x = -cam_x | |
| fwd_y = -cam_y | |
| fwd_z = -cam_z | |
| fwd_len = math.sqrt(fwd_x * fwd_x + fwd_y * fwd_y + fwd_z * fwd_z) + 1e-9 | |
| fwd_x /= fwd_len | |
| fwd_y /= fwd_len | |
| fwd_z /= fwd_len | |
| right_x = fwd_y * 0.0 - fwd_z * 1.0 | |
| right_y = fwd_z * 0.0 - fwd_x * 0.0 | |
| right_z = fwd_x * 1.0 - fwd_y * 0.0 | |
| r_len = math.sqrt(right_x * right_x + right_y * right_y + right_z * right_z) | |
| if r_len < 1e-6: | |
| right_x, right_y, right_z = 1.0, 0.0, 0.0 | |
| else: | |
| right_x /= r_len | |
| right_y /= r_len | |
| right_z /= r_len | |
| up_x = right_y * fwd_z - right_z * fwd_y | |
| up_y = right_z * fwd_x - right_x * fwd_z | |
| up_z = right_x * fwd_y - right_y * fwd_x | |
| focal = canvas_size * 0.9 | |
| cx = canvas_size / 2.0 | |
| cy = canvas_size / 2.0 | |
| projected: list[tuple[float, float]] = [] | |
| any_behind = False | |
| for x, y, z in corners_3d: | |
| rx = x - cam[0] | |
| ry = y - cam[1] | |
| rz = z - cam[2] | |
| cs_x = rx * right_x + ry * right_y + rz * right_z | |
| cs_y = rx * up_x + ry * up_y + rz * up_z | |
| cs_z = rx * fwd_x + ry * fwd_y + rz * fwd_z | |
| if cs_z <= 0.05: | |
| any_behind = True | |
| cs_z = 0.05 | |
| projected.append((cx + (cs_x / cs_z) * focal, cy - (cs_y / cs_z) * focal)) | |
| if any_behind: | |
| return Image.new("RGB", (canvas_size, canvas_size), _BACKGROUND_COLOR[:3]) | |
| src_corners = [(0.0, 0.0), (float(iw), 0.0), (float(iw), float(ih)), (0.0, float(ih))] | |
| try: | |
| coeffs = _solve_perspective_coeffs(src_corners, projected) | |
| except np.linalg.LinAlgError: | |
| return Image.new("RGB", (canvas_size, canvas_size), _BACKGROUND_COLOR[:3]) | |
| warped = thumb.transform( | |
| (canvas_size, canvas_size), | |
| Image.PERSPECTIVE, | |
| coeffs, | |
| Image.BILINEAR, | |
| ) | |
| canvas = Image.new("RGBA", (canvas_size, canvas_size), _BACKGROUND_COLOR) | |
| canvas.alpha_composite(warped) | |
| return canvas.convert("RGB") | |
| def normalize_for_render(image_value: Any) -> Image.Image | None: | |
| if image_value is None: | |
| return None | |
| if isinstance(image_value, Image.Image): | |
| return image_value | |
| if isinstance(image_value, np.ndarray): | |
| array = image_value | |
| if array.dtype != np.uint8: | |
| array = np.clip(array, 0, 255).astype(np.uint8) | |
| return Image.fromarray(array) | |
| if isinstance(image_value, dict): | |
| for key in ("composite", "background"): | |
| candidate = image_value.get(key) | |
| if candidate is not None: | |
| normalized = normalize_for_render(candidate) | |
| if normalized is not None: | |
| return normalized | |
| layers = image_value.get("layers") | |
| if isinstance(layers, list) and layers: | |
| return normalize_for_render(layers[0]) | |
| return None | |
| def is_multiangle(preset_name: str | None) -> bool: | |
| return preset_name == PRESET_NAME | |