| import asyncio |
| import httpx |
| import uuid |
| from datetime import datetime |
| from typing import Optional, List, Literal |
| from fastapi import FastAPI, HTTPException, BackgroundTasks |
| from fastapi.responses import StreamingResponse |
| from pydantic import BaseModel, Field |
| import logging |
| import os |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| app = FastAPI( |
| title="OpenAI Compatible API - Images & TTS", |
| description="OpenAI-compatible API for image generation and text-to-speech using Captions backend", |
| version="1.0.0" |
| ) |
|
|
| |
| CAPTIONS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/gen-ai/image" |
| CAPTIONS_TTS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/voiceover/tts" |
| BEARER_TOKEN = os.getenv("CAPTIONS_BEARER_TOKEN", "eyJhbGciOiJSUzI1NiIsImtpZCI6IjU3YmZiMmExMWRkZmZjMGFkMmU2ODE0YzY4NzYzYjhjNjg3NTgxZDgiLCJ0eXAiOiJKV1QifQ.eyJnb29nbGUiOnRydWUsImlzcyI6Imh0dHBzOi8vc2VjdXJldG9rZW4uZ29vZ2xlLmNvbS9jYXB0aW9ucy1mNmRlOSIsImF1ZCI6ImNhcHRpb25zLWY2ZGU5IiwiYXV0aF90aW1lIjoxNzU1MzYyODEzLCJ1c2VyX2lkIjoic3hWek5XaUYyempXYmUxTjNjd3UiLCJzdWIiOiJzeFZ6TldpRjJ6aldiZTFOM2N3dSIsImlhdCI6MTc1NTYwMTE2NCwiZXhwIjoxNzU1NjA0NzY0LCJmaXJlYmFzZSI6eyJpZGVudGl0aWVzIjp7fSwic2lnbl9pbl9wcm92aWRlciI6ImN1c3RvbSJ9fQ.Nu7u9Xu8aeuUQPTQ8Rhe4qwbDhMk96s8dveFxyj9g6Zas4G_yU3KIdYcFVc4y85ieTNq8oKDmT3RAAgEOwTH4V6Ev1sHiKHQNX1GJp5dG0D6snH-zM4v6vmdIK3V6NgR72-ta5lzzc_aOg4Nbd4Y5tjdnc9rHNUhq-_hf4YCHFWfHjaw4gbYTNmZ_90UxL_d4d9e7tPE70FdNkjbu5XC_efZN7WNzNRJLhnj-JV--FQ94rC_zKxn6WAA-zPo-l7vfFq9nK_zEfqp-SR2c2xivdfR25f4HghfYn0nK0Xjep13pXHw7XeO0oz668ada_GIaXjIAodv7linkrJ3CXChqg") |
|
|
| |
| MODEL_MAPPINGS = { |
| "dall-e-3": "openai-dalle-3", |
| "dall-e-2": "openai-dalle-3", |
| "gpt-4o": "openai-gpt-4o-image", |
| "google-imagen-3": "google-imagen-3", |
| "imagen-3": "google-imagen-3", |
| "luma-photon": "luma-photon", |
| "photon": "luma-photon", |
| "flux-1-1-pro": "bfl-flux-1-1-pro", |
| "flux": "bfl-flux-1-1-pro", |
| "ideogram-v1": "ideogram-v1", |
| "ideogram": "ideogram-v1", |
| "recraft-v3": "recraft-v3", |
| "recraft": "recraft-v3", |
| "stable-diffusion-3-5": "stable-diffusion-3-5-large", |
| "sd-3-5": "stable-diffusion-3-5-large", |
| "stable-diffusion": "stable-diffusion-3-5-large" |
| } |
|
|
| |
| VOICE_MAPPINGS = { |
| "alloy": "0s0tckZNA4EDjsNWIGpn", |
| "echo": "VfJEoIjcuedwbnVocfwS", |
| "fable": "aIJGQIEdPBlV4bWoLgiC", |
| "onyx": "NkxXZNRZuGVagP3gLTlk", |
| "nova": "dEcutGbESImg8uIOJOb3", |
| "shimmer": "OsLeLksKZUcYFR6Rj3AV", |
| |
| "brandon": "0s0tckZNA4EDjsNWIGpn", |
| "nicole": "2OMmjuvizlUUkgCLYrEU", |
| "jamal": "4VCohb9n7kc8qQAMbC9T", |
| "xavier": "6LVJ04FKnALQY4vuI3xi", |
| "emma": "7pjl1PlCtijY5E7k9nex", |
| "alexandra": "8OwpkBz4OXvyOgg6uSVM", |
| "josh": "9H5PLh8sHyc4NiQba2sO", |
| "vincent": "A6YwaBVPdqMuPU5guI31", |
| "bella": "DVkGI1gOEQwhI9D98kgV", |
| "sophia": "Dw4Y69nCUd0lijzanffn", |
| "ethan": "FNrD9UXPRmnlfELyZfOH", |
| "greg": "GFvARbVuizGj4jkdG1iN", |
| "isabella": "GNliQ6gOp8Y96hz0uPSY", |
| "mason": "Jc5LFEs9ONmW3vilHdpg", |
| "justin": "LWoskltOczE5nVUCPFCl", |
| "bradford": "Lvu57Tdi6WU0LrCkf3W0", |
| "ally": "NJSANg1RFfytiL3apSc0", |
| "maddy": "NX9RZUSep3h9RzDoipkJ", |
| "george": "NmypOAkKcWovPSbjMJPk", |
| "brian": "Pt04qYLGmK9HateRrrdh", |
| "taylor": "QQ0vIwK2AgVtbHZk3wYq", |
| "samara": "QyFFVFY5hzA5T7sVv9JI", |
| "linda": "RzrSQgnXwblMgDyOeOuy", |
| "liam": "SveSw38zJT860NRIeiVk", |
| "hope": "UfOKaDAlzOMjZnyEhPH1", |
| "william": "VesROIDY8lJS6zz8xTRb", |
| "dwight": "W76fVeloaQcuN71bIQF6", |
| "lisa": "ZbuIjlIzHpIc8oO17kWW", |
| "arial": "aCWKe1NzicFCAkohj7TY", |
| "elliot": "arGkfQC5Z0yNlNrYLlE8", |
| "rhea": "blo9kiIBaFNr0UCI2gpA", |
| "leo": "bqvJyFf80waIYPYiv6zX", |
| "eve": "cQ0q3hcj9Bm4IccGDY9C", |
| "serena": "e3zFWWHHfNk6vOh5kbBX", |
| "domi": "eSojoW8lMv5whHRCJugk", |
| "alex": "eXjri1H442qcs35pWaTr", |
| "blondie": "fHmK4z2cR0VXxvQmd7ei", |
| "nathan": "gO0Do5f1lCvLoIvbl6dx", |
| "daniel": "grqhFog58KWjgcO6t4ya", |
| "tara": "iBsjG6Kk8tmO0ldX7Aho", |
| "maya": "iWBJcyi2qdFpXYRGt42f", |
| "ashley": "j51tO8Upz9wEVIUkynCJ", |
| "matthew": "lJQLBnDNpkkc4RIgqhIZ", |
| "andrew": "lQS5Hszd1P0W2m18M4ME", |
| "olivia": "ltYBSrCwVJp0I99DmLfq", |
| "adam": "m1t6JeyI9DXRhnCg8kuX", |
| "mark": "okc8JAt7Vb3u20k4soKB", |
| "micah": "r0ZdS6QBWDxmcRN7HxWq", |
| "elli": "r4gww888sYU82aKZSUHy", |
| "sylvia": "rJmVxgRa6YI9bALBqvtC", |
| "noah": "rgqCbvqWKIaxYs54d7xS", |
| "kayla": "s1YBw3dmanbLNCq7MXI8", |
| "carla": "sUXCiUMyEVHBC7sRlPZY", |
| "owen": "tijk10imWq7nGRawDD62", |
| "lila": "wjOnivHr3V1ZGNuCMZJI", |
| "sam": "xpkvvHUyS37s3f84MObW", |
| "antoni": "y5nGwtfzvQ2OhrBXZnj5", |
| "ava": "zYqKDc8tFTIsAhJFpTaC" |
| } |
|
|
| |
| AVAILABLE_VOICES = { |
| "0s0tckZNA4EDjsNWIGpn": {"name": "Brandon", "gender": "male", "accent": "american", "provider": "OpenAI"}, |
| "2OMmjuvizlUUkgCLYrEU": {"name": "Nicole", "gender": "female", "accent": "australian", "provider": "Cartesia"}, |
| "4VCohb9n7kc8qQAMbC9T": {"name": "Jamal", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "6LVJ04FKnALQY4vuI3xi": {"name": "Xavier", "gender": "male", "accent": "american", "provider": "PlayHT"}, |
| "7pjl1PlCtijY5E7k9nex": {"name": "Emma", "gender": "female", "accent": "american", "provider": "Google"}, |
| "8OwpkBz4OXvyOgg6uSVM": {"name": "Alexandra", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "9H5PLh8sHyc4NiQba2sO": {"name": "Josh", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "A6YwaBVPdqMuPU5guI31": {"name": "Vincent", "gender": "male", "accent": "american", "provider": "PlayHT"}, |
| "DVkGI1gOEQwhI9D98kgV": {"name": "Bella", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "Dw4Y69nCUd0lijzanffn": {"name": "Sophia", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "FNrD9UXPRmnlfELyZfOH": {"name": "Ethan", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "GFvARbVuizGj4jkdG1iN": {"name": "Greg", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "GNliQ6gOp8Y96hz0uPSY": {"name": "Isabella", "gender": "female", "accent": "american", "provider": "Google"}, |
| "Jc5LFEs9ONmW3vilHdpg": {"name": "Mason", "gender": "male", "accent": "american", "provider": "Google"}, |
| "LWoskltOczE5nVUCPFCl": {"name": "Justin", "gender": "male", "accent": "american", "provider": "Cartesia"}, |
| "Lvu57Tdi6WU0LrCkf3W0": {"name": "Bradford", "gender": "male", "accent": "british", "provider": "ElevenLabs"}, |
| "NJSANg1RFfytiL3apSc0": {"name": "Ally", "gender": "female", "accent": "american", "provider": "PlayHT"}, |
| "NX9RZUSep3h9RzDoipkJ": {"name": "Maddy", "gender": "female", "accent": "american", "provider": "PlayHT"}, |
| "NkxXZNRZuGVagP3gLTlk": {"name": "James", "gender": "male", "accent": "british", "provider": "OpenAI"}, |
| "NmypOAkKcWovPSbjMJPk": {"name": "George", "gender": "male", "accent": "british", "provider": "Cartesia"}, |
| "OsLeLksKZUcYFR6Rj3AV": {"name": "Lea", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
| "Pt04qYLGmK9HateRrrdh": {"name": "Brian", "gender": "male", "accent": "american", "provider": "Cartesia"}, |
| "QQ0vIwK2AgVtbHZk3wYq": {"name": "Taylor", "gender": "female", "accent": "british", "provider": "ElevenLabs"}, |
| "QyFFVFY5hzA5T7sVv9JI": {"name": "Samara", "gender": "female", "accent": "british", "provider": "ElevenLabs"}, |
| "RzrSQgnXwblMgDyOeOuy": {"name": "Linda", "gender": "female", "accent": "british", "provider": "PlayHT"}, |
| "SveSw38zJT860NRIeiVk": {"name": "Liam", "gender": "male", "accent": "american", "provider": "Google"}, |
| "UfOKaDAlzOMjZnyEhPH1": {"name": "Hope", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "VesROIDY8lJS6zz8xTRb": {"name": "William", "gender": "male", "accent": "american", "provider": "Google"}, |
| "VfJEoIjcuedwbnVocfwS": {"name": "John", "gender": "male", "accent": "american", "provider": "OpenAI"}, |
| "W76fVeloaQcuN71bIQF6": {"name": "Dwight", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "ZbuIjlIzHpIc8oO17kWW": {"name": "Lisa", "gender": "female", "accent": "american", "provider": "PlayHT"}, |
| "aCWKe1NzicFCAkohj7TY": {"name": "Arial", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
| "aIJGQIEdPBlV4bWoLgiC": {"name": "Jordan", "gender": "male", "accent": "american", "provider": "OpenAI"}, |
| "arGkfQC5Z0yNlNrYLlE8": {"name": "Elliot", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "blo9kiIBaFNr0UCI2gpA": {"name": "Rhea", "gender": "female", "accent": "australian", "provider": "PlayHT"}, |
| "bqvJyFf80waIYPYiv6zX": {"name": "Leo", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "cQ0q3hcj9Bm4IccGDY9C": {"name": "Eve", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "dEcutGbESImg8uIOJOb3": {"name": "Julie", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
| "e3zFWWHHfNk6vOh5kbBX": {"name": "Serena", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "eSojoW8lMv5whHRCJugk": {"name": "Domi", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "eXjri1H442qcs35pWaTr": {"name": "Alex", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "fHmK4z2cR0VXxvQmd7ei": {"name": "Blondie", "gender": "female", "accent": "british", "provider": "ElevenLabs"}, |
| "gO0Do5f1lCvLoIvbl6dx": {"name": "Nathan", "gender": "male", "accent": "british", "provider": "PlayHT"}, |
| "grqhFog58KWjgcO6t4ya": {"name": "Daniel", "gender": "male", "accent": "american", "provider": "PlayHT"}, |
| "iBsjG6Kk8tmO0ldX7Aho": {"name": "Tara", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
| "iWBJcyi2qdFpXYRGt42f": {"name": "Maya", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
| "j51tO8Upz9wEVIUkynCJ": {"name": "Ashley", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
| "lJQLBnDNpkkc4RIgqhIZ": {"name": "Matthew", "gender": "male", "accent": "australian", "provider": "Cartesia"}, |
| "lQS5Hszd1P0W2m18M4ME": {"name": "Andrew", "gender": "male", "accent": "american", "provider": "Cartesia"}, |
| "ltYBSrCwVJp0I99DmLfq": {"name": "Olivia", "gender": "female", "accent": "american", "provider": "Google"}, |
| "m1t6JeyI9DXRhnCg8kuX": {"name": "Adam", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "okc8JAt7Vb3u20k4soKB": {"name": "Mark", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "r0ZdS6QBWDxmcRN7HxWq": {"name": "Micah", "gender": "male", "accent": "british", "provider": "ElevenLabs"}, |
| "r4gww888sYU82aKZSUHy": {"name": "Elli", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "rJmVxgRa6YI9bALBqvtC": {"name": "Sylvia", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
| "rgqCbvqWKIaxYs54d7xS": {"name": "Noah", "gender": "male", "accent": "australian", "provider": "ElevenLabs"}, |
| "s1YBw3dmanbLNCq7MXI8": {"name": "Kayla", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
| "sUXCiUMyEVHBC7sRlPZY": {"name": "Carla", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
| "tijk10imWq7nGRawDD62": {"name": "Owen", "gender": "male", "accent": "american", "provider": "Google"}, |
| "wjOnivHr3V1ZGNuCMZJI": {"name": "Lila", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
| "xpkvvHUyS37s3f84MObW": {"name": "Sam", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "y5nGwtfzvQ2OhrBXZnj5": {"name": "Antoni", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
| "zYqKDc8tFTIsAhJFpTaC": {"name": "Ava", "gender": "female", "accent": "american", "provider": "Google"} |
| } |
|
|
| |
| AVAILABLE_MODELS = { |
| "google-imagen-3": {"name": "Imagen 3", "provider": "Google"}, |
| "openai-gpt-4o-image": {"name": "GPT-4o", "provider": "OpenAI"}, |
| "luma-photon": {"name": "Photon", "provider": "Luma AI"}, |
| "bfl-flux-1-1-pro": {"name": "Flux 1.1 Pro", "provider": "Black Forest Labs"}, |
| "ideogram-v1": {"name": "Ideogram V1", "provider": "Ideogram"}, |
| "openai-dalle-3": {"name": "DALL-E 3 HD", "provider": "OpenAI"}, |
| "recraft-v3": {"name": "Recraft V3", "provider": "Recraft"}, |
| "stable-diffusion-3-5-large": {"name": "SD 3.5", "provider": "Stability AI"} |
| } |
|
|
| |
| class ImageGenerationRequest(BaseModel): |
| prompt: str = Field(..., description="A text description of the desired image(s)") |
| model: Optional[str] = Field("dall-e-3", description="The model to use for image generation") |
| n: Optional[int] = Field(1, ge=1, le=10, description="Number of images to generate") |
| quality: Optional[Literal["standard", "hd"]] = Field("standard", description="Quality of the image") |
| response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="Response format") |
| size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] = Field("1024x1024", description="Size of the generated images") |
| style: Optional[Literal["vivid", "natural"]] = Field("vivid", description="Style of the generated images") |
| user: Optional[str] = Field(None, description="A unique identifier representing your end-user") |
|
|
| |
| class TTSRequest(BaseModel): |
| model: str = Field("tts-1", description="The TTS model to use") |
| input: str = Field(..., description="The text to generate audio for") |
| voice: str = Field("alloy", description="The voice to use for generation") |
| response_format: Optional[Literal["mp3", "opus", "aac", "flac"]] = Field("mp3", description="The format to audio in") |
| speed: Optional[float] = Field(1.0, ge=0.25, le=4.0, description="The speed of the generated audio") |
|
|
| |
| class ImageData(BaseModel): |
| url: Optional[str] = None |
| b64_json: Optional[str] = None |
| revised_prompt: Optional[str] = None |
|
|
| class ImageGenerationResponse(BaseModel): |
| created: int |
| data: List[ImageData] |
|
|
| |
| class CaptionsSubmitRequest(BaseModel): |
| modelId: str = "openai-gpt-4o-image" |
| prompt: str |
| aspectRatio: int = 2 |
| magicPrompt: bool = False |
| optimisticProjectId: str |
|
|
| class CaptionsStatusRequest(BaseModel): |
| operationId: str |
|
|
| |
| class CaptionsTTSSubmitRequest(BaseModel): |
| text: str |
| voiceId: str = "4VCohb9n7kc8qQAMbC9T" |
| modelId: str = "QHwZJt6xARgiV04YqEFY" |
| optimisticProjectId: str |
|
|
| class CaptionsTTSStatusRequest(BaseModel): |
| operationId: str |
|
|
| |
| operations_store = {} |
|
|
| def get_captions_model_id(openai_model: str) -> str: |
| """Convert OpenAI model name to Captions model ID""" |
| return MODEL_MAPPINGS.get(openai_model, "openai-dalle-3") |
|
|
| def get_aspect_ratio_from_size(size: str) -> int: |
| """Convert OpenAI size format to Captions aspect ratio""" |
| size_map = { |
| "256x256": 1, |
| "512x512": 1, |
| "1024x1024": 1, |
| "1792x1024": 2, |
| "1024x1792": 3 |
| } |
| return size_map.get(size, 1) |
|
|
| def get_captions_voice_id(openai_voice: str) -> str: |
| """Convert OpenAI voice name to Captions voice ID""" |
| return VOICE_MAPPINGS.get(openai_voice.lower(), "0s0tckZNA4EDjsNWIGpn") |
|
|
| async def submit_image_generation(prompt: str, model: str = "dall-e-3", size: str = "1024x1024") -> str: |
| """Submit image generation request to Captions API""" |
| headers = { |
| "accept": "application/json, text/plain, */*", |
| "authorization": f"Bearer {BEARER_TOKEN}", |
| "content-type": "application/json", |
| "origin": "https://desktop.captions.ai", |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", |
| "x-app-version": "1.0.0", |
| "x-captions-user-timezone": "UTC", |
| "x-device-id": str(uuid.uuid4()).replace("-", "") |
| } |
| |
| payload = { |
| "modelId": get_captions_model_id(model), |
| "prompt": prompt, |
| "aspectRatio": get_aspect_ratio_from_size(size), |
| "magicPrompt": False, |
| "optimisticProjectId": f"API-{uuid.uuid4()}" |
| } |
| |
| async with httpx.AsyncClient() as client: |
| try: |
| response = await client.post( |
| f"{CAPTIONS_BASE_URL}/generate/submit", |
| headers=headers, |
| json=payload, |
| timeout=30.0 |
| ) |
| response.raise_for_status() |
| result = response.json() |
| |
| if result.get("success"): |
| operation_id = result["data"]["operationId"] |
| logger.info(f"Image generation submitted with operation ID: {operation_id}") |
| return operation_id |
| else: |
| raise HTTPException(status_code=500, detail="Failed to submit image generation") |
| |
| except httpx.RequestError as e: |
| logger.error(f"Request error: {e}") |
| raise HTTPException(status_code=500, detail="Failed to connect to image generation service") |
| except Exception as e: |
| logger.error(f"Unexpected error: {e}") |
| raise HTTPException(status_code=500, detail="Internal server error") |
|
|
| async def check_generation_status(operation_id: str) -> dict: |
| """Check the status of image generation""" |
| headers = { |
| "accept": "application/json, text/plain, */*", |
| "authorization": f"Bearer {BEARER_TOKEN}", |
| "content-type": "application/json", |
| "origin": "https://desktop.captions.ai", |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", |
| "x-app-version": "1.0.0", |
| "x-captions-user-timezone": "UTC", |
| "x-device-id": str(uuid.uuid4()).replace("-", "") |
| } |
| |
| payload = {"operationId": operation_id} |
| |
| async with httpx.AsyncClient() as client: |
| try: |
| response = await client.post( |
| f"{CAPTIONS_BASE_URL}/generate/status", |
| headers=headers, |
| json=payload, |
| timeout=30.0 |
| ) |
| response.raise_for_status() |
| result = response.json() |
| |
| if result.get("success"): |
| return result["data"] |
| else: |
| raise HTTPException(status_code=500, detail="Failed to check generation status") |
| |
| except httpx.RequestError as e: |
| logger.error(f"Request error: {e}") |
| raise HTTPException(status_code=500, detail="Failed to connect to status service") |
| except Exception as e: |
| logger.error(f"Unexpected error: {e}") |
| raise HTTPException(status_code=500, detail="Internal server error") |
|
|
| async def wait_for_completion(operation_id: str, max_wait_time: int = 300) -> dict: |
| """Wait for image generation to complete with polling""" |
| start_time = datetime.now() |
| retry_count = 0 |
| max_retries = 3 |
| |
| while True: |
| try: |
| status_data = await check_generation_status(operation_id) |
| retry_count = 0 |
| |
| |
| if status_data.get("state") == 2: |
| if "complete" in status_data: |
| return status_data["complete"] |
| else: |
| raise HTTPException(status_code=500, detail="Generation completed but no result data") |
| |
| |
| if status_data.get("state") == 3: |
| raise HTTPException(status_code=500, detail="Image generation failed") |
| |
| |
| elapsed = (datetime.now() - start_time).total_seconds() |
| if elapsed > max_wait_time: |
| raise HTTPException(status_code=408, detail="Image generation timeout") |
| |
| |
| if status_data.get("state") == 1: |
| logger.info(f"Operation {operation_id} still processing...") |
| |
| |
| wait_time = min(5, 2 + (elapsed / 60)) |
| await asyncio.sleep(wait_time) |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| retry_count += 1 |
| if retry_count >= max_retries: |
| logger.error(f"Max retries exceeded for operation {operation_id}: {e}") |
| raise HTTPException(status_code=500, detail="Failed to check generation status after multiple retries") |
| |
| logger.warning(f"Retry {retry_count}/{max_retries} for operation {operation_id}: {e}") |
| await asyncio.sleep(2 ** retry_count) |
|
|
| @app.get("/v1/models") |
| async def list_models(): |
| """List available models compatible with OpenAI format""" |
| models = [] |
| for model_id, info in AVAILABLE_MODELS.items(): |
| |
| models.append({ |
| "id": model_id, |
| "object": "model", |
| "created": 1234567890, |
| "owned_by": info["provider"].lower().replace(" ", "-"), |
| "name": info["name"], |
| "provider": info["provider"] |
| }) |
| |
| |
| for alias, captions_id in MODEL_MAPPINGS.items(): |
| if captions_id == model_id and alias not in [m["id"] for m in models]: |
| models.append({ |
| "id": alias, |
| "object": "model", |
| "created": 1234567890, |
| "owned_by": info["provider"].lower().replace(" ", "-"), |
| "name": info["name"], |
| "provider": info["provider"] |
| }) |
| |
| return {"object": "list", "data": models} |
|
|
| @app.post("/v1/images/generations", response_model=ImageGenerationResponse) |
| async def create_image(request: ImageGenerationRequest): |
| """ |
| Creates an image given a text prompt. |
| Compatible with OpenAI's image generation API. |
| """ |
| try: |
| logger.info(f"Received image generation request: prompt='{request.prompt[:100]}...', model='{request.model}', size='{request.size}'") |
| |
| |
| captions_model_id = get_captions_model_id(request.model) |
| if captions_model_id not in AVAILABLE_MODELS: |
| raise HTTPException(status_code=400, detail=f"Model '{request.model}' is not supported") |
| |
| |
| if not request.prompt or len(request.prompt.strip()) == 0: |
| raise HTTPException(status_code=400, detail="Prompt cannot be empty") |
| |
| if len(request.prompt) > 1000: |
| raise HTTPException(status_code=400, detail="Prompt exceeds maximum length of 1000 characters") |
| |
| |
| operation_id = await submit_image_generation(request.prompt, request.model, request.size) |
| logger.info(f"Image generation submitted with operation ID: {operation_id}") |
| |
| |
| completion_data = await wait_for_completion(operation_id) |
| |
| |
| if not completion_data.get("assetResolvedUrl"): |
| raise HTTPException(status_code=500, detail="Generation completed but no image URL received") |
| |
| |
| image_data = ImageData( |
| url=completion_data.get("assetResolvedUrl"), |
| revised_prompt=request.prompt |
| ) |
| |
| response = ImageGenerationResponse( |
| created=int(datetime.now().timestamp()), |
| data=[image_data] |
| ) |
| |
| logger.info(f"Image generation completed successfully for operation: {operation_id}") |
| return response |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Unexpected error in image generation: {e}") |
| raise HTTPException(status_code=500, detail="Internal server error") |
|
|
| @app.post("/v1/images/generations/async") |
| async def create_image_async(request: ImageGenerationRequest): |
| """ |
| Starts an image generation request and returns operation ID for status checking. |
| Non-blocking version of the generation API. |
| """ |
| try: |
| logger.info(f"Received async image generation request: prompt='{request.prompt[:100]}...', model='{request.model}', size='{request.size}'") |
| |
| |
| captions_model_id = get_captions_model_id(request.model) |
| if captions_model_id not in AVAILABLE_MODELS: |
| raise HTTPException(status_code=400, detail=f"Model '{request.model}' is not supported") |
| |
| |
| if not request.prompt or len(request.prompt.strip()) == 0: |
| raise HTTPException(status_code=400, detail="Prompt cannot be empty") |
| |
| if len(request.prompt) > 1000: |
| raise HTTPException(status_code=400, detail="Prompt exceeds maximum length of 1000 characters") |
| |
| |
| operation_id = await submit_image_generation(request.prompt, request.model, request.size) |
| |
| |
| operations_store[operation_id] = { |
| "created": int(datetime.now().timestamp()), |
| "prompt": request.prompt, |
| "model": request.model, |
| "size": request.size, |
| "status": "processing" |
| } |
| |
| return { |
| "operation_id": operation_id, |
| "status": "submitted", |
| "created": int(datetime.now().timestamp()), |
| "status_url": f"/v1/images/generations/status/{operation_id}" |
| } |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Unexpected error in async image generation: {e}") |
| raise HTTPException(status_code=500, detail="Internal server error") |
|
|
| @app.get("/v1/images/generations/status/{operation_id}") |
| async def get_generation_status(operation_id: str): |
| """ |
| Check the status of an image generation operation. |
| """ |
| try: |
| if operation_id not in operations_store: |
| raise HTTPException(status_code=404, detail="Operation ID not found") |
| |
| |
| status_data = await check_generation_status(operation_id) |
| operation_info = operations_store[operation_id] |
| |
| |
| if status_data.get("state") == 1: |
| return { |
| "operation_id": operation_id, |
| "status": "processing", |
| "created": operation_info["created"], |
| "estimated_completion": None |
| } |
| elif status_data.get("state") == 2: |
| |
| operations_store[operation_id]["status"] = "completed" |
| |
| |
| image_data = ImageData( |
| url=status_data["complete"].get("assetResolvedUrl"), |
| revised_prompt=operation_info["prompt"] |
| ) |
| |
| return { |
| "operation_id": operation_id, |
| "status": "completed", |
| "created": operation_info["created"], |
| "data": [image_data.dict()] |
| } |
| elif status_data.get("state") == 3: |
| operations_store[operation_id]["status"] = "failed" |
| return { |
| "operation_id": operation_id, |
| "status": "failed", |
| "created": operation_info["created"], |
| "error": "Image generation failed" |
| } |
| else: |
| return { |
| "operation_id": operation_id, |
| "status": "unknown", |
| "created": operation_info["created"], |
| "error": "Unknown status" |
| } |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Error checking generation status: {e}") |
| raise HTTPException(status_code=500, detail="Failed to check generation status") |
|
|
| |
| @app.post("/v1/audio/speech") |
| async def create_speech(request: TTSRequest): |
| """ |
| Generate speech from text using OpenAI-compatible API |
| """ |
| try: |
| |
| voice_id = get_captions_voice_id(request.voice) |
| |
| |
| captions_request = CaptionsTTSSubmitRequest( |
| text=request.input, |
| voiceId=voice_id, |
| modelId="QHwZJt6xARgiV04YqEFY", |
| optimisticProjectId=f"tts-{uuid.uuid4().hex[:8]}" |
| ) |
| |
| |
| async with httpx.AsyncClient() as client: |
| response = await client.post( |
| f"{CAPTIONS_TTS_BASE_URL}/generate/submit", |
| json=captions_request.dict(), |
| headers={ |
| "Authorization": f"Bearer {BEARER_TOKEN}", |
| "Content-Type": "application/json", |
| "x-app-version": "1.0.0", |
| "x-device-id": "api-client" |
| }, |
| timeout=30.0 |
| ) |
| |
| if response.status_code != 200: |
| logger.error(f"TTS submit failed: {response.text}") |
| raise HTTPException(status_code=response.status_code, detail="TTS generation failed") |
| |
| result = response.json() |
| operation_id = result["data"]["operationId"] |
| |
| |
| operations_store[operation_id] = { |
| "type": "tts", |
| "voice_id": voice_id, |
| "text": request.input, |
| "format": request.response_format, |
| "created_at": datetime.now() |
| } |
| |
| |
| max_retries = 60 |
| retry_count = 0 |
| |
| while retry_count < max_retries: |
| status_response = await client.post( |
| f"{CAPTIONS_TTS_BASE_URL}/generate/status", |
| json={"operationId": operation_id}, |
| headers={ |
| "Authorization": f"Bearer {BEARER_TOKEN}", |
| "Content-Type": "application/json", |
| "x-app-version": "1.0.0", |
| "x-device-id": "api-client" |
| }, |
| timeout=30.0 |
| ) |
| |
| if status_response.status_code != 200: |
| await asyncio.sleep(1) |
| retry_count += 1 |
| continue |
| |
| status_result = status_response.json() |
| state = status_result["data"]["state"] |
| |
| if state == "COMPLETE": |
| audio_url = status_result["data"]["url"] |
| |
| |
| audio_response = await client.get(audio_url) |
| if audio_response.status_code == 200: |
| |
| return StreamingResponse( |
| iter([audio_response.content]), |
| media_type="audio/mpeg", |
| headers={ |
| "Content-Disposition": f"attachment; filename=speech.{request.response_format}" |
| } |
| ) |
| else: |
| raise HTTPException(status_code=500, detail="Failed to fetch generated audio") |
| |
| elif state == "FAILED": |
| raise HTTPException(status_code=500, detail="TTS generation failed") |
| |
| |
| await asyncio.sleep(1) |
| retry_count += 1 |
| |
| |
| raise HTTPException(status_code=408, detail="TTS generation timed out") |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Error in TTS generation: {e}") |
| raise HTTPException(status_code=500, detail="Internal server error") |
|
|
| @app.post("/v1/audio/speech/async") |
| async def create_speech_async(request: TTSRequest, background_tasks: BackgroundTasks): |
| """ |
| Start async TTS generation and return operation ID |
| """ |
| try: |
| |
| voice_id = get_captions_voice_id(request.voice) |
| |
| |
| captions_request = CaptionsTTSSubmitRequest( |
| text=request.input, |
| voiceId=voice_id, |
| modelId="QHwZJt6xARgiV04YqEFY", |
| optimisticProjectId=f"tts-{uuid.uuid4().hex[:8]}" |
| ) |
| |
| |
| async with httpx.AsyncClient() as client: |
| response = await client.post( |
| f"{CAPTIONS_TTS_BASE_URL}/generate/submit", |
| json=captions_request.dict(), |
| headers={ |
| "Authorization": f"Bearer {BEARER_TOKEN}", |
| "Content-Type": "application/json", |
| "x-app-version": "1.0.0", |
| "x-device-id": "api-client" |
| }, |
| timeout=30.0 |
| ) |
| |
| if response.status_code != 200: |
| logger.error(f"TTS submit failed: {response.text}") |
| raise HTTPException(status_code=response.status_code, detail="TTS generation failed") |
| |
| result = response.json() |
| operation_id = result["data"]["operationId"] |
| |
| |
| operations_store[operation_id] = { |
| "type": "tts", |
| "voice_id": voice_id, |
| "text": request.input, |
| "format": request.response_format, |
| "created_at": datetime.now(), |
| "status": "processing" |
| } |
| |
| return {"operation_id": operation_id, "status": "processing"} |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Error in async TTS generation: {e}") |
| raise HTTPException(status_code=500, detail="Internal server error") |
|
|
| @app.get("/v1/audio/speech/status/{operation_id}") |
| async def get_tts_status(operation_id: str): |
| """ |
| Check the status of a TTS generation operation |
| """ |
| if operation_id not in operations_store: |
| raise HTTPException(status_code=404, detail="Operation not found") |
| |
| operation = operations_store[operation_id] |
| if operation["type"] != "tts": |
| raise HTTPException(status_code=400, detail="Invalid operation type") |
| |
| try: |
| async with httpx.AsyncClient() as client: |
| response = await client.post( |
| f"{CAPTIONS_TTS_BASE_URL}/generate/status", |
| json={"operationId": operation_id}, |
| headers={ |
| "Authorization": f"Bearer {BEARER_TOKEN}", |
| "Content-Type": "application/json", |
| "x-app-version": "1.0.0", |
| "x-device-id": "api-client" |
| }, |
| timeout=30.0 |
| ) |
| |
| if response.status_code != 200: |
| return {"status": "error", "error": "Failed to check status"} |
| |
| result = response.json() |
| state = result["data"]["state"] |
| |
| if state == "COMPLETE": |
| audio_url = result["data"]["url"] |
| operations_store[operation_id]["status"] = "completed" |
| operations_store[operation_id]["url"] = audio_url |
| return { |
| "status": "completed", |
| "url": audio_url, |
| "operation_id": operation_id |
| } |
| elif state == "FAILED": |
| operations_store[operation_id]["status"] = "failed" |
| return {"status": "failed", "operation_id": operation_id} |
| else: |
| operations_store[operation_id]["status"] = "processing" |
| return {"status": "processing", "operation_id": operation_id} |
| |
| except Exception as e: |
| logger.error(f"Error checking TTS status: {e}") |
| raise HTTPException(status_code=500, detail="Failed to check TTS status") |
|
|
| @app.get("/v1/audio/speech/download/{operation_id}") |
| async def download_tts_audio(operation_id: str): |
| """ |
| Download the generated audio file |
| """ |
| if operation_id not in operations_store: |
| raise HTTPException(status_code=404, detail="Operation not found") |
| |
| operation = operations_store[operation_id] |
| if operation["type"] != "tts": |
| raise HTTPException(status_code=400, detail="Invalid operation type") |
| |
| if operation.get("status") != "completed": |
| raise HTTPException(status_code=400, detail="Audio not ready yet") |
| |
| audio_url = operation.get("url") |
| if not audio_url: |
| raise HTTPException(status_code=404, detail="Audio URL not found") |
| |
| try: |
| async with httpx.AsyncClient() as client: |
| audio_response = await client.get(audio_url) |
| if audio_response.status_code == 200: |
| format_type = operation.get("format", "mp3") |
| return StreamingResponse( |
| iter([audio_response.content]), |
| media_type="audio/mpeg", |
| headers={ |
| "Content-Disposition": f"attachment; filename=speech.{format_type}" |
| } |
| ) |
| else: |
| raise HTTPException(status_code=500, detail="Failed to fetch generated audio") |
| |
| except Exception as e: |
| logger.error(f"Error downloading TTS audio: {e}") |
| raise HTTPException(status_code=500, detail="Failed to download audio") |
|
|
| @app.get("/v1/voices") |
| async def list_voices(): |
| """ |
| List available TTS voices |
| """ |
| voices = [] |
| for voice_id, voice_info in AVAILABLE_VOICES.items(): |
| |
| openai_name = None |
| for name, mapped_id in VOICE_MAPPINGS.items(): |
| if mapped_id == voice_id: |
| openai_name = name |
| break |
| |
| voices.append({ |
| "id": voice_id, |
| "name": voice_info["name"], |
| "openai_name": openai_name, |
| "gender": voice_info["gender"], |
| "accent": voice_info["accent"], |
| "provider": voice_info["provider"] |
| }) |
| |
| return { |
| "voices": voices, |
| "openai_compatible": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] |
| } |
|
|
| @app.get("/health") |
| async def health_check(): |
| """Health check endpoint""" |
| return {"status": "healthy", "timestamp": datetime.now().isoformat()} |
|
|
| @app.get("/") |
| async def root(): |
| """Root endpoint with API information""" |
| return { |
| "message": "OpenAI Compatible Image Generation & TTS API", |
| "version": "1.0.0", |
| "supported_models": list(AVAILABLE_MODELS.keys()), |
| "openai_aliases": list(MODEL_MAPPINGS.keys()), |
| "supported_voices": len(AVAILABLE_VOICES), |
| "openai_voice_aliases": list(set([k for k in VOICE_MAPPINGS.keys() if k in ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]])), |
| "endpoints": { |
| "models": "/v1/models", |
| "voices": "/v1/voices", |
| "image_generation": "/v1/images/generations", |
| "async_generation": "/v1/images/generations/async", |
| "status_check": "/v1/images/generations/status/{operation_id}", |
| "tts": "/v1/audio/speech", |
| "tts_async": "/v1/audio/speech/async", |
| "tts_status": "/v1/audio/speech/status/{operation_id}", |
| "tts_download": "/v1/audio/speech/download/{operation_id}", |
| "health": "/health", |
| "docs": "/docs" |
| }, |
| "example_curl": { |
| "generate_image": "curl -X POST 'http://localhost:8000/v1/images/generations' -H 'Content-Type: application/json' -d '{\"prompt\": \"a cat\", \"model\": \"dall-e-3\", \"size\": \"1024x1024\"}'", |
| "list_models": "curl -X GET 'http://localhost:8000/v1/models'", |
| "generate_speech": "curl -X POST 'http://localhost:8000/v1/audio/speech' -H 'Content-Type: application/json' -d '{\"model\": \"tts-1\", \"input\": \"Hello world\", \"voice\": \"alloy\"}' --output speech.mp3", |
| "list_voices": "curl -X GET 'http://localhost:8000/v1/voices'" |
| } |
| } |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=8000) |