| |
| |
|
|
| import os |
| import sys |
| import torch |
| import json |
| import argparse |
| from datetime import datetime |
| from diffusers import StableDiffusionPipeline |
| from PIL import Image |
|
|
| |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) |
|
|
| |
|
|
| def setup_args(): |
| parser = argparse.ArgumentParser(description="CompI Phase 1.B: Advanced Style Conditioning") |
| parser.add_argument("prompt", nargs="*", help="Main scene/subject description") |
| parser.add_argument("--style", "-s", help="Art style (or number from list)") |
| parser.add_argument("--mood", "-m", help="Mood/atmosphere (or number from list)") |
| parser.add_argument("--variations", "-v", type=int, default=1, help="Number of variations") |
| parser.add_argument("--quality", "-q", choices=["draft", "standard", "high"], default="standard", help="Quality preset") |
| parser.add_argument("--negative", "-n", help="Negative prompt") |
| parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode") |
| parser.add_argument("--list-styles", action="store_true", help="List available styles and exit") |
| parser.add_argument("--list-moods", action="store_true", help="List available moods and exit") |
| return parser.parse_args() |
|
|
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"Using device: {device}") |
|
|
| OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs") |
| os.makedirs(OUTPUT_DIR, exist_ok=True) |
|
|
| def log(msg): |
| now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]") |
| print(f"{now} {msg}") |
|
|
| |
|
|
| STYLES = { |
| "digital art": { |
| "prompt": "digital art, highly detailed", |
| "negative": "blurry, pixelated, low resolution" |
| }, |
| "oil painting": { |
| "prompt": "oil painting, classical art, brushstrokes, canvas texture", |
| "negative": "digital, pixelated, modern" |
| }, |
| "watercolor": { |
| "prompt": "watercolor painting, soft colors, flowing paint", |
| "negative": "harsh lines, digital, photographic" |
| }, |
| "cyberpunk": { |
| "prompt": "cyberpunk style, neon lights, futuristic, sci-fi", |
| "negative": "natural, organic, pastoral" |
| }, |
| "impressionist": { |
| "prompt": "impressionist painting, soft brushstrokes, light and color", |
| "negative": "sharp details, photorealistic, digital" |
| }, |
| "concept art": { |
| "prompt": "concept art, professional illustration, detailed", |
| "negative": "amateur, sketch, unfinished" |
| }, |
| "anime": { |
| "prompt": "anime style, manga, Japanese animation", |
| "negative": "realistic, western cartoon, photographic" |
| }, |
| "photorealistic": { |
| "prompt": "photorealistic, high detail, professional photography", |
| "negative": "cartoon, painting, stylized" |
| }, |
| "minimalist": { |
| "prompt": "minimalist art, clean lines, simple composition", |
| "negative": "cluttered, complex, detailed" |
| }, |
| "surrealism": { |
| "prompt": "surrealist art, dreamlike, impossible, Salvador Dali style", |
| "negative": "realistic, logical, mundane" |
| }, |
| "pixel art": { |
| "prompt": "pixel art, 8-bit style, retro gaming", |
| "negative": "smooth, high resolution, photorealistic" |
| }, |
| "steampunk": { |
| "prompt": "steampunk style, Victorian era, brass and copper, gears", |
| "negative": "modern, digital, futuristic" |
| }, |
| "3d render": { |
| "prompt": "3D render, CGI, computer graphics, ray tracing", |
| "negative": "2D, flat, hand-drawn" |
| } |
| } |
|
|
| MOODS = { |
| "dreamy": { |
| "prompt": "dreamy atmosphere, soft lighting, ethereal", |
| "negative": "harsh, stark, realistic" |
| }, |
| "dark": { |
| "prompt": "dark and moody, dramatic shadows, mysterious", |
| "negative": "bright, cheerful, light" |
| }, |
| "peaceful": { |
| "prompt": "peaceful, serene, calm, tranquil", |
| "negative": "chaotic, violent, disturbing" |
| }, |
| "vibrant": { |
| "prompt": "vibrant and energetic, bright colors, dynamic", |
| "negative": "dull, muted, lifeless" |
| }, |
| "melancholic": { |
| "prompt": "melancholic, sad, nostalgic, wistful", |
| "negative": "happy, joyful, upbeat" |
| }, |
| "mysterious": { |
| "prompt": "mysterious, enigmatic, hidden secrets", |
| "negative": "obvious, clear, straightforward" |
| }, |
| "whimsical": { |
| "prompt": "whimsical, playful, fantastical, magical", |
| "negative": "serious, realistic, mundane" |
| }, |
| "dramatic": { |
| "prompt": "dramatic lighting, high contrast, cinematic", |
| "negative": "flat lighting, low contrast, amateur" |
| }, |
| "retro": { |
| "prompt": "retro style, vintage, nostalgic, classic", |
| "negative": "modern, contemporary, futuristic" |
| } |
| } |
|
|
| QUALITY_PRESETS = { |
| "draft": {"steps": 20, "guidance": 6.0, "size": (512, 512)}, |
| "standard": {"steps": 30, "guidance": 7.5, "size": (512, 512)}, |
| "high": {"steps": 50, "guidance": 8.5, "size": (768, 768)} |
| } |
|
|
| |
|
|
| def load_model(): |
| MODEL_NAME = "runwayml/stable-diffusion-v1-5" |
| log(f"Loading model: {MODEL_NAME}") |
| |
| def dummy_safety_checker(images, **kwargs): |
| return images, [False] * len(images) |
| |
| try: |
| pipe = StableDiffusionPipeline.from_pretrained( |
| MODEL_NAME, |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, |
| safety_checker=dummy_safety_checker, |
| ) |
| pipe = pipe.to(device) |
| pipe.enable_attention_slicing() |
| log("Model loaded successfully") |
| return pipe |
| except Exception as e: |
| log(f"Error loading model: {e}") |
| sys.exit(1) |
|
|
| |
|
|
| def list_options(options_dict, title): |
| print(f"\n{title}:") |
| for idx, (key, value) in enumerate(options_dict.items(), 1): |
| prompt_preview = value["prompt"][:50] + "..." if len(value["prompt"]) > 50 else value["prompt"] |
| print(f" {idx:2d}. {key:15s} - {prompt_preview}") |
|
|
| def get_user_choice(options_dict, prompt_text, allow_custom=True): |
| choice = input(f"{prompt_text}: ").strip() |
| |
| if choice.isdigit(): |
| idx = int(choice) - 1 |
| keys = list(options_dict.keys()) |
| if 0 <= idx < len(keys): |
| return keys[idx] |
| |
| if choice in options_dict: |
| return choice |
| |
| if allow_custom and choice: |
| return choice |
| |
| return None |
|
|
| def interactive_mode(pipe): |
| log("Starting interactive style conditioning mode") |
| |
| |
| main_prompt = input("\nEnter your main scene/subject: ").strip() |
| if not main_prompt: |
| log("No prompt provided") |
| return |
| |
| |
| list_options(STYLES, "Available Styles") |
| style_key = get_user_choice(STYLES, "Choose style (number/name/custom)") |
| |
| |
| list_options(MOODS, "Available Moods") |
| mood_key = get_user_choice(MOODS, "Choose mood (number/name/custom/blank)", allow_custom=True) |
| |
| |
| variations = input("Number of variations (default 1): ").strip() |
| variations = int(variations) if variations.isdigit() else 1 |
| |
| quality = input("Quality [draft/standard/high] (default standard): ").strip() |
| quality = quality if quality in QUALITY_PRESETS else "standard" |
| |
| negative = input("Negative prompt (optional): ").strip() |
| |
| |
| generate_styled_images(pipe, main_prompt, style_key, mood_key, variations, quality, negative) |
|
|
| |
|
|
| def generate_styled_images(pipe, main_prompt, style_key, mood_key, variations, quality, custom_negative=""): |
| |
| full_prompt = main_prompt |
| style_negative = "" |
| mood_negative = "" |
| |
| if style_key and style_key in STYLES: |
| full_prompt += f", {STYLES[style_key]['prompt']}" |
| style_negative = STYLES[style_key]['negative'] |
| elif style_key: |
| full_prompt += f", {style_key}" |
| |
| if mood_key and mood_key in MOODS: |
| full_prompt += f", {MOODS[mood_key]['prompt']}" |
| mood_negative = MOODS[mood_key]['negative'] |
| elif mood_key: |
| full_prompt += f", {mood_key}" |
| |
| |
| negative_parts = [part for part in [style_negative, mood_negative, custom_negative] if part] |
| full_negative = ", ".join(negative_parts) if negative_parts else None |
| |
| |
| quality_settings = QUALITY_PRESETS[quality] |
| |
| log(f"Full prompt: {full_prompt}") |
| log(f"Negative prompt: {full_negative or '[none]'}") |
| log(f"Quality: {quality} ({quality_settings['steps']} steps)") |
| log(f"Generating {variations} variation(s)") |
| |
| |
| for i in range(variations): |
| seed = torch.seed() |
| generator = torch.manual_seed(seed) if device == "cpu" else torch.Generator(device).manual_seed(seed) |
| |
| with torch.autocast(device) if device == "cuda" else torch.no_grad(): |
| result = pipe( |
| full_prompt, |
| negative_prompt=full_negative, |
| height=quality_settings["size"][1], |
| width=quality_settings["size"][0], |
| num_inference_steps=quality_settings["steps"], |
| guidance_scale=quality_settings["guidance"], |
| generator=generator, |
| ) |
| |
| img = result.images[0] |
| |
| |
| prompt_slug = "_".join(main_prompt.lower().split()[:4]) |
| style_slug = (style_key or "nostyle").replace(" ", "")[:10] |
| mood_slug = (mood_key or "nomood").replace(" ", "")[:10] |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| |
| filename = f"{prompt_slug[:20]}_{style_slug}_{mood_slug}_{quality}_{timestamp}_seed{seed}_v{i+1}.png" |
| filepath = os.path.join(OUTPUT_DIR, filename) |
| |
| img.save(filepath) |
| |
| |
| metadata = { |
| "main_prompt": main_prompt, |
| "style": style_key, |
| "mood": mood_key, |
| "full_prompt": full_prompt, |
| "negative_prompt": full_negative, |
| "quality": quality, |
| "seed": seed, |
| "variation": i + 1, |
| "timestamp": datetime.now().isoformat(), |
| "settings": quality_settings |
| } |
| |
| metadata_file = filepath.replace('.png', '_metadata.json') |
| with open(metadata_file, 'w') as f: |
| json.dump(metadata, f, indent=2) |
| |
| log(f"Generated variation {i+1}: {filepath}") |
| |
| log(f"Phase 1.B complete - {variations} styled images generated") |
|
|
| |
|
|
| def main(): |
| args = setup_args() |
| |
| |
| if args.list_styles: |
| list_options(STYLES, "Available Styles") |
| return |
| |
| if args.list_moods: |
| list_options(MOODS, "Available Moods") |
| return |
| |
| |
| pipe = load_model() |
| |
| |
| if args.interactive: |
| interactive_mode(pipe) |
| return |
| |
| |
| main_prompt = " ".join(args.prompt) if args.prompt else input("Enter main prompt: ").strip() |
| if not main_prompt: |
| log("No prompt provided") |
| return |
| |
| generate_styled_images( |
| pipe, main_prompt, args.style, args.mood, |
| args.variations, args.quality, args.negative or "" |
| ) |
|
|
| if __name__ == "__main__": |
| main() |
|
|