| import gradio as gr |
| from transformers import pipeline |
| import torch |
| from diffusers import StableDiffusionPipeline |
| import soundfile as sf |
| import speech_recognition as sr |
| import numpy as np |
| import os |
|
|
| |
| |
| llm_pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1") |
|
|
| |
| sd_pipe = StableDiffusionPipeline.from_pretrained( |
| "runwayml/stable-diffusion-v1-5", |
| torch_dtype=torch.float16 |
| ).to("cuda" if torch.cuda.is_available() else "cpu") |
|
|
| |
| recognizer = sr.Recognizer() |
|
|
| def enhance_prompt(basic_prompt, style, detail_level, artist_style): |
| """使用LLM增强提示词""" |
| prompt_template = f""" |
| 根据以下简短描述创建一个详细的Stable Diffusion提示: |
| 原始描述: {basic_prompt} |
| 风格: {style} |
| 细节级别: {detail_level} |
| 艺术家风格: {artist_style} |
| |
| 请生成一个包含以下元素的详细提示: |
| - 主体描述 |
| - 环境/背景 |
| - 光照条件 |
| - 色彩风格 |
| - 艺术媒介(如数字绘画、油画等) |
| - 质量描述(如4K、超详细等) |
| |
| 生成的提示: |
| """ |
| |
| enhanced_prompt = llm_pipe( |
| prompt_template, |
| max_length=200, |
| num_return_sequences=1, |
| temperature=0.7 |
| )[0]['generated_text'] |
| |
| |
| enhanced_prompt = enhanced_prompt.replace(prompt_template, "").strip() |
| return enhanced_prompt |
|
|
| def generate_image(enhanced_prompt, steps, guidance_scale, seed): |
| """使用Stable Diffusion生成图像""" |
| if seed == -1: |
| seed = torch.randint(0, 2**32, (1,)).item() |
| |
| generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed) |
| |
| image = sd_pipe( |
| enhanced_prompt, |
| num_inference_steps=steps, |
| guidance_scale=guidance_scale, |
| generator=generator |
| ).images[0] |
| |
| return image, seed |
|
|
| def process_audio(audio): |
| """处理语音输入""" |
| sr, audio_data = audio |
| audio_array = np.array(audio_data, dtype=np.float32) |
| |
| |
| temp_file = "temp_audio.wav" |
| sf.write(temp_file, audio_array, sr) |
| |
| with sr.AudioFile(temp_file) as source: |
| audio_data = recognizer.record(source) |
| try: |
| text = recognizer.recognize_google(audio_data, language='en-US') |
| os.remove(temp_file) |
| return text |
| except Exception as e: |
| os.remove(temp_file) |
| return f"语音识别错误: {str(e)}" |
|
|
| def full_process(basic_prompt, style, detail_level, artist_style, steps, guidance_scale, seed, use_audio, audio_input): |
| """完整处理流程""" |
| |
| if use_audio and audio_input is not None: |
| basic_prompt = process_audio(audio_input) |
| |
| |
| enhanced_prompt = enhance_prompt(basic_prompt, style, detail_level, artist_style) |
| |
| |
| image, used_seed = generate_image(enhanced_prompt, steps, guidance_scale, seed) |
| |
| return enhanced_prompt, image, used_seed |
|
|
| |
| with gr.Blocks(title="魔法树屋图像生成器") as demo: |
| gr.Markdown("# 🎨 魔法树屋图像生成器") |
| gr.Markdown("输入简短描述或使用语音输入,生成精美图像!") |
| |
| with gr.Row(): |
| with gr.Column(): |
| |
| use_audio = gr.Checkbox(label="使用语音输入") |
| audio_input = gr.Audio(label="录音", visible=False) |
| |
| basic_prompt = gr.Textbox( |
| label="简短描述", |
| placeholder="例如: 天空中的魔法树屋", |
| visible=True |
| ) |
| |
| |
| def toggle_input(use_audio): |
| return { |
| basic_prompt: gr.update(visible=not use_audio), |
| audio_input: gr.update(visible=use_audio) |
| } |
| |
| use_audio.change( |
| toggle_input, |
| inputs=use_audio, |
| outputs=[basic_prompt, audio_input] |
| ) |
| |
| |
| style = gr.Dropdown( |
| label="风格", |
| choices=["现实主义", "幻想艺术", "赛博朋克", "水墨画", "卡通", "极简主义"], |
| value="幻想艺术" |
| ) |
| |
| detail_level = gr.Slider( |
| label="细节级别", |
| minimum=1, |
| maximum=5, |
| step=1, |
| value=3 |
| ) |
| |
| artist_style = gr.Dropdown( |
| label="艺术家风格", |
| choices=["无", "梵高", "毕加索", "莫奈", "达利", "宫崎骏"], |
| value="无" |
| ) |
| |
| |
| with gr.Accordion("高级选项", open=False): |
| steps = gr.Slider( |
| label="生成步数", |
| minimum=20, |
| maximum=100, |
| step=5, |
| value=50 |
| ) |
| |
| guidance_scale = gr.Slider( |
| label="引导尺度", |
| minimum=1.0, |
| maximum=20.0, |
| step=0.5, |
| value=7.5 |
| ) |
| |
| seed = gr.Number( |
| label="随机种子 (-1 表示随机)", |
| value=-1 |
| ) |
| |
| submit_btn = gr.Button("生成图像", variant="primary") |
| |
| with gr.Column(): |
| |
| enhanced_prompt = gr.Textbox( |
| label="生成的提示", |
| interactive=False |
| ) |
| |
| image_output = gr.Image( |
| label="生成的图像", |
| height=512 |
| ) |
| |
| used_seed = gr.Number( |
| label="使用的种子", |
| interactive=False |
| ) |
| |
| |
| submit_btn.click( |
| fn=full_process, |
| inputs=[ |
| basic_prompt, style, detail_level, artist_style, |
| steps, guidance_scale, seed, use_audio, audio_input |
| ], |
| outputs=[enhanced_prompt, image_output, used_seed] |
| ) |
|
|
| |
| demo.queue() |
|
|
| |
| if __name__ == "__main__": |
| demo.launch() |