| import gradio as gr |
| from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration |
| from diffusers import DiffusionPipeline |
| import torch |
|
|
| |
| prompt_generator = pipeline("text2text-generation", model="facebook/bart-large-cnn") |
|
|
| def generate_prompt(description: str) -> str: |
| |
| prompt = prompt_generator(f"将这个描述扩展为一个详细的图像生成提示:{description}", max_length=150)[0]['generated_text'] |
| return prompt |
|
|
| |
| pipe = DiffusionPipeline.from_pretrained("ByteDance/SDXL-Lightning") |
|
|
| def generate_image(prompt: str): |
| |
| image = pipe(prompt).images[0] |
| return image |
|
|
| |
| processor = WhisperProcessor.from_pretrained("openai/whisper-large") |
| model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large") |
|
|
| def transcribe_audio(audio): |
| |
| audio_input = processor(audio, return_tensors="pt").input_features |
| predicted_ids = model.generate(audio_input) |
| transcription = processor.decode(predicted_ids[0], skip_special_tokens=True) |
| return transcription |
|
|
| |
| def process_input(description: str, creativity: float, include_background: bool): |
| prompt = generate_prompt(description) |
| if include_background: |
| prompt += " 添加详细的生动背景。" |
| image = generate_image(prompt) |
| return prompt, image |
|
|
| |
| def process_audio_input(audio): |
| description = transcribe_audio(audio) |
| prompt = generate_prompt(description) |
| image = generate_image(prompt) |
| return prompt, image |
|
|
| |
| text_input = gr.Textbox(label="请输入描述", placeholder="例如:天空中的魔法树屋") |
| creativity_slider = gr.Slider(minimum=0, maximum=1, step=0.1, label="创意程度 (0 到 1)", value=0.7) |
| background_checkbox = gr.Checkbox(label="是否添加背景", value=True) |
| audio_input = gr.Audio(type="numpy", label="用语音描述图像") |
|
|
| |
| interface = gr.Interface( |
| fn=process_input, |
| inputs=[text_input, creativity_slider, background_checkbox], |
| outputs=[gr.Textbox(label="生成的提示"), gr.Image(label="生成的图像")], |
| title="魔法图像生成器", |
| description="请输入简短描述或使用语音生成魔法图像!调整创意程度和背景选项。", |
| theme="huggingface" |
| ) |
|
|
| |
| interface_with_audio = gr.Interface( |
| fn=process_audio_input, |
| inputs=[audio_input], |
| outputs=[gr.Textbox(label="生成的提示"), gr.Image(label="生成的图像")], |
| title="语音输入魔法图像生成器", |
| description="说出一个简短的描述,生成魔法图像!" |
| ) |
|
|
| |
| gr.TabbedInterface([interface, interface_with_audio]).launch() |
|
|
|
|