Spaces:
Sleeping
Sleeping
| import torch | |
| from diffusers import StableDiffusionPipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor | |
| import os, random | |
| from moviepy.editor import ImageSequenceClip, AudioFileClip | |
| # ---------- STORY GENERATION ---------- | |
| def generate_story(prompt, duration): | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "mistralai/Mistral-7B-Instruct-v0.2", torch_dtype=torch.float16, device_map="auto" | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") | |
| text = tokenizer( | |
| f"Write a detailed story of {duration} minutes about: {prompt}", | |
| return_tensors="pt" | |
| ).to("cpu") | |
| output = model.generate(**text, max_new_tokens=900) | |
| story = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return story | |
| # ---------- VOICE ---------- | |
| def generate_voice(text): | |
| from bark import generate_audio, preload_models | |
| preload_models() | |
| audio = generate_audio(text) | |
| path = "story_audio.wav" | |
| import soundfile as sf | |
| sf.write(path, audio, 24000) | |
| return path | |
| # ---------- IMAGE GENERATION ---------- | |
| def generate_images(story): | |
| sd = StableDiffusionPipeline.from_pretrained( | |
| "runwayml/stable-diffusion-v1-5", | |
| torch_dtype=torch.float16 | |
| ).to("cpu") | |
| scenes = story.split(".")[:10] # create 10 images max | |
| paths = [] | |
| for i, s in enumerate(scenes): | |
| img = sd(s).images[0] | |
| path = f"img_{i}.png" | |
| img.save(path) | |
| paths.append(path) | |
| return paths | |
| # ---------- VIDEO ---------- | |
| def make_video(images, audio_file): | |
| video_clip = ImageSequenceClip(images, fps=1) # 1 image per second | |
| audio_clip = AudioFileClip(audio_file) | |
| final = video_clip.set_audio(audio_clip) | |
| out = "final_story_video.mp4" | |
| final.write_videofile(out) | |
| return out | |