import torch from diffusers import StableDiffusionPipeline from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor import os, random from moviepy.editor import ImageSequenceClip, AudioFileClip # ---------- STORY GENERATION ---------- def generate_story(prompt, duration): model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.2", torch_dtype=torch.float16, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") text = tokenizer( f"Write a detailed story of {duration} minutes about: {prompt}", return_tensors="pt" ).to("cpu") output = model.generate(**text, max_new_tokens=900) story = tokenizer.decode(output[0], skip_special_tokens=True) return story # ---------- VOICE ---------- def generate_voice(text): from bark import generate_audio, preload_models preload_models() audio = generate_audio(text) path = "story_audio.wav" import soundfile as sf sf.write(path, audio, 24000) return path # ---------- IMAGE GENERATION ---------- def generate_images(story): sd = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 ).to("cpu") scenes = story.split(".")[:10] # create 10 images max paths = [] for i, s in enumerate(scenes): img = sd(s).images[0] path = f"img_{i}.png" img.save(path) paths.append(path) return paths # ---------- VIDEO ---------- def make_video(images, audio_file): video_clip = ImageSequenceClip(images, fps=1) # 1 image per second audio_clip = AudioFileClip(audio_file) final = video_clip.set_audio(audio_clip) out = "final_story_video.mp4" final.write_videofile(out) return out