Spaces:
Sleeping
Sleeping
File size: 1,836 Bytes
3991698 506b6e1 3991698 506b6e1 3991698 66d7e9f 506b6e1 3991698 506b6e1 3991698 506b6e1 3991698 66d7e9f 3991698 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import torch
from diffusers import StableDiffusionPipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
import os, random
from moviepy.editor import ImageSequenceClip, AudioFileClip
# ---------- STORY GENERATION ----------
def generate_story(prompt, duration):
model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.2", torch_dtype=torch.float16, device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
text = tokenizer(
f"Write a detailed story of {duration} minutes about: {prompt}",
return_tensors="pt"
).to("cpu")
output = model.generate(**text, max_new_tokens=900)
story = tokenizer.decode(output[0], skip_special_tokens=True)
return story
# ---------- VOICE ----------
def generate_voice(text):
from bark import generate_audio, preload_models
preload_models()
audio = generate_audio(text)
path = "story_audio.wav"
import soundfile as sf
sf.write(path, audio, 24000)
return path
# ---------- IMAGE GENERATION ----------
def generate_images(story):
sd = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16
).to("cpu")
scenes = story.split(".")[:10] # create 10 images max
paths = []
for i, s in enumerate(scenes):
img = sd(s).images[0]
path = f"img_{i}.png"
img.save(path)
paths.append(path)
return paths
# ---------- VIDEO ----------
def make_video(images, audio_file):
video_clip = ImageSequenceClip(images, fps=1) # 1 image per second
audio_clip = AudioFileClip(audio_file)
final = video_clip.set_audio(audio_clip)
out = "final_story_video.mp4"
final.write_videofile(out)
return out
|