Rajan18 commited on
Commit
3991698
·
verified ·
1 Parent(s): 5d5ebcf

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +55 -19
utils.py CHANGED
@@ -1,26 +1,62 @@
1
- import gradio as gr
 
 
 
 
2
 
3
- def create_story_video(prompt, duration):
4
- story = generate_story(prompt, duration)
5
- audio_path = generate_voice(story)
6
- image_paths = generate_images(story)
7
- video_path = make_video(image_paths, audio_path)
 
8
 
9
- return story, audio_path, video_path
 
 
 
10
 
11
- with gr.Blocks() as app:
12
- gr.Markdown("# 🎬 AI Story Video Generator")
13
- gr.Markdown("Generate 5–10 minute story videos with AI images + AI voice.")
14
 
15
- prompt = gr.Textbox(label="Story Topic", placeholder="ex: A lonely robot on Mars finds a friend...")
16
- duration = gr.Slider(1, 10, value=5, step=1, label="Story duration (minutes)")
17
- btn = gr.Button("Generate Story Video")
18
 
19
- story_output = gr.Textbox(label="Generated Story")
20
- audio_output = gr.Audio(label="AI Narration")
21
- video_output = gr.Video(label="Final AI Story Video")
 
 
 
 
 
 
22
 
23
- btn.click(create_story_video, inputs=[prompt, duration],
24
- outputs=[story_output, audio_output, video_output])
25
 
26
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import StableDiffusionPipeline
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
4
+ import os, random
5
+ from moviepy.editor import ImageSequenceClip, AudioFileClip
6
 
7
+ # ---------- STORY GENERATION ----------
8
+ def generate_story(prompt, duration):
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "mistralai/Mistral-7B-Instruct-v0.2", torch_dtype=torch.float16, device_map="auto"
11
+ )
12
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
13
 
14
+ text = tokenizer(
15
+ f"Write a detailed story of {duration} minutes about: {prompt}",
16
+ return_tensors="pt"
17
+ ).to("cuda")
18
 
19
+ output = model.generate(**text, max_new_tokens=900)
20
+ story = tokenizer.decode(output[0], skip_special_tokens=True)
21
+ return story
22
 
 
 
 
23
 
24
+ # ---------- VOICE ----------
25
+ def generate_voice(text):
26
+ from bark import generate_audio, preload_models
27
+ preload_models()
28
+ audio = generate_audio(text)
29
+ path = "story_audio.wav"
30
+ import soundfile as sf
31
+ sf.write(path, audio, 24000)
32
+ return path
33
 
 
 
34
 
35
+ # ---------- IMAGE GENERATION ----------
36
+ def generate_images(story):
37
+ sd = StableDiffusionPipeline.from_pretrained(
38
+ "runwayml/stable-diffusion-v1-5",
39
+ torch_dtype=torch.float16
40
+ ).to("cuda")
41
+
42
+ scenes = story.split(".")[:10] # create 10 images max
43
+ paths = []
44
+
45
+ for i, s in enumerate(scenes):
46
+ img = sd(s).images[0]
47
+ path = f"img_{i}.png"
48
+ img.save(path)
49
+ paths.append(path)
50
+
51
+ return paths
52
+
53
+
54
+ # ---------- VIDEO ----------
55
+ def make_video(images, audio_file):
56
+ video_clip = ImageSequenceClip(images, fps=1) # 1 image per second
57
+ audio_clip = AudioFileClip(audio_file)
58
+ final = video_clip.set_audio(audio_clip)
59
+
60
+ out = "final_story_video.mp4"
61
+ final.write_videofile(out)
62
+ return out