diff --git a/storybook_generation/README.md b/storybook_generation/README.md new file mode 100644 index 0000000..742e2eb --- /dev/null +++ b/storybook_generation/README.md @@ -0,0 +1,21 @@ +# Storybook Generation +This workflow generates a video story from a paragraph of text. It uses `StableDiffusionWalker` on pairwise sentences to generate video clips, captions them with each sentence, and stitches them together into a video. + +## Examples +Here's a good starter prompt: `Once upon a time, there was a small bird named Poppy. Poppy was a curious bird who loved to explore the world around her. One day, as she was flying over the fields, she noticed a beautiful flower in the distance. Poppy flew closer to the flower and was amazed by its vibrant colors and sweet fragrance. She landed on the flower and started to sip the nectar from its center.` + +## Deploying +Follow our [getting started guide](https://www.sievedata.com/dashboard/welcome) to get your Sieve API key and install the Sieve Python client. + +1. Export API keys & install Python client +``` +export SIEVE_API_KEY={YOUR_API_KEY} +pip install https://mango.sievedata.com/v1/client_package/sievedata-0.0.1.1.2-py3-none-any.whl +``` + +2. Deploy a workflow to Sieve +``` +git clone git@github.com:sieve-community/examples.git +cd examples/yolo_object_tracking +sieve deploy +``` diff --git a/storybook_generation/caption_combine.py b/storybook_generation/caption_combine.py new file mode 100644 index 0000000..e8c4bb9 --- /dev/null +++ b/storybook_generation/caption_combine.py @@ -0,0 +1,66 @@ +import sieve + +@sieve.function( + name="video-captioner-combiner", + gpu = False, + python_packages=[ + "moviepy==1.0.3", + "opencv-python==4.6.0.66", + "uuid==1.30", + ], + python_version="3.8", + iterator_input=True, + persist_output=True +) +def caption_and_combine(videos, prompt_pairs) -> sieve.Video: + from moviepy.editor import ImageClip, concatenate_videoclips + import cv2 + import textwrap + import uuid + + # Sort videos by global ID + videos = sorted(videos, key=lambda video: video.video_number) + + # Add captions + images = [] + for v, prompt in zip(videos, prompt_pairs): + print("Creating video with caption: ", prompt[0]) + cap = cv2.VideoCapture(v.path) + while cap.isOpened(): + # Capture frames in the video + ret, frame = cap.read() + if not ret: + break + + # Add caption with textwrap + font = cv2.FONT_HERSHEY_SIMPLEX + wrapped_text = textwrap.wrap(prompt[0], width=30) + x, y = 10, 40 + font_size = 1 + font_thickness = 2 + + for i, line in enumerate(wrapped_text): + textsize = cv2.getTextSize(line, font, font_size, font_thickness)[0] + + gap = textsize[1] + 10 + + y = int((frame.shape[0] + textsize[1]) / 2) + i * gap + x = int((frame.shape[1] - textsize[0]) / 2) + + cv2.putText(frame, line, (x, y), font, + font_size, + (255,255,0), + font_thickness, + lineType = cv2.LINE_AA) + + # Add the frame to the list of images + images.append(frame) + + # Combine the images into a video + print("Combining all frames into video...") + clips = [ImageClip(m).set_duration(0.25) for m in images] + video = concatenate_videoclips(clips) + video_path = f"{uuid.uuid4()}.mp4" + video.write_videofile(video_path, fps=30) + return sieve.Video(path=video_path) + \ No newline at end of file diff --git a/storybook_generation/walker.py b/storybook_generation/walker.py new file mode 100644 index 0000000..f76b8c1 --- /dev/null +++ b/storybook_generation/walker.py @@ -0,0 +1,57 @@ +import sieve + +@sieve.Model( + name="run_stable_diff_walk", + python_packages=[ + "torch==1.13.1", + "stable_diffusion_videos==0.8.1", + "accelerate==0.16.0" + ], + system_packages=["libgl1-mesa-glx", "libglib2.0-0", "ffmpeg", "libavcodec58", "libsndfile1", "git-lfs"], + gpu=True, + machine_type="a100", + run_commands=[ + "mkdir -p /root/.cache/models/stable-diffusion-v1-4", + "git lfs install", + "git clone https://huggingface.co/CompVis/stable-diffusion-v1-4 /root/.cache/models/stable-diffusion-v1-4", + ], + persist_output=True +) +class StableDiffusionVideo: + def __setup__(self): + import torch + from stable_diffusion_videos import StableDiffusionWalkPipeline + + # Load stable diffusion model from local cache + self.pipeline = StableDiffusionWalkPipeline.from_pretrained( + "/root/.cache/models/stable-diffusion-v1-4", + torch_dtype=torch.float16, + revision="fp16", + ).to("cuda") + + # Keep global ID to sort outputs + self.video_number = 0 + + def __predict__(self, prompt_pair: tuple) -> sieve.Video: + import torch + from stable_diffusion_videos import StableDiffusionWalkPipeline + + # Unpack prompt pair + prompt1, prompt2 = prompt_pair[0], prompt_pair[1] + + # Generate and store video output + print("Generating video with prompts: " + prompt1 + " | " + prompt2) + video_path = self.pipeline.walk( + [prompt1, prompt2], + [42, 1337], + fps=5, + num_interpolation_steps=15, + height=512, + width=768, + ) + + # Increment global id + self.video_number += 1 + + # Return video + yield sieve.Video(path=video_path, video_number=self.video_number) diff --git a/storybook_generation/workflow.py b/storybook_generation/workflow.py new file mode 100644 index 0000000..c4184d2 --- /dev/null +++ b/storybook_generation/workflow.py @@ -0,0 +1,37 @@ +''' +Sieve workflow to generate a storybook video from a piece of writing. +''' + +import sieve +from walker import StableDiffusionVideo +from caption_combine import caption_and_combine + +# Creates a cleaned up list of sentences from a piece of writing +@sieve.function(name="prompt-to-script") +def prompt_to_script(prompt: str) -> list: + script = prompt.split(".") + script = [s.strip() for s in script if s.strip() != ""] + script = [s + "." for s in script] + return script + +# Generates pairs of sentences from a list of sentences +@sieve.function(name="create-prompt-pairs") +def create_prompt_pairs(script: list) -> tuple: + for i in range(len(script) - 1): + yield (script[i], script[i + 1]) + +@sieve.workflow(name="storybook_generation") +def storybook_generation(prompt: str) -> sieve.Video: + # Create a script (list of sentences) and pair them up + print("Generating script and prompt pairs...") + script = prompt_to_script(prompt) + prompt_pairs = create_prompt_pairs(script) + + # Generate videos with StableDiffusionWalker + print("Generating videos...") + videos = StableDiffusionVideo()(prompt_pairs) + + # Return a captioned and concatenated video + print("Generating storybook...") + combined_video = caption_and_combine(videos, prompt_pairs) + return combined_video