| import os |
| import requests |
| from PIL import Image |
| import torch |
| from torchvision import transforms |
| from transformers import ( |
| VisionEncoderDecoderModel, |
| ViTImageProcessor, |
| AutoTokenizer, |
| BlipProcessor, |
| BlipForConditionalGeneration, |
| ) |
| from diffusers import ( |
| DiffusionPipeline, |
| StableDiffusionPipeline, |
| StableDiffusionImageVariationPipeline, |
| ) |
|
|
| def generate_image_caption(image_path): |
| |
| device = torch.device("cpu") |
| os.environ["CUDA_LAUNCH_BLOCKING"] = "1" |
|
|
| sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained( |
| "lambdalabs/sd-image-variations-diffusers", revision="v2.0" |
| ) |
| sd_pipe = sd_pipe.to(device) |
|
|
| pipeline = DiffusionPipeline.from_pretrained( |
| "lambdalabs/sd-image-variations-diffusers" |
| ) |
|
|
| |
| img_transforms = transforms.Compose( |
| [ |
| transforms.ToTensor(), |
| transforms.Resize( |
| (224, 224), |
| interpolation=transforms.InterpolationMode.BICUBIC, |
| antialias=False, |
| ), |
| transforms.Normalize( |
| [0.5, 0.5, 0.5], [0.5, 0.5, 0.5] |
| ), |
| ] |
| ) |
|
|
| |
| with Image.open(image_path) as img: |
| img_tensor = img_transforms(img).to(device).unsqueeze(0) |
| out = sd_pipe(img_tensor, guidance_scale=3) |
| out["images"][0].save("img1.jpg") |
|
|
| |
|
|
|
|
|
|
| generate_image_caption("C:\Master\First.jpg") |
|
|
|
|
|
|