Spaces:
Runtime error
Runtime error
| from PIL import Image | |
| from transformers import CLIPProcessor, CLIPModel | |
| import gradio as gr | |
| # Initialize CLIP model and processor | |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") | |
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") | |
| def image_similarity(image: Image.Image, positive_prompt: str, negative_prompt: str): | |
| inputs = processor( | |
| text=[positive_prompt, negative_prompt], | |
| images=image, | |
| return_tensors="pt", | |
| padding=True | |
| ) | |
| outputs = model(**inputs) | |
| logits_per_image = outputs.logits_per_image # image-text similarity score | |
| probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities | |
| # Determine if positive prompt has a higher probability than the negative prompt | |
| result = probs[0][0] > probs[0][1] | |
| return bool(result), f"Probabilities: Positive {probs[0][0]:.4f}, Negative {probs[0][1]:.4f}" | |
| interface = gr.Interface( | |
| fn=image_similarity, | |
| inputs=[ | |
| gr.components.Image(type="pil"), | |
| gr.components.Text(label="Enter positive prompt e.g. 'a smiling face'"), | |
| gr.components.Text(label="Enter negative prompt e.g. 'a sad face'") | |
| ], | |
| outputs=[ | |
| gr.components.Textbox(label="Result"), | |
| gr.components.Textbox(label="Probabilities") | |
| ], | |
| title="Engagify's Image Action Detection", | |
| description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against a negative label. Ensure the prompts accurately describe the desired detection.", | |
| live=False, | |
| theme=gr.themes.Monochrome(), | |
| ) | |
| interface.launch() | |