| import base64 |
| from openai import OpenAI |
|
|
| class GPT4Vision: |
| def __init__(self): |
| self.client = OpenAI() |
|
|
| def encode_image(self, image_file): |
| """ |
| Encode the image to base64 format. |
| |
| :param image_file: File object of the image. |
| :return: Base64 encoded string of the image. |
| """ |
| return base64.b64encode(image_file.read()).decode('utf-8') |
|
|
| def describe(self, image_file, user_message): |
| """ |
| Get a description of the image using OpenAI's GPT-4 Vision API. |
| |
| :param image_file: File object of the image. |
| :param user_message: Custom text message to send as user input. |
| :return: The API response. |
| """ |
| base64_image = self.encode_image(image_file) |
|
|
| response = self.client.chat.completions.create( |
| model="gpt-4o", |
| messages=[ |
| { |
| "role": "user", |
| "content": [ |
| {"type": "text", "text": user_message}, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{base64_image}" |
| }, |
| }, |
| ], |
| } |
| ], |
| max_tokens=3000, |
| ) |
| return response.choices[0].message.content |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|