| import os |
| import time |
| from typing import List, Tuple, Optional |
| import google.generativeai as genai |
| import gradio as gr |
| from PIL import Image |
| import tempfile |
| import os |
|
|
| GOOGLE_API_KEY = os.environ.get("GEMINI_API_KEY") |
|
|
| IMAGE_WIDTH = 512 |
| IMAGE_WIDTH = 512 |
|
|
| system_instruction_analysis = "You are an expert of the given topic. Analyze the provided text with a focus on the topic, identifying recent issues, recent insights, or improvements relevant to academic standards and effectiveness. Offer actionable advice for enhancing knowledge and suggest real-life examples." |
| model_name ='gemini-2.5-flash' |
| model = genai.GenerativeModel(model_name, system_instruction=system_instruction_analysis) |
| |
|
|
| |
| def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]: |
| return [seq.strip() for seq in stop_sequences.split(",")] if stop_sequences else None |
|
|
| def preprocess_image(image: Image.Image) -> Image.Image: |
| image_height = int(image.height * IMAGE_WIDTH / image.width) |
| return image.resize((IMAGE_WIDTH, image_height)) |
|
|
| def user(text_prompt: str, chatbot): |
| if chatbot is None: |
| chatbot = [] |
| return "", chatbot + [{"role": "user", "content": text_prompt}, {"role": "assistant", "content": ""}] |
|
|
| def bot( |
| google_key: str, |
| image_prompt: Optional[Image.Image], |
| temperature: float, |
| max_output_tokens: int, |
| stop_sequences: str, |
| top_k: int, |
| top_p: float, |
| chatbot: List[dict] |
| ): |
| google_key = google_key or GOOGLE_API_KEY |
| if not google_key: |
| raise ValueError("GOOGLE_API_KEY is not set. Please set it up.") |
|
|
| raw_content = chatbot[-2]["content"] if len(chatbot) >= 2 else None |
| |
| if isinstance(raw_content, list): |
| text_prompt = " ".join(str(item) for item in raw_content) |
| else: |
| text_prompt = raw_content |
|
|
| text_prompt = text_prompt.strip() if text_prompt else None |
|
|
| |
| if not text_prompt and not image_prompt: |
| chatbot[-1]["content"] = "Prompt cannot be empty. Please provide input text or an image." |
| yield chatbot |
| return |
| elif image_prompt and not text_prompt: |
| text_prompt = "Describe the image" |
| elif image_prompt and text_prompt: |
| text_prompt = f"{text_prompt}. Also, analyze the provided image." |
|
|
| |
| genai.configure(api_key=google_key) |
| generation_config = genai.types.GenerationConfig( |
| temperature=temperature, |
| max_output_tokens=max_output_tokens, |
| stop_sequences=preprocess_stop_sequences(stop_sequences), |
| top_k=top_k, |
| top_p=top_p, |
| ) |
|
|
| inputs = [text_prompt] if image_prompt is None else [text_prompt, preprocess_image(image_prompt)] |
|
|
| try: |
| response = model.generate_content(inputs, stream=True, generation_config=generation_config) |
| response.resolve() |
| except Exception as e: |
| chatbot[-1]["content"] = f"Error occurred: {str(e)}" |
| yield chatbot |
| return |
|
|
| |
| chatbot[-1]["content"] = "" |
| for chunk in response: |
| for i in range(0, len(chunk.text), 10): |
| chatbot[-1]["content"] += chunk.text[i:i + 10] |
| time.sleep(0.01) |
| yield chatbot |
| |
| google_key_component = gr.Textbox( |
| label="Google API Key", |
| type="password", |
| placeholder="Enter your Google API Key", |
| visible=GOOGLE_API_KEY is None |
| ) |
|
|
| image_prompt_component = gr.Image(type="pil", label="Input Image (Optional: Figure/Graph)") |
| chatbot_component = gr.Chatbot(label="Chatbot") |
| text_prompt_component = gr.Textbox( |
| placeholder="Type your question here...", |
| label="Ask", |
| lines=3 |
| ) |
| run_button_component = gr.Button("Submit") |
| temperature_component = gr.Slider( |
| minimum=0, |
| maximum=1.0, |
| value=0.4, |
| step=0.05, |
| label="Creativity (Temperature)", |
| info="Controls the randomness of the response. Higher values result in more creative answers." |
| ) |
| max_output_tokens_component = gr.Slider( |
| minimum=1, |
| maximum=2048, |
| value=1024, |
| step=1, |
| label="Response Length (Token Limit)", |
| info="Sets the maximum number of tokens in the output response." |
| ) |
| stop_sequences_component = gr.Textbox( |
| label="Stop Sequences (Optional)", |
| placeholder="Enter stop sequences, e.g., STOP, END", |
| info="Specify sequences to stop the generation." |
| ) |
| top_k_component = gr.Slider( |
| minimum=1, |
| maximum=40, |
| value=32, |
| step=1, |
| label="Top-K Sampling", |
| info="Limits token selection to the top K most probable tokens. Lower values produce conservative outputs." |
| ) |
| top_p_component = gr.Slider( |
| minimum=0, |
| maximum=1, |
| value=1, |
| step=0.01, |
| label="Top-P Sampling", |
| info="Limits token selection to tokens with a cumulative probability up to P. Lower values produce conservative outputs." |
| ) |
| example_scenarios = [ |
| "Describe Multimodal AI", |
| "What are the difference between muliagent llm and multiagent system", |
| "Why it's difficult to intgrate multimodality in prompt"] |
| example_images = [["ex1.png"],["ex2.png"]] |
|
|
|
|
| |
| user_inputs = [text_prompt_component, chatbot_component] |
| bot_inputs = [ |
| google_key_component, |
| image_prompt_component, |
| temperature_component, |
| max_output_tokens_component, |
| stop_sequences_component, |
| top_k_component, |
| top_p_component, |
| chatbot_component, |
| ] |
|
|
|
|
| with gr.Blocks(theme="earneleh/paris") as demo: |
| gr.Markdown("<h1 style='font-size: 36px; font-weight: bold; font-family: Arial;'>Gemini 2.0 Multimodal Chatbot</h1>") |
| with gr.Row(): |
| google_key_component.render() |
| with gr.Row(): |
| chatbot_component.render() |
| with gr.Row(): |
| with gr.Column(scale=0.5): |
| text_prompt_component.render() |
| with gr.Column(scale=0.5): |
| image_prompt_component.render() |
| with gr.Column(scale=0.5): |
| run_button_component.render() |
| with gr.Accordion("🧪Example Text 💬", open=False): |
| example_radio = gr.Radio( |
| choices=example_scenarios, |
| label="Example Queries", |
| info="Select an example query.") |
| |
| example_radio.change( |
| fn=lambda query: query if query else "No query selected.", |
| inputs=[example_radio], |
| outputs=[text_prompt_component]) |
| |
| with gr.Accordion("🧪Example Image 🩻", open=False): |
| gr.Examples( |
| examples=example_images, |
| inputs=[image_prompt_component], |
| label="Example Figures", |
| ) |
| with gr.Accordion("🛠️Customize", open=False): |
| temperature_component.render() |
| max_output_tokens_component.render() |
| stop_sequences_component.render() |
| top_k_component.render() |
| top_p_component.render() |
|
|
| run_button_component.click( |
| fn=user, inputs=user_inputs, outputs=[text_prompt_component, chatbot_component] |
| ).then( |
| fn=bot, inputs=bot_inputs, outputs=[chatbot_component] |
| ) |
| demo.launch() |