# -*- coding: utf-8 -*- """Chaty AI original.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1V1aACjqfyQoyVUSHPpKT3vEYS49J7l-8 """ pip install diffusers transformers torch accelerate cohere langchain gradio import os from google.colab import userdata # Enable notebook access to secrets # This line ensures that the notebook can access secrets stored in Colab's secret manager. # No output expected, but this is a necessary setup step. # Retrieve the COHERE_API_KEY from Colab secrets COHERE_API_KEY = userdata.get('COHERE_API_KEY') # Set the environment variable for Cohere os.environ["COHERE_API_KEY"] = COHERE_API_KEY print("COHERE_API_KEY loaded successfully.") import torch from diffusers import StableDiffusionPipeline # 1. Specify the model ID for Stable Diffusion model_id = "runwayml/stable-diffusion-v1-5" # 2. Load the pre-trained Stable Diffusion model # Check if CUDA is available and set the device accordingly device = "cuda" if torch.cuda.is_available() else "cpu" pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) pipeline.to(device) print(f"Stable Diffusion model '{model_id}' loaded successfully on {device}.") # 3. Define a sample text prompt for image generation prompt = "a photo of an astronaut riding a horse on mars" # 4. Generate an image using the loaded pipeline and the prompt print(f"Generating image for prompt: '{prompt}'...") image = pipeline(prompt).images[0] # 5. Display the generated image print("Image generated successfully.") image !pip install langchain-cohere print("langchain-cohere installed successfully.") from langchain_cohere import ChatCohere from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate from langchain_core.messages import HumanMessage, AIMessage print("Cohere and Langchain components imported successfully.") import os from google.colab import userdata # Retrieve the COHERE_API_KEY from Colab secrets COHERE_API_KEY = userdata.get('COHERE_API_KEY') # Set the environment variable for Cohere os.environ["COHERE_API_KEY"] = COHERE_API_KEY from langchain_cohere import ChatCohere llm = ChatCohere(cohere_api_key=COHERE_API_KEY) print("Cohere LLM initialized successfully.") system_message_template = SystemMessagePromptTemplate.from_template( "You are a helpful AI assistant that can generate creative images based on user descriptions. " "You can also answer questions and engage in conversation. " "If a user asks for an image, extract the key elements for a Stable Diffusion prompt." ) human_message_template = HumanMessagePromptTemplate.from_template("User: {user_input}") chat_prompt_template = ChatPromptTemplate.from_messages([ system_message_template, human_message_template ]) print("ChatPromptTemplate created successfully.") sample_question = "What kind of images can you generate?" # Create a list of messages using the chat_prompt_template formatted_messages = chat_prompt_template.format_messages(user_input=sample_question) # Invoke the LLM with the formatted messages response = llm.invoke(formatted_messages) print(f"AI Response: {response.content}") system_message_template = SystemMessagePromptTemplate.from_template( "You are a helpful AI assistant that can generate creative images based on user descriptions. " "You can also answer questions and engage in conversation. " "If a user asks for an image, extract the key elements for a Stable Diffusion prompt and output it in the format 'IMAGE_PROMPT: '. " "Otherwise, provide a normal conversational response." ) human_message_template = HumanMessagePromptTemplate.from_template("User: {user_input}") chat_prompt_template = ChatPromptTemplate.from_messages([ system_message_template, human_message_template ]) print("ChatPromptTemplate updated successfully with image generation instruction.") def handle_user_input(user_message: str): # 2. Format the user_message using the chat_prompt_template formatted_messages = chat_prompt_template.format_messages(user_input=user_message) # 3. Invoke the LLM with the formatted messages ai_response = llm.invoke(formatted_messages) response_content = ai_response.content generated_image = None # 4. Implement logic to determine if an image generation is implied IMAGE_PROMPT_PREFIX = "IMAGE_PROMPT: " if response_content.startswith(IMAGE_PROMPT_PREFIX): # 5. Extract the actual image description image_description = response_content[len(IMAGE_PROMPT_PREFIX):].strip() print(f"AI detected an image request. Generating image for: '{image_description}'") try: # 6. Use the pipeline to generate an image generated_image = pipeline(image_description).images[0] full_response = f"Image generated successfully based on your request: '{image_description}'" except Exception as e: full_response = f"Could not generate image for '{image_description}'. Error: {e}" generated_image = None else: # Otherwise, it's a normal conversational response full_response = response_content # 7. Return both the AI's full textual response and the generated image object return full_response, generated_image # 8. Test the handle_user_input function with a sample conversational input conversational_input = "Tell me a fun fact about AI." print(f"\nUser: {conversational_input}") ai_text_response, generated_image_output = handle_user_input(conversational_input) print(f"AI: {ai_text_response}") if generated_image_output: print("No image expected for this conversational input.") import torch from diffusers import StableDiffusionPipeline # 1. Specify the model ID for Stable Diffusion model_id = "runwayml/stable-diffusion-v1-5" # 2. Load the pre-trained Stable Diffusion model # Check if CUDA is available and set the device accordingly device = "cuda" if torch.cuda.is_available() else "cpu" pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) pipeline.to(device) print(f"Stable Diffusion model '{model_id}' loaded successfully on {device}.") image_request_input = "Generate an image of a futuristic city at sunset with flying cars." print(f"\nUser: {image_request_input}") ai_text_response, generated_image_output = handle_user_input(image_request_input) print(f"AI: {ai_text_response}") if generated_image_output: print("Displaying generated image:") display(generated_image_output) else: print("No image was generated for this request.") import gradio as gr print("Gradio library imported successfully.") def gradio_interface_fn(user_query: str): ai_text_response, generated_image_output = handle_user_input(user_query) print(f"DEBUG: Type of generated_image_output: {type(generated_image_output)}") print(f"DEBUG: Is generated_image_output None?: {generated_image_output is None}") if generated_image_output is not None: print(f"DEBUG: Generated image mode: {generated_image_output.mode}, size: {generated_image_output.size}") return ai_text_response, generated_image_output print("Gradio interface function 'gradio_interface_fn' defined.") import base64 import os image_filename = "Chaty (1).png" image_path_in_colab = image_filename base64_image_tag = "" favicon_data_uri = "" # New variable to store the base64 data URI for the favicon if os.path.exists(image_path_in_colab): try: with open(image_path_in_colab, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') base64_image_tag = f"Chaty Logo" favicon_data_uri = f"data:image/png;base64,{encoded_string}" # Populate the favicon data URI print("Image encoded to Base64 successfully and data URI for favicon set.") except Exception as e: base64_image_tag = "" favicon_data_uri = "" print(f"An error occurred while encoding the image: {e}") else: base64_image_tag = "" favicon_data_uri = "" print(f"Warning: Image file '{image_filename}' not found. Please upload this file to your Colab environment if you wish to display the logo and favicon.") !pip install gTTS print("gTTS installed successfully.") chat_history = [] print("Empty chat history list 'chat_history' initialized.") from langchain_core.messages import HumanMessage, AIMessage, SystemMessage from gtts import gTTS import os def handle_user_input(user_message: str, chat_history: list): # Create the current human message current_human_message = HumanMessage(content=user_message) # Get the system message content from the existing template system_message_content = system_message_template.format().content system_message = SystemMessage(content=system_message_content) # Combine system message, chat history, and current human message for the LLM call messages_for_llm = [system_message] + chat_history + [current_human_message] # Invoke the LLM with the combined messages ai_response = llm.invoke(messages_for_llm) response_content = ai_response.content generated_image = None full_response = "" # Initialize full_response audio_output_path = None # Initialize audio output path # Implement logic to determine if an image generation is implied IMAGE_PROMPT_PREFIX = "IMAGE_PROMPT: " if response_content.startswith(IMAGE_PROMPT_PREFIX): # Extract the actual image description image_description = response_content[len(IMAGE_PROMPT_PREFIX):].strip() print(f"AI detected an image request. Generating image for: '{image_description}'") try: # Use the pipeline to generate an image generated_image = pipeline(image_description).images[0] full_response = f"Image generated successfully based on your request: '{image_description}'" except Exception as e: full_response = f"Could not generate image for '{image_description}'. Error: {e}" generated_image = None else: # Otherwise, it's a normal conversational response full_response = response_content # Generate speech for the full_response try: tts = gTTS(text=full_response, lang='es') # Assuming Spanish for the voice output audio_file_name = "ai_response.mp3" tts.save(audio_file_name) audio_output_path = audio_file_name print(f"Audio generated and saved to {audio_output_path}") except Exception as e: print(f"Error generating audio: {e}") audio_output_path = None # Update chat history with the current user message and the AI's raw response chat_history.append(current_human_message) chat_history.append(AIMessage(content=ai_response.content)) # Return the AI's full textual response, the generated image object, the audio path, and the updated chat history return full_response, generated_image, audio_output_path, chat_history print("handle_user_input function updated to include chat history and speech generation.") from langchain_core.prompts import MessagesPlaceholder # Re-define the system message template (it was already defined but ensures it's current) system_message_template = SystemMessagePromptTemplate.from_template( "You are a helpful AI assistant that can generate creative images based on user descriptions. " "You can also answer questions and engage in conversation. " "If a user asks for an image, extract the key elements for a Stable Diffusion prompt and output it in the format 'IMAGE_PROMPT: '. " "Otherwise, provide a normal conversational response." ) # Re-define the human message template human_message_template = HumanMessagePromptTemplate.from_template("User: {user_input}") # Re-define the chat_prompt_template to include MessagesPlaceholder for chat history chat_prompt_template = ChatPromptTemplate.from_messages([ system_message_template, MessagesPlaceholder(variable_name="chat_history"), human_message_template ]) print("ChatPromptTemplate updated successfully for history management.") def gradio_interface_fn(user_query: str, history: list): ai_text_response, generated_image_output, audio_output_path, updated_chat_history = handle_user_input(user_query, history) print(f"DEBUG: Type of generated_image_output: {type(generated_image_output)}") print(f"DEBUG: Is generated_image_output None?: {generated_image_output is None}") if generated_image_output is not None: print(f"DEBUG: Generated image mode: {generated_image_output.mode}, size: {generated_image_output.size}") return ai_text_response, generated_image_output, audio_output_path, updated_chat_history print("Gradio interface function 'gradio_interface_fn' updated to manage history and audio.") get_ipython().system('pip install bcrypt') print("bcrypt library installed successfully.") # Commented out IPython magic to ensure Python compatibility. # %%writefile app.py # import gradio as gr # import torch # from diffusers import StableDiffusionPipeline # from langchain_cohere import ChatCohere # from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, MessagesPlaceholder # from langchain_core.messages import HumanMessage, AIMessage, SystemMessage # from gtts import gTTS # import os # import base64 # import json # import bcrypt # Import bcrypt # # # --- Configuration and Initialization (from your notebook) --- # COHERE_API_KEY = os.environ.get('COHERE_API_KEY') # # # --- User Management Functions (from previous steps) --- # USERS_FILE = 'users.json' # CHAT_HISTORY_FILE_PATTERN = 'chat_history_{}.json' # Pattern for user-specific chat history files # # def load_users(): # """Loads user data from the USERS_FILE. Returns an empty dictionary if the file doesn't exist.""" # if os.path.exists(USERS_FILE): # with open(USERS_FILE, 'r') as f: # return json.load(f) # return {} # # def save_users(users): # """Saves user data to the USERS_FILE.""" # with open(USERS_FILE, 'w') as f: # json.dump(users, f, indent=4) # # def hash_password(password): # """ # Hashes a password using bcrypt. # The password is encoded to bytes, a salt is generated, and then the password is hashed. # Returns the hashed password as a UTF-8 decoded string. # """ # hashed = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()) # return hashed.decode('utf-8') # # def register_user(username, password): # """ # Registers a new user with the given username and password. # Hashes the password and saves it to users.json. # Handles cases where the username already exists. # """ # users = load_users() # if username in users: # return "Error: Username already exists." # # hashed_password = hash_password(password) # users[username] = hashed_password # save_users(users) # # Initialize an empty chat history for the new user # save_chat_history(username, []) # return f"User '{username}' registered successfully." # # def authenticate_user(username, password): # """ # Authenticates a user against the stored hashed passwords in users.json. # Returns True if authentication is successful, False otherwise. # """ # users = load_users() # stored_hashed_password = users.get(username) # # if stored_hashed_password: # try: # return bcrypt.checkpw(password.encode('utf-8'), stored_hashed_password.encode('utf-8')) # except ValueError: # return False # return False # # def load_chat_history(username): # """ # Loads a user's chat history from their specific JSON file. # Converts stored dicts back into Langchain HumanMessage/AIMessage objects. # """ # history_file = CHAT_HISTORY_FILE_PATTERN.format(username) # if os.path.exists(history_file): # with open(history_file, 'r') as f: # raw_history = json.load(f) # chat_history = [] # for msg in raw_history: # if msg['type'] == 'human': # chat_history.append(HumanMessage(content=msg['content'])) # elif msg['type'] == 'ai': # chat_history.append(AIMessage(content=msg['content'])) # elif msg['type'] == 'system': # chat_history.append(SystemMessage(content=msg['content'])) # return chat_history # return [] # # def save_chat_history(username, chat_history): # """ # Saves a user's chat history (Langchain Message objects) to their specific JSON file. # Converts Langchain Message objects to dictionaries for JSON serialization. # """ # history_file = CHAT_HISTORY_FILE_PATTERN.format(username) # raw_history = [] # for msg in chat_history: # raw_history.append({'type': msg.type, 'content': msg.content}) # with open(history_file, 'w') as f: # json.dump(raw_history, f, indent=4) # # def convert_langchain_to_gradio_chatbot_history(langchain_history): # """ # Converts a list of Langchain Message objects to Gradio Chatbot format. # Gradio Chatbot expects a list of lists: [[user_message, ai_message], ...] # """ # gradio_history = [] # # Ensure we iterate in pairs (human, AI) # for i in range(0, len(langchain_history), 2): # human_msg = langchain_history[i].content if i < len(langchain_history) else "" # # Check if there's a corresponding AI message # ai_msg = langchain_history[i+1].content if i+1 < len(langchain_history) else "" # gradio_history.append([human_msg, ai_msg]) # return gradio_history # # # Initialize users.json if it doesn't exist # if not os.path.exists(USERS_FILE): # save_users({}) # Save an empty dictionary as a valid JSON # # # # --- Stable Diffusion Model Initialization --- # model_id = "runwayml/stable-diffusion-v1-5" # device = "cuda" if torch.cuda.is_available() else "cpu" # pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) # pipeline.to(device) # # # --- Cohere LLM Initialization --- # llm = ChatCohere(cohere_api_key=COHERE_API_KEY) # # # --- Prompt Templates --- # system_message_template = SystemMessagePromptTemplate.from_template( # "You are a helpful AI assistant that can generate creative images based on user descriptions. " # "You can also answer questions and engage in conversation. " # "If a user asks for an image, extract the key elements for a Stable Diffusion prompt and output it in the format 'IMAGE_PROMPT: '. " # "Otherwise, provide a normal conversational response." # ) # human_message_template = HumanMessagePromptTemplate.from_template("User: {user_input}") # chat_prompt_template = ChatPromptTemplate.from_messages([ # system_message_template, # MessagesPlaceholder(variable_name="chat_history"), # human_message_template # ]) # # # --- Core Logic Functions --- # def handle_user_input(user_message: str, chat_history: list, username: str): # current_human_message = HumanMessage(content=user_message) # system_message_content = system_message_template.format().content # system_message = SystemMessage(content=system_message_content) # messages_for_llm = [system_message] + chat_history + [current_human_message] # # ai_response = llm.invoke(messages_for_llm) # response_content = ai_response.content # # generated_image = None # full_response = "" # audio_output_path = None # # IMAGE_PROMPT_PREFIX = "IMAGE_PROMPT: " # if response_content.startswith(IMAGE_PROMPT_PREFIX): # image_description = response_content[len(IMAGE_PROMPT_PREFIX):].strip() # try: # generated_image = pipeline(image_description).images[0] # full_response = f"Image generated successfully based on your request: '{image_description}'" # except Exception as e: # full_response = f"Could not generate image for '{image_description}'. Error: {e}" # generated_image = None # else: # full_response = response_content # # try: # tts = gTTS(text=full_response, lang='es') # audio_file_name = f"ai_response_{username}.mp3" if username else "ai_response_guest.mp3" # tts.save(audio_file_name) # audio_output_path = audio_file_name # except Exception as e: # audio_output_path = None # # chat_history.append(current_human_message) # chat_history.append(AIMessage(content=ai_response.content)) # # if username: # save_chat_history(username, chat_history) # # return full_response, generated_image, audio_output_path, chat_history # # # def gradio_interface_fn(user_query: str, history: list, username: str): # if not user_query: # Handle empty input case # return "Por favor, introduce un mensaje.", None, None, history, convert_langchain_to_gradio_chatbot_history(history) # # ai_text_response, generated_image_output, audio_output_path, updated_chat_history_langchain = handle_user_input(user_query, history, username) # gradio_chat_history_display = convert_langchain_to_gradio_chatbot_history(updated_chat_history_langchain) # return ai_text_response, generated_image_output, audio_output_path, updated_chat_history_langchain, gradio_chat_history_display # # # --- Gradio Registration Function --- # def register_gradio_fn(username, password): # return register_user(username, password) # # # --- Gradio Login Function --- # def login_gradio_fn(username, password): # if authenticate_user(username, password): # user_history_langchain = load_chat_history(username) # gradio_chat_history_display = convert_langchain_to_gradio_chatbot_history(user_history_langchain) # return ( # f"¡Bienvenido, {username}! Has iniciado sesión con éxito.", # username, # user_history_langchain, # gradio_chat_history_display # ) # else: # return ( # "Error: Nombre de usuario o contraseña incorrectos.", # '', # [], # [] # ) # # # --- Gradio Logout Function --- # def logout_gradio_fn(): # return ( # "Has cerrado sesión.", # '', # [], # [] # ) # # # --- Gradio UI --- # image_filename = "Chaty (1).png" # base64_image_tag = "" # favicon_data_uri = "" # # if os.path.exists(image_filename): # try: # with open(image_filename, "rb") as image_file: # encoded_string = base64.b64encode(image_file.read()).decode('utf-8') # base64_image_tag = f"Chaty Logo" # favicon_data_uri = f"data:image/png;base64,{encoded_string}" # except Exception as e: # pass # Handle error silently in UI context # # head_content = "" # if favicon_data_uri: # head_content = f"" # # with gr.Blocks( # title='Chaty', # theme=gr.themes.Soft(), # head=head_content # ) as demo: # logged_in_user = gr.State('') # Initialize logged_in_user state # chatbot_history_state = gr.State([]) # State for Langchain messages # # gr.Markdown(f"{base64_image_tag} Un asistente de IA interactivo que utiliza Stable Diffusion para la generaci\u00f3n de im\u00e1genes y Cohere/Langchain para la comprensi\u00f3n del lenguaje y la generaci\u00f3n de respuestas. Si pides una imagen, la generar\u00e1; de lo contrario, responder\u00e1 conversacionalmente. **\u00a1Ahora con respuestas de voz!**") # # # Define chatbot UI elements at a higher scope # chatbot_ui_elements_column = gr.Column(visible=True) # with chatbot_ui_elements_column: # chatbot_display_chatbot = gr.Chatbot(label='Chaty', value=[], elem_id='chatbot', height=400) # chatbot_input_textbox = gr.Textbox(lines=2, label='Escribe tu mensaje o petición de imagen aquí...', interactive=True) # chatbot_submit_button = gr.Button("Enviar", interactive=True) # ai_response_textbox = gr.Textbox(lines=5, label='Respuesta de la IA', interactive=False) # image_output = gr.Image(label='Imagen Generada') # audio_output = gr.Audio(label='Respuesta de Voz', autoplay=True) # # with gr.Tab("Inicio de Sesión"): # gr.Markdown("## Iniciar Sesión") # with gr.Column(): # login_username = gr.Textbox(label="Nombre de Usuario") # login_password = gr.Textbox(label="Contraseña", type="password") # login_button = gr.Button("Iniciar Sesión") # login_output = gr.Textbox(label="Mensaje de Inicio de Sesión", interactive=False) # current_logged_in_user_display = gr.Textbox(label="Usuario Actual", interactive=False, value="No has iniciado sesión.") # logout_button = gr.Button("Cerrar Sesión") # # login_button.click( # fn=login_gradio_fn, # inputs=[login_username, login_password], # outputs=[ # login_output, # logged_in_user, # chatbot_history_state, # chatbot_display_chatbot # ] # ) # # logout_button.click( # fn=logout_gradio_fn, # inputs=[], # outputs=[ # login_output, # logged_in_user, # chatbot_history_state, # chatbot_display_chatbot # ] # ) # # logged_in_user.change( # lambda user: f"Has iniciado sesión como: {user}" if user else "No has iniciado sesión.", # inputs=logged_in_user, # outputs=current_logged_in_user_display # ) # # with gr.Tab("Chatbot"): # chatbot_submit_button.click( # fn=gradio_interface_fn, # inputs=[chatbot_input_textbox, chatbot_history_state, logged_in_user], # Removed audio_input # outputs=[ # ai_response_textbox, # image_output, # audio_output, # chatbot_history_state, # chatbot_display_chatbot # ] # ).then( # lambda: "", # Clear the input textbox after submission # inputs=None, # outputs=chatbot_input_textbox # ) # # with gr.Tab("Registro de Usuario"): # gr.Markdown("## Registrar Nuevo Usuario") # with gr.Column(): # register_username = gr.Textbox(label="Nombre de Usuario") # register_password = gr.Textbox(label="Contraseña", type="password") # register_button = gr.Button("Registrar") # register_output = gr.Textbox(label="Mensaje de Registro", interactive=False) # # register_button.click( # fn=register_gradio_fn, # inputs=[register_username, register_password], # outputs=register_output # ) # # demo.launch(debug=True, share=True) !pip install langchain-cohere get_ipython().system('python app.py')