Spaces:

Arcpolar
/

Chatbot

Paused

App Files Files Community

Arcpolar commited on Oct 23, 2023

Commit

98cc856

1 Parent(s): b128f46

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -75

app.py CHANGED Viewed

@@ -1,104 +1,78 @@
 from transformers import AutoTokenizer
-import transformers
-import torch
 from transformers import pipeline
 import gradio as gr
-model = "arcpolar/Ubuntu_Llama_Chat_7B" # test
-tokenizer = AutoTokenizer.from_pretrained(model)#, use_auth_token=True)
-llama_pipeline = pipeline(
-    "text-generation",  # LLM task
-    model=model,
-    torch_dtype=torch.float16,
-    device_map="auto",
 )
-def get_response(prompt: str) -> None:
-    """
-    Generate a response from the Llama model.
-    Parameters:
-        prompt (str): The user's input/question for the model.
-    Returns:
-        None: Prints the model's response.
-    """
-    sequences = llama_pipeline(
-        prompt,
-        do_sample=True,
-        top_k=10,
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id,
-        max_length=256,
-    )
-    print("Chatbot:", sequences[0]['generated_text'])
 SYSTEM_PROMPT = """<s>[INST] <<SYS>>
 You are a helpful bot. Your answers are clear and concise.
 <</SYS>>
 """
 # Formatting function for message and history
-def format_message(message: str, history: list, memory_limit: int = 3) -> str:
-    """
-    Formats the message and history for the Llama model.
-    Parameters:
-        message (str): Current message to send.
-        history (list): Past conversation history.
-        memory_limit (int): Limit on how many past interactions to consider.
-    Returns:
-        str: Formatted message string
-    """
-    # always keep len(history) <= memory_limit
     if len(history) > memory_limit:
         history = history[-memory_limit:]
     if len(history) == 0:
         return SYSTEM_PROMPT + f"{message} [/INST]"
     formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
-    # Handle conversation history
     for user_msg, model_answer in history[1:]:
         formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
-    # Handle the current message
     formatted_message += f"<s>[INST] {message} [/INST]"
     return formatted_message
-# Generate a response from the Llama model
-def get_llama_response(message: str, history: list) -> str:
-    """
-    Generates a conversational response from the Llama model.
-    Parameters:
-        message (str): User's input message.
-        history (list): Past conversation history.
-    Returns:
-        str: Generated response from the Llama model.
-    """
     query = format_message(message, history)
-    response = ""
-    sequences = llama_pipeline(
         query,
-        do_sample=True,
-        top_k=10,
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id,
-        max_length=1024,
     )
-    generated_text = sequences[0]['generated_text']
-    response = generated_text[len(query):]  # Remove the prompt from the output
-    print("Chatbot:", response.strip())
-    return response.strip()
-gr.ChatInterface(get_llama_response).launch()

+# Import the AutoTokenizer function from the transformers library
 from transformers import AutoTokenizer
+# Import the pipeline function from the transformers library
 from transformers import pipeline
+# Import Gradio
 import gradio as gr
+# Import Transformer
+import transformers
+# Import pyTorch
+import torch
+# Define Model Name
+model = "arcpolar/Ubuntu_Llama_Chat_7B"
+# Setup Tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model)
+# Llama pipeline learned from Ograbek, K. youtube video and colab note book
+# Code from https://colab.research.google.com/drive/1SSv6lzX3Byu50PooYogmiwHqf5PQN68E
+# Initialize a text-generation pipeline using Ubuntu_Llama_Chat_7B
+Ubuntu_Llama_Chat_pipeline = pipeline(
+    "text-generation",  # Specify the task as text-generation
+    model=model, # Use Ubuntu_Llama_Chat_7B for the task
+    torch_dtype=torch.float16, # Set data type for PyTorch tensors to float16
+    device_map="auto", # Automatically choose the computation device
 )
+# Format Message and System Prompt learned from Ograbek, K. youtube video and colab notebook
+# Code from https://colab.research.google.com/drive/1SSv6lzX3Byu50PooYogmiwHqf5PQN68E
+# Define the initial prompt for the Llama 2 model
 SYSTEM_PROMPT = """<s>[INST] <<SYS>>
 You are a helpful bot. Your answers are clear and concise.
 <</SYS>>
 """
 # Formatting function for message and history
+def format_message(message: str, history: list, memory_limit: int = 5) -> str:
+    # If history length exceeds memory_limit, keep only the most recent interactions
     if len(history) > memory_limit:
         history = history[-memory_limit:]
+    # If there's no history, return the SYSTEM_PROMPT and current message
     if len(history) == 0:
         return SYSTEM_PROMPT + f"{message} [/INST]"
+    # Start the formatted message with the SYSTEM_PROMPT and the oldest history item
     formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
+    # Iterate over remaining history items and format them accordingly
     for user_msg, model_answer in history[1:]:
         formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
+    # Append the current user message to the formatted string
     formatted_message += f"<s>[INST] {message} [/INST]"
+    # Return the fully formatted message string
     return formatted_message
+# Generate response learned from Ograbek, K. youtube video and colab notebook
+# Code from https://colab.research.google.com/drive/1SSv6lzX3Byu50PooYogmiwHqf5PQN68E
+def get_response(message: str, history: list) -> str:
+    # Format the user's message and history for input to the Llama model
     query = format_message(message, history)
+    # Get a response from the Llama model using the configured parameters
+    sequences = Ubuntu_Llama_Chat_pipeline(
         query,
+        do_sample=True,             # Enable sampling for response generation
+        top_k=10,                   # Limit sampling to top 10 tokens
+        num_return_sequences=1,     # Request a single response sequence
+        eos_token_id=tokenizer.eos_token_id,  # Specify the end-of-sequence token
+        max_length=1024             # Set a maximum length for the response
     )
+    # Extract the model's response, excluding the original query
+    response = sequences[0]['generated_text'][len(query):].strip()
+    # Display the response
+    print("Chatbot:", response)
+    return response
+# Launch a chat interface using the `get_response` function
+gr.ChatInterface(get_response).launch()