Spaces:

IoannisKat1
/

AILA_Workspace_v2

Sleeping

App Files Files Community

IoannisKat1 commited on Feb 8, 2025

Commit

36fb184

verified ·

1 Parent(s): 95cf083

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -6

app.py CHANGED Viewed

@@ -1,13 +1,25 @@
-from unsloth import FastLanguageModel
-import torch
 import gradio as gr
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
-client = FastLanguageModel.for_inference(model)
 def generate_response(instruction,chat_history):
     """Generates a response using your fine-tuned model."""
     # FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
@@ -34,6 +46,11 @@ def generate_response(instruction,chat_history):
     response = response.split("### Answer:")[-1]
     return response
 def update_chat_history(chat_history, user_message, bot_message):
     """Update chat history to maintain relevance and avoid excessive growth."""
     chat_history['user'].append(user_message)
@@ -54,7 +71,8 @@ def chatbot(input_text,chat_history):
     messages["user"].append(user_msg)
     messages["bot"].append(bot_msg)
-  bot_response = generate_response(input_text,messages)
   chat_history.append(("User: " + input_text, bot_response))
   messages = update_chat_history(messages, input_text, bot_response)
   return "", chat_history

+# from unsloth import FastLanguageModel
+# import torch
+import os
+from llama_index.core.storage.storage_context import StorageContext
+from llama_index.core.indices.loading import load_index_from_storage
+from openai import OpenAI
 import gradio as gr
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
+# client = FastLanguageModel.for_inference(model)
+dir = 'aila_indices_legislation'
+# Initialize OpenAI client
+client = OpenAI()
+storage_context = StorageContext.from_defaults(persist_dir=dir)
+index = load_index_from_storage(storage_context)
+query_engine = index.as_query_engine(similarity_top_k=8)
 def generate_response(instruction,chat_history):
     """Generates a response using your fine-tuned model."""
     # FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
     response = response.split("### Answer:")[-1]
     return response
+def generate_response_rag_index(instruction,chat_history):
+    response = query_engine.query(instruction)
+    return response
 def update_chat_history(chat_history, user_message, bot_message):
     """Update chat history to maintain relevance and avoid excessive growth."""
     chat_history['user'].append(user_message)
     messages["user"].append(user_msg)
     messages["bot"].append(bot_msg)
+  # bot_response = generate_response(input_text,messages)
+  bot_response = generate_response_rag_index(input_text,messages)
   chat_history.append(("User: " + input_text, bot_response))
   messages = update_chat_history(messages, input_text, bot_response)
   return "", chat_history