IoannisKat1 commited on
Commit
36fb184
·
verified ·
1 Parent(s): 95cf083

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -1,13 +1,25 @@
1
- from unsloth import FastLanguageModel
2
- import torch
 
 
 
 
3
  import gradio as gr
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
- model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
9
- client = FastLanguageModel.for_inference(model)
10
-
 
 
 
 
 
 
 
 
11
  def generate_response(instruction,chat_history):
12
  """Generates a response using your fine-tuned model."""
13
  # FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
@@ -34,6 +46,11 @@ def generate_response(instruction,chat_history):
34
  response = response.split("### Answer:")[-1]
35
  return response
36
 
 
 
 
 
 
37
  def update_chat_history(chat_history, user_message, bot_message):
38
  """Update chat history to maintain relevance and avoid excessive growth."""
39
  chat_history['user'].append(user_message)
@@ -54,7 +71,8 @@ def chatbot(input_text,chat_history):
54
  messages["user"].append(user_msg)
55
  messages["bot"].append(bot_msg)
56
 
57
- bot_response = generate_response(input_text,messages)
 
58
  chat_history.append(("User: " + input_text, bot_response))
59
  messages = update_chat_history(messages, input_text, bot_response)
60
  return "", chat_history
 
1
+ # from unsloth import FastLanguageModel
2
+ # import torch
3
+ import os
4
+ from llama_index.core.storage.storage_context import StorageContext
5
+ from llama_index.core.indices.loading import load_index_from_storage
6
+ from openai import OpenAI
7
  import gradio as gr
8
 
9
  """
10
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
11
  """
12
+ # model,tokenizer = FastLanguageModel.from_pretrained('./unified_model')
13
+ # client = FastLanguageModel.for_inference(model)
14
+
15
+ dir = 'aila_indices_legislation'
16
+
17
+ # Initialize OpenAI client
18
+ client = OpenAI()
19
+ storage_context = StorageContext.from_defaults(persist_dir=dir)
20
+ index = load_index_from_storage(storage_context)
21
+ query_engine = index.as_query_engine(similarity_top_k=8)
22
+
23
  def generate_response(instruction,chat_history):
24
  """Generates a response using your fine-tuned model."""
25
  # FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function
 
46
  response = response.split("### Answer:")[-1]
47
  return response
48
 
49
+ def generate_response_rag_index(instruction,chat_history):
50
+ response = query_engine.query(instruction)
51
+ return response
52
+
53
+
54
  def update_chat_history(chat_history, user_message, bot_message):
55
  """Update chat history to maintain relevance and avoid excessive growth."""
56
  chat_history['user'].append(user_message)
 
71
  messages["user"].append(user_msg)
72
  messages["bot"].append(bot_msg)
73
 
74
+ # bot_response = generate_response(input_text,messages)
75
+ bot_response = generate_response_rag_index(input_text,messages)
76
  chat_history.append(("User: " + input_text, bot_response))
77
  messages = update_chat_history(messages, input_text, bot_response)
78
  return "", chat_history