FinQuery / helper.py
avimittal30's picture
pushing files
a7aaec4
raw
history blame
3.38 kB
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pypdf import PdfReader
import requests
import json
def extract_text_from_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text.strip()
def chunk_text(text, chunk_size=500, chunk_overlap=100):
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap, # Overlap to preserve context
separators=["\n\n", "\n", " ", ""], # Prioritize logical breaks
)
return splitter.split_text(text)
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
def embedding_function(texts):
return embedding_model.encode(texts, convert_to_numpy=True).tolist()
def generate_hypothetical_answer(query):
import requests
import json
# Ollama API endpoint (default is localhost:11434)
ollama_url = "http://localhost:11434/api/generate"
# Prepare the prompt
prompt = f"Generate a plausible answer to the question:\n\n{query}\n\nAnswer:"
# Prepare the request payload
payload = {
"model": "llama2", # or any other model you have pulled in Ollama
"prompt": prompt,
"stream": False
}
try:
# Make the API request to Ollama
response = requests.post(ollama_url, json=payload)
response.raise_for_status() # Raise an exception for HTTP errors
# Parse the response
result = response.json()
# Extract the generated text
generated_text = result.get("response", "")
return generated_text.strip()
except Exception as e:
print(f"Error generating hypothetical answer: {e}")
return "Failed to generate a hypothetical answer."
def query_llm_with_context(query,context,top_n=3):
# Get documents sorted by similarity
sorted_docs, sorted_scores = context
# Use only the top N documents
top_docs = sorted_docs[:top_n]
# Create a context string by joining the top documents
context = "\n\n===Document Boundary===\n\n".join(top_docs)
# Create a prompt with the context and query
prompt = f"""
Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the following query:
Query: {query}
"""
# Call Ollama API instead of OpenAI
ollama_url = "http://localhost:11434/api/generate"
# Prepare the request payload
payload = {
"model": "llama2", # or any other model you have pulled in Ollama
"prompt": prompt,
"stream": False
}
try:
# Make the API request to Ollama
response = requests.post(ollama_url, json=payload)
response.raise_for_status() # Raise an exception for HTTP errors
# Parse the response
result = response.json()
# Extract the generated text
generated_text = result.get("response", "")
return generated_text.strip()
except Exception as e:
print(f"Error querying LLM with context: {e}")
return "Failed to generate an answer with the provided context."