Spaces:

avimittal30
/

FinQuery

Build error

App Files Files Community

FinQuery / helper.py

avimittal30

pushing files

a7aaec4 about 1 year ago

raw

history blame

3.38 kB

	from sentence_transformers import SentenceTransformer
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from pypdf import PdfReader
	import requests
	import json


	def extract_text_from_pdf(pdf_path):
	reader = PdfReader(pdf_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text.strip()

	def chunk_text(text, chunk_size=500, chunk_overlap=100):
	splitter = RecursiveCharacterTextSplitter(
	chunk_size=chunk_size,
	chunk_overlap=chunk_overlap, # Overlap to preserve context
	separators=["\n\n", "\n", " ", ""], # Prioritize logical breaks
	)
	return splitter.split_text(text)

	embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	def embedding_function(texts):
	return embedding_model.encode(texts, convert_to_numpy=True).tolist()



	def generate_hypothetical_answer(query):
	import requests
	import json

	# Ollama API endpoint (default is localhost:11434)
	ollama_url = "http://localhost:11434/api/generate"

	# Prepare the prompt
	prompt = f"Generate a plausible answer to the question:\n\n{query}\n\nAnswer:"

	# Prepare the request payload
	payload = {
	"model": "llama2", # or any other model you have pulled in Ollama
	"prompt": prompt,
	"stream": False
	}

	try:
	# Make the API request to Ollama
	response = requests.post(ollama_url, json=payload)
	response.raise_for_status() # Raise an exception for HTTP errors

	# Parse the response
	result = response.json()

	# Extract the generated text
	generated_text = result.get("response", "")
	return generated_text.strip()

	except Exception as e:
	print(f"Error generating hypothetical answer: {e}")
	return "Failed to generate a hypothetical answer."




	def query_llm_with_context(query,context,top_n=3):
	# Get documents sorted by similarity
	sorted_docs, sorted_scores = context

	# Use only the top N documents
	top_docs = sorted_docs[:top_n]

	# Create a context string by joining the top documents
	context = "\n\n===Document Boundary===\n\n".join(top_docs)

	# Create a prompt with the context and query
	prompt = f"""
	Context information is below.
	---------------------
	{context}
	---------------------

	Given the context information and not prior knowledge, answer the following query:
	Query: {query}
	"""

	# Call Ollama API instead of OpenAI
	ollama_url = "http://localhost:11434/api/generate"

	# Prepare the request payload
	payload = {
	"model": "llama2", # or any other model you have pulled in Ollama
	"prompt": prompt,
	"stream": False
	}

	try:
	# Make the API request to Ollama
	response = requests.post(ollama_url, json=payload)
	response.raise_for_status() # Raise an exception for HTTP errors

	# Parse the response
	result = response.json()

	# Extract the generated text
	generated_text = result.get("response", "")
	return generated_text.strip()

	except Exception as e:
	print(f"Error querying LLM with context: {e}")
	return "Failed to generate an answer with the provided context."