Spaces:
Build error
Build error
| from sentence_transformers import SentenceTransformer | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from pypdf import PdfReader | |
| import requests | |
| import json | |
| def extract_text_from_pdf(pdf_path): | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| def chunk_text(text, chunk_size=500, chunk_overlap=100): | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=chunk_size, | |
| chunk_overlap=chunk_overlap, # Overlap to preserve context | |
| separators=["\n\n", "\n", " ", ""], # Prioritize logical breaks | |
| ) | |
| return splitter.split_text(text) | |
| embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| def embedding_function(texts): | |
| return embedding_model.encode(texts, convert_to_numpy=True).tolist() | |
| def generate_hypothetical_answer(query): | |
| import requests | |
| import json | |
| # Ollama API endpoint (default is localhost:11434) | |
| ollama_url = "http://localhost:11434/api/generate" | |
| # Prepare the prompt | |
| prompt = f"Generate a plausible answer to the question:\n\n{query}\n\nAnswer:" | |
| # Prepare the request payload | |
| payload = { | |
| "model": "llama2", # or any other model you have pulled in Ollama | |
| "prompt": prompt, | |
| "stream": False | |
| } | |
| try: | |
| # Make the API request to Ollama | |
| response = requests.post(ollama_url, json=payload) | |
| response.raise_for_status() # Raise an exception for HTTP errors | |
| # Parse the response | |
| result = response.json() | |
| # Extract the generated text | |
| generated_text = result.get("response", "") | |
| return generated_text.strip() | |
| except Exception as e: | |
| print(f"Error generating hypothetical answer: {e}") | |
| return "Failed to generate a hypothetical answer." | |
| def query_llm_with_context(query,context,top_n=3): | |
| # Get documents sorted by similarity | |
| sorted_docs, sorted_scores = context | |
| # Use only the top N documents | |
| top_docs = sorted_docs[:top_n] | |
| # Create a context string by joining the top documents | |
| context = "\n\n===Document Boundary===\n\n".join(top_docs) | |
| # Create a prompt with the context and query | |
| prompt = f""" | |
| Context information is below. | |
| --------------------- | |
| {context} | |
| --------------------- | |
| Given the context information and not prior knowledge, answer the following query: | |
| Query: {query} | |
| """ | |
| # Call Ollama API instead of OpenAI | |
| ollama_url = "http://localhost:11434/api/generate" | |
| # Prepare the request payload | |
| payload = { | |
| "model": "llama2", # or any other model you have pulled in Ollama | |
| "prompt": prompt, | |
| "stream": False | |
| } | |
| try: | |
| # Make the API request to Ollama | |
| response = requests.post(ollama_url, json=payload) | |
| response.raise_for_status() # Raise an exception for HTTP errors | |
| # Parse the response | |
| result = response.json() | |
| # Extract the generated text | |
| generated_text = result.get("response", "") | |
| return generated_text.strip() | |
| except Exception as e: | |
| print(f"Error querying LLM with context: {e}") | |
| return "Failed to generate an answer with the provided context." | |