from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pypdf import PdfReader
import requests
import json


def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text.strip()

def chunk_text(text, chunk_size=500, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,  # Overlap to preserve context
        separators=["\n\n", "\n", " ", ""],  # Prioritize logical breaks
    )
    return splitter.split_text(text)

embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def embedding_function(texts):
    return embedding_model.encode(texts, convert_to_numpy=True).tolist()


def generate_hypothetical_answer(query):
    import requests
    import json
    
    # Ollama API endpoint (default is localhost:11434)
    ollama_url = "http://localhost:11434/api/generate"
    
    # Prepare the prompt
    prompt = f"Generate a plausible answer to the question:\n\n{query}\n\nAnswer:"
    
    # Prepare the request payload
    payload = {
        "model": "llama2",  # or any other model you have pulled in Ollama
        "prompt": prompt,
        "stream": False
    }
    
    try:
        # Make the API request to Ollama
        response = requests.post(ollama_url, json=payload)
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Parse the response
        result = response.json()
        
        # Extract the generated text
        generated_text = result.get("response", "")
        return generated_text.strip()
    
    except Exception as e:
        print(f"Error generating hypothetical answer: {e}")
        return "Failed to generate a hypothetical answer."


def query_llm_with_context(query,context,top_n=3):
    # Get documents sorted by similarity
    sorted_docs, sorted_scores = context
    
    # Use only the top N documents
    top_docs = sorted_docs[:top_n]
    
    # Create a context string by joining the top documents
    context = "\n\n===Document Boundary===\n\n".join(top_docs)
    
    # Create a prompt with the context and query
    prompt = f"""
    Context information is below.
    ---------------------
    {context}
    ---------------------
    
    Given the context information and not prior knowledge, answer the following query:
    Query: {query}
    """
    
    # Call Ollama API instead of OpenAI
    ollama_url = "http://localhost:11434/api/generate"
    
    # Prepare the request payload
    payload = {
        "model": "llama2",  # or any other model you have pulled in Ollama
        "prompt": prompt,
        "stream": False
    }
    
    try:
        # Make the API request to Ollama
        response = requests.post(ollama_url, json=payload)
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Parse the response
        result = response.json()
        
        # Extract the generated text
        generated_text = result.get("response", "")
        return generated_text.strip()
    
    except Exception as e:
        print(f"Error querying LLM with context: {e}")
        return "Failed to generate an answer with the provided context."