Spaces:

m-ahmad-official
/

backend

Running

App Files Files Community

m-ahmad-official commited on Feb 18

Commit

5f90488

1 Parent(s): 812f6e6

update

Browse files

Files changed (4) hide show

agent.py +6 -21
config.py +37 -0
logging_config.py +30 -0
retrieve.py +92 -0

agent.py CHANGED Viewed

@@ -35,27 +35,13 @@ third_party_model = OpenAIChatCompletionsModel(
 # Make backend package importable
 current_dir = os.path.dirname(os.path.abspath(__file__))
-backend_parent = os.path.dirname(current_dir)
-if backend_parent not in sys.path:
-    sys.path.insert(0, backend_parent)
-# Import backend modules (support both module and script execution)
-try:
-    from config import get_config
-    from retrieve import search as retrieve_search
-    from logging_config import setup_logging
-except ImportError as e:
-    try:
-        from .config import get_config
-        from .retrieve import search as retrieve_search
-        from .logging_config import setup_logging
-    except ImportError as e2:
-        try:
-            from backend.config import get_config
-            from backend.retrieve import search as retrieve_search
-            from backend.logging_config import setup_logging
-        except ImportError as e3:
-            raise ImportError(f"Failed to import backend modules: {e3}")
 # Import OpenAI Agents SDK (must be installed separately)
 try:
@@ -212,7 +198,6 @@ def get_agent():
 def check_qdrant_health() -> str:
     try:
-        from backend.config import get_config
         from qdrant_client import QdrantClient
         cfg = get_config()

 # Make backend package importable
 current_dir = os.path.dirname(os.path.abspath(__file__))
+if current_dir not in sys.path:
+    sys.path.insert(0, current_dir)
+# Import backend modules
+from config import get_config
+from retrieve import search as retrieve_search
+from logging_config import setup_logging
 # Import OpenAI Agents SDK (must be installed separately)
 try:
 def check_qdrant_health() -> str:
     try:
         from qdrant_client import QdrantClient
         cfg = get_config()

config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""
+Configuration module for RAG Book Chatbot.
+Loads configuration from environment variables.
+"""
+import os
+from typing import Dict, Any
+def get_config() -> Dict[str, Any]:
+    """
+    Get configuration from environment variables.
+    Returns:
+        Dictionary containing all required configuration
+    Raises:
+        ValueError: If required environment variables are missing
+    """
+    config = {
+        "openai_api_key": os.getenv("OPENAI_API_KEY"),
+        "cohere_api_key": os.getenv("COHERE_API_KEY"),
+        "qdrant_url": os.getenv("QDRANT_URL"),
+        "qdrant_api_key": os.getenv("QDRANT_API_KEY"),
+        "qdrant_collection": os.getenv("QDRANT_COLLECTION", "book-chunks"),
+    }
+    # Validate required keys
+    required_keys = ["openai_api_key", "qdrant_url", "qdrant_api_key"]
+    missing_keys = [key for key in required_keys if not config[key]]
+    if missing_keys:
+        raise ValueError(
+            f"Missing required environment variables: {', '.join(missing_keys)}"
+        )
+    return config

logging_config.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""
+Logging configuration module.
+"""
+import logging
+import sys
+def setup_logging(name: str) -> logging.Logger:
+    """
+    Set up basic logging for the application.
+    Args:
+        name: Logger name
+    Returns:
+        Configured logger instance
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    if not logger.handlers:
+        handler = logging.StreamHandler(sys.stdout)
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    return logger

retrieve.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+Retrieval module for RAG Book Chatbot.
+Handles vector search using Qdrant and Cohere embeddings.
+"""
+import logging
+from typing import List, Dict, Any, Optional
+logger = logging.getLogger(__name__)
+def search(
+    query_text: str,
+    cohere_client: Any,
+    qdrant_client: Any,
+    collection_name: str,
+    top_k: int = 5,
+) -> List[Dict[str, Any]]:
+    """
+    Search for relevant chunks in Qdrant using Cohere embeddings.
+    Args:
+        query_text: User's question or search query
+        cohere_client: Initialized Cohere client
+        qdrant_client: Initialized Qdrant client
+        collection_name: Name of the Qdrant collection
+        top_k: Number of results to return (default: 5)
+    Returns:
+        List of search results with scores and metadata
+    """
+    try:
+        # Generate embedding for the query
+        logger.info(f"Generating embedding for query: {query_text[:100]}...")
+        embedding_response = cohere_client.embed(
+            texts=[query_text],
+            model="embed-english-v3.0",
+            input_type="search_query",
+        )
+        query_embedding = embedding_response.embeddings[0]
+        logger.debug(f"Generated embedding dimension: {len(query_embedding)}")
+        # Search in Qdrant
+        logger.info(f"Searching Qdrant collection: {collection_name}")
+        search_results = qdrant_client.search(
+            collection_name=collection_name,
+            query_vector=query_embedding,
+            limit=top_k,
+        )
+        logger.info(f"Found {len(search_results)} results")
+        # Format results
+        results = []
+        for hit in search_results:
+            results.append(
+                {
+                    "id": hit.id,
+                    "score": hit.score,
+                    "payload": hit.payload,
+                }
+            )
+        return results
+    except Exception as e:
+        logger.error(f"Search failed: {e}", exc_info=True)
+        raise
+def validate_results(results: List[Dict[str, Any]]) -> float:
+    """
+    Validate that results have required metadata.
+    Args:
+        results: List of search results
+    Returns:
+        Percentage of results with complete metadata (0-1)
+    """
+    if not results:
+        return 1.0
+    required_fields = {"url", "chunk_index", "text"}
+    valid_count = 0
+    for result in results:
+        payload = result.get("payload", {})
+        if all(field in payload for field in required_fields):
+            valid_count += 1
+    return valid_count / len(results)