Spaces:

1337XCode
/

personabot-api

Running

App Files Files Community

GitHub Actions commited on Feb 28

Commit

0da0699

1 Parent(s): 4ef165a

Deploy 27439fc

Browse files

Files changed (11) hide show

app/api/chat.py +2 -0
app/core/portfolio_context.py +120 -0
app/models/pipeline.py +6 -0
app/pipeline/graph.py +64 -21
app/pipeline/nodes/enumerate_query.py +236 -0
app/pipeline/nodes/generate.py +91 -11
app/pipeline/nodes/log_eval.py +9 -3
app/services/gemini_client.py +58 -0
app/services/vector_store.py +43 -0
tests/conftest.py +4 -0
tests/test_enumerate_query.py +213 -0

app/api/chat.py CHANGED Viewed

@@ -194,6 +194,8 @@ async def chat_endpoint(
         "critic_completeness": None,
         "critic_specificity": None,
         "critic_quality": None,
     }
     async def sse_generator():

         "critic_completeness": None,
         "critic_specificity": None,
         "critic_quality": None,
+        # Fix 1: enumeration classifier — populated by enumerate_query node
+        "is_enumeration_query": False,
     }
     async def sse_generator():

app/core/portfolio_context.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+backend/app/core/portfolio_context.py
+Known portfolio entities extracted from the TOON context file.
+Two purposes:
+  1. Fix 2 Rule 1 — CRAG routing: detect whether a failed query is asking
+     about something genuinely in the portfolio.  When the first CRAG retry
+     also fails, a second retry is allowed for queries that mention known
+     entities.  This prevents the not-found response from firing on queries
+     that should have findings (e.g. "how does textops work?").
+  2. Fix 2 Rule 2 — Not-found specific suggestion: the generate node passes
+     the TOON entity list to Gemini so it can produce a specific redirect like
+     "Try asking about his TextOps Kubernetes setup" rather than the generic
+     "ask about his projects".
+Entity list is manually maintained from the TOON context file and must be
+updated whenever refresh_gemini_context.py adds new content.
+Deliberate duplication: the TOON file is runtime state (may be absent in tests);
+this module is compile-time — no file I/O, no latency, no failure mode.
+"""
+from __future__ import annotations
+# ---------------------------------------------------------------------------
+# Known project names (as they appear in the TOON file and corpus)
+# ---------------------------------------------------------------------------
+KNOWN_PROJECTS: frozenset[str] = frozenset({
+    "textops", "text ops",
+    "echo-echo", "echo echo",
+    "localhost",
+    "donut-asm", "donut asm", "donut.c", "donut",
+    "save-the-planet", "save the planet",
+    "sorting-demo", "sorting demo",
+    "student-management-system", "student management system",
+    "sysphus",
+    "personabot", "persona bot",
+})
+# ---------------------------------------------------------------------------
+# Known technologies (canonical forms + common abbreviations)
+# ---------------------------------------------------------------------------
+KNOWN_TECHNOLOGIES: frozenset[str] = frozenset({
+    # Languages
+    "python", "go", "golang", "java", "javascript", "typescript",
+    "assembly", "x86", "sql", "html", "css",
+    # Frameworks / libraries
+    "fastapi", "react", "node.js", "nodejs", "express", "ejs",
+    "langgraph", "langchain", "pydantic",
+    # Infra / cloud
+    "docker", "kubernetes", "aws", "gcp", "terraform", "ci/cd", "gitlab",
+    "github actions", "nginx",
+    # ML / AI
+    "yolo", "yolov8", "ncnn", "onnx",
+    "rag", "llm", "llms", "groq", "gemini", "qdrant",
+    "sentence-transformers", "bge", "cross-encoder", "bm25",
+    # Networking / P2P
+    "webrtc", "kademlia", "tor", "dht", "p2p",
+    # Database
+    "sqlite", "postgres", "postgresql", "mysql", "mongodb", "orm",
+    # Testing
+    "junit", "pytest",
+    "jwt", "owasp",
+    # Monitoring
+    "prometheus", "mlflow", "dagshub",
+    # Misc
+    "microservices", "serverless", "e2ee",
+})
+# ---------------------------------------------------------------------------
+# Known companies / educational institutions
+# ---------------------------------------------------------------------------
+KNOWN_ORGS: frozenset[str] = frozenset({
+    # Employment (update from TOON / resume as new roles are indexed)
+    "vk live", "vklive",
+    # Education
+    "university",
+    # Platforms / services
+    "github", "groq", "huggingface", "vercel", "cloudflare", "qdrant cloud",
+})
+# ---------------------------------------------------------------------------
+# All known portfolio nouns in one flat set for O(1) membership checks
+# ---------------------------------------------------------------------------
+ALL_PORTFOLIO_NOUNS: frozenset[str] = KNOWN_PROJECTS | KNOWN_TECHNOLOGIES | KNOWN_ORGS
+# Compact context block passed to Gemini when generating a specific not-found
+# suggestion.  One sentence per major entity class — tight token budget.
+SUGGESTION_HINT: str = (
+    "Darshan's portfolio includes: "
+    "projects (TextOps, Echo-Echo, Localhost, Donut-ASM, Sysphus, Save the Planet, Sorting Demo, "
+    "Student Management System, PersonaBot); "
+    "skills and technologies (Python, Go, FastAPI, LangGraph, RAG, Qdrant, Groq, Docker, Kubernetes, "
+    "AWS, WebRTC, Kademlia DHT, YOLOv8, Assembly x86, Java, React, Node.js); "
+    "blog posts (60 FPS Object Detection on Android, Prompt Engineering Jailbreaks); "
+    "work experience and education (ask about his resume/CV for employer details)."
+)
+def is_portfolio_relevant(query: str) -> bool:
+    """
+    Return True when the query mentions at least one known portfolio entity.
+    Used by graph routing (Fix 2 Rule 1) to decide whether a second CRAG
+    retry is warranted after the first retry also found nothing.
+    Token-level check: split on non-alphanumeric, lowercase, check membership.
+    ~5µs per call on a 20-token query — zero latency impact.
+    """
+    import re
+    tokens = re.findall(r"[a-z0-9]+", query.lower())
+    # Single-token check
+    for token in tokens:
+        if token in ALL_PORTFOLIO_NOUNS:
+            return True
+    # Bigram check — catches "vk live", "text ops", "echo echo"
+    for a, b in zip(tokens, tokens[1:]):
+        if f"{a} {b}" in ALL_PORTFOLIO_NOUNS:
+            return True
+    return False

app/models/pipeline.py CHANGED Viewed

@@ -82,3 +82,9 @@ class PipelineState(TypedDict):
     critic_completeness: Optional[int]  # answer uses all relevant available chunks
     critic_specificity: Optional[int]   # answer contains specific names/numbers
     critic_quality: Optional[str]       # "high" | "medium" | "low"

     critic_completeness: Optional[int]  # answer uses all relevant available chunks
     critic_specificity: Optional[int]   # answer contains specific names/numbers
     critic_quality: Optional[str]       # "high" | "medium" | "low"
+    # Fix 1: Enumeration query classifier.
+    # True when the query has enumeration intent ("list all projects", "how many blogs").
+    # The enumerate_query node skips semantic retrieval and does a Qdrant payload-filter
+    # scroll instead, returning a complete deduplicated title list.
+    # Logged to SQLite so enumeration turns can be monitored separately from RAG turns.
+    is_enumeration_query: bool

app/pipeline/graph.py CHANGED Viewed

@@ -4,11 +4,13 @@ from langgraph.graph.state import CompiledStateGraph
 from app.models.pipeline import PipelineState
 from app.pipeline.nodes.guard import make_guard_node
 from app.pipeline.nodes.cache import make_cache_node
 from app.pipeline.nodes.gemini_fast import make_gemini_fast_node
 from app.pipeline.nodes.retrieve import make_retrieve_node
 from app.pipeline.nodes.rewrite_query import make_rewrite_query_node, _has_meaningful_token
 from app.pipeline.nodes.generate import make_generate_node
 from app.pipeline.nodes.log_eval import make_log_eval_node
 # Relevance gate threshold — matches retrieve.py constant.
 _MIN_TOP_SCORE: float = -3.5
@@ -28,6 +30,20 @@ def route_guard(state: PipelineState) -> str:
     return "block"
 def route_cache(state: PipelineState) -> str:
     if state.get("cached", False):
         return "hit"
@@ -48,44 +64,70 @@ def route_gemini(state: PipelineState) -> str:
 def route_retrieve_result(state: PipelineState) -> str:
     """
     CRAG routing: trigger a query rewrite when retrieval was weak or empty.
-    Exactly one retry is permitted; retrieval_attempts tracks this.
-    Rewrite conditions (first attempt only, meaningful query tokens required):
-      1. reranked_chunks is empty (nothing above the -3.5 threshold).
-      2. reranked_chunks is non-empty but the top cross-encoder score is below
-         _CRAG_LOW_CONFIDENCE_SCORE (-1.5), indicating borderline retrieval where
-         a different query phrasing would likely produce much better matches.
     """
     attempts = state.get("retrieval_attempts", 1)
     reranked = state.get("reranked_chunks", [])
-    if attempts == 1 and _has_meaningful_token(state.get("query", "")):
         if not reranked:
             return "rewrite"
         top_score = state.get("top_rerank_score")
         if top_score is not None and top_score < _CRAG_LOW_CONFIDENCE_SCORE:
             return "rewrite"
     return "generate"
 def build_pipeline(services: dict) -> CompiledStateGraph:
     graph = StateGraph(PipelineState)
-    graph.add_node("guard",         make_guard_node(services["classifier"]))
-    graph.add_node("cache",         make_cache_node(services["cache"], services["embedder"]))
-    graph.add_node("gemini_fast",   make_gemini_fast_node(services["gemini"]))
-    graph.add_node("retrieve",      make_retrieve_node(
-                                    services["vector_store"],
-                                    services["embedder"],
-                                    services["reranker"]))
-    # CRAG: one query rewrite on failed retrieval — then retrieve runs a second time.
-    graph.add_node("rewrite_query", make_rewrite_query_node(services["gemini"]))
-    graph.add_node("generate",      make_generate_node(services["llm"], services["gemini"]))
-    graph.add_node("log_eval",      make_log_eval_node(services["db_path"], services.get("github_log")))
     graph.set_entry_point("guard")
     graph.add_conditional_edges("guard", route_guard,
-        {"pass": "cache", "block": "log_eval"})
     graph.add_conditional_edges("cache", route_cache,
         {"hit": "log_eval", "miss": "gemini_fast"})
@@ -93,11 +135,12 @@ def build_pipeline(services: dict) -> CompiledStateGraph:
     graph.add_conditional_edges("gemini_fast", route_gemini,
         {"answered": "log_eval", "research": "retrieve"})
-    # After retrieve: either run CRAG rewrite (one retry) or proceed to generate.
     graph.add_conditional_edges("retrieve", route_retrieve_result,
         {"rewrite": "rewrite_query", "generate": "generate"})
-    # After rewrite: go straight back to retrieve for the second attempt.
     # The cycle terminates because route_retrieve_result checks retrieval_attempts.
     graph.add_edge("rewrite_query", "retrieve")

 from app.models.pipeline import PipelineState
 from app.pipeline.nodes.guard import make_guard_node
 from app.pipeline.nodes.cache import make_cache_node
+from app.pipeline.nodes.enumerate_query import make_enumerate_query_node
 from app.pipeline.nodes.gemini_fast import make_gemini_fast_node
 from app.pipeline.nodes.retrieve import make_retrieve_node
 from app.pipeline.nodes.rewrite_query import make_rewrite_query_node, _has_meaningful_token
 from app.pipeline.nodes.generate import make_generate_node
 from app.pipeline.nodes.log_eval import make_log_eval_node
+from app.core.portfolio_context import is_portfolio_relevant
 # Relevance gate threshold — matches retrieve.py constant.
 _MIN_TOP_SCORE: float = -3.5
     return "block"
+def route_enumerate(state: PipelineState) -> str:
+    """
+    Fix 1: after the enumerate_query node, decide whether to skip the normal
+    retrieval pipeline and go straight to generate.
+    "skip_to_generate" — enumeration intent detected; reranked_chunks is already
+                         populated with the complete Qdrant scroll result.
+    "continue"         — no enumeration intent; proceed to cache → gemini_fast → retrieve.
+    """
+    if state.get("is_enumeration_query", False):
+        return "skip_to_generate"
+    return "continue"
 def route_cache(state: PipelineState) -> str:
     if state.get("cached", False):
         return "hit"
 def route_retrieve_result(state: PipelineState) -> str:
     """
     CRAG routing: trigger a query rewrite when retrieval was weak or empty.
+    Fix 2 Rule 1: portfolio-noun queries are allowed a SECOND CRAG retry after
+    the first retry also finds nothing.  This prevents the not-found response from
+    firing on queries where the corpus genuinely should have results (e.g. a typo
+    in a project name or a synonym mismatch that's specific to portfolio content).
+    Attempt tracking (via retrieval_attempts):
+      First retrieve  → retrieval_attempts = 1
+      First rewrite   → retrieval_attempts = 2 (rewrite_query increments by +1)
+      Second retrieve → retrieval_attempts = 3
+      Second rewrite  → retrieval_attempts = 4 (portfolio queries only)
+      Third retrieve  → retrieval_attempts = 5
+    Any attempt ≥ 5 (or ≥ 3 for non-portfolio queries) goes to generate.
+    Routing terminates because retrieval_attempts grows monotonically.
     """
     attempts = state.get("retrieval_attempts", 1)
     reranked = state.get("reranked_chunks", [])
+    query = state.get("query", "")
+    # First CRAG attempt — applies to all queries with meaningful tokens.
+    if attempts == 1 and _has_meaningful_token(query):
         if not reranked:
             return "rewrite"
         top_score = state.get("top_rerank_score")
         if top_score is not None and top_score < _CRAG_LOW_CONFIDENCE_SCORE:
             return "rewrite"
+    # Fix 2 Rule 1: second CRAG attempt for portfolio-noun queries only.
+    # attempts==3 means: first retrieve failed → rewrite fired → second retrieve
+    # also failed (still empty after the first CRAG rewrite).  When the query
+    # mentions a known portfolio entity, attempt one more vocabulary-shifted rewrite
+    # before admitting the not-found path.
+    if attempts == 3 and not reranked and is_portfolio_relevant(query):
+        return "rewrite"
     return "generate"
 def build_pipeline(services: dict) -> CompiledStateGraph:
     graph = StateGraph(PipelineState)
+    graph.add_node("guard",          make_guard_node(services["classifier"]))
+    graph.add_node("enumerate_query", make_enumerate_query_node(services["vector_store"]))
+    graph.add_node("cache",          make_cache_node(services["cache"], services["embedder"]))
+    graph.add_node("gemini_fast",    make_gemini_fast_node(services["gemini"]))
+    graph.add_node("retrieve",       make_retrieve_node(
+                                     services["vector_store"],
+                                     services["embedder"],
+                                     services["reranker"]))
+    # CRAG: query rewrite on failed retrieval — runs up to twice for portfolio queries.
+    graph.add_node("rewrite_query",  make_rewrite_query_node(services["gemini"]))
+    graph.add_node("generate",       make_generate_node(services["llm"], services["gemini"]))
+    graph.add_node("log_eval",       make_log_eval_node(services["db_path"], services.get("github_log")))
     graph.set_entry_point("guard")
     graph.add_conditional_edges("guard", route_guard,
+        {"pass": "enumerate_query", "block": "log_eval"})
+    # Fix 1: enumerate_query either skips straight to generate (full list fetched)
+    # or falls through to the normal cache → gemini_fast → retrieve pipeline.
+    graph.add_conditional_edges("enumerate_query", route_enumerate,
+        {"skip_to_generate": "generate", "continue": "cache"})
     graph.add_conditional_edges("cache", route_cache,
         {"hit": "log_eval", "miss": "gemini_fast"})
     graph.add_conditional_edges("gemini_fast", route_gemini,
         {"answered": "log_eval", "research": "retrieve"})
+    # After retrieve: either run CRAG rewrite (up to twice for portfolio queries)
+    # or proceed to generate.
     graph.add_conditional_edges("retrieve", route_retrieve_result,
         {"rewrite": "rewrite_query", "generate": "generate"})
+    # After rewrite: go straight back to retrieve for the next attempt.
     # The cycle terminates because route_retrieve_result checks retrieval_attempts.
     graph.add_edge("rewrite_query", "retrieve")

app/pipeline/nodes/enumerate_query.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""
+backend/app/pipeline/nodes/enumerate_query.py
+Fix 1 — Enumeration Query Classifier and Metadata Retrieval.
+Inserted immediately after the Guard node (before Cache / Gemini fast-path).
+When enumeration intent is detected, this node queries Qdrant using a
+payload filter on metadata.source_type — no vector embedding, no reranker.
+It then deduplicates by source_title, sorts alphabetically, and populates
+reranked_chunks so the Generate node receives the complete, accurate list.
+Why a database filter beats similarity search for enumeration:
+  Semantic retrieval cannot guarantee completeness — it finds the top-K
+  most similar chunks, not ALL matching chunks.  "List all my projects"
+  with top_k=20 and 8 projects in the corpus would return the 8 most
+  similar to the query vector, but which 8 depends on the embedding.
+  A payload filter returns every matching point, regardless of embedding
+  position.  Completeness is guaranteed; the cosine metric is irrelevant.
+Cost: 0 embedding calls, 0 reranker calls, 1 Qdrant scroll.
+"""
+from __future__ import annotations
+import logging
+import re
+from typing import Callable
+from langgraph.config import get_stream_writer
+from app.models.pipeline import PipelineState, Chunk
+from app.services.vector_store import VectorStore
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Enumeration intent patterns
+# ---------------------------------------------------------------------------
+# Each pattern is checked against the lowercased, whitespace-normalised query.
+# Order matters: more specific patterns are checked first.
+_ENUM_PREFIXES: tuple[str, ...] = (
+    "list all",
+    "list the",
+    "list every",
+    "list your",
+    "list his",
+    "list ",
+    "show all",
+    "show me all",
+    "show every",
+    "give me all",
+    "give me a list",
+    "what are all",
+    "what are your",
+    "what are his",
+    "how many",
+    "count ",
+    "count of",
+    "enumerate",
+    "name all",
+    "name every",
+)
+# Trailing pattern: "what [are|were|is] all the <noun>?"
+_ENUM_TRAILING_RE = re.compile(
+    r"(?:what|which)\s+(?:are|were|is|were)\s+all\s+(?:the\s+)?",
+    re.IGNORECASE,
+)
+def _has_enumeration_intent(query: str) -> bool:
+    """
+    Return True when the lowercased query signals enumeration intent.
+    Pure string ops — no LLM, no embedding.  Runs in < 5µs.
+    """
+    q = " ".join(query.lower().split())  # normalise whitespace
+    for prefix in _ENUM_PREFIXES:
+        if q.startswith(prefix) or f" {prefix}" in q:
+            return True
+    if _ENUM_TRAILING_RE.search(q):
+        return True
+    return False
+# ---------------------------------------------------------------------------
+# Entity-type extractor
+# ---------------------------------------------------------------------------
+# Maps query tokens → Qdrant source_type values.
+# "all source types" is represented as an empty list (caller scrolls without filter).
+_TYPE_MAP: dict[str, list[str]] = {
+    "project":      ["project"],
+    "projects":     ["project"],
+    "blog":         ["blog"],
+    "blogs":        ["blog"],
+    "post":         ["blog"],
+    "posts":        ["blog"],
+    "article":      ["blog"],
+    "articles":     ["blog"],
+    "writing":      ["blog"],
+    "writings":     ["blog"],
+    "experience":   ["cv", "bio"],
+    "experiences":  ["cv", "bio"],
+    "work":         ["cv", "bio"],
+    "jobs":         ["cv", "bio"],
+    "job":          ["cv", "bio"],
+    "role":         ["cv", "bio"],
+    "roles":        ["cv", "bio"],
+    "company":      ["cv", "bio"],
+    "companies":    ["cv", "bio"],
+    "skills":       ["cv", "project", "blog"],
+    "skill":        ["cv", "project", "blog"],
+    "technologies": ["cv", "project", "blog"],
+    "technology":   ["cv", "project", "blog"],
+    "tech":         ["cv", "project", "blog"],
+    "tools":        ["cv", "project", "blog"],
+    "readme":       ["github"],
+    "repositories": ["github"],
+    "repos":        ["github"],
+}
+def _extract_source_types(query: str) -> list[str]:
+    """
+    Map query vocabulary to Qdrant source_type values.
+    Returns a deduplicated list.  An empty list means "all types".
+    """
+    tokens = re.findall(r"[a-z]+", query.lower())
+    found: list[str] = []
+    seen: set[str] = set()
+    for tok in tokens:
+        for st in _TYPE_MAP.get(tok, []):
+            if st not in seen:
+                seen.add(st)
+                found.append(st)
+    # If no specific type matched, return empty (= all types).
+    return found
+# ---------------------------------------------------------------------------
+# Source type display label (used in status event)
+# ---------------------------------------------------------------------------
+_TYPE_LABEL: dict[str, str] = {
+    "project": "projects",
+    "blog": "blog posts",
+    "cv": "CV/experience",
+    "bio": "background",
+    "github": "GitHub repos",
+}
+def _label_for_types(source_types: list[str]) -> str:
+    if not source_types:
+        return "all portfolio content"
+    return " and ".join(_TYPE_LABEL.get(st, st) for st in source_types[:2])
+# ---------------------------------------------------------------------------
+# Node factory
+# ---------------------------------------------------------------------------
+def make_enumerate_query_node(vector_store: VectorStore) -> Callable[[PipelineState], dict]:
+    """
+    Returns a LangGraph node that:
+      1. Classifies whether the query has enumeration intent.
+      2. If yes: scrolls Qdrant by source_type, deduplicates by title,
+         populates reranked_chunks, sets is_enumeration_query=True.
+      3. If no: passes through with is_enumeration_query=False so the
+         rest of the pipeline (cache → gemini_fast → retrieve) runs normally.
+    No I/O unless enumeration intent is detected.
+    """
+    def enumerate_query_node(state: PipelineState) -> dict:
+        writer = get_stream_writer()
+        query = state["query"]
+        if not _has_enumeration_intent(query):
+            return {"is_enumeration_query": False}
+        # Enumeration intent confirmed.
+        source_types = _extract_source_types(query)
+        label = _label_for_types(source_types)
+        writer({"type": "status", "label": f"Fetching complete list of {label}..."})
+        # Scroll Qdrant — payload filter, no vector.
+        all_chunks = vector_store.scroll_by_source_type(
+            source_types=source_types or ["project", "blog", "cv", "bio", "github"],
+        )
+        if not all_chunks:
+            # Nothing in the corpus yet — let the normal pipeline handle it.
+            logger.info("Enumeration scroll returned 0 results; falling back to RAG path.")
+            return {"is_enumeration_query": False}
+        # Deduplicate by source_title (many chunks per document; we want title-level list).
+        seen_titles: set[str] = set()
+        unique_by_title: list[Chunk] = []
+        for chunk in all_chunks:
+            title = chunk["metadata"].get("source_title", "").strip()
+            if title and title not in seen_titles:
+                seen_titles.add(title)
+                unique_by_title.append(chunk)
+        # Sort alphabetically by title for stable output.
+        unique_by_title.sort(key=lambda c: c["metadata"].get("source_title", "").lower())
+        logger.info(
+            "Enumeration: query=%r source_types=%r → %d unique titles",
+            query, source_types, len(unique_by_title),
+        )
+        # Emit one "reading" event per unique source so the frontend's source card
+        # row is populated (mirrors the retrieve node's contract).
+        seen_urls: set[str] = set()
+        for chunk in unique_by_title:
+            meta = chunk["metadata"]
+            url = meta.get("source_url") or ""
+            dedup_key = url or meta.get("doc_id", "")
+            if dedup_key and dedup_key not in seen_urls:
+                seen_urls.add(dedup_key)
+                writer({
+                    "type": "reading",
+                    "title": meta.get("source_title", ""),
+                    "url": url or None,
+                    "source_type": meta.get("source_type", ""),
+                })
+        writer({"type": "status", "label": f"Found {len(unique_by_title)} items — composing list..."})
+        return {
+            "is_enumeration_query": True,
+            "reranked_chunks": unique_by_title,
+            # Mark path early so log_eval tags enumeration turns separately.
+            "path": "enumeration",
+        }
+    return enumerate_query_node

app/pipeline/nodes/generate.py CHANGED Viewed

@@ -8,6 +8,7 @@ from app.models.chat import SourceRef
 from app.models.pipeline import PipelineState
 from app.services.llm_client import LLMClient
 from app.core.quality import is_low_trust
 logger = logging.getLogger(__name__)
@@ -84,16 +85,36 @@ CRITICAL SAFETY RULES — override everything above:
 # context here, so anything specific it says would be fabricated.
 _NOT_FOUND_SYSTEM = """\
 You are the assistant on Darshan Chheda's portfolio website.
-The knowledge base search returned no relevant results for this question.
-Respond in 1-2 natural sentences. Use fresh wording each time — do not start with
-"I don't have information about". Acknowledge that specific information isn't indexed
-right now, then invite the visitor to ask about {topics}.
-CRITICAL: Do NOT name any specific project, technology, company, blog post, or skill.
-You have NO retrieved facts — any specific name you produce is fabricated.
-No apologies, no padding, vary your phrasing.
-""".format(topics=_TOPIC_SUGGESTIONS)
 def _format_history(state: "PipelineState") -> str:
@@ -135,12 +156,71 @@ def make_generate_node(llm_client: LLMClient, gemini_client=None) -> Callable[[P
         complexity = state.get("query_complexity", "simple")
         reranked_chunks = state.get("reranked_chunks", [])
-        # ── Not-found path ─────────────────────────────────────────────────
         if not reranked_chunks:
             writer({"type": "status", "label": "Could not find specific information, responding carefully..."})
             history_prefix = _format_history(state)
             stream = llm_client.complete_with_complexity(
-                prompt=f"{history_prefix}Visitor question: {query}",
                 system=_NOT_FOUND_SYSTEM,
                 stream=True,
                 complexity="simple",

 from app.models.pipeline import PipelineState
 from app.services.llm_client import LLMClient
 from app.core.quality import is_low_trust
+from app.core.portfolio_context import SUGGESTION_HINT
 logger = logging.getLogger(__name__)
 # context here, so anything specific it says would be fabricated.
 _NOT_FOUND_SYSTEM = """\
 You are the assistant on Darshan Chheda's portfolio website.
+The knowledge base search returned no relevant results for this question even after a retry.
+You will be given one specific suggestion to offer (generated from the real portfolio index).
+Respond in 1-2 natural sentences:
+  1. Acknowledge that specific information isn't indexed right now.
+  2. End with the specific suggestion provided after SUGGESTION:.
+Rules:
+- Use fresh wording each time — do not start with "I don't have information about".
+- Do NOT name any specific project, technology, company, or skill UNLESS it appears in the
+  SUGGESTION line provided to you. You have NO retrieved facts.
+- No apologies, no padding, vary your phrasing.
+"""
+# Enumeration path: Groq formats the pre-fetched, deduplicated title list.
+# The generate node builds a numbered list in the prompt; Groq adds citations.
+_ENUM_SYSTEM_PROMPT = """\
+You are the assistant on Darshan Chheda's portfolio website.
+You have been given a complete, database-fetched list of items matching the visitor's request.
+Your job is to format this list as a clean numbered list and add one citation per item.
+FORMATTING RULES:
+1. Output a numbered list. Each line: "N. [Title](URL) — one-sentence description from the passage."
+2. Cite each item with [N] immediately after its title. Example: "1. TextOps [1] — ..."
+3. Only use the titles, URLs, and text provided in the passages. Do not invent items.
+4. If a URL is missing for an item, omit the link but keep the title.
+5. Do not add a preamble like "Here is a list of..." — start directly with "1.".
+6. After the list, add one sentence summarising the count: "That's N items in total."
+7. No apologies, no padding.
+"""
 def _format_history(state: "PipelineState") -> str:
         complexity = state.get("query_complexity", "simple")
         reranked_chunks = state.get("reranked_chunks", [])
+        # ── Enumeration path (Fix 1) ──────────────────────────────────────────────
+        # enumerate_query node already set is_enumeration_query=True and populated
+        # reranked_chunks with deduplicated, alphabetically-sorted title chunks.
+        # We format the pre-fetched list with a special prompt — no extra LLM reasoning
+        # needed, just reliable numbered-list formatting with one citation per item.
+        if state.get("is_enumeration_query") and reranked_chunks:
+            writer({"type": "status", "label": "Formatting complete list..."})
+            context_parts: list[str] = []
+            source_refs: list[SourceRef] = []
+            for i, chunk in enumerate(reranked_chunks, start=1):
+                meta = chunk["metadata"]
+                header = f"[{i}] {meta.get('source_title', 'Item')}"
+                if meta.get("source_url"):
+                    header += f" ({meta['source_url']})"
+                context_parts.append(f"{header}\n{chunk['text'][:300]}")
+                source_refs.append(
+                    SourceRef(
+                        title=meta.get("source_title", ""),
+                        url=meta.get("source_url", ""),
+                        section=meta.get("section", ""),
+                    )
+                )
+            context_block_enum = "\n\n".join(context_parts)
+            prompt_enum = f"Items fetched from database:\n{context_block_enum}\n\nVisitor request: {query}"
+            stream = llm_client.complete_with_complexity(
+                prompt=prompt_enum,
+                system=_ENUM_SYSTEM_PROMPT,
+                stream=True,
+                complexity="simple",
+            )
+            full_answer = ""
+            async for token in stream:
+                full_answer += token
+                writer({"type": "token", "text": token})
+            return {"answer": full_answer, "sources": source_refs, "path": "enumeration"}
+        # ── Not-found path ────────────────────────────────────────────────────────────
         if not reranked_chunks:
             writer({"type": "status", "label": "Could not find specific information, responding carefully..."})
+            # Fix 2 Rule 2: generate a specific, topical redirect suggestion using
+            # Gemini with the TOON portfolio entity list.  Fires here (after all CRAG
+            # retries have been exhausted) so the visitor always gets a meaningful
+            # alternative rather than a generic catch-all footer.
+            query_topic = state.get("query_topic") or "that topic"
+            specific_suggestion = (
+                f"Try rephrasing about {query_topic} — I may know it under a different term."
+            )
+            if gemini_client is not None and gemini_client.is_configured:
+                try:
+                    specific_suggestion = await gemini_client.generate_specific_suggestion(
+                        query=query,
+                        query_topic=query_topic,
+                        suggestion_hint=SUGGESTION_HINT,
+                    )
+                except Exception as exc:
+                    logger.debug("Specific suggestion generation failed: %s", exc)
             history_prefix = _format_history(state)
+            prompt_not_found = (
+                f"{history_prefix}Visitor question: {query}\n\n"
+                f"SUGGESTION: {specific_suggestion}"
+            )
             stream = llm_client.complete_with_complexity(
+                prompt=prompt_not_found,
                 system=_NOT_FOUND_SYSTEM,
                 stream=True,
                 complexity="simple",

app/pipeline/nodes/log_eval.py CHANGED Viewed

@@ -65,7 +65,8 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                     critic_groundedness  INTEGER,
                     critic_completeness  INTEGER,
                     critic_specificity   INTEGER,
-                    critic_quality       TEXT
                 )
                 """
             )
@@ -81,6 +82,8 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                 ("critic_completeness", "INTEGER"),
                 ("critic_specificity", "INTEGER"),
                 ("critic_quality", "TEXT"),
             ]:
                 try:
                     conn.execute(f"ALTER TABLE interactions ADD COLUMN {col} {definition}")
@@ -92,8 +95,9 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                 INSERT INTO interactions
                     (timestamp, session_id, query, answer, chunks_used, rerank_scores,
                      reranked_chunks_json, latency_ms, cached, path,
-                     critic_groundedness, critic_completeness, critic_specificity, critic_quality)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 (
                     datetime.now(tz=timezone.utc).isoformat(),
@@ -110,6 +114,7 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                     state.get("critic_completeness"),
                     state.get("critic_specificity"),
                     state.get("critic_quality"),
                 ),
             )
             return cursor.lastrowid  # type: ignore[return-value]
@@ -145,6 +150,7 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                     "critic_completeness": state.get("critic_completeness"),
                     "critic_specificity": state.get("critic_specificity"),
                     "critic_quality": state.get("critic_quality"),
                 }
                 github_log.append(record)

                     critic_groundedness  INTEGER,
                     critic_completeness  INTEGER,
                     critic_specificity   INTEGER,
+                    critic_quality       TEXT,
+                    is_enumeration_query BOOLEAN DEFAULT 0
                 )
                 """
             )
                 ("critic_completeness", "INTEGER"),
                 ("critic_specificity", "INTEGER"),
                 ("critic_quality", "TEXT"),
+                # Fix 1: enumeration classifier flag
+                ("is_enumeration_query", "BOOLEAN DEFAULT 0"),
             ]:
                 try:
                     conn.execute(f"ALTER TABLE interactions ADD COLUMN {col} {definition}")
                 INSERT INTO interactions
                     (timestamp, session_id, query, answer, chunks_used, rerank_scores,
                      reranked_chunks_json, latency_ms, cached, path,
+                     critic_groundedness, critic_completeness, critic_specificity, critic_quality,
+                     is_enumeration_query)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 (
                     datetime.now(tz=timezone.utc).isoformat(),
                     state.get("critic_completeness"),
                     state.get("critic_specificity"),
                     state.get("critic_quality"),
+                    state.get("is_enumeration_query", False),
                 ),
             )
             return cursor.lastrowid  # type: ignore[return-value]
                     "critic_completeness": state.get("critic_completeness"),
                     "critic_specificity": state.get("critic_specificity"),
                     "critic_quality": state.get("critic_quality"),
+                    "is_enumeration_query": state.get("is_enumeration_query", False),
                 }
                 github_log.append(record)

app/services/gemini_client.py CHANGED Viewed

@@ -447,3 +447,61 @@ class GeminiClient:
             # Non-fatal: log and fall back to RAG so users always get a response.
             logger.warning("Gemini fast path error (%s); routing to RAG.", exc)
             return None, query

             # Non-fatal: log and fall back to RAG so users always get a response.
             logger.warning("Gemini fast path error (%s); routing to RAG.", exc)
             return None, query
+    async def generate_specific_suggestion(
+        self,
+        query: str,
+        query_topic: str,
+        suggestion_hint: str,
+    ) -> str:
+        """
+        Fix 2 Rule 2 — generate a specific not-found redirect suggestion.
+        When the RAG pipeline finds nothing (after CRAG retry), instead of
+        the generic "ask about his projects", this method uses the TOON portfolio
+        context to produce a specific, topical suggestion grounded in real content.
+        Examples:
+          query_topic="kubernetes" →
+            "Ask about how Darshan deployed TextOps on Kubernetes with custom Helm charts."
+          query_topic="work experience" →
+            "Try asking about his role at VK Live or his responsibilities there."
+        Falls back to a topic-specific hardcoded suggestion if Gemini is unavailable.
+        The fallback itself uses ``query_topic`` so it is always more specific than
+        the generic "ask about his projects" footer.
+        """
+        if not self._client:
+            # Graceful fallback: still more specific than the old generic text.
+            return (
+                f"Try rephrasing your question about {query_topic} "
+                "— I may know it under a different term."
+            )
+        prompt = (
+            f"Portfolio content available:\n{suggestion_hint}\n\n"
+            f"Visitor asked: {query}\n"
+            f"Topic detected: {query_topic}\n\n"
+            "The search returned no results. Write ONE specific suggestion the visitor "
+            "should try instead, referencing a real item from the portfolio content above "
+            "that is most related to their query topic. "
+            "Format: 'Try asking about [specific item/aspect].' "
+            "Maximum 20 words. Output ONLY the suggestion sentence."
+        )
+        try:
+            from google.genai import types  # noqa: PLC0415
+            response = await self._client.aio.models.generate_content(  # type: ignore[attr-defined]
+                model=self._model,
+                contents=prompt,
+                config=types.GenerateContentConfig(temperature=0.3, max_output_tokens=60),
+            )
+            text = (response.candidates[0].content.parts[0].text or "").strip().strip('"')
+            if text:
+                logger.debug("Specific suggestion generated: %r", text[:80])
+                return text
+        except Exception as exc:
+            logger.warning("generate_specific_suggestion failed (%s); using fallback.", exc)
+        return (
+            f"Try rephrasing your question about {query_topic} "
+            "— I may know it under a different term."
+        )

app/services/vector_store.py CHANGED Viewed

@@ -274,3 +274,46 @@ class VectorStore:
                 "search_by_raptor_level(level=%d) failed: %s — skipping RAPTOR results.", level, exc
             )
             return []

                 "search_by_raptor_level(level=%d) failed: %s — skipping RAPTOR results.", level, exc
             )
             return []
+    def scroll_by_source_type(
+        self,
+        source_types: list[str],
+        limit: int = 500,
+    ) -> list[Chunk]:
+        """
+        Retrieve all chunks matching any of the given source_types via payload
+        filter — no vector search involved.
+        Used by the enumeration_query node (Fix 1) to answer "list all projects /
+        blogs / skills" queries with zero embedding or reranker calls.  The result
+        is deduplicated and sorted by the caller.
+        source_types: list of metadata.source_type values to include.
+          e.g. ["project"]  or  ["blog"]  or  ["cv", "project", "blog"]
+        limit: upper bound on total points fetched (safety cap; default 500 covers
+          any realistic personal portfolio without unbounded scrolling).
+        """
+        if not source_types:
+            return []
+        try:
+            # OR filter across all requested source types.
+            should_conditions = [
+                FieldCondition(
+                    key="metadata.source_type",
+                    match=MatchValue(value=st),
+                )
+                for st in source_types
+            ]
+            qdrant_filter = Filter(should=should_conditions)
+            records, _ = self.client.scroll(
+                collection_name=self.collection,
+                scroll_filter=qdrant_filter,
+                limit=limit,
+                with_payload=True,
+                with_vectors=False,
+            )
+            return [Chunk(**rec.payload) for rec in records if rec.payload]
+        except Exception as exc:
+            logger.warning("scroll_by_source_type(%r) failed: %s", source_types, exc)
+            return []

tests/conftest.py CHANGED Viewed

@@ -62,6 +62,9 @@ def app_client():
         if isinstance(stream_mode, list):
             yield ("custom", {"type": "status", "label": "Checking your question"})
             yield ("updates", {"guard": {"guard_passed": True}})
             yield ("updates", {"cache": {"cached": False}})
             yield ("custom", {"type": "status", "label": "Thinking about your question directly..."})
             yield ("custom", {"type": "token", "text": "I built TextOps."})
@@ -69,6 +72,7 @@ def app_client():
         else:
             # Fallback for any code that still calls astream without stream_mode.
             yield {"guard": {"guard_passed": True}}
             yield {"cache": {"cached": False}}
             yield {"generate": {"answer": "I built TextOps.", "sources": []}}

         if isinstance(stream_mode, list):
             yield ("custom", {"type": "status", "label": "Checking your question"})
             yield ("updates", {"guard": {"guard_passed": True}})
+            # Fix 1: enumerate_query node runs after guard on every request.
+            # Non-enumeration queries set is_enumeration_query=False and pass through.
+            yield ("updates", {"enumerate_query": {"is_enumeration_query": False}})
             yield ("updates", {"cache": {"cached": False}})
             yield ("custom", {"type": "status", "label": "Thinking about your question directly..."})
             yield ("custom", {"type": "token", "text": "I built TextOps."})
         else:
             # Fallback for any code that still calls astream without stream_mode.
             yield {"guard": {"guard_passed": True}}
+            yield {"enumerate_query": {"is_enumeration_query": False}}
             yield {"cache": {"cached": False}}
             yield {"generate": {"answer": "I built TextOps.", "sources": []}}

tests/test_enumerate_query.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# backend/tests/test_enumerate_query.py
+# Unit tests for the enumeration query classifier (Fix 1) and
+# the portfolio-relevance helper (Fix 2 Rule 1).
+#
+# All tests are pure-Python; no network calls, no Qdrant, no embedder.
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from app.pipeline.nodes.enumerate_query import (
+    _has_enumeration_intent,
+    _extract_source_types,
+    make_enumerate_query_node,
+)
+from app.core.portfolio_context import is_portfolio_relevant
+# Patch target for LangGraph's stream writer, which requires a runnable context
+# that doesn't exist in unit tests.
+_WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer"
+# ---------------------------------------------------------------------------
+# _has_enumeration_intent
+# ---------------------------------------------------------------------------
+class TestHasEnumerationIntent:
+    def test_list_all_projects(self):
+        assert _has_enumeration_intent("list all projects") is True
+    def test_list_projects_no_all(self):
+        assert _has_enumeration_intent("list projects") is True
+    def test_show_all_blogs(self):
+        assert _has_enumeration_intent("show all blog posts") is True
+    def test_how_many_blogs(self):
+        assert _has_enumeration_intent("how many blog posts do you have") is True
+    def test_count_projects(self):
+        assert _has_enumeration_intent("count projects") is True
+    def test_enumerate_skills(self):
+        assert _has_enumeration_intent("enumerate all skills") is True
+    def test_give_me_a_list_of(self):
+        assert _has_enumeration_intent("give me a list of your projects") is True
+    def test_what_are_all_the_projects(self):
+        # trailing-regex pattern: "what are all the X"
+        assert _has_enumeration_intent("what are all the projects") is True
+    def test_which_are_all_the_blogs(self):
+        # Requires "all" keyword — the trailing regex gate prevents over-triggering.
+        assert _has_enumeration_intent("which are all the blog posts") is True
+    def test_regular_how_query_no_intent(self):
+        assert _has_enumeration_intent("how does TextOps work") is False
+    def test_explain_query_no_intent(self):
+        assert _has_enumeration_intent("explain the architecture of PersonaBot") is False
+    def test_what_is_query_no_intent(self):
+        assert _has_enumeration_intent("what is echo-echo") is False
+    def test_tell_me_about_no_intent(self):
+        assert _has_enumeration_intent("tell me about your background") is False
+    def test_empty_string(self):
+        assert _has_enumeration_intent("") is False
+# ---------------------------------------------------------------------------
+# _extract_source_types
+# ---------------------------------------------------------------------------
+class TestExtractSourceTypes:
+    def test_projects(self):
+        types = _extract_source_types("list all projects")
+        assert "project" in types
+    def test_blogs(self):
+        types = _extract_source_types("show all blog posts")
+        assert "blog" in types
+    def test_skills_cv(self):
+        types = _extract_source_types("list all your skills")
+        assert "cv" in types
+    def test_generic_returns_empty(self):
+        # "everything" or "all" without a type token → [] meaning scroll all types
+        types = _extract_source_types("list everything")
+        assert types == []
+    def test_github_repos(self):
+        types = _extract_source_types("show all github repos")
+        assert "github" in types
+    def test_work_experience(self):
+        types = _extract_source_types("list all work experience")
+        assert "cv" in types
+# ---------------------------------------------------------------------------
+# make_enumerate_query_node
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_non_enumeration_query_passes_through():
+    """A regular query must exit the node with is_enumeration_query=False."""
+    mock_vs = MagicMock()
+    mock_vs.scroll_by_source_type = MagicMock(return_value=[])
+    node = make_enumerate_query_node(mock_vs)
+    state = {"query": "how does TextOps work", "retrieval_attempts": 0}
+    with patch(_WRITER_PATCH, return_value=MagicMock()):
+        result = node(state)
+    assert result["is_enumeration_query"] is False
+    # Vector store must NOT be called for normal queries (zero cost guarantee).
+    mock_vs.scroll_by_source_type.assert_not_called()
+@pytest.mark.asyncio
+async def test_enumeration_query_sets_flag_and_populates_chunks():
+    """An enumeration query must call scroll and set is_enumeration_query=True."""
+    chunk_a = {
+        "text": "TextOps is a CLI toolkit.",
+        "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
+    }
+    chunk_b = {
+        "text": "Echo-Echo is a WebRTC demo.",
+        "metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"},
+    }
+    mock_vs = MagicMock()
+    mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])
+    node = make_enumerate_query_node(mock_vs)
+    state = {"query": "list all projects", "retrieval_attempts": 0}
+    with patch(_WRITER_PATCH, return_value=MagicMock()):
+        result = node(state)
+    assert result["is_enumeration_query"] is True
+    assert len(result["reranked_chunks"]) == 2
+    mock_vs.scroll_by_source_type.assert_called_once()
+@pytest.mark.asyncio
+async def test_enumeration_deduplicates_by_source_title():
+    """Duplicate source_title chunks must be collapsed to one representative."""
+    chunk_a = {
+        "text": "TextOps chunk 1",
+        "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
+    }
+    chunk_b = {
+        "text": "TextOps chunk 2",
+        "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"},
+    }
+    mock_vs = MagicMock()
+    mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])
+    node = make_enumerate_query_node(mock_vs)
+    state = {"query": "list all projects", "retrieval_attempts": 0}
+    with patch(_WRITER_PATCH, return_value=MagicMock()):
+        result = node(state)
+    assert result["is_enumeration_query"] is True
+    assert len(result["reranked_chunks"]) == 1
+@pytest.mark.asyncio
+async def test_enumeration_empty_scroll_returns_not_found():
+    """When Qdrant returns no chunks, is_enumeration_query stays False (no results to list)."""
+    mock_vs = MagicMock()
+    mock_vs.scroll_by_source_type = MagicMock(return_value=[])
+    node = make_enumerate_query_node(mock_vs)
+    state = {"query": "list all projects", "retrieval_attempts": 0}
+    with patch(_WRITER_PATCH, return_value=MagicMock()):
+        result = node(state)
+    # With no chunks, the node does not commit to enumeration path; falls to RAG.
+    assert result["is_enumeration_query"] is False
+# ---------------------------------------------------------------------------
+# is_portfolio_relevant (Fix 2 Rule 1)
+# ---------------------------------------------------------------------------
+class TestIsPortfolioRelevant:
+    def test_known_project_name(self):
+        assert is_portfolio_relevant("how does textops work") is True
+    def test_known_project_variant(self):
+        assert is_portfolio_relevant("tell me about echo echo") is True
+    def test_known_technology(self):
+        assert is_portfolio_relevant("explain the use of langchain in your stack") is True
+    def test_known_organisation(self):
+        assert is_portfolio_relevant("what did you do at vk live") is True
+    def test_unrelated_query(self):
+        assert is_portfolio_relevant("what is the weather in london") is False
+    def test_generic_question(self):
+        assert is_portfolio_relevant("tell me a joke") is False
+    def test_empty_string(self):
+        assert is_portfolio_relevant("") is False