Spaces:

gauthamnairy
/

PageIndexAPI

Running

App Files Files Community

gauthamnairy commited on Feb 8

Commit

b39e782

verified ·

1 Parent(s): 48ed553

Update pageindex/core/tree_index.py

Browse files

Files changed (1) hide show

pageindex/core/tree_index.py +68 -0

pageindex/core/tree_index.py CHANGED Viewed

@@ -148,6 +148,74 @@ class TreeIndex:
             if node.get('nodes'):
                 self._add_node_ids(node['nodes'], prefix=f"{node_id}.")
     def reasoning_search(self, query: str, llm_client: Any, model: str = "gpt-4-turbo") -> str:
         """
         Performs a tree search to find relevant nodes for the query.

             if node.get('nodes'):
                 self._add_node_ids(node['nodes'], prefix=f"{node_id}.")
+    def reasoning_search_stream(self, query: str, llm_client: Any, model: str = "gpt-4-turbo"):
+        """
+        Streamed version of reasoning_search. Yields status updates and finally the context.
+        """
+        if not self.tree:
+            yield "<<<STATUS: Tree not built. Upload document first.>>>"
+            return "Tree not index built."
+        yield "<<<STATUS: Analyzing document structure...>>>"
+        tree_summary = self._get_tree_structure_summary(self.tree)
+        prompt = f"""
+        You are given a query and the tree structure of a document.
+        You need to find all nodes that are likely to contain the answer.
+        Query: {query}
+        Document tree structure:
+        {json.dumps(tree_summary, indent=2)}
+        Reply in the following JSON format:
+        {{
+          "thinking": <your reasoning about which nodes are relevant>,
+          "node_list": ["node_id1", "node_id2", ...]
+        }}
+        """
+        try:
+            yield "<<<STATUS: Querying LLM for relevant sections...>>>"
+            response = llm_client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.1
+            )
+            content = response.choices[0].message.content
+            # Basic JSON extraction
+            if "```json" in content:
+                content = content.split("```json")[1].split("```")[0].strip()
+            elif "```" in content:
+                content = content.split("```")[1].split("```")[0].strip()
+            result = json.loads(content)
+            thinking = result.get("thinking", "No reasoning provided.")
+            yield f"<<<STATUS: LLM Reasoning: {thinking}>>>"
+            relevant_node_ids = result.get("node_list", [])
+            yield f"<<<STATUS: Identifying {len(relevant_node_ids)} relevant sections...>>>"
+            # Retrieve text
+            context_parts = []
+            for node_id in relevant_node_ids:
+                node = self._find_node_by_id(self.tree, node_id)
+                if node:
+                    yield f"<<<STATUS: Reading section: {node['title']}...>>>"
+                    context_parts.append(f"--- Section: {node['title']} (ID: {node_id}) ---\n{node['text']}\n")
+            full_context = "\n".join(context_parts)
+            if not full_context:
+                yield "<<<STATUS: No relevant content found.>>>"
+                return "No relevant context found."
+            return full_context
+        except Exception as e:
+            yield f"<<<STATUS: Error: {str(e)}>>>"
+            return f"Error: {str(e)}"
     def reasoning_search(self, query: str, llm_client: Any, model: str = "gpt-4-turbo") -> str:
         """
         Performs a tree search to find relevant nodes for the query.