gauthamnairy commited on
Commit
b39e782
·
verified ·
1 Parent(s): 48ed553

Update pageindex/core/tree_index.py

Browse files
Files changed (1) hide show
  1. pageindex/core/tree_index.py +68 -0
pageindex/core/tree_index.py CHANGED
@@ -148,6 +148,74 @@ class TreeIndex:
148
  if node.get('nodes'):
149
  self._add_node_ids(node['nodes'], prefix=f"{node_id}.")
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  def reasoning_search(self, query: str, llm_client: Any, model: str = "gpt-4-turbo") -> str:
152
  """
153
  Performs a tree search to find relevant nodes for the query.
 
148
  if node.get('nodes'):
149
  self._add_node_ids(node['nodes'], prefix=f"{node_id}.")
150
 
151
+ def reasoning_search_stream(self, query: str, llm_client: Any, model: str = "gpt-4-turbo"):
152
+ """
153
+ Streamed version of reasoning_search. Yields status updates and finally the context.
154
+ """
155
+ if not self.tree:
156
+ yield "<<<STATUS: Tree not built. Upload document first.>>>"
157
+ return "Tree not index built."
158
+
159
+ yield "<<<STATUS: Analyzing document structure...>>>"
160
+ tree_summary = self._get_tree_structure_summary(self.tree)
161
+
162
+ prompt = f"""
163
+ You are given a query and the tree structure of a document.
164
+ You need to find all nodes that are likely to contain the answer.
165
+
166
+ Query: {query}
167
+
168
+ Document tree structure:
169
+ {json.dumps(tree_summary, indent=2)}
170
+
171
+ Reply in the following JSON format:
172
+ {{
173
+ "thinking": <your reasoning about which nodes are relevant>,
174
+ "node_list": ["node_id1", "node_id2", ...]
175
+ }}
176
+ """
177
+
178
+ try:
179
+ yield "<<<STATUS: Querying LLM for relevant sections...>>>"
180
+ response = llm_client.chat.completions.create(
181
+ model=model,
182
+ messages=[{"role": "user", "content": prompt}],
183
+ temperature=0.1
184
+ )
185
+
186
+ content = response.choices[0].message.content
187
+ # Basic JSON extraction
188
+ if "```json" in content:
189
+ content = content.split("```json")[1].split("```")[0].strip()
190
+ elif "```" in content:
191
+ content = content.split("```")[1].split("```")[0].strip()
192
+
193
+ result = json.loads(content)
194
+ thinking = result.get("thinking", "No reasoning provided.")
195
+ yield f"<<<STATUS: LLM Reasoning: {thinking}>>>"
196
+
197
+ relevant_node_ids = result.get("node_list", [])
198
+ yield f"<<<STATUS: Identifying {len(relevant_node_ids)} relevant sections...>>>"
199
+
200
+ # Retrieve text
201
+ context_parts = []
202
+ for node_id in relevant_node_ids:
203
+ node = self._find_node_by_id(self.tree, node_id)
204
+ if node:
205
+ yield f"<<<STATUS: Reading section: {node['title']}...>>>"
206
+ context_parts.append(f"--- Section: {node['title']} (ID: {node_id}) ---\n{node['text']}\n")
207
+
208
+ full_context = "\n".join(context_parts)
209
+ if not full_context:
210
+ yield "<<<STATUS: No relevant content found.>>>"
211
+ return "No relevant context found."
212
+
213
+ return full_context
214
+
215
+ except Exception as e:
216
+ yield f"<<<STATUS: Error: {str(e)}>>>"
217
+ return f"Error: {str(e)}"
218
+
219
  def reasoning_search(self, query: str, llm_client: Any, model: str = "gpt-4-turbo") -> str:
220
  """
221
  Performs a tree search to find relevant nodes for the query.