m-ahmad-official commited on
Commit
a724f5f
·
1 Parent(s): 0ae85bd
Files changed (1) hide show
  1. retrieve.py +1 -46
retrieve.py CHANGED
@@ -86,52 +86,7 @@ def search(
86
  exc_info=True,
87
  )
88
  raise
89
-
90
- def validate_chunk_sequencing(results: List[Dict[str, Any]]) -> bool:
91
- """
92
- Verify that chunk_index values are properly assigned: integers >= 0 and unique per URL.
93
-
94
- Note: Since search may return only a subset of chunks for a URL, we cannot
95
- verify full sequential continuity (0,1,2,3...). Instead we check:
96
- - All chunk_index values are integers >= 0
97
- - No duplicate chunk_index for the same URL in the result set
98
-
99
- Args:
100
- results: List of search results
101
-
102
- Returns:
103
- True if chunk indices are valid, False otherwise
104
- """
105
- # Group by URL
106
- url_chunks = {}
107
- for result in results:
108
- payload = result.get("payload", {})
109
- url = payload.get("url", "")
110
- chunk_idx = payload.get("chunk_index")
111
-
112
- if url not in url_chunks:
113
- url_chunks[url] = []
114
- url_chunks[url].append(chunk_idx)
115
-
116
- # Check each URL's chunks are valid
117
- for url, indices in url_chunks.items():
118
- # All indices must be integers >= 0
119
- for idx in indices:
120
- if not isinstance(idx, int) or idx < 0:
121
- logger.debug(
122
- f"Invalid chunk_index for {url}: {idx} (must be non-negative integer)"
123
- )
124
- return False
125
-
126
- # Check for duplicates (within this URL's results)
127
- if len(set(indices)) != len(indices):
128
- logger.debug(f"Duplicate chunk_index for {url}: {indices}")
129
- return False
130
-
131
- logger.debug(f"Chunk indexing valid for {len(url_chunks)} URLs")
132
- return True
133
-
134
-
135
  def search(
136
  query_text: str,
137
  cohere_client: cohere.ClientV2,
 
86
  exc_info=True,
87
  )
88
  raise
89
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def search(
91
  query_text: str,
92
  cohere_client: cohere.ClientV2,