Spaces:
Sleeping
Sleeping
Fix: chunk_file KeyError 'path' — accept both 'path' and 'filepath' keys
Browse files
ingestion/code_chunker.py
CHANGED
|
@@ -299,7 +299,7 @@ def chunk_file(file: dict) -> list[dict]:
|
|
| 299 |
"""
|
| 300 |
from ingestion.file_filter import language_from_path
|
| 301 |
|
| 302 |
-
filepath = file
|
| 303 |
content = file["content"]
|
| 304 |
language = language_from_path(filepath)
|
| 305 |
repo = file.get("repo", "")
|
|
@@ -322,6 +322,6 @@ def chunk_files(files: list[dict]) -> list[dict]:
|
|
| 322 |
for file in files:
|
| 323 |
file_chunks = chunk_file(file)
|
| 324 |
all_chunks.extend(file_chunks)
|
| 325 |
-
print(f" {file
|
| 326 |
print(f"Total: {len(all_chunks)} chunks from {len(files)} files")
|
| 327 |
return all_chunks
|
|
|
|
| 299 |
"""
|
| 300 |
from ingestion.file_filter import language_from_path
|
| 301 |
|
| 302 |
+
filepath = file.get("path") or file.get("filepath", "")
|
| 303 |
content = file["content"]
|
| 304 |
language = language_from_path(filepath)
|
| 305 |
repo = file.get("repo", "")
|
|
|
|
| 322 |
for file in files:
|
| 323 |
file_chunks = chunk_file(file)
|
| 324 |
all_chunks.extend(file_chunks)
|
| 325 |
+
print(f" {file.get('path') or file.get('filepath', '?')} → {len(file_chunks)} chunks")
|
| 326 |
print(f"Total: {len(all_chunks)} chunks from {len(files)} files")
|
| 327 |
return all_chunks
|