| from hf_scrapper import get_file_structure |
| from dotenv import load_dotenv |
| import os |
| import json |
|
|
| load_dotenv() |
|
|
| def index_repository(token, repo, current_path=""): |
| file_structure = get_file_structure(repo, token, current_path) |
| full_structure = [] |
| for item in file_structure: |
| if item['type'] == 'directory': |
| sub_directory_structure = index_repository(token, repo, item['path']) |
| full_structure.append({ |
| "type": "directory", |
| "path": item['path'], |
| "contents": sub_directory_structure |
| }) |
| else: |
| file_item = { |
| "type": item['type'], |
| "size": item['size'], |
| "path": item['path'], |
| } |
| full_structure.append(file_item) |
| return full_structure |
|
|
| def indexer(): |
| token = os.getenv("TOKEN") |
| repo = os.getenv("REPO") |
| full_structure = index_repository(token, repo, "") |
| print(f"Full file structure for repository '{repo}' has been indexed.") |
| return full_structure |
|
|
| if __name__ == '__main__': |
| files = indexer() |
| |
| |
| output_file = f"{os.getenv('CACHE_DIR')}file_structure.json" |
| |
| |
| with open(output_file, 'w') as f: |
| json.dump(files, f, indent=4) |
| |
| print(f"File structure saved to {output_file}") |
|
|