| import os |
| import zipfile |
|
|
| |
| total_files_count = 0 |
| contains_all_files_count = 0 |
| missing_files_count = 0 |
| missing_files_info = [] |
|
|
| def check_zip_contents(zip_file_path): |
| global total_files_count |
| global contains_all_files_count |
| global missing_files_count |
| global missing_files_info |
|
|
| |
| total_files_count += 1 |
|
|
| try: |
| |
| contains_all_files_count = 0 |
| with zipfile.ZipFile(zip_file_path, 'r') as zf: |
| |
| file_list = [os.path.normpath(item) for item in zf.namelist()] |
|
|
| |
| zip_file_name = os.path.splitext(os.path.basename(zip_file_path))[0] |
|
|
| |
| expected_folder = os.path.normpath(zip_file_name) |
| expected_files = ['config.json', 'generation_config.json', 'pytorch_model.bin', 'source.spm', 'target.spm', 'tokenizer_config.json', 'vocab.json'] |
|
|
| |
| if expected_folder not in file_list: |
| missing_files_count += 1 |
| missing_files_info.append(f"{zip_file_name} does not contain the expected folder.\n") |
| return |
|
|
| |
| missing_files = [] |
| for expected_file in expected_files: |
| file_path = os.path.join(expected_folder, expected_file) |
| if file_path not in file_list: |
| missing_files.append(expected_file) |
|
|
| if not missing_files: |
| contains_all_files_count += 1 |
| else: |
| missing_files_count += 1 |
| missing_files_info.append(f"{zip_file_name} is missing the following files: {', '.join(missing_files)}\n") |
|
|
| except zipfile.BadZipFile as e: |
| print(f"Error: {e}") |
| |
| print(f"File path: {zip_file_path}") |
|
|
| |
|
|
| |
| folder_path = os.getcwd() |
|
|
| |
| zip_files = [f for f in os.listdir(folder_path) if f.endswith('.zip')] |
|
|
| |
| for zip_file in zip_files: |
| zip_file_path = os.path.join(folder_path, zip_file) |
| check_zip_contents(zip_file_path) |
|
|
| |
| print(f"\nNumber of ZIP files containing all files and folders: {contains_all_files_count}") |
|
|
| |
| for info in missing_files_info: |
| print(info) |
|
|
| |
| print(f"\nProcessed {total_files_count} ZIP files") |
| print(f"Number of files missing: {missing_files_count}") |
|
|