| |
|
|
| import sys |
| import os |
| sys.path.append('src') |
|
|
| print("Testing GLEN document ID generation (final version)...") |
| print(f"Working directory: {os.getcwd()}") |
|
|
| |
| sys.argv = [ |
| 'makeid_glen.py', |
| '--model_name_or_path', 'logs/test_glen_vault/GLEN_P2_test', |
| '--infer_dir', 'logs/test_glen_vault/GLEN_P2_test', |
| '--dataset_name', 'the_vault', |
| '--docid_file_name', 'GLEN_P2_test_docids', |
| '--per_device_eval_batch_size', '4', |
| '--max_input_length', '128', |
| '--num_return_sequences', '10' |
| ] |
|
|
| try: |
| print("βΆοΈ Starting document ID generation...") |
| |
| |
| exec(open('examples/glen_phase2/makeid_glen.py').read()) |
| |
| print("β
Document ID generation completed successfully!") |
| |
| |
| output_file = "logs/GLEN_P2_test_docids.tsv" |
| if os.path.exists(output_file): |
| with open(output_file, 'r') as f: |
| lines = f.readlines() |
| print(f"π Output file created: {output_file}") |
| print(f"π Generated {len(lines)} document IDs") |
| if lines: |
| print(f"π Sample line: {lines[0].strip()}") |
| else: |
| print("β οΈ Output file not found") |
| |
| except Exception as e: |
| print(f"β Error: {e}") |
| import traceback |
| traceback.print_exc() |