| |
| """ |
| Test script to verify custom NLTK data path functionality. |
| This shows that the WordNet generator now downloads NLTK data to model_cache/nltk_data. |
| """ |
|
|
| import sys |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent)) |
|
|
| def test_custom_nltk_path(): |
| """Test that NLTK data downloads to custom cache directory.""" |
| print("π§ͺ Testing Custom NLTK Data Path") |
| print("=" * 50) |
| |
| model_cache = Path('./model_cache') |
| nltk_cache = model_cache / 'nltk_data' |
| |
| print(f"π Model cache directory: {model_cache.absolute()}") |
| print(f"π Expected NLTK data directory: {nltk_cache.absolute()}") |
| |
| try: |
| from wordnet_clue_generator import WordNetClueGenerator |
| |
| |
| generator = WordNetClueGenerator(cache_dir=str(model_cache)) |
| |
| print(f"\nπ§ Generator Configuration:") |
| print(f" Cache dir: {generator.cache_dir}") |
| print(f" NLTK data dir: {generator.nltk_data_dir}") |
| |
| |
| print(f"\nπ NLTK Path Test:") |
| if nltk_cache.exists(): |
| print(f" β
NLTK cache directory exists") |
| contents = list(nltk_cache.iterdir()) |
| print(f" Contents: {len(contents)} items") |
| for item in contents[:5]: |
| print(f" - {item.name}") |
| else: |
| print(f" π NLTK cache directory will be created on initialization") |
| |
| |
| print(f"\nπ Testing ensure_nltk_data function:") |
| from wordnet_clue_generator import ensure_nltk_data |
| |
| |
| success = ensure_nltk_data(str(nltk_cache)) |
| print(f" Result: {'β
Success' if success else 'β Failed'}") |
| |
| if success: |
| |
| import nltk |
| print(f" NLTK search paths (first 3):") |
| for i, path in enumerate(nltk.data.path[:3], 1): |
| print(f" {i}. {path}") |
| |
| |
| if nltk_cache.exists(): |
| print(f" β
NLTK data directory created") |
| |
| |
| contents = list(nltk_cache.rglob('*')) |
| print(f" π Directory contents: {len(contents)} total items") |
| |
| return True |
| |
| except Exception as e: |
| print(f"β Test error: {e}") |
| import traceback |
| traceback.print_exc() |
| return False |
|
|
| def show_cache_integration(): |
| """Show how the cache integrates with existing structure.""" |
| print(f"\nπ Cache Integration Overview") |
| print("=" * 40) |
| |
| model_cache = Path('./model_cache') |
| |
| print(f"Cache Structure:") |
| print(f"model_cache/") |
| print(f"βββ nltk_data/ # β New NLTK data location") |
| print(f"β βββ corpora/") |
| print(f"β βββ wordnet/") |
| print(f"β βββ punkt/") |
| print(f"β βββ omw-1.4/") |
| print(f"βββ unified_vocabulary_*.pkl") |
| print(f"βββ unified_embeddings_*.npy") |
| print(f"βββ models--sentence-transformers/") |
| |
| if model_cache.exists(): |
| actual_size = sum(f.stat().st_size for f in model_cache.rglob('*') if f.is_file()) / (1024*1024) |
| print(f"\nπ Current cache size: {actual_size:.1f} MB") |
| |
| nltk_dir = model_cache / 'nltk_data' |
| if nltk_dir.exists(): |
| nltk_size = sum(f.stat().st_size for f in nltk_dir.rglob('*') if f.is_file()) / (1024*1024) |
| print(f"π NLTK data size: {nltk_size:.1f} MB") |
|
|
| def main(): |
| """Run the custom NLTK path test.""" |
| print("π Custom NLTK Path Test") |
| print("=" * 60) |
| print("Testing WordNet generator with model_cache/nltk_data location") |
| |
| success = test_custom_nltk_path() |
| show_cache_integration() |
| |
| if success: |
| print(f"\nβ
SUCCESS!") |
| print(f"π NLTK data will now download to: model_cache/nltk_data/") |
| print(f"π― This keeps all AI/NLP data centralized in model_cache") |
| print(f"β‘ WordNet clue generator ready for use!") |
| else: |
| print(f"\nβ Test failed - check configuration") |
|
|
| if __name__ == "__main__": |
| main() |