customeragent-api / server /scripts /download_nltk_data.py
anasraza526's picture
Clean deploy to Hugging Face
ac90985
#!/usr/bin/env python3
"""
Download all required NLTK data for the customer agent application.
Run this script once to ensure all NLTK resources are available.
"""
import nltk
import sys
def download_nltk_data():
"""Download all required NLTK datasets"""
resources = [
'punkt_tab',
'punkt',
'stopwords',
'wordnet',
'averaged_perceptron_tagger',
'omw-1.4'
]
print("Downloading NLTK data...")
print("-" * 50)
for resource in resources:
try:
print(f"Downloading {resource}...", end=" ")
nltk.download(resource, quiet=True)
print("βœ“")
except Exception as e:
print(f"βœ— (Error: {e})")
print("-" * 50)
print("NLTK data download complete!")
# Verify downloads
print("\nVerifying downloads...")
verification_paths = [
'tokenizers/punkt_tab',
'tokenizers/punkt',
'corpora/stopwords',
'corpora/wordnet'
]
all_ok = True
for path in verification_paths:
try:
nltk.data.find(path)
print(f"βœ“ {path}")
except LookupError:
print(f"βœ— {path} - NOT FOUND")
all_ok = False
if all_ok:
print("\nβœ“ All NLTK resources are properly installed!")
return 0
else:
print("\nβœ— Some resources are missing. Please check the errors above.")
return 1
if __name__ == "__main__":
sys.exit(download_nltk_data())