Spaces:
Runtime error
Runtime error
| # Dataset Setup Script | |
| # This script downloads and sets up production datasets | |
| echo "π Setting up production datasets..." | |
| # Create datasets directory | |
| mkdir -p datasets | |
| cd datasets | |
| echo "" | |
| echo "π¦ 1. Creating CourseQ Dataset (Custom)" | |
| mkdir -p courseq | |
| # CourseQ is already created by our system - no download needed | |
| echo "β CourseQ ready" | |
| echo "" | |
| echo "π¦ 2. Downloading CLINC150 Intent Dataset" | |
| mkdir -p clinc150 | |
| curl -o clinc150/data.json https://raw.githubusercontent.com/clinc/oos-eval/master/data/data_full.json | |
| echo "β CLINC150 downloaded" | |
| echo "" | |
| echo "π¦ 3. Setting up MedQuAD (Manual Download Required)" | |
| echo "β οΈ MedQuAD requires manual setup:" | |
| echo " 1. Visit: https://github.com/abachaa/MedQuAD" | |
| echo " 2. Download the repository" | |
| echo " 3. Extract to: datasets/medquad/" | |
| echo "" | |
| echo "" | |
| echo "π¦ 4. Setting up SymCAT (Manual Download Required)" | |
| echo "β οΈ SymCAT requires manual setup:" | |
| echo " 1. Visit: https://github.com/symcat/symcat-corpus" | |
| echo " 2. Download symptoms.json" | |
| echo " 3. Place in: datasets/symcat/symptoms.json" | |
| echo "" | |
| echo "" | |
| echo "π¦ 5. Setting up Roman Urdu Corpus (Manual Download Required)" | |
| echo "β οΈ Roman Urdu corpus requires manual setup:" | |
| echo " 1. Search for Roman Urdu parallel corpus datasets" | |
| echo " 2. Place CSV in: datasets/roman_urdu_corpus/data.csv" | |
| echo "" | |
| echo "" | |
| echo "β Automated setup complete!" | |
| echo "" | |
| echo "π Next Steps:" | |
| echo " 1. Follow manual setup instructions above for remaining datasets" | |
| echo " 2. Restart your server" | |
| echo " 3. Run: python test_enhanced_modules.py" | |
| echo "" | |
| echo "π‘ Note: System works fine with sample data for development!" | |