| |
| """ |
| Debug Adaptive Beta Bug |
| |
| Quick test to reproduce the bug where word count decreases when beta is relaxed. |
| """ |
|
|
| import os |
| import sys |
| import logging |
|
|
| |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') |
|
|
| def setup_environment(): |
| """Setup environment and add src to path""" |
| |
| cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir') |
| cache_dir = os.path.abspath(cache_dir) |
| os.environ['HF_HOME'] = cache_dir |
| os.environ['TRANSFORMERS_CACHE'] = cache_dir |
| os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir |
| |
| |
| backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src') |
| backend_path = os.path.abspath(backend_path) |
| if backend_path not in sys.path: |
| sys.path.insert(0, backend_path) |
| |
| print(f"Using cache directory: {cache_dir}") |
|
|
| def test_debug_adaptive_beta(): |
| """Test the problematic case with debug logging""" |
| |
| setup_environment() |
| |
| print("🐛 Debug Adaptive Beta Bug") |
| print("=" * 50) |
| |
| |
| os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum' |
| os.environ['SOFT_MIN_BETA'] = '10.0' |
| os.environ['SOFT_MIN_ADAPTIVE'] = 'true' |
| os.environ['SOFT_MIN_MIN_WORDS'] = '15' |
| os.environ['SOFT_MIN_MAX_RETRIES'] = '5' |
| os.environ['SOFT_MIN_BETA_DECAY'] = '0.7' |
| os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '1000' |
| |
| try: |
| from services.thematic_word_service import ThematicWordService |
| |
| print("Creating ThematicWordService...") |
| service = ThematicWordService() |
| service.initialize() |
| |
| |
| inputs = ["universe", "movies", "languages"] |
| print(f"\\nTesting problematic case: {inputs}") |
| print(f"Expected: Word count should INCREASE as beta decreases") |
| print("-" * 50) |
| |
| results = service.generate_thematic_words( |
| inputs, |
| num_words=50, |
| min_similarity=0.3, |
| multi_theme=False |
| ) |
| |
| print(f"\\n✅ Final result: {len(results)} words generated") |
| if len(results) > 0: |
| print(f"Top 5 words:") |
| for i, (word, similarity, tier) in enumerate(results[:5], 1): |
| print(f" {i}. {word}: {similarity:.4f}") |
| else: |
| print(" ⚠️ No words generated!") |
| |
| except Exception as e: |
| print(f"❌ Test failed: {e}") |
| import traceback |
| traceback.print_exc() |
|
|
| def main(): |
| print("🧪 Debugging Adaptive Beta Bug") |
| print("This will show detailed score statistics at each beta level") |
| print("=" * 60) |
| |
| test_debug_adaptive_beta() |
| |
| print("\\n" + "=" * 60) |
| print("🔍 Look for patterns in the debug output:") |
| print("1. Do score ranges change as expected?") |
| print("2. Is the threshold comparison working correctly?") |
| print("3. Are scores getting more permissive with lower beta?") |
| print("=" * 60) |
|
|
| if __name__ == "__main__": |
| main() |