| |
| """ |
| Enhanced Test: Semantic Clue Generator with ThematicWordGenerator Integration |
| Test semantic clue generation with proper embedding integration for better quality. |
| """ |
|
|
| import sys |
| import logging |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent)) |
|
|
| try: |
| from semantic_clue_generator import SemanticClueGenerator |
| from thematic_word_generator import UnifiedThematicWordGenerator |
| GENERATOR_AVAILABLE = True |
| except ImportError as e: |
| print(f"β Import error: {e}") |
| GENERATOR_AVAILABLE = False |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
| ) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| def test_enhanced_semantic_clues(): |
| """Test semantic clue generation with thematic word generator integration.""" |
| if not GENERATOR_AVAILABLE: |
| print("β Cannot run test - Enhanced generators not available") |
| return |
| |
| print("π§ͺ Testing Enhanced Semantic Clue Generation") |
| print("=" * 60) |
| |
| |
| print("π Initializing thematic word generator...") |
| try: |
| word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000) |
| word_gen.initialize() |
| print("β
Thematic word generator initialized successfully") |
| except Exception as e: |
| print(f"β Failed to initialize thematic word generator: {e}") |
| return |
| |
| |
| print("π Initializing semantic clue generator with thematic integration...") |
| clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen) |
| |
| try: |
| clue_gen.initialize() |
| print("β
Semantic clue generator initialized successfully") |
| except Exception as e: |
| print(f"β Failed to initialize semantic clue generator: {e}") |
| return |
| |
| |
| test_cases = [ |
| |
| ("CAT", "animals"), |
| ("KITTY", "animals"), |
| ("MEAL", "food"), |
| ("HUNGER", "food"), |
| ("TECH", "technology"), |
| ("SCIENTIST", "science"), |
| |
| |
| ("DOG", "animals"), |
| ("PYTHON", "technology"), |
| ("GUITAR", "music"), |
| ("OCEAN", "geography"), |
| ("ATOM", "science"), |
| ("PIZZA", "food"), |
| ("MOUNTAIN", "geography"), |
| ("VIOLIN", "music"), |
| ("DATABASE", "technology"), |
| ] |
| |
| print(f"\nπ― Testing {len(test_cases)} word-topic combinations with enhanced semantic analysis") |
| print("=" * 60) |
| |
| successful_clues = 0 |
| total_tests = len(test_cases) |
| high_quality_clues = 0 |
| |
| for word, topic in test_cases: |
| print(f"\nπ Testing: '{word}' + '{topic}'") |
| print("-" * 40) |
| |
| try: |
| |
| styles = ["category", "definition", "description"] |
| candidates = [] |
| |
| for style in styles: |
| clue = clue_gen.generate_clue( |
| word=word, |
| topic=topic, |
| clue_style=style, |
| difficulty="medium" |
| ) |
| if clue and clue not in candidates: |
| candidates.append(clue) |
| |
| print(f"Generated {len(candidates)} candidates:") |
| for i, candidate in enumerate(candidates, 1): |
| print(f" {i}. {candidate}") |
| |
| |
| best_clue = candidates[0] if candidates else None |
| |
| print(f"\nπ Best clue: {best_clue}") |
| |
| |
| if best_clue: |
| |
| basic_quality = (len(best_clue) > 3 and |
| word.lower() not in best_clue.lower()) |
| |
| |
| is_generic = ("term related to" in best_clue.lower() or |
| "associated with" in best_clue.lower()) |
| |
| |
| is_descriptive = (len(best_clue.split()) >= 3 and |
| not is_generic and |
| basic_quality) |
| |
| if is_descriptive: |
| high_quality_clues += 1 |
| successful_clues += 1 |
| print("β
Quality: EXCELLENT") |
| elif basic_quality and not is_generic: |
| successful_clues += 1 |
| print("β
Quality: GOOD") |
| elif basic_quality: |
| successful_clues += 1 |
| print("π Quality: ACCEPTABLE (generic)") |
| else: |
| print("β Quality: POOR") |
| else: |
| print("β No clue generated") |
| |
| except Exception as e: |
| print(f"β Error generating clue: {e}") |
| logger.exception("Detailed error:") |
| |
| print(f"\n" + "=" * 60) |
| print(f"π ENHANCED SEMANTIC RESULTS") |
| print(f"=" * 60) |
| print(f"Total tests: {total_tests}") |
| print(f"Successful clues: {successful_clues}") |
| print(f"High quality clues: {high_quality_clues}") |
| print(f"Overall success rate: {(successful_clues/total_tests)*100:.1f}%") |
| print(f"High quality rate: {(high_quality_clues/total_tests)*100:.1f}%") |
| |
| |
| if high_quality_clues >= total_tests * 0.6: |
| print("π Enhanced semantic approach produces excellent clues!") |
| print("π Ready for integration into main crossword application") |
| elif successful_clues >= total_tests * 0.8: |
| print("π Good improvement over LLM, suitable for production use") |
| elif successful_clues >= total_tests * 0.6: |
| print("β οΈ Decent improvement, may need more template refinement") |
| else: |
| print("β Still struggling, consider alternative approaches") |
|
|
|
|
| def interactive_test(): |
| """Interactive test mode for user-provided word-topic combinations.""" |
| print("π§ͺ Interactive Semantic Clue Testing") |
| print("=" * 60) |
| |
| |
| print("π Initializing thematic word generator...") |
| try: |
| word_gen = UnifiedThematicWordGenerator(vocab_size_limit=50000) |
| word_gen.initialize() |
| print("β
Thematic word generator initialized successfully") |
| except Exception as e: |
| print(f"β Failed to initialize thematic word generator: {e}") |
| return |
| |
| |
| print("π Initializing semantic clue generator with thematic integration...") |
| clue_gen = SemanticClueGenerator(thematic_word_generator=word_gen) |
| |
| try: |
| clue_gen.initialize() |
| print("β
Semantic clue generator initialized successfully") |
| except Exception as e: |
| print(f"β Failed to initialize semantic clue generator: {e}") |
| return |
| |
| print("\n" + "=" * 60) |
| print("π― INTERACTIVE MODE") |
| print("=" * 60) |
| print("Enter word-topic pairs to test clue generation.") |
| print("Format: word,topic (e.g., 'cat,animals')") |
| print("Type 'quit' or 'exit' to stop.") |
| print("Type 'batch' to run the full test suite.") |
| print("-" * 60) |
| |
| while True: |
| try: |
| user_input = input("\nπ Enter word,topic: ").strip() |
| |
| if user_input.lower() in ['quit', 'exit', 'q']: |
| print("π Goodbye!") |
| break |
| elif user_input.lower() == 'batch': |
| print("\nπ Running full test suite...") |
| test_enhanced_semantic_clues() |
| print("\n" + "=" * 60) |
| print("π― Back to interactive mode") |
| print("-" * 60) |
| continue |
| elif not user_input or ',' not in user_input: |
| print("β Invalid format. Use: word,topic (e.g., 'cat,animals')") |
| continue |
| |
| |
| parts = user_input.split(',', 1) |
| word = parts[0].strip().upper() |
| topic = parts[1].strip().lower() |
| |
| if not word or not topic: |
| print("β Both word and topic are required") |
| continue |
| |
| print(f"\nπ Testing: '{word}' + '{topic}'") |
| print("-" * 40) |
| |
| |
| styles = ["category", "definition", "description"] |
| candidates = [] |
| |
| for style in styles: |
| try: |
| clue = clue_gen.generate_clue( |
| word=word, |
| topic=topic, |
| clue_style=style, |
| difficulty="medium" |
| ) |
| if clue and clue not in candidates: |
| candidates.append(clue) |
| except Exception as e: |
| logger.debug(f"Error with style {style}: {e}") |
| |
| if candidates: |
| print(f"Generated {len(candidates)} candidates:") |
| for i, candidate in enumerate(candidates, 1): |
| print(f" {i}. {candidate}") |
| |
| best_clue = candidates[0] |
| print(f"\nπ Best clue: {best_clue}") |
| |
| |
| if (best_clue and |
| len(best_clue) > 3 and |
| word.lower() not in best_clue.lower()): |
| |
| is_generic = ("term related to" in best_clue.lower() or |
| "associated with" in best_clue.lower()) |
| |
| if len(best_clue.split()) >= 3 and not is_generic: |
| print("β
Quality: EXCELLENT") |
| elif not is_generic: |
| print("β
Quality: GOOD") |
| else: |
| print("π Quality: ACCEPTABLE (generic)") |
| else: |
| print("β Quality: POOR") |
| else: |
| print("β No clues generated") |
| |
| except KeyboardInterrupt: |
| print("\nπ Goodbye!") |
| break |
| except Exception as e: |
| print(f"β Error: {e}") |
|
|
|
|
| def main(): |
| """Run the enhanced semantic test.""" |
| import sys |
| |
| if len(sys.argv) > 1 and sys.argv[1] == '--interactive': |
| interactive_test() |
| else: |
| print("Run with --interactive for user input mode, or without args for full test.") |
| test_enhanced_semantic_clues() |
|
|
|
|
| if __name__ == "__main__": |
| main() |