Spaces:

vimalk78
/

abc123

Running

App Files Files Community

abc123 / hack /test_adaptive_beta.py

vimalk78

feat: add multi-topic intersection methods with adaptive beta for word selection

b05514b 8 months ago

raw

history blame contribute delete

6.84 kB

	#!/usr/bin/env python3
	"""
	Test Adaptive Beta with Cricket+Sports Example

	Tests that the adaptive beta mechanism generates more words for constrained cases
	like "cricket sentence" + "sports topic".
	"""

	import os
	import sys
	import warnings
	import logging

	# Configure logging to see the adaptive beta messages
	logging.basicConfig(level=logging.INFO, format='%(message)s')

	# Suppress warnings for cleaner output
	warnings.filterwarnings("ignore")

	def setup_environment():
	"""Setup environment and add src to path"""
	# Set cache directory to root cache-dir folder
	cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
	cache_dir = os.path.abspath(cache_dir)
	os.environ['HF_HOME'] = cache_dir
	os.environ['TRANSFORMERS_CACHE'] = cache_dir
	os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir

	# Add backend source to path
	backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
	backend_path = os.path.abspath(backend_path)
	if backend_path not in sys.path:
	sys.path.insert(0, backend_path)

	print(f"Using cache directory: {cache_dir}")

	def test_adaptive_beta_cricket_sports():
	"""Test the cricket+sports case that previously generated only 16 words"""

	setup_environment()

	print("🧪 Testing Adaptive Beta with Cricket+Sports Example")
	print("=" * 60)

	# Set environment variables for soft minimum with adaptive beta
	os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
	os.environ['SOFT_MIN_BETA'] = '10.0'
	os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
	os.environ['SOFT_MIN_MIN_WORDS'] = '15'
	os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
	os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
	os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '5000' # Smaller vocab for faster testing

	try:
	from services.thematic_word_service import ThematicWordService

	print("Creating ThematicWordService with adaptive soft minimum...")
	service = ThematicWordService()

	print("Initializing service (adaptive beta configuration will be logged)...")
	service.initialize()

	# Test cases
	test_cases = [
	{
	"name": "Cricket sentence only",
	"inputs": ["india won test series against england"],
	"expected": ">30 words (no constraint)",
	"description": "Single sentence - should generate many words"
	},
	{
	"name": "Cricket sentence + Sports topic",
	"inputs": ["india won test series against england", "Sports"],
	"expected": "~15-25 words (adaptive beta should kick in)",
	"description": "Sentence + topic - adaptive beta should relax to get more words"
	},
	{
	"name": "Multiple sports topics",
	"inputs": ["Cricket", "Tennis", "Football"],
	"expected": "~15-20 words (adaptive beta for 3 topics)",
	"description": "Three topics - should auto-adapt for more words"
	}
	]

	for i, test_case in enumerate(test_cases, 1):
	print(f"\n📊 Test {i}: {test_case['name']}")
	print(f" Description: {test_case['description']}")
	print(f" Expected: {test_case['expected']}")
	print(f" Inputs: {test_case['inputs']}")
	print("-" * 50)

	# Generate words
	results = service.generate_thematic_words(
	test_case['inputs'],
	num_words=50,
	min_similarity=0.3,
	multi_theme=False
	)

	print(f"✅ Generated {len(results)} words")
	print(f"Top 15 words:")
	for j, (word, similarity, tier) in enumerate(results[:15], 1):
	print(f" {j:2d}. {word:15s}: {similarity:.4f} ({tier})")

	# Analysis
	if len(results) >= 15:
	print(f" ✅ Success: Generated {len(results)} words (≥ 15 minimum)")
	else:
	print(f" ⚠️ Warning: Only {len(results)} words generated (< 15 minimum)")
	print(" This suggests adaptive beta may need tuning")

	except Exception as e:
	print(f"❌ Test failed: {e}")
	import traceback
	traceback.print_exc()

	def test_adaptive_beta_disabled():
	"""Test with adaptive beta disabled for comparison"""

	print(f"\n\n🔒 Testing with Adaptive Beta DISABLED")
	print("=" * 60)

	# Disable adaptive beta
	os.environ['SOFT_MIN_ADAPTIVE'] = 'false'

	try:
	from services.thematic_word_service import ThematicWordService

	service = ThematicWordService()
	service.initialize()

	# Test the problematic case
	inputs = ["india won test series against england", "Sports"]
	print(f"Testing cricket+sports with fixed beta=10.0...")

	results = service.generate_thematic_words(
	inputs,
	num_words=50,
	min_similarity=0.3,
	multi_theme=False
	)

	print(f"✅ Generated {len(results)} words (with fixed beta)")
	print(f"Top 10 words:")
	for j, (word, similarity, tier) in enumerate(results[:10], 1):
	print(f" {j:2d}. {word:15s}: {similarity:.4f}")

	if len(results) < 15:
	print(f" ⚠️ As expected: Only {len(results)} words with fixed beta (too strict)")
	else:
	print(f" ✅ Surprisingly good: {len(results)} words even with fixed beta")

	except Exception as e:
	print(f"❌ Test failed: {e}")
	import traceback
	traceback.print_exc()

	def main():
	"""Main test runner"""
	print("🧪 Adaptive Beta Integration Test")
	print("Testing automatic beta relaxation for constrained word generation")
	print("=" * 70)

	try:
	# Test with adaptive beta enabled
	test_adaptive_beta_cricket_sports()

	# Test with adaptive beta disabled for comparison
	test_adaptive_beta_disabled()

	print("\n" + "=" * 70)
	print("🎯 ADAPTIVE BETA TEST RESULTS:")
	print("1. Adaptive beta should automatically relax when < 15 words found")
	print("2. Cricket+Sports should now generate 15+ words (was 16)")
	print("3. Complex multi-topic queries should auto-adapt for sufficient words")
	print("4. Logging shows beta adjustment process")
	print("=" * 70)

	except Exception as e:
	print(f"❌ Adaptive beta test failed: {e}")
	import traceback
	traceback.print_exc()

	if __name__ == "__main__":
	main()