| """ |
| Manual benchmark for the SemanticDeduplicator component. |
| """ |
|
|
| import sys |
| import logging |
| from efficient_context.compression import SemanticDeduplicator |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| def main(): |
| print("Testing SemanticDeduplicator") |
| |
| |
| repetitive_text = """ |
| Climate change is a significant global challenge. |
| Global warming is affecting ecosystems worldwide. |
| The Earth's temperature is rising due to human activities. |
| Climate change poses a serious threat to our planet. |
| Rising global temperatures are causing environmental problems. |
| |
| Renewable energy is key to a sustainable future. |
| Clean energy sources help reduce carbon emissions. |
| Sustainable power generation is vital for fighting climate change. |
| Green energy technologies are becoming more affordable. |
| Renewable resources provide alternatives to fossil fuels. |
| """ |
| |
| print(f"Original text length: {len(repetitive_text.split())} words") |
| |
| |
| for threshold in [0.7, 0.8, 0.85, 0.9, 0.95]: |
| print(f"\nTesting threshold: {threshold}") |
| |
| deduplicator = SemanticDeduplicator(threshold=threshold) |
| |
| |
| compressed_text = deduplicator.compress(repetitive_text) |
| |
| print(f"Compressed text length: {len(compressed_text.split())} words") |
| print(f"Compression ratio: {len(compressed_text.split()) / len(repetitive_text.split()):.2f}") |
| |
| |
| print(f"Compressed text (preview): {compressed_text[:100]}...") |
|
|
| if __name__ == "__main__": |
| main() |
|
|