# Benchmark Configuration for Summarizer-Standard Model # Standard summarization model for text summarization tasks model: name: "summarizer-standard" base_url: "http://127.0.0.1:8000" temperature: 0.3 # Lower temperature for consistent summaries max_tokens: 256 # Reasonable summary length timeout: 45 # Allow time for summarization datasets: - name: "cnn_dailymail" file: "datasets/cnn_dailymail_sample.jsonl" sample_size: 100 # Reasonable sample size for benchmarking instruction: "Summarize the following article in 2-3 sentences." input_field: "article" expected_field: "highlights" evaluation: rouge_threshold: 0.3 # Lenient ROUGE score threshold semantic_preservation_min: 0.4 # Lenient semantic similarity length_ratio_min: 0.1 # Minimum compression ratio length_ratio_max: 0.8 # Maximum compression ratio output: results_dir: "results" include_raw_responses: false model_size_gb: 0.369 # From file size check (369MB) cnn_dailymail: source_url: "https://huggingface.co/datasets/cnn_dailymail" max_samples: 2000