| """ |
| Question generation utilities for MCQ and open-text formats. |
| """ |
|
|
| import random |
| from typing import Dict, List, Optional, Tuple |
|
|
| from .logger import setup_logger |
|
|
| logger = setup_logger(__name__) |
|
|
|
|
| class QuestionGenerator: |
| """Generates questions in MCQ and open-text formats.""" |
| |
| def __init__( |
| self, |
| num_options: int = 4, |
| option_labels: Optional[List[str]] = None, |
| distractor_strategy: str = "balanced" |
| ): |
| """ |
| Initialize question generator. |
| |
| Args: |
| num_options: Number of MCQ options |
| option_labels: Labels for options (e.g., ['A', 'B', 'C', 'D']) |
| distractor_strategy: Strategy for generating distractor options |
| - "present_only": only use sounds present in audio |
| - "mixed": mix of present and absent sounds |
| - "balanced": balanced distribution |
| """ |
| self.num_options = num_options |
| self.option_labels = option_labels or ["A", "B", "C", "D"] |
| self.distractor_strategy = distractor_strategy |
| |
| if len(self.option_labels) != num_options: |
| raise ValueError(f"Number of option labels must match num_options ({num_options})") |
| |
| def generate_count_mcq( |
| self, |
| question_template: str, |
| correct_count: int, |
| all_categories: List[str] |
| ) -> Dict: |
| """ |
| Generate an MCQ for counting task. |
| |
| Args: |
| question_template: Question text template |
| correct_count: Correct number of unique sounds |
| all_categories: List of all available categories |
| |
| Returns: |
| Dictionary with question, options, and correct answer |
| """ |
| |
| options = self._generate_count_options(correct_count) |
| |
| |
| random.shuffle(options) |
| |
| |
| correct_label = self.option_labels[options.index(correct_count)] |
| |
| |
| option_map = {label: value for label, value in zip(self.option_labels, options)} |
| |
| return { |
| "question": question_template, |
| "options": option_map, |
| "correct_answer": correct_label, |
| "correct_value": correct_count |
| } |
| |
| def generate_count_open_text( |
| self, |
| question_template: str, |
| correct_count: int |
| ) -> Dict: |
| """ |
| Generate an open-text question for counting task. |
| |
| Args: |
| question_template: Question text template |
| correct_count: Correct number of unique sounds |
| |
| Returns: |
| Dictionary with question and correct answer |
| """ |
| return { |
| "question": question_template, |
| "correct_answer": str(correct_count) |
| } |
| |
| def generate_category_mcq( |
| self, |
| question_template: str, |
| correct_category: str, |
| present_categories: List[str], |
| all_categories: List[str] |
| ) -> Dict: |
| """ |
| Generate an MCQ where answer is a sound category. |
| |
| Args: |
| question_template: Question text template |
| correct_category: Correct category |
| present_categories: Categories present in the audio |
| all_categories: All available categories |
| |
| Returns: |
| Dictionary with question, options, and correct answer |
| """ |
| |
| distractors = self._generate_category_distractors( |
| correct_category, |
| present_categories, |
| all_categories, |
| self.num_options - 1 |
| ) |
| |
| |
| options = [correct_category] + distractors |
| random.shuffle(options) |
| |
| |
| correct_label = self.option_labels[options.index(correct_category)] |
| |
| |
| option_map = {label: value for label, value in zip(self.option_labels, options)} |
| |
| return { |
| "question": question_template, |
| "options": option_map, |
| "correct_answer": correct_label, |
| "correct_value": correct_category |
| } |
| |
| def generate_category_open_text( |
| self, |
| question_template: str, |
| correct_category: str |
| ) -> Dict: |
| """ |
| Generate an open-text question where answer is a sound category. |
| |
| Args: |
| question_template: Question text template |
| correct_category: Correct category |
| |
| Returns: |
| Dictionary with question and correct answer |
| """ |
| return { |
| "question": question_template, |
| "correct_answer": correct_category |
| } |
| |
| def generate_sequence_open_text( |
| self, |
| question_template: str, |
| sequence: List[str] |
| ) -> Dict: |
| """ |
| Generate an open-text question for sequence/ordering. |
| |
| Args: |
| question_template: Question text template |
| sequence: List of categories in order |
| |
| Returns: |
| Dictionary with question and correct answer |
| """ |
| return { |
| "question": question_template, |
| "correct_answer": ", ".join(sequence) |
| } |
| |
| def _generate_count_options(self, correct_count: int) -> List[int]: |
| """ |
| Generate count options including the correct count. |
| |
| Args: |
| correct_count: Correct count value |
| |
| Returns: |
| List of count options |
| """ |
| options = [correct_count] |
| |
| |
| possible_values = list(range(1, max(correct_count + 3, 12))) |
| possible_values = [v for v in possible_values if v != correct_count] |
| |
| distractors = random.sample(possible_values, min(self.num_options - 1, len(possible_values))) |
| options.extend(distractors) |
| |
| return options[:self.num_options] |
| |
| def _generate_category_distractors( |
| self, |
| correct_category: str, |
| present_categories: List[str], |
| all_categories: List[str], |
| num_distractors: int |
| ) -> List[str]: |
| """ |
| Generate distractor categories based on strategy. |
| |
| Args: |
| correct_category: Correct category |
| present_categories: Categories present in audio |
| all_categories: All available categories |
| num_distractors: Number of distractors to generate |
| |
| Returns: |
| List of distractor categories |
| """ |
| present_non_answer = [c for c in present_categories if c != correct_category] |
| absent_categories = [c for c in all_categories if c not in present_categories] |
| |
| distractors = [] |
| |
| if self.distractor_strategy == "present_only": |
| |
| if len(present_non_answer) >= num_distractors: |
| distractors = random.sample(present_non_answer, num_distractors) |
| else: |
| distractors = present_non_answer.copy() |
| |
| remaining = num_distractors - len(distractors) |
| distractors.extend(random.sample(absent_categories, min(remaining, len(absent_categories)))) |
| |
| elif self.distractor_strategy == "mixed": |
| |
| num_present = random.randint(0, min(len(present_non_answer), num_distractors)) |
| num_absent = num_distractors - num_present |
| |
| if num_present > 0: |
| distractors.extend(random.sample(present_non_answer, min(num_present, len(present_non_answer)))) |
| if num_absent > 0: |
| distractors.extend(random.sample(absent_categories, min(num_absent, len(absent_categories)))) |
| |
| else: |
| |
| num_present_distractor = random.choice([0, 1, 2]) |
| num_present_distractor = min(num_present_distractor, len(present_non_answer), num_distractors) |
| num_absent_distractor = num_distractors - num_present_distractor |
| |
| if num_present_distractor > 0: |
| distractors.extend(random.sample(present_non_answer, num_present_distractor)) |
| if num_absent_distractor > 0: |
| distractors.extend(random.sample(absent_categories, min(num_absent_distractor, len(absent_categories)))) |
| |
| |
| while len(distractors) < num_distractors: |
| remaining_options = [c for c in all_categories if c not in distractors and c != correct_category] |
| if not remaining_options: |
| break |
| distractors.append(random.choice(remaining_options)) |
| |
| return distractors[:num_distractors] |
|
|