Spaces:

SpringWang08
/

Medical-VQA

Paused

File size: 15,775 Bytes

import os
from dataclasses import dataclass

import torch

from src.utils.text_utils import postprocess_answer


def _as_bool(value: object, default: bool = False) -> bool:
    if value is None:
        return default
    if isinstance(value, bool):
        return value
    return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}


@dataclass
class RewriteConfig:
    enabled: bool = False
    model_id: str = ""
    use_4bit: bool = True
    max_new_tokens: int = 28
    max_words: int = 10


_REWRITE_STYLE_BY_MODEL = {
    "A1": {
        "vi": "Diễn đạt đơn giản, trực tiếp, gần với đáp án gốc.",
        "en": "Use simple, direct wording close to the raw answer.",
    },
    "A2": {
        "vi": "Diễn đạt như một quan sát ngắn trên hình ảnh.",
        "en": "Word it as a short imaging observation.",
    },
    "B1": {
        "vi": "Diễn đạt tự nhiên, mềm hơn, dễ đọc.",
        "en": "Use natural, softer, easy-to-read wording.",
    },
    "B2": {
        "vi": "Diễn đạt hay hơn A1/A2, theo phong cách lâm sàng súc tích.",
        "en": "Use stronger concise clinical wording than A1/A2.",
    },
    "DPO": {
        "vi": "Diễn đạt hay nhất theo hướng thận trọng, chuyên nghiệp.",
        "en": "Use the most careful, professional wording.",
    },
    "PPO": {
        "vi": "Diễn đạt hay nhất theo hướng rõ ràng, mạch lạc.",
        "en": "Use the clearest, most polished wording.",
    },
    "SOUP": {
        "vi": "Diễn đạt cân bằng giữa lâm sàng, thận trọng và rõ ràng.",
        "en": "Use balanced clinical, careful, and clear wording.",
    },
}


_MODEL_SPECIFIC_EXAMPLES = {
    "A1": {
        "vi": {
            "question": "Ảnh có khối u không?",
            "answer": "có",
            "rewrite": "Có, có khối u.",
        },
        "en": {
            "question": "Is there a mass?",
            "answer": "yes",
            "rewrite": "Yes, there is a mass.",
        },
    },
    "A2": {
        "vi": {
            "question": "Ảnh có khối u không?",
            "answer": "có",
            "rewrite": "Có, thấy khối u trên ảnh.",
        },
        "en": {
            "question": "Is there a mass?",
            "answer": "yes",
            "rewrite": "Yes, a mass is seen.",
        },
    },
    "B2": {
        "vi": {
            "question": "Ảnh có khối u không?",
            "answer": "có",
            "rewrite": "Có, hình ảnh gợi ý khối u.",
        },
        "en": {
            "question": "Is there a mass?",
            "answer": "yes",
            "rewrite": "Yes, imaging suggests a mass.",
        },
    },
    "DPO": {
        "vi": {
            "question": "Ảnh có khối u không?",
            "answer": "có",
            "rewrite": "Có, có dấu hiệu gợi ý khối u.",
        },
        "en": {
            "question": "Is there a mass?",
            "answer": "yes",
            "rewrite": "Yes, findings suggest a mass.",
        },
    },
    "PPO": {
        "vi": {
            "question": "Ảnh có khối u không?",
            "answer": "có",
            "rewrite": "Có, kết quả gợi ý khối u rõ.",
        },
        "en": {
            "question": "Is there a mass?",
            "answer": "yes",
            "rewrite": "Yes, results clearly suggest a mass.",
        },
    },
    "SOUP": {
        "vi": {
            "question": "Ảnh có khối u không?",
            "answer": "có",
            "rewrite": "Có, hình ảnh gợi ý khối u rõ.",
        },
        "en": {
            "question": "Is there a mass?",
            "answer": "yes",
            "rewrite": "Yes, imaging clearly suggests a mass.",
        },
    },
}


class MedicalAnswerRewriter:
    """
    Rewrite lớp cuối cho VQA output.

    Mục tiêu:
    - Giữ nguyên ý nghĩa gốc.
    - Làm câu trả lời tự nhiên và đầy đủ hơn một chút.
    - Vẫn giới hạn tối đa số từ theo cấu hình.

    Mô hình này không thay thế VQA model chính.
    """

    def __init__(self, config: RewriteConfig | None = None) -> None:
        self.config = config or self._load_config()
        self._load_attempted = False
        self._ready = False
        self._tokenizer = None
        self._model = None
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    @staticmethod
    def _load_config() -> RewriteConfig:
        model_id = (
            os.getenv("ANSWER_REWRITE_MODEL_ID", "").strip()
            or os.getenv("QWEN_REWRITE_MODEL_ID", "").strip()
            or "Qwen/Qwen2.5-14B-Instruct"
        )
        enabled = _as_bool(os.getenv("ANSWER_REWRITE_ENABLED"), default=True)
        use_4bit = _as_bool(os.getenv("ANSWER_REWRITE_USE_4BIT"), default=True)
        max_new_tokens = int(os.getenv("ANSWER_REWRITE_MAX_NEW_TOKENS", "28"))
        max_words = int(os.getenv("ANSWER_REWRITE_MAX_WORDS", "10"))
        return RewriteConfig(
            enabled=enabled,
            model_id=model_id,
            use_4bit=use_4bit,
            max_new_tokens=max_new_tokens,
            max_words=max_words,
        )

    @property
    def enabled(self) -> bool:
        return bool(self.config.enabled and self.config.model_id)

    @property
    def model_id(self) -> str:
        return self.config.model_id

    @property
    def ready(self) -> bool:
        return self._ready

    def _lazy_load(self) -> None:
        if self._load_attempted:
            return
        self._load_attempted = True

        if not self.enabled:
            return

        try:
            from transformers import AutoModelForCausalLM, AutoTokenizer
            hf_token = (
                os.getenv("ANSWER_REWRITE_HF_TOKEN", "").strip()
                or os.getenv("HF_TOKEN", "").strip()
                or os.getenv("HUGGINGFACE_HUB_TOKEN", "").strip()
                or None
            )

            tokenizer = AutoTokenizer.from_pretrained(self.config.model_id, trust_remote_code=True, token=hf_token)
            model_kwargs = {
                "trust_remote_code": True,
                "low_cpu_mem_usage": True,
            }

            if self._device.type == "cuda":
                if self.config.use_4bit:
                    try:
                        from transformers import BitsAndBytesConfig

                        model_kwargs["quantization_config"] = BitsAndBytesConfig(
                            load_in_4bit=True,
                            bnb_4bit_use_double_quant=True,
                            bnb_4bit_quant_type="nf4",
                            bnb_4bit_compute_dtype=torch.bfloat16,
                        )
                    except Exception as exc:
                        print(f"[WARNING] Rewrite 4-bit config unavailable, falling back to bf16: {exc}")
                        model_kwargs["torch_dtype"] = torch.bfloat16
                else:
                    model_kwargs["torch_dtype"] = torch.bfloat16
                model_kwargs["device_map"] = "auto"
            else:
                model_kwargs["torch_dtype"] = torch.float32

            if hf_token is not None:
                model_kwargs["token"] = hf_token

            model = AutoModelForCausalLM.from_pretrained(self.config.model_id, **model_kwargs)
            model.eval()

            self._tokenizer = tokenizer
            self._model = model
            self._ready = True
            print(f"[INFO] ✅ Answer rewriter ready: {self.config.model_id}")
        except Exception as exc:
            self._ready = False
            print(f"[WARNING] ❌ Answer rewriter load failed: {exc}")

    def _get_style_instruction(self, source_model: str | None, language: str) -> str:
        if not source_model:
            return ""
        style = _REWRITE_STYLE_BY_MODEL.get(source_model.upper())
        if not style:
            return ""
        lang_key = "en" if language.lower().startswith("en") else "vi"
        return style[lang_key]

    def _get_model_specific_example(self, source_model: str | None, language: str) -> dict[str, str] | None:
        if not source_model:
            return None
        examples = _MODEL_SPECIFIC_EXAMPLES.get(source_model.upper())
        if not examples:
            return None
        lang_key = "en" if language.lower().startswith("en") else "vi"
        return examples[lang_key]

    def _build_messages(
        self,
        question: str,
        answer: str,
        language: str = "vi",
        source_model: str | None = None,
    ) -> list[dict[str, str]]:
        style_instruction = self._get_style_instruction(source_model, language)
        model_example = self._get_model_specific_example(source_model, language)
        system_prompt = (
            "Bạn là bộ biên tập câu trả lời cho hệ thống Medical VQA. "
            "Nhiệm vụ của bạn là mở rộng đáp án gốc thành một câu trả lời đầy đủ, "
            "tự nhiên và rõ nghĩa hơn, nhưng vẫn phải bám sát đáp án gốc. "
            "KHÔNG thêm thông tin y khoa mới, KHÔNG suy diễn ngoài đáp án gốc. "
            "Có thể dùng câu hỏi để xác định đối tượng y khoa đang được hỏi, "
            "nhưng đáp án gốc quyết định ý nghĩa đúng/sai/có/không. "
            "Nếu nhiều model có cùng đáp án gốc, vẫn dùng phong cách riêng của model hiện tại. "
            "CÂU TRẢ LỜI BẮT BUỘC PHẢI DƯỚI 10 TỪ, ÍT NHẤT 3 TỪ. "
            "Chỉ trả về câu trả lời cuối cùng."
        )
        if style_instruction:
            system_prompt += f" Phong cách riêng cho model này: {style_instruction}"

        if language.lower().startswith("en"):
            system_prompt = (
                "You are an editor for a Medical VQA system. "
                "Expand the raw answer into a fuller, natural, clearer answer "
                "while staying strictly based on the raw answer. "
                "Do not add new medical facts or infer beyond the raw answer. "
                "You may use the question to identify the medical target, "
                "but the raw answer controls yes/no/presence/absence. "
                "If several models share the same raw answer, still use this model's wording style. "
                "THE ANSWER MUST BE UNDER 10 WORDS and at least 3 words. "
                "Return only the final answer."
            )
            if style_instruction:
                system_prompt += f" Model-specific wording style: {style_instruction}"

        examples = [
            {
                "question": "Ảnh này có tràn dịch màng phổi không?",
                "answer": "không",
                "rewrite": "Không, không thấy tràn dịch màng phổi.",
            },
            {
                "question": "Hình ảnh có tim to không?",
                "answer": "có",
                "rewrite": "Có, hình ảnh cho thấy tim to.",
            },
            {
                "question": "Đây là loại ảnh gì?",
                "answer": "x quang ngực",
                "rewrite": "Đây là ảnh X-quang ngực.",
            },
        ]

        if language.lower().startswith("en"):
            examples = [
                {
                    "question": "Is there pleural effusion?",
                    "answer": "no",
                    "rewrite": "No, pleural effusion is not seen.",
                },
                {
                    "question": "Is the heart enlarged?",
                    "answer": "yes",
                    "rewrite": "Yes, the heart appears enlarged.",
                },
                {
                    "question": "What modality is this?",
                    "answer": "chest x ray",
                    "rewrite": "This is a chest X-ray.",
                },
            ]

        if model_example:
            examples.append(model_example)

        messages: list[dict[str, str]] = [{"role": "system", "content": system_prompt}]
        for ex in examples:
            messages.append(
                {
                    "role": "user",
                    "content": f"Câu hỏi: {ex['question']}\nĐáp án gốc: {ex['answer']}",
                }
            )
            messages.append({"role": "assistant", "content": ex["rewrite"]})

        user_prompt = (
            f"Câu hỏi: {question}\n"
            f"Đáp án gốc: {answer}\n"
            f"Model nguồn: {source_model or 'unknown'}\n"
            "Viết lại thành câu đầy đủ hơn, tự nhiên hơn, dưới 10 từ. "
            "CHỈ DÙNG THÔNG TIN TỪ ĐÁP ÁN GỐC."
        )
        if style_instruction:
            user_prompt += f"\nPhong cách diễn đạt: {style_instruction}"

        if language.lower().startswith("en"):
            user_prompt = (
                f"Question: {question}\nRaw answer: {answer}\n"
                f"Source model: {source_model or 'unknown'}\n"
                "Rewrite it as a fuller, natural answer under 10 words. "
                "Use only information from the raw answer."
            )
            if style_instruction:
                user_prompt += f"\nWording style: {style_instruction}"
        messages.append({"role": "user", "content": user_prompt})
        return messages

    def rewrite(
        self,
        question: str,
        answer: str,
        language: str = "vi",
        source_model: str | None = None,
    ) -> str:
        """
        Rewrite câu trả lời để tự nhiên hơn.
        Nếu rewrite model không sẵn sàng, trả về output đã postprocess.
        """
        if not answer:
            return ""

        self._lazy_load()
        fallback = postprocess_answer(answer, max_words=self.config.max_words)
        if not self.enabled or not self._ready:
            return fallback

        try:
            messages = self._build_messages(
                question=question,
                answer=answer,
                language=language,
                source_model=source_model,
            )
            prompt = self._tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True,
            )
            inputs = self._tokenizer(prompt, return_tensors="pt", truncation=True)
            inputs = {k: v.to(self._device) for k, v in inputs.items()}

            with torch.inference_mode():
                output_ids = self._model.generate(
                    **inputs,
                    max_new_tokens=self.config.max_new_tokens,
                    do_sample=False,
                    temperature=0.1,
                    repetition_penalty=1.05,
                    pad_token_id=self._tokenizer.eos_token_id,
                )

            prompt_len = inputs["input_ids"].shape[1]
            generated = self._tokenizer.decode(output_ids[0][prompt_len:], skip_special_tokens=True).strip()
            cleaned = postprocess_answer(generated, max_words=self.config.max_words)
            return cleaned or fallback
        except Exception as exc:
            print(f"[WARNING] Rewrite failed: {exc}")
            return fallback


def rewrite_final_answer(
    question: str,
    answer: str,
    language: str = "vi",
    source_model: str | None = None,
) -> str:
    """
    Helper tiện dùng trong notebook / web.
    """
    rewriter = MedicalAnswerRewriter()
    return rewriter.rewrite(
        question=question,
        answer=answer,
        language=language,
        source_model=source_model,
    )