import torch import nltk import numpy as np import os import kagglehub from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification from bert_score import score as bert_score_calculator try: nltk.data.find('tokenizers/punkt') except nltk.downloader.DownloadError: nltk.download('punkt') class LLM_Generator: def __init__(self, model_handle, device='cuda'): self.device = device print(f"Downloading model from Kaggle Hub: {model_handle}") model_path = kagglehub.model_download(model_handle) print(f"Model downloaded to: {model_path}") self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype="auto", device_map="auto" ) def generate(self, prompt, num_samples=1, temperature=0.7, max_new_tokens=150): messages = [ {"role": "system", "content": "you are a helpful assistant."}, {"role": "user", "content": prompt} ] text = self.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, enable_thinking=True ) model_inputs = self.tokenizer([text] * num_samples, return_tensors="pt").to(self.device) generated_ids_batch = self.model.generate( **model_inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, num_return_sequences=num_samples ) input_ids_len = model_inputs.input_ids.shape[1] final_responses = [] for generated_ids in generated_ids_batch: output_ids = generated_ids[input_ids_len:].tolist() try: # Find the start of the final content after the "thinking" part # The token ID 151668 corresponds to the end of the thinking block for Qwen-3 index = len(output_ids) - output_ids[::-1].index(151668) except ValueError: index = 0 content = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") final_responses.append(content) return final_responses class SelfCheckGPT: def __init__(self, device=None): if device: self.device = device else: self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.nli_tokenizer = None self.nli_model = None def _load_nli_model(self): if self.nli_model is None: nli_model_name = "microsoft/deberta-v3-large-mnli" try: self.nli_tokenizer = AutoTokenizer.from_pretrained(nli_model_name) self.nli_model = AutoModelForSequenceClassification.from_pretrained(nli_model_name).to(self.device) except Exception as e: print(f"Error loading NLI model: {e}") raise def _check_bertscore(self, sentences, sample_responses): all_scores = [] for sent in sentences: refs = [sent] * len(sample_responses) cands = sample_responses _, _, F1 = bert_score_calculator( cands, refs, lang="en", verbose=False, idf=False, device=self.device ) avg_bert_score = F1.mean().item() score = 1.0 - avg_bert_score all_scores.append(score) return all_scores def _check_nli(self, sentences, sample_responses): self._load_nli_model() all_scores = [] for sent in sentences: contradiction_probs = [] for sample in sample_responses: tokenized_input = self.nli_tokenizer( sample, sent, return_tensors="pt", truncation=True, max_length=512 ).to(self.device) with torch.no_grad(): logits = self.nli_model(**tokenized_input).logits entailment_logit = logits[0, self.nli_model.config.label2id['entailment']] contradiction_logit = logits[0, self.nli_model.config.label2id['contradiction']] prob_contradiction = torch.exp(contradiction_logit) / (torch.exp(entailment_logit) + torch.exp(contradiction_logit)) contradiction_probs.append(prob_contradiction.item()) avg_contradiction_prob = np.mean(contradiction_probs) all_scores.append(avg_contradiction_prob) return all_scores def check(self, main_response, sample_responses, method='nli'): sentences = nltk.sent_tokenize(main_response) if not sentences: return [] if method.lower() == 'bertscore': scores = self._check_bertscore(sentences, sample_responses) elif method.lower() == 'nli': scores = self._check_nli(sentences, sample_responses) else: raise ValueError(f"Invalid method '{method}'. Choose from 'bertscore', 'nli'.") results = [{"sentence": sent, "score": score} for sent, score in zip(sentences, scores)] return results def main(): model_handle = "qwen-lm/qwen-3/transformers/0.6b" print("Initializing LLM Generator...") generator = LLM_Generator(model_handle=model_handle) prompt = "Write a short biography of Neil Armstrong, the first man on the moon. Include the name of the spacecraft he used." print(f"Generating responses for prompt: '{prompt}'") responses = generator.generate(prompt, num_samples=6, temperature=0.8, max_new_tokens=150) main_response = responses[0] sample_responses = responses[1:] print("\n--- Generated Main Response ---") print(main_response) print("\n--- Generated Sample Responses ---") for i, r in enumerate(sample_responses): print(f"{i+1}. {r[:100]}...") checker = SelfCheckGPT() print("\n\n--- Running SelfCheckGPT with 'nli' method ---") nli_results = checker.check(main_response, sample_responses, method='nli') print("Higher scores suggest a higher probability of being a hallucination.") for result in nli_results: print(f"Score: {result['score']:.4f}\tSentence: {result['sentence']}") print("\n--- Running SelfCheckGPT with 'bertscore' method ---") bertscore_results = checker.check(main_response, sample_responses, method='bertscore') print("Higher scores suggest a higher probability of being a hallucination.") for result in bertscore_results: print(f"Score: {result['score']:.4f}\tSentence: {result['sentence']}")