| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
| import torch |
| import re |
|
|
| class SentimentAnalyzer: |
| def __init__(self, model_name="google/gemma-2-2b-it"): |
| """ |
| Initialize sentiment analyzer with Gemma model |
| |
| Args: |
| model_name: Hugging Face model name (ใช้ gemma-2-2b-it แทน 3-4b ที่ยังไม่มี) |
| """ |
| print(f"Loading model: {model_name}") |
| |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"Using device: {self.device}") |
| |
| try: |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
| self.model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, |
| device_map="auto" if self.device == "cuda" else None, |
| low_cpu_mem_usage=True |
| ) |
| |
| if self.device == "cpu": |
| self.model = self.model.to(self.device) |
| |
| print("Model loaded successfully!") |
| |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| |
| self.model = None |
| self.sentiment_pipeline = pipeline( |
| "sentiment-analysis", |
| model="distilbert-base-uncased-finetuned-sst-2-english" |
| ) |
| |
| def analyze_sentiment(self, text): |
| """ |
| วิเคราะห์ sentiment ของข้อความ |
| |
| Args: |
| text: ข้อความที่ต้องการวิเคราะห์ |
| |
| Returns: |
| dict: {sentiment, score, explanation} |
| """ |
| if not text or len(text.strip()) == 0: |
| return { |
| "sentiment": "Neutral", |
| "score": 0.5, |
| "explanation": "No text to analyze" |
| } |
| |
| |
| if self.model is None: |
| return self._fallback_sentiment(text) |
| |
| try: |
| |
| prompt = f"""Analyze the sentiment of this financial news. Rate it as Positive, Negative, or Neutral with a confidence score (0-1). |
| News: {text[:500]} |
| Provide your analysis in this exact format: |
| Sentiment: [Positive/Negative/Neutral] |
| Score: [0.0-1.0] |
| Reason: [Brief explanation]""" |
|
|
| |
| inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) |
| inputs = inputs.to(self.device) |
| |
| with torch.no_grad(): |
| outputs = self.model.generate( |
| **inputs, |
| max_new_tokens=150, |
| temperature=0.3, |
| do_sample=True, |
| pad_token_id=self.tokenizer.eos_token_id |
| ) |
| |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| |
| return self._parse_llm_response(response) |
| |
| except Exception as e: |
| print(f"Error in analysis: {e}") |
| return self._fallback_sentiment(text) |
| |
| def _parse_llm_response(self, response): |
| """แยก sentiment, score และ explanation จาก LLM response""" |
| sentiment = "Neutral" |
| score = 0.5 |
| explanation = "Unable to analyze" |
| |
| try: |
| |
| if "Sentiment:" in response: |
| sentiment_line = re.search(r'Sentiment:\s*(\w+)', response, re.IGNORECASE) |
| if sentiment_line: |
| sentiment = sentiment_line.group(1).capitalize() |
| |
| |
| if "Score:" in response: |
| score_line = re.search(r'Score:\s*([\d.]+)', response) |
| if score_line: |
| score = float(score_line.group(1)) |
| score = max(0.0, min(1.0, score)) |
| |
| |
| if "Reason:" in response: |
| reason_match = re.search(r'Reason:\s*(.+?)(?:\n|$)', response, re.IGNORECASE) |
| if reason_match: |
| explanation = reason_match.group(1).strip() |
| |
| |
| if sentiment not in ["Positive", "Negative", "Neutral"]: |
| if "positive" in response.lower(): |
| sentiment = "Positive" |
| elif "negative" in response.lower(): |
| sentiment = "Negative" |
| else: |
| sentiment = "Neutral" |
| |
| except Exception as e: |
| print(f"Parse error: {e}") |
| |
| return { |
| "sentiment": sentiment, |
| "score": score, |
| "explanation": explanation |
| } |
| |
| def _fallback_sentiment(self, text): |
| """Fallback method ใช้ DistilBERT""" |
| try: |
| result = self.sentiment_pipeline(text[:512])[0] |
| |
| |
| sentiment = "Positive" if result['label'] == 'POSITIVE' else "Negative" |
| score = result['score'] |
| |
| return { |
| "sentiment": sentiment, |
| "score": score, |
| "explanation": f"Analyzed using fallback model with {score:.2%} confidence" |
| } |
| except: |
| return { |
| "sentiment": "Neutral", |
| "score": 0.5, |
| "explanation": "Analysis unavailable" |
| } |
| |
| def analyze_batch(self, news_list): |
| """ |
| วิเคราะห์ sentiment หลายข่าวพร้อมกัน |
| |
| Args: |
| news_list: list ของ dict ที่มี title และ summary |
| |
| Returns: |
| list: รายการผลการวิเคราะห์ |
| """ |
| results = [] |
| |
| for news in news_list: |
| |
| combined_text = f"{news.get('title', '')} {news.get('summary', '')}" |
| |
| sentiment_result = self.analyze_sentiment(combined_text) |
| |
| results.append({ |
| **news, |
| **sentiment_result |
| }) |
| |
| return results |