| import streamlit as st |
| import requests |
| import openai |
| import os |
| from datasets import load_dataset |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.linear_model import SGDClassifier |
| from sklearn.metrics import classification_report, accuracy_score |
| import joblib |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| from pathlib import Path |
|
|
| |
| st.set_page_config(page_title="์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ์์ฑ", page_icon="๐ฐ", layout="wide") |
|
|
| |
| openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
| |
| @st.cache_data |
| def load_huggingface_data(): |
| dataset = load_dataset("jacobvs/PoliticalTweets") |
| return dataset |
|
|
| |
| def fetch_naver_news(query, display=15): |
| client_id = "I_8koTJh3R5l4wLurQbG" |
| client_secret = "W5oWYlAgur" |
|
|
| url = "https://openapi.naver.com/v1/search/news.json" |
| headers = { |
| "X-Naver-Client-Id": client_id, |
| "X-Naver-Client-Secret": client_secret, |
| } |
| params = { |
| "query": query, |
| "display": display, |
| "start": 1, |
| "sort": "date", |
| } |
|
|
| response = requests.get(url, headers=headers, params=params) |
| if response.status_code == 200: |
| return response.json()['items'] |
| else: |
| st.error("๋ด์ค ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ๋ฐ ์คํจํ์ต๋๋ค.") |
| return [] |
|
|
| |
| def combine_datasets(huggingface_data, naver_data): |
| additional_texts = [item['title'] + ". " + item['description'] for item in naver_data] |
| additional_labels = ["NEUTRAL"] * len(additional_texts) |
| hf_texts = huggingface_data['train']['text'] |
| hf_labels = huggingface_data['train']['party'] |
| return hf_texts + additional_texts, hf_labels + additional_labels |
|
|
| |
| def initialize_model(): |
| if os.path.exists("incremental_model.pkl") and os.path.exists("tfidf_vectorizer.pkl"): |
| model = joblib.load("incremental_model.pkl") |
| vectorizer = joblib.load("tfidf_vectorizer.pkl") |
| else: |
| |
| model = SGDClassifier(loss='log_loss', max_iter=5, tol=None) |
| vectorizer = TfidfVectorizer(max_features=1000, stop_words="english") |
| return model, vectorizer |
|
|
| |
| def incremental_training(texts, labels, model, vectorizer): |
| X = vectorizer.fit_transform(texts) |
| y = [0 if label == "Democrat" else 1 if label == "Republican" else 2 for label in labels] |
| model.partial_fit(X, y, classes=[0, 1, 2]) |
| |
| joblib.dump(model, "incremental_model.pkl") |
| joblib.dump(vectorizer, "tfidf_vectorizer.pkl") |
| return model, vectorizer |
|
|
| |
| def generate_article_gpt4(prompt): |
| try: |
| response = openai.ChatCompletion.create( |
| model="gpt-4", |
| messages=[ |
| {"role": "system", "content": "You are a helpful assistant that generates articles."}, |
| {"role": "user", "content": prompt} |
| ], |
| max_tokens=1024, |
| temperature=0.7 |
| ) |
| return response['choices'][0]['message']['content'] |
| except Exception as e: |
| return f"Error generating text: {e}" |
|
|
| |
| st.title("๐ฐ ์ ์น์ ์ฑํฅ ๋ถ์ ๋ฐ ๋ฐ๋ ๊ด์ ์์ฑ ๋๊ตฌ") |
| st.markdown("๋ค์ด๋ฒ ๋ด์ค์ ํ๊น
ํ์ด์ค ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ์ฌ ๋ด์ค ์ฑํฅ์ ๋ถ์ํ๊ณ , ๋ฐ๋ ๊ด์ ์ ์์ฑํฉ๋๋ค.") |
|
|
| |
| huggingface_data = load_huggingface_data() |
| query = st.text_input("๋ค์ด๋ฒ ๋ด์ค์์ ๊ฒ์ํ ํค์๋๋ฅผ ์
๋ ฅํ์ธ์", value="์ ์น") |
|
|
| |
| if st.button("๋ฐ์ดํฐ ๊ฒฐํฉ ๋ฐ ํ์ต"): |
| texts, labels = combine_datasets(huggingface_data, fetch_naver_news(query)) |
| model, vectorizer = initialize_model() |
| model, vectorizer = incremental_training(texts, labels, model, vectorizer) |
| |
| |
| X_test = vectorizer.transform(texts) |
| y_test = [0 if label == "Democrat" else 1 if label == "Republican" else 2 for label in labels] |
| y_pred = model.predict(X_test) |
| accuracy = accuracy_score(y_test, y_pred) |
| st.write(f"๋ชจ๋ธ ์ ํ๋: {accuracy:.2f}") |
| st.text("๋ถ๋ฅ ๋ฆฌํฌํธ:") |
| st.text(classification_report(y_test, y_pred, target_names=["Democrat", "Republican", "NEUTRAL"])) |
| st.success("๋ชจ๋ธ์ด ์๋ก์ด ๋ฐ์ดํฐ๋ก ์ถ๊ฐ ํ์ต๋์์ต๋๋ค.") |
|
|
| |
| if st.button("๋ด์ค ์ฑํฅ ๋ถ์"): |
| model, vectorizer = initialize_model() |
| news_items = fetch_naver_news(query, display=15) |
|
|
| if news_items: |
| st.subheader("๋ด์ค ์ฑํฅ ๋ถ์ ๊ฒฐ๊ณผ") |
| for item in news_items: |
| title = item["title"] |
| description = item["description"] |
| link = item["link"] |
| combined_text = f"{title}. {description}" |
|
|
| |
| vectorized_text = vectorizer.transform([combined_text]) |
| prediction = model.predict(vectorized_text)[0] |
| sentiment = ["Democrat", "Republican", "NEUTRAL"][prediction] |
|
|
| |
| opposite_perspective = "๋ณด์์ " if sentiment == "Democrat" else "์ง๋ณด์ " if sentiment == "Republican" else "์ค๋ฆฝ์ " |
| prompt = f"๋ค์ ๊ธฐ์ฌ์ ๋ฐ๋ ๊ด์ ์ผ๋ก ๊ธฐ์ฌ๋ฅผ ์์ฑํ์ธ์:\n\n{combined_text}\n\n๋ฐ๋ ๊ด์ : {opposite_perspective}" |
| opposite_article = generate_article_gpt4(prompt) |
|
|
| st.write(f"**์ ๋ชฉ:** {title}") |
| st.write(f"**๊ธฐ์ฌ ๋ด์ฉ:** {description}") |
| st.write(f"**์ฑํฅ:** {sentiment}") |
| st.write(f"**๋ฐ๋ ๊ด์ ๊ธฐ์ฌ:** {opposite_article}") |
| st.write(f"**๋งํฌ:** [๊ธฐ์ฌ ๋งํฌ]({link})") |
| st.markdown("---") |
|
|