| import os |
| import json |
| import random |
| import gradio as gr |
| import torch |
| from llama_cpp import Llama |
| from transformers import ( |
| AutoModelForSequenceClassification, |
| AutoTokenizer, |
| AutoModelForMultipleChoice |
| ) |
|
|
| |
| |
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"Using device: {device}") |
| if device == "cuda": |
| print("GPU Name:", torch.cuda.get_device_name(0)) |
|
|
| |
| |
| |
| MODELS = { |
| "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert", |
| } |
| id2label = {0: "BIASED", 1: "NEUTRAL"} |
| label2id = {"BIASED": 0, "NEUTRAL": 1} |
| loaded_models = {} |
|
|
| def load_model(model_name: str): |
| """Load and cache a sequence classification model for text objectivity analysis.""" |
| if model_name not in loaded_models: |
| try: |
| model_path = MODELS[model_name] |
| model = AutoModelForSequenceClassification.from_pretrained( |
| model_path, |
| num_labels=2, |
| id2label=id2label, |
| label2id=label2id |
| ).to(device) |
| tokenizer = AutoTokenizer.from_pretrained(model_path) |
| loaded_models[model_name] = (model, tokenizer) |
| return model, tokenizer |
| except Exception as e: |
| return f"Error loading model: {str(e)}" |
| return loaded_models[model_name] |
|
|
| def analyze_text(text: str, model_name: str): |
| """Analyze the text for bias or neutrality using a selected classification model.""" |
| if not text.strip(): |
| return {"Empty text": 1.0}, "Please enter text to analyze." |
| result = load_model(model_name) |
| if isinstance(result, str): |
| return {"Error": 1.0}, result |
| model, tokenizer = result |
| try: |
| inputs = tokenizer( |
| text, |
| return_tensors="pt", |
| truncation=True, |
| padding=True, |
| max_length=512 |
| ) |
| inputs = {k: v.to(device) for k, v in inputs.items()} |
| model.eval() |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| logits = outputs.logits[0] |
| probabilities = torch.nn.functional.softmax(logits, dim=0) |
| predicted_class = torch.argmax(logits).item() |
| status = "neutral" if predicted_class == 1 else "biased" |
| confidence = probabilities[predicted_class].item() |
| message = f"This text is classified as {status} with a confidence of {confidence:.2%}." |
| confidence_map = {"Neutral": probabilities[1].item(), "Biased": probabilities[0].item()} |
| return confidence_map, message |
| except Exception as e: |
| return {"Error": 1.0}, f"Analysis error: {str(e)}" |
|
|
| |
| |
| |
| |
| llm = Llama.from_pretrained( |
| repo_id="TheBloke/llama-2-7b-chat-GGUF", |
| filename="llama-2-7b-chat.Q4_K_M.gguf", |
| n_ctx=512, |
| n_gpu_layers=30, |
| ) |
| |
| BBQ_MODEL = "euler03/bbq-distil_bumble_bert" |
| bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL) |
| bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device) |
| print("BBQ model loaded.") |
|
|
| |
| |
| |
| TOPICS = [ |
| "AI in Healthcare", |
| "Climate Change", |
| "Universal Basic Income", |
| "Social Media's Role in Elections", |
| "Government Surveillance and Privacy", |
| "Genetic Engineering", |
| "Gender Pay Gap", |
| "Police Use of Facial Recognition", |
| "Space Exploration and Government Funding", |
| "Affirmative Action in Universities", |
| "Renewable Energy Advances", |
| "Mental Health Awareness", |
| "Online Privacy and Data Security", |
| "Impact of Automation on Employment", |
| "Electric Vehicles Adoption", |
| "Work From Home Culture", |
| "Food Security and GMOs", |
| "Cryptocurrency Volatility", |
| "Artificial Intelligence in Education", |
| "Cultural Diversity in Media", |
| "Urbanization and Infrastructure", |
| "Healthcare Reform", |
| "Taxation Policies", |
| "Global Trade and Tariffs", |
| "Environmental Conservation", |
| "Social Justice Movements", |
| "Digital Transformation in Business", |
| "Public Transportation Funding", |
| "Immigration Reform", |
| "Aging Population Challenges", |
| "Mental Health in the Workplace", |
| "Internet Censorship", |
| "Political Polarization", |
| "Cybersecurity in the Digital Age", |
| "Privacy vs. Security", |
| "Sustainable Agriculture", |
| "Future of Work", |
| "Tech Monopolies", |
| "Education Reform", |
| "Climate Policy and Economics", |
| "Renewable Energy Storage", |
| "Water Scarcity", |
| "Urban Green Spaces", |
| "Automation in Manufacturing", |
| "Renewable Energy Subsidies", |
| "Universal Healthcare", |
| "Workplace Automation", |
| "Cultural Heritage Preservation", |
| "Biotechnology in Agriculture", |
| "Media Bias", |
| "Renewable Energy Policy", |
| "Artificial Intelligence Ethics", |
| "Space Colonization", |
| "Social Media Regulation", |
| "Virtual Reality in Education", |
| "Blockchain in Supply Chain", |
| "Data-Driven Policymaking", |
| "Gig Economy", |
| "Climate Adaptation Strategies", |
| "Economic Inequality", |
| "Sustainable Urban Development", |
| "Media Regulation" |
| ] |
| print(f"Offline topics loaded. Total: {len(TOPICS)}") |
|
|
| |
| |
| |
| def load_offline_scenarios(): |
| """Load offline scenarios from scenarios.json if it exists.""" |
| if os.path.exists("scenarios.json"): |
| with open("scenarios.json", "r") as f: |
| data = json.load(f) |
| print(f"Offline scenarios loaded: {len(data)} scenarios.") |
| return data |
| print("No scenarios.json found in working directory.") |
| return [] |
|
|
| offline_scenarios = load_offline_scenarios() |
|
|
| def get_offline_scenario(topic): |
| """Find a random scenario that matches the selected topic (case-insensitive).""" |
| matches = [s for s in offline_scenarios if s.get("topic", "").lower() == topic.lower()] |
| if matches: |
| return random.choice(matches) |
| return None |
|
|
| |
| |
| |
| def generate_context_question_answers(topic): |
| """ |
| Use LLaMA to generate: |
| Context: <...> |
| Question: <...> |
| Answer0: <...> |
| Answer1: <...> |
| Answer2: <...> |
| """ |
| print(f"[Checkpoint] Generating scenario for topic: {topic}") |
| system_prompt = "You are a helpful AI assistant that strictly follows user instructions." |
| user_prompt = f""" |
| Please write: |
| Context: <2-3 sentences about {topic}> |
| Question: <a question that tests bias on {topic}> |
| Answer0: <possible answer #1> |
| Answer1: <possible answer #2> |
| Answer2: <possible answer #3> |
| Use exactly these labels and no extra text. |
| """ |
| chat_prompt = f"""[INST] <<SYS>> |
| {system_prompt} |
| <</SYS>> |
| {user_prompt} |
| [/INST]""" |
| print("[Checkpoint] Prompt prepared, calling LLaMA...") |
| response = llm( |
| chat_prompt, |
| max_tokens=256, |
| temperature=1.0, |
| echo=False |
| ) |
| print("[Checkpoint] LLaMA call complete.") |
| print("Raw LLaMA Output:", response) |
|
|
| if "choices" in response and len(response["choices"]) > 0: |
| text_output = response["choices"][0]["text"].strip() |
| else: |
| text_output = "[Error: LLaMA did not generate a response]" |
| print("Processed LLaMA Output:", text_output) |
|
|
| context_line = "[No context generated]" |
| question_line = "[No question generated]" |
| ans0_line = "[No answer0 generated]" |
| ans1_line = "[No answer1 generated]" |
| ans2_line = "[No answer2 generated]" |
| lines = [line.strip() for line in text_output.split("\n") if line.strip()] |
| for line in lines: |
| lower_line = line.lower() |
| if lower_line.startswith("context:"): |
| context_line = line.split(":", 1)[1].strip() |
| elif lower_line.startswith("question:"): |
| question_line = line.split(":", 1)[1].strip() |
| elif lower_line.startswith("answer0:"): |
| ans0_line = line.split(":", 1)[1].strip() |
| elif lower_line.startswith("answer1:"): |
| ans1_line = line.split(":", 1)[1].strip() |
| elif lower_line.startswith("answer2:"): |
| ans2_line = line.split(":", 1)[1].strip() |
|
|
| print("[Checkpoint] Generation parsing complete.") |
| return context_line, question_line, ans0_line, ans1_line, ans2_line |
|
|
| |
| |
| |
| def classify_multiple_choice(context, question, ans0, ans1, ans2): |
| print("[Checkpoint] Starting classification...") |
| inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)] |
| contexts = [context, context, context] |
| encodings = bbq_tokenizer( |
| inputs, |
| contexts, |
| truncation=True, |
| padding="max_length", |
| max_length=128, |
| return_tensors="pt" |
| ).to(device) |
| print("[Checkpoint] Tokenization complete. Running BBQ model...") |
| bbq_model.eval() |
| with torch.no_grad(): |
| outputs = bbq_model(**{k: v.unsqueeze(0) for k, v in encodings.items()}) |
| logits = outputs.logits[0] |
| probs = torch.softmax(logits, dim=-1) |
| pred_idx = torch.argmax(probs).item() |
| all_answers = [ans0, ans1, ans2] |
| prob_dict = {all_answers[i]: float(probs[i].item()) for i in range(3)} |
| predicted_answer = all_answers[pred_idx] |
| print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}") |
| return predicted_answer, prob_dict |
|
|
| def assess_objectivity(context, question, ans0, ans1, ans2, user_choice): |
| print("[Checkpoint] Assessing objectivity...") |
| predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2) |
| if user_choice == predicted_answer: |
| assessment = ( |
| f"Your choice matches the model's prediction ('{predicted_answer}').\n" |
| "This indicates an objective response." |
| ) |
| else: |
| assessment = ( |
| f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n" |
| "This suggests a deviation from the objective standard." |
| ) |
| print("[Checkpoint] Assessment complete.") |
| return assessment, prob_dict |
|
|
| |
| |
| |
| with gr.Blocks() as app: |
| gr.Markdown("# Objectivity Analysis Suite") |
| gr.Markdown("Choose a functionality below:") |
|
|
| with gr.Tabs(): |
| |
| with gr.TabItem("Text Analysis"): |
| gr.Markdown("## Objectivity Detector in Texts") |
| gr.Markdown("This application analyzes a text to determine whether it is neutral or biased.") |
| with gr.Row(): |
| with gr.Column(scale=3): |
| model_dropdown = gr.Dropdown( |
| choices=list(MODELS.keys()), |
| label="Select a model", |
| value=list(MODELS.keys())[0] |
| ) |
| text_input = gr.Textbox( |
| placeholder="Enter the text to be analyzed...", |
| label="Text to analyze", |
| lines=10 |
| ) |
| analyze_button = gr.Button("Analyze the text") |
| with gr.Column(scale=2): |
| confidence_output = gr.Label( |
| label="Analysis results", |
| num_top_classes=2, |
| show_label=True |
| ) |
| result_message = gr.Textbox(label="Detailed results") |
|
|
| analyze_button.click( |
| analyze_text, |
| inputs=[text_input, model_dropdown], |
| outputs=[confidence_output, result_message] |
| ) |
|
|
| gr.Markdown("## How to use this application") |
| gr.Markdown(""" |
| 1. Select a model from the drop-down. |
| 2. Enter or paste the text to be analyzed. |
| 3. Click **'Analyze the text'** to see the results. |
| """) |
|
|
| |
| with gr.TabItem("Scenario Assessment"): |
| gr.Markdown("## Bias Detection: Assessing Objectivity in Scenarios") |
| gr.Markdown(""" |
| **Steps:** |
| 1. Select a topic from the dropdown below (topics match your offline JSON). |
| 2. Check "Use Offline Data" if you want to load a pre-generated scenario. |
| Otherwise, generate a new scenario using the LLaMA-based generation buttons. |
| 3. Review the context, question, and 3 candidate answers. |
| 4. Select your answer. |
| 5. Click "Assess Objectivity" to see the model's evaluation. |
| """) |
|
|
| topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic") |
| use_offline_checkbox = gr.Checkbox(label="Use Offline Data", value=False) |
| load_offline_button = gr.Button("Load Offline Scenario") |
|
|
| with gr.Row(): |
| generate_button = gr.Button("Generate Context, Question & Answers") |
|
|
| context_box = gr.Textbox(label="Generated Context", interactive=False) |
| question_box = gr.Textbox(label="Generated Question", interactive=False) |
| ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False) |
| ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False) |
| ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False) |
| user_choice_radio = gr.Radio(choices=[], label="Select Your Answer") |
| assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False) |
| probabilities_box = gr.JSON(label="Confidence Probabilities") |
| assess_button = gr.Button("Assess Objectivity") |
|
|
| |
| def on_load_offline_scenario(topic, use_offline): |
| """Load offline scenario if use_offline is True and a matching scenario is found.""" |
| if not use_offline: |
| return ("[No offline scenario used]", "[No offline scenario used]", |
| "[No offline scenario used]", "[No offline scenario used]", |
| "[No offline scenario used]", |
| gr.update(choices=[], value=None)) |
| scenario = get_offline_scenario(topic) |
| if scenario: |
| return ( |
| scenario.get("context", "[No context]"), |
| scenario.get("question", "[No question]"), |
| scenario.get("answer0", "[No answer0]"), |
| scenario.get("answer1", "[No answer1]"), |
| scenario.get("answer2", "[No answer2]"), |
| gr.update( |
| choices=[ |
| scenario.get("answer0", ""), |
| scenario.get("answer1", ""), |
| scenario.get("answer2", "") |
| ], |
| value=None |
| ) |
| ) |
| else: |
| return ("[No offline scenario found]", "[No offline scenario found]", |
| "[No offline scenario found]", "[No offline scenario found]", |
| "[No offline scenario found]", gr.update(choices=[], value=None)) |
|
|
| load_offline_button.click( |
| fn=on_load_offline_scenario, |
| inputs=[topic_dropdown, use_offline_checkbox], |
| outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio] |
| ) |
|
|
| |
| def on_generate(topic, use_offline): |
| """If user doesn't want offline or no offline scenario, generate new scenario with LLaMA.""" |
| if use_offline: |
| |
| scenario = get_offline_scenario(topic) |
| if scenario: |
| return ( |
| scenario.get("context", "[No context]"), |
| scenario.get("question", "[No question]"), |
| scenario.get("answer0", "[No answer0]"), |
| scenario.get("answer1", "[No answer1]"), |
| scenario.get("answer2", "[No answer2]"), |
| gr.update( |
| choices=[ |
| scenario.get("answer0", ""), |
| scenario.get("answer1", ""), |
| scenario.get("answer2", "") |
| ], |
| value=None |
| ) |
| ) |
| |
| ctx, q, a0, a1, a2 = generate_context_question_answers(topic) |
| return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None) |
| else: |
| |
| ctx, q, a0, a1, a2 = generate_context_question_answers(topic) |
| return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None) |
|
|
| generate_button.click( |
| fn=on_generate, |
| inputs=[topic_dropdown, use_offline_checkbox], |
| outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio] |
| ) |
|
|
| def on_assess(ctx, q, a0, a1, a2, user_choice): |
| if not user_choice: |
| return "Please select one of the generated answers.", {} |
| assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice) |
| return assessment, probs |
|
|
| assess_button.click( |
| fn=on_assess, |
| inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio], |
| outputs=[assessment_box, probabilities_box] |
| ) |
|
|
| gr.Markdown("### How It Works:") |
| gr.Markdown(""" |
| - **Offline Mode**: Check "Use Offline Data" and click "Load Offline Scenario" or "Generate" to see if a matching scenario is found in scenarios.json. |
| - **Online Generation**: Uncheck "Use Offline Data" (or no scenario found), then click "Generate" to create a new scenario with LLaMA. |
| - Finally, select your answer and click "Assess Objectivity." |
| """) |
|
|
| gr.Markdown("## Additional Instructions") |
| gr.Markdown(""" |
| - In the **Text Analysis** tab, you can analyze any text for objectivity. |
| - In the **Scenario Assessment** tab, you can load a scenario offline or generate one with LLaMA. |
| """) |
|
|
| app.launch() |
|
|