| import pandas as pd |
| import streamlit as st |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| import torch |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-imdb") |
| model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-imdb") |
|
|
| def analyze_text(text): |
| |
| text = text.lower() |
|
|
| |
| encoded_text = tokenizer(text, truncation=True, padding=True, return_tensors='pt') |
|
|
| |
| with torch.no_grad(): |
| output = model(**encoded_text) |
| predictions = output.logits.argmax(-1).item() |
|
|
| if predictions == 1: |
| return "Job Related" |
| else: |
| return "Not Job Related" |
|
|
| def count_job_related_messages(data): |
| job_related_count = 0 |
| not_job_related_count = 0 |
|
|
| for message in data["message"]: |
| result = analyze_text(message) |
| if result == "Job Related": |
| job_related_count += 1 |
| else: |
| not_job_related_count += 1 |
|
|
| return job_related_count, not_job_related_count |
|
|
| |
| st.title("Job Related Message Analyzer") |
|
|
| uploaded_file = st.file_uploader("Upload CSV file") |
| user_input = st.text_input("Enter text") |
|
|
| if uploaded_file: |
| |
| data = pd.read_csv(uploaded_file) |
|
|
| |
| results = [] |
| for message in data["message"]: |
| result = analyze_text(message) |
| results.append(result) |
|
|
| data["Job Related"] = results |
|
|
| |
| job_related_count, not_job_related_count = count_job_related_messages(data) |
|
|
| st.dataframe(data) |
| st.write(f"Job Related Messages: {job_related_count}") |
| st.write(f"Not Job Related Messages: {not_job_related_count}") |
| elif user_input: |
| |
| result = analyze_text(user_input) |
| st.write(f"Message Classification: {result}") |
| else: |
| st.write("Please upload a CSV file or enter text to analyze.") |
|
|