Spaces:

argmin
/

llm_classifier

Sleeping

App Files Files Community

argmin commited on Dec 7, 2024

Commit

510a9b0

1 Parent(s): f23351c

add files

Browse files

Files changed (31) hide show

Makefile +8 -0
app/.DS_Store +0 -0
app/__init__.py +0 -0
app/config/__pycache__/model_params.cpython-310.pyc +0 -0
app/config/model_params.py +6 -0
app/main.py +201 -0
app/utils/__init__.py +0 -0
app/utils/__pycache__/__init__.cpython-310.pyc +0 -0
app/utils/__pycache__/api.cpython-310.pyc +0 -0
app/utils/__pycache__/classification.cpython-310.pyc +0 -0
app/utils/__pycache__/evaluation.cpython-310.pyc +0 -0
app/utils/__pycache__/prompt.cpython-310.pyc +0 -0
app/utils/__pycache__/tokens.cpython-310.pyc +0 -0
app/utils/__pycache__/validation.cpython-310.pyc +0 -0
app/utils/api.py +26 -0
app/utils/classification.py +26 -0
app/utils/evaluation.py +21 -0
app/utils/prompt.py +54 -0
app/utils/tokens.py +15 -0
app/utils/validation.py +18 -0
requirements.txt +8 -0
tests/__init__.py +0 -0
tests/__pycache__/__init__.cpython-310.pyc +0 -0
tests/__pycache__/test_api.cpython-310-pytest-8.3.4.pyc +0 -0
tests/__pycache__/test_evaluation.cpython-310-pytest-8.3.4.pyc +0 -0
tests/__pycache__/test_prompt.cpython-310-pytest-8.3.4.pyc +0 -0
tests/__pycache__/test_validation.cpython-310-pytest-8.3.4.pyc +0 -0
tests/test_api.py +18 -0
tests/test_evaluation.py +14 -0
tests/test_prompt.py +23 -0
tests/test_validation.py +17 -0

Makefile ADDED Viewed

	@@ -0,0 +1,8 @@

+setup:
+	pip install -r requirements.txt
+run:
+	streamlit run app.py
+test:
+	PYTHONPATH=./app pytest

app/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app/__init__.py ADDED Viewed

File without changes

app/config/__pycache__/model_params.cpython-310.pyc ADDED Viewed

Binary file (303 Bytes). View file

app/config/model_params.py ADDED Viewed

	@@ -0,0 +1,6 @@

+DEFAULT_PARAMS = {
+    "model": "gpt-4o-mini-2024-07-18",
+    "max_tokens": 60,
+    "temperature": 0.0,
+    "available_models": ["gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06"],  # Structured-output-compatible models
+}

app/main.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import streamlit as st
+import pandas as pd
+from utils.prompt import generate_prompts
+from utils.classification import apply_classification
+from utils.validation import generate_classification_model
+from utils.api import get_openai_client
+from utils.tokens import estimate_token_count
+from config.model_params import DEFAULT_PARAMS
+st.set_page_config(layout="wide")
+# Streamlit App Title
+st.title("LLM-based Classifier")
+# Upload Dataset
+uploaded_file = st.sidebar.file_uploader("Upload a CSV file", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    st.write("### Data Preview", df.head())
+    # Select Target Column
+    label_column = st.selectbox("Select target column (if available):", df.columns.tolist())
+    # Exclude Target Column from Feature Selection
+    if label_column:  # Ensure the label column is defined
+        filtered_columns = [col for col in df.columns if col != label_column]
+    else:
+        filtered_columns = df.columns.tolist()
+    # Feature Selection
+    features = st.multiselect("Select features:", filtered_columns, default=filtered_columns)
+    # Validate Features
+    if label_column in features:
+        st.error(f"Target column '{label_column}' cannot be included in features. Please remove it.")
+        st.stop()
+    # Specify Prediction Column Name
+    prediction_column = st.text_input(
+        "Enter the name of the column to store predictions:", "Predicted Label"
+    )
+    # Define Labels and Descriptions
+    st.write(f"### Describe the values {prediction_column} can take")
+    num_labels = st.number_input("Number of unique labels:", min_value=2, step=1)
+    # Create columns for labels and descriptions
+    col1, col2 = st.columns(2)
+    label_descriptions = {}
+    for i in range(int(num_labels)):
+        with col1:
+            label = st.text_input(f"Label {i+1} name:", key=f"label_name_{i}")
+        with col2:
+            description = st.text_input(f"Label {i+1} description:", key=f"label_desc_{i}")
+        label_descriptions[label] = description
+    # Compare user-provided labels with unique target values
+    if label_column:
+        # Get unique values in the target column
+        unique_target_values = set(df[label_column].unique())
+        n_unique_target_values = len(unique_target_values)
+        if n_unique_target_values > 20:
+                st.warning(
+                    f"The selected column '{label_column}' has {n_unique_target_values} unique values, "
+                    f"which may not be ideal as a target for classification."
+                )
+                proceed = st.checkbox(
+                    f"I understand and still want to use '{label_column}' as the target column."
+                )
+                if not proceed:
+                    st.stop()
+        # Get user-provided labels
+        user_provided_labels = set(label_descriptions.keys())
+        # Identify missing and extra labels
+        missing_labels = unique_target_values - user_provided_labels
+        extra_labels = user_provided_labels - unique_target_values
+        # Display warnings for discrepancies
+        if missing_labels:
+            st.warning(
+                f"The following values in the target column are not accounted for in the labels: {', '.join(map(str, missing_labels))}."
+            )
+        if extra_labels:
+            st.warning(
+                f"The following user-provided labels do not match any values in the target column: {', '.join(map(str, extra_labels))}."
+            )
+    # Few-Shot Prompting
+    use_few_shot = st.checkbox("Use few-shot prompting with examples from the target column", value=False)
+    if use_few_shot and label_column:
+        st.info("Few-shot prompting is enabled. Examples will be selected from the dataset.")
+        # Group by target column and select 2 examples per class
+        few_shot_examples = (
+            df.groupby(label_column, group_keys=False)
+              .apply(lambda group: group.sample(min(2, len(group)), random_state=42))
+        )
+        # Show the few-shot examples for reference
+        st.write("### Few-Shot Examples")
+        st.write(few_shot_examples[[*features, label_column]])
+        # Remove few-shot examples from the dataset
+        remaining_data = df.drop(few_shot_examples.index)
+    else:
+        few_shot_examples = None
+        remaining_data = df
+    # Limit rows to 20 to control costs
+    if len(remaining_data) > 20:
+        st.warning("Only the first 20 rows of the remaining dataset will be sent to OpenAI to save costs.")
+    limited_data = remaining_data.head(20)
+    # Prepare Few-Shot Examples for Prompting
+    example_rows = []
+    if use_few_shot and few_shot_examples is not None:
+        for _, example in few_shot_examples.iterrows():
+            example_rows.append({
+                "features": {feature: example[feature] for feature in features},
+                "label": example[label_column],
+            })
+    # API Key and Model Parameters
+    openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key:", type="password")
+    model_params = {
+        "model": st.selectbox(
+            "Model:",
+            DEFAULT_PARAMS["available_models"],
+            index=DEFAULT_PARAMS["available_models"].index(DEFAULT_PARAMS["model"])
+        ),
+        "temperature": st.slider("Temperature:", min_value=0.0, max_value=1.0, value=DEFAULT_PARAMS["temperature"]),
+        "max_tokens": DEFAULT_PARAMS["max_tokens"],
+    }
+    st.sidebar.write('**Model Config**')
+    st.sidebar.json(DEFAULT_PARAMS)
+    verbose = st.checkbox("Verbose", value=False)
+    # Classification Button
+    if st.button("Run Classification"):
+        if not openai_api_key:
+            st.error("Please provide a valid OpenAI API Key.")
+        else:
+            # Initialize OpenAI client
+            client = get_openai_client(api_key=openai_api_key)
+            # Dynamically create the Pydantic model for validation
+            ClassificationOutput = generate_classification_model(list(label_descriptions.keys()))
+            # Function to classify a single row
+            def classify_row(row):
+                # Generate system and user prompts
+                system_prompt, user_prompt = generate_prompts(
+                    row=row.to_dict(),
+                    label_descriptions=label_descriptions,
+                    features=features,
+                    example_rows=example_rows,
+                )
+                # Show the prompts in an expander for transparency
+                if verbose:
+                    with st.expander(f"OpenAI Call Input for Row Index {row.name}"):
+                        st.write("**System Prompt:**")
+                        st.code(system_prompt)
+                        st.write(f"Token Count (System Prompt): {estimate_token_count(system_prompt, model_params['model'])}")
+                        st.write("**User Prompt:**")
+                        st.code(user_prompt)
+                        st.write(f"Token Count (User Prompt): {estimate_token_count(user_prompt, model_params['model'])}")
+                # Make the OpenAI call and validate the output
+                return apply_classification(
+                    client=client,
+                    model_params=model_params,
+                    ClassificationOutput=ClassificationOutput,
+                    system_prompt=system_prompt,
+                    user_prompt=user_prompt,
+                    verbose=verbose,
+                    st=st
+                )
+            # Apply the classification to each row in the limited data
+            limited_data[prediction_column] = limited_data.apply(classify_row, axis=1)
+            # Display Predictions
+            st.write(f"### Predictions ({prediction_column})", limited_data)
+            # Evaluate if ground truth is available
+            if label_column in limited_data.columns:
+                from utils.evaluation import evaluate_predictions
+                report = evaluate_predictions(limited_data[label_column], limited_data[prediction_column])
+                st.write("### Evaluation Metrics")
+                st.json(report)
+            else:
+                st.warning(f"Target column '{label_column}' or prediction column '{prediction_column}' is missing in the data.")

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (150 Bytes). View file

app/utils/__pycache__/api.cpython-310.pyc ADDED Viewed

Binary file (1.03 kB). View file

app/utils/__pycache__/classification.cpython-310.pyc ADDED Viewed

Binary file (880 Bytes). View file

app/utils/__pycache__/evaluation.cpython-310.pyc ADDED Viewed

Binary file (779 Bytes). View file

app/utils/__pycache__/prompt.cpython-310.pyc ADDED Viewed

Binary file (2 kB). View file

app/utils/__pycache__/tokens.cpython-310.pyc ADDED Viewed

Binary file (656 Bytes). View file

app/utils/__pycache__/validation.cpython-310.pyc ADDED Viewed

Binary file (750 Bytes). View file

app/utils/api.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from openai import OpenAI
+# Initialize OpenAI client
+def get_openai_client(api_key):
+    """
+    Returns an OpenAI client instance with the provided API key.
+    """
+    return OpenAI(api_key=api_key)
+def classify_row_chat(prompt, client, model="gpt-3.5-turbo"):
+    """
+    Sends a classification prompt to the OpenAI Chat API and returns the predicted label.
+    Args:
+        prompt (str): The user prompt to classify data.
+        client (OpenAI): The OpenAI client instance.
+        model (str): The model to use for chat completion.
+    Returns:
+        str: The predicted label.
+    """
+    response = client.chat.completions.create(
+        model=model,
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return response.choices[0].message.content.strip()

app/utils/classification.py ADDED Viewed

	@@ -0,0 +1,26 @@

+def apply_classification(client, model_params, ClassificationOutput, system_prompt, user_prompt, verbose=False, st=None):
+    response = client.chat.completions.create(
+        model=model_params["model"],
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        max_tokens=model_params["max_tokens"],
+        temperature=model_params["temperature"],
+    )
+    raw_prediction = response.choices[0].message.content.strip()
+    # Log raw prediction for debugging
+    if verbose and st:
+        st.info(f"Raw Prediction: {raw_prediction}")
+    # Validate and process the prediction
+    try:
+        validated_prediction = ClassificationOutput.parse_obj({"label": raw_prediction}).label
+    except Exception as e:
+        if verbose and st:
+            st.error(f"Invalid prediction: {raw_prediction}. Error: {e}")
+        return "INVALID"
+    return validated_prediction

app/utils/evaluation.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+from sklearn.metrics import classification_report
+def evaluate_predictions(y_true, y_pred):
+    """
+    Evaluates predictions by converting labels to strings and generating a classification report.
+    Args:
+        y_true (pd.Series or list): True labels.
+        y_pred (pd.Series or list): Predicted labels.
+    Returns:
+        dict: Classification report as a dictionary.
+    """
+    # Ensure both true and predicted labels are strings
+    y_true_str = pd.Series(y_true).astype(str)
+    y_pred_str = pd.Series(y_pred).astype(str)
+    # Generate classification report
+    report = classification_report(y_true_str, y_pred_str, output_dict=True)
+    return report

app/utils/prompt.py ADDED Viewed

	@@ -0,0 +1,54 @@

+def create_classification_prompt(row, label_descriptions, features, example_rows):
+    """
+    Generates system and user prompts for classification.
+    Args:
+        row (dict): A single row of feature values.
+        label_descriptions (dict): Mapping of labels to their descriptions.
+        features (list): List of features to include in the prompt.
+        example_rows (list): Few-shot examples for the prompt.
+    Returns:
+        tuple: (system_prompt, user_prompt)
+    """
+    # System prompt
+    system_prompt = "You are a classifier. Assign one of the following labels based on the input data:\n"
+    for label, desc in label_descriptions.items():
+        system_prompt += f"- {label}: {desc}\n"
+    # Few-shot examples
+    if example_rows:
+        system_prompt += "\nExamples:\n"
+        for example in example_rows:
+            example_features = "; ".join(
+                f"{feature}: {example['features'][feature]}" for feature in features
+                #f"{feature}: {example.get('features', {}).get(feature, 'MISSING')}" for feature in features
+            )
+            system_prompt += f"Input: {example_features}\nLabel: {example['label']}\n"
+    # User prompt for the current row
+    user_features = "; ".join(f"{feature}: {row[feature]}" for feature in features)
+    user_prompt = f"Input: {user_features}\nLabel:"
+    return system_prompt, user_prompt
+def generate_prompts(row, label_descriptions, features, example_rows):
+    """
+    Wrapper for create_classification_prompt to generate prompts for a row.
+    Args:
+        row (dict): Row of the dataset.
+        label_descriptions (dict): Mapping of labels to their descriptions.
+        features (list): List of features to include in the prompt.
+        example_rows (list): Few-shot examples for the prompt.
+    Returns:
+        tuple: (system_prompt, user_prompt)
+    """
+    return create_classification_prompt(
+        row=row,
+        label_descriptions=label_descriptions,
+        features=features,
+        example_rows=example_rows,
+    )

app/utils/tokens.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import tiktoken
+def estimate_token_count(prompt: str, model: str) -> int:
+    """
+    Estimate the token count for a given prompt and model.
+    Args:
+        prompt (str): The input prompt to tokenize.
+        model (str): The name of the model to use for token encoding.
+    Returns:
+        int: The estimated token count.
+    """
+    encoding = tiktoken.encoding_for_model(model)
+    return len(encoding.encode(prompt))

app/utils/validation.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from pydantic import BaseModel, create_model
+from typing import Literal, List
+def generate_classification_model(labels: List[str]) -> BaseModel:
+    """
+    Dynamically generates a Pydantic model for classification based on user-provided labels.
+    Args:
+        labels (List[str]): List of valid label strings.
+    Returns:
+        BaseModel: A dynamically generated Pydantic model.
+    """
+    return create_model(
+        "DynamicClassificationOutput",
+        label=(Literal[tuple(labels)], ...),  # Enforce that 'label' matches one of the valid labels
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+kagglehub
+pytest
+pytest-mock
+sentencepiece
+sentence_transformers
+streamlit
+tiktoken
+transformers

tests/__init__.py ADDED Viewed

File without changes

tests/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (146 Bytes). View file

tests/__pycache__/test_api.cpython-310-pytest-8.3.4.pyc ADDED Viewed

Binary file (1.18 kB). View file

tests/__pycache__/test_evaluation.cpython-310-pytest-8.3.4.pyc ADDED Viewed

Binary file (1.21 kB). View file

tests/__pycache__/test_prompt.cpython-310-pytest-8.3.4.pyc ADDED Viewed

Binary file (1.49 kB). View file

tests/__pycache__/test_validation.cpython-310-pytest-8.3.4.pyc ADDED Viewed

Binary file (1.53 kB). View file

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from unittest.mock import Mock
+from utils.api import classify_row_chat
+def test_classify_row_chat():
+    # Mock the OpenAI client and its response
+    client_mock = Mock()
+    client_mock.chat.completions.create.return_value = Mock(
+        choices=[Mock(message=Mock(content="Positive"))]
+    )
+    # Define the prompt
+    prompt = "Classify the following observation: Age: 25, Weight: 70\nLabel:"
+    # Call the classify_row_chat function with the mocked client
+    prediction = classify_row_chat(prompt=prompt, client=client_mock, model="gpt-3.5-turbo")
+    # Assert the response matches the expected label
+    assert prediction == "Positive", "The classification should return 'Positive'"

tests/test_evaluation.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from utils.evaluation import evaluate_predictions
+def test_evaluate_predictions():
+    y_true = ["Positive", "Negative", "Positive"]
+    y_pred = ["Positive", "Negative", "Positive"]
+    # Test perfect match
+    report = evaluate_predictions(y_true, y_pred)
+    assert report["accuracy"] == 1.0, "Accuracy should be 100% for perfect predictions"
+    # Test mismatched predictions
+    y_pred_mismatch = ["Negative", "Negative", "Positive"]
+    report_mismatch = evaluate_predictions(y_true, y_pred_mismatch)
+    assert report_mismatch["accuracy"] < 1.0, "Accuracy should be less than 100% for mismatched predictions"

tests/test_prompt.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import pytest
+from utils.prompt import generate_prompts
+def test_generate_prompts():
+    example_rows = [  # Update to match the function's parameter name
+        {"features": {"Age": 34, "Weight": 70, "Location": "Urban"}, "label": "Positive"},
+        {"features": {"Age": 25, "Weight": 60, "Location": "Rural"}, "label": "Negative"},
+    ]
+    features = ["Age", "Weight", "Location"]
+    label_descriptions = {
+        "Positive": "The sentiment is positive.",
+        "Negative": "The sentiment is negative.",
+    }
+    row = {"Age": 30, "Weight": 65, "Location": "Suburban"}
+    system_prompt, user_prompt = generate_prompts(
+        row=row, example_rows=example_rows, features=features, label_descriptions=label_descriptions
+    )
+    assert "Age: 34; Weight: 70; Location: Urban" in system_prompt
+    assert "Label: Positive" in system_prompt
+    assert "Label:" in user_prompt

tests/test_validation.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic import ValidationError
+from utils.validation import generate_classification_model
+def test_classification_output_validation():
+    # Dynamically generate classification model
+    ClassificationOutput = generate_classification_model(["Positive", "Negative"])
+    # Test valid input
+    valid_output = ClassificationOutput(label="Positive")
+    assert valid_output.label == "Positive", "Label should be 'Positive'"
+    # Test invalid input
+    try:
+        ClassificationOutput(label="InvalidLabel")
+    except ValidationError as e:
+        error_message = str(e)
+        assert "Input should be 'Positive' or 'Negative'" in error_message, "Should raise validation error with correct message"