| """ |
| System tests for end-to-end workflows. |
| |
| Tests the complete system including training and inference pipelines. |
| """ |
| import pytest |
| import numpy as np |
| import pandas as pd |
| import tempfile |
| from pathlib import Path |
| from unittest.mock import patch, MagicMock |
| import joblib |
|
|
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.multioutput import MultiOutputClassifier |
|
|
|
|
| @pytest.mark.system |
| @pytest.mark.slow |
| class TestTrainingPipeline: |
| """System tests for model training pipeline.""" |
| |
| def test_complete_training_workflow(self, sample_dataframe): |
| """Test complete training workflow from data to model.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| from sklearn.model_selection import train_test_split |
| |
| |
| features, vectorizer = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| |
| X_train, X_test, y_train, y_test = train_test_split( |
| features, labels.values, test_size=0.2, random_state=42 |
| ) |
| |
| |
| rf = RandomForestClassifier(n_estimators=10, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(X_train, y_train) |
| |
| |
| predictions = model.predict(X_test) |
| |
| |
| assert predictions.shape[0] == X_test.shape[0] |
| assert predictions.shape[1] == y_test.shape[1] |
| assert np.all((predictions == 0) | (predictions == 1)) |
| |
| def test_training_with_oversampling(self, sample_dataframe): |
| """Test training pipeline with oversampling.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| from imblearn.over_sampling import RandomOverSampler |
| from sklearn.model_selection import train_test_split |
| |
| |
| features, _ = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| |
| y_single = labels.iloc[:, 0].values |
| |
| |
| X_train, X_test, y_train, y_test = train_test_split( |
| features, y_single, test_size=0.2, random_state=42 |
| ) |
| |
| |
| ros = RandomOverSampler(random_state=42) |
| X_resampled, y_resampled = ros.fit_resample(X_train, y_train) |
| |
| |
| rf = RandomForestClassifier(n_estimators=10, random_state=42) |
| rf.fit(X_resampled, y_resampled) |
| |
| |
| predictions = rf.predict(X_test) |
| |
| |
| assert len(predictions) == len(X_test) |
| assert np.all((predictions == 0) | (predictions == 1)) |
| |
| def test_model_serialization(self, sample_dataframe): |
| """Test model can be saved and loaded.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| |
| |
| features, _ = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(features, labels.values) |
| |
| |
| with tempfile.NamedTemporaryFile(suffix='.pkl', delete=False) as f: |
| model_path = f.name |
| |
| try: |
| joblib.dump(model, model_path) |
| loaded_model = joblib.load(model_path) |
| |
| |
| pred_original = model.predict(features) |
| pred_loaded = loaded_model.predict(features) |
| |
| np.testing.assert_array_equal(pred_original, pred_loaded) |
| finally: |
| Path(model_path).unlink() |
|
|
|
|
| @pytest.mark.system |
| class TestInferencePipeline: |
| """System tests for inference pipeline.""" |
| |
| def test_inference_on_new_text(self, sample_dataframe): |
| """Test inference pipeline on new unseen text.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| clean_github_text, |
| ) |
| |
| |
| features, vectorizer = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(features, labels.values) |
| |
| |
| new_texts = [ |
| "Fixed critical bug in authentication module", |
| "Added new REST API endpoint for users", |
| ] |
| |
| |
| cleaned_texts = [clean_github_text(text) for text in new_texts] |
| new_features = vectorizer.transform(cleaned_texts).toarray() |
| |
| |
| predictions = model.predict(new_features) |
| |
| |
| assert predictions.shape[0] == len(new_texts) |
| assert predictions.shape[1] == labels.shape[1] |
| assert np.all((predictions == 0) | (predictions == 1)) |
| |
| def test_inference_with_empty_input(self, sample_dataframe): |
| """Test inference handles empty input gracefully.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| clean_github_text, |
| ) |
| |
| |
| features, vectorizer = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(features, labels.values) |
| |
| |
| empty_text = "" |
| cleaned = clean_github_text(empty_text) |
| new_features = vectorizer.transform([cleaned]).toarray() |
| |
| |
| predictions = model.predict(new_features) |
| |
| assert predictions.shape[0] == 1 |
| assert predictions.shape[1] == labels.shape[1] |
| |
| def test_batch_inference(self, sample_dataframe): |
| """Test inference on batch of samples.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| |
| |
| features, vectorizer = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(features, labels.values) |
| |
| |
| predictions = model.predict(features) |
| |
| assert predictions.shape == labels.shape |
| assert np.all((predictions == 0) | (predictions == 1)) |
|
|
|
|
| @pytest.mark.system |
| @pytest.mark.requires_data |
| class TestEndToEndDataFlow: |
| """System tests for complete data flow from raw to predictions.""" |
| |
| def test_full_pipeline_database_to_predictions(self, temp_db): |
| """Test complete pipeline from database to predictions.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| load_data_from_db, |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| from sklearn.model_selection import train_test_split |
| |
| |
| df = load_data_from_db(temp_db) |
| |
| |
| features, vectorizer = extract_tfidf_features(df, max_features=50) |
| labels = prepare_labels(df) |
| |
| |
| X_train, X_test, y_train, y_test = train_test_split( |
| features, labels.values, test_size=0.4, random_state=42 |
| ) |
| |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(X_train, y_train) |
| |
| |
| predictions = model.predict(X_test) |
| |
| |
| from sklearn.metrics import accuracy_score |
| |
| |
| accuracies = [] |
| for i in range(y_test.shape[1]): |
| acc = accuracy_score(y_test[:, i], predictions[:, i]) |
| accuracies.append(acc) |
| |
| |
| assert np.mean(accuracies) > 0.4 |
|
|
|
|
| @pytest.mark.system |
| class TestModelValidation: |
| """System tests for model validation workflows.""" |
| |
| def test_cross_validation_workflow(self, sample_dataframe): |
| """Test cross-validation workflow.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| from sklearn.model_selection import cross_val_score |
| |
| |
| features, _ = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| |
| y_single = labels.iloc[:, 0].values |
| |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| |
| |
| scores = cross_val_score(rf, features, y_single, cv=2, scoring='accuracy') |
| |
| assert len(scores) == 2 |
| assert all(0 <= score <= 1 for score in scores) |
| |
| def test_grid_search_workflow(self, sample_dataframe): |
| """Test grid search workflow.""" |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| from sklearn.model_selection import GridSearchCV |
| |
| |
| features, _ = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| |
| y_single = labels.iloc[:, 0].values |
| |
| |
| param_grid = { |
| 'n_estimators': [5, 10], |
| 'max_depth': [5, 10], |
| } |
| |
| rf = RandomForestClassifier(random_state=42) |
| grid_search = GridSearchCV(rf, param_grid, cv=2, scoring='accuracy') |
| grid_search.fit(features, y_single) |
| |
| |
| assert hasattr(grid_search, 'best_params_') |
| assert hasattr(grid_search, 'best_score_') |
| assert grid_search.best_score_ >= 0 |
|
|
|
|
| @pytest.mark.system |
| @pytest.mark.regression |
| class TestRegressionScenarios: |
| """Regression tests for known issues and edge cases.""" |
| |
| def test_empty_feature_vectors_handling(self): |
| """ |
| Regression test: Ensure empty feature vectors don't crash training. |
| |
| This was identified in Great Expectations TEST 2 - 25 samples with |
| zero features after TF-IDF extraction. |
| """ |
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.multioutput import MultiOutputClassifier |
| |
| |
| X = np.array([ |
| [0.1, 0.2, 0.3], |
| [0.0, 0.0, 0.0], |
| [0.4, 0.5, 0.6], |
| [0.0, 0.0, 0.0], |
| ]) |
| |
| y = np.array([ |
| [1, 0], |
| [0, 1], |
| [1, 1], |
| [0, 0], |
| ]) |
| |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(X, y) |
| |
| predictions = model.predict(X) |
| assert predictions.shape == y.shape |
| |
| def test_zero_occurrence_labels_handling(self): |
| """ |
| Regression test: Handle labels with zero occurrences. |
| |
| This was identified in Great Expectations TEST 5 - 75 labels with |
| zero occurrences in the dataset. |
| """ |
| from hopcroft_skill_classification_tool_competition.features import get_label_columns |
| |
| |
| df = pd.DataFrame({ |
| 'issue text': ['text1', 'text2', 'text3'], |
| 'Label1': [1, 1, 0], |
| 'Label2': [0, 0, 0], |
| 'Label3': [1, 0, 1], |
| }) |
| |
| label_cols = get_label_columns(df) |
| |
| |
| assert 'Label1' in label_cols |
| assert 'Label2' in label_cols |
| assert 'Label3' in label_cols |
| |
| |
| |
| |
| def test_high_sparsity_features(self): |
| """ |
| Regression test: Handle very sparse features (>99% zeros). |
| |
| This was identified in Great Expectations TEST 6 - 99.88% sparsity. |
| """ |
| from sklearn.ensemble import RandomForestClassifier |
| |
| |
| X = np.zeros((100, 1000)) |
| |
| |
| for i in range(100): |
| indices = np.random.choice(1000, size=1, replace=False) |
| X[i, indices] = np.random.rand(1) |
| |
| y = np.random.randint(0, 2, size=100) |
| |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| rf.fit(X, y) |
| |
| predictions = rf.predict(X) |
| assert len(predictions) == len(y) |
| |
| def test_duplicate_samples_detection(self): |
| """ |
| Regression test: Detect duplicate samples. |
| |
| This was identified in Deepchecks validation - 481 duplicates (6.72%). |
| """ |
| df = pd.DataFrame({ |
| 'issue text': ['duplicate', 'duplicate', 'unique'], |
| 'issue description': ['desc', 'desc', 'different'], |
| 'Label1': [1, 1, 0], |
| }) |
| |
| |
| duplicates = df[['issue text', 'issue description']].duplicated() |
| |
| assert duplicates.sum() == 1 |
| |
| |
| df_cleaned = df.drop_duplicates(subset=['issue text', 'issue description']) |
| assert len(df_cleaned) == 2 |
|
|
|
|
| @pytest.mark.system |
| @pytest.mark.acceptance |
| class TestAcceptanceCriteria: |
| """Acceptance tests verifying requirements are met.""" |
| |
| def test_multi_label_classification_support(self, sample_dataframe): |
| """ |
| Acceptance test: System supports multi-label classification. |
| |
| Requirement: Each issue can have multiple skill labels. |
| """ |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| |
| features, _ = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(features, labels.values) |
| |
| |
| predictions = model.predict(features) |
| |
| |
| labels_per_sample = predictions.sum(axis=1) |
| assert np.any(labels_per_sample > 1), "System should support multiple labels per sample" |
| |
| def test_handles_github_text_format(self): |
| """ |
| Acceptance test: System handles GitHub issue text format. |
| |
| Requirement: Process text from GitHub issues with URLs, code, etc. |
| """ |
| from hopcroft_skill_classification_tool_competition.features import clean_github_text |
| |
| github_text = """ |
| Fixed bug in authentication #123 |
| |
| See: https://github.com/repo/issues/123 |
| |
| ```python |
| def login(user): |
| return authenticate(user) |
| ``` |
| |
| Related to <b>security</b> improvements 🔒 |
| """ |
| |
| cleaned = clean_github_text(github_text) |
| |
| |
| assert "https://" not in cleaned |
| assert "```" not in cleaned |
| assert "<b>" not in cleaned |
| assert len(cleaned) > 0 |
| |
| def test_produces_binary_predictions(self, sample_dataframe): |
| """ |
| Acceptance test: System produces binary predictions (0 or 1). |
| |
| Requirement: Clear yes/no predictions for each skill. |
| """ |
| from hopcroft_skill_classification_tool_competition.features import ( |
| extract_tfidf_features, |
| prepare_labels, |
| ) |
| |
| features, _ = extract_tfidf_features(sample_dataframe, max_features=50) |
| labels = prepare_labels(sample_dataframe) |
| |
| rf = RandomForestClassifier(n_estimators=5, random_state=42) |
| model = MultiOutputClassifier(rf) |
| model.fit(features, labels.values) |
| |
| predictions = model.predict(features) |
| |
| |
| assert np.all((predictions == 0) | (predictions == 1)) |
|
|