cyb006-baseline-classifier / validation_results.json

Initial release: XGBoost + MLP for user-risk-tier classification, plus structural-leakage diagnostic on threat-actor detection

e6a6835 verified 2 days ago

raw

history blame contribute delete

3.52 kB

	{
	"version": "1.0.0",
	"dataset": "xpertsystems/cyb006-sample",
	"task": "3-class user_risk_tier classification",
	"baselines": {
	"always_predict_majority_accuracy": 0.5666666666666667,
	"majority_class": "low",
	"random_guess_accuracy": 0.3333333333333333
	},
	"split": {
	"strategy": "stratified (StratifiedShuffleSplit, nested 70/15/15)",
	"rationale": "This is a USER-LEVEL task (one row per user, 200 users total). Group-aware splitting does not apply since there is no many-rows-per-group structure to leak. Stratified splitting ensures each fold preserves the 3-tier class distribution.",
	"users_train": 139,
	"users_val": 31,
	"users_test": 30,
	"seed": 42
	},
	"n_features": 34,
	"label_classes": [
	"low",
	"medium",
	"high"
	],
	"class_distribution_train": {
	"low": 79,
	"medium": 33,
	"high": 27
	},
	"class_distribution_test": {
	"low": 17,
	"medium": 7,
	"high": 6
	},
	"leakage_excluded_features": [
	"threat_actor_flag (perfect oracle for high tier)",
	"account_takeover_flag (2/200 positives, oracle-prone)",
	"credential_attack_victim_flag (1/200 positives)",
	"velocity_anomaly_score (per-session, leaky for threat detection - aggregated session features that DO leak are excluded from session-aggregate fields)",
	"session_timestamp_utc (per-session, leaky)",
	"credential_attempt_count (per-session, leaky)",
	"login_outcome (per-session, leaky)"
	],
	"leakage_audit_note": "See leakage_diagnostic.json for the full audit on the abandoned threat-actor binary detection task. Features dropped from session aggregation reflect that audit.",
	"models": {
	"xgboost": {
	"architecture": "Gradient-boosted decision trees, multi:softprob, 3 classes",
	"framework": "xgboost",
	"test_metrics": {
	"model": "xgboost",
	"accuracy": 0.6666666666666666,
	"macro_f1": 0.6453546453546454,
	"weighted_f1": 0.6634032634032633,
	"per_class_f1": {
	"low": 0.7272727272727273,
	"medium": 0.2857142857142857,
	"high": 0.9230769230769231
	},
	"confusion_matrix": {
	"labels": [
	"low",
	"medium",
	"high"
	],
	"matrix": [
	[
	12,
	5,
	0
	],
	[
	4,
	2,
	1
	],
	[
	0,
	0,
	6
	]
	]
	},
	"macro_roc_auc_ovr": 0.8016919142238835
	}
	},
	"mlp": {
	"architecture": "PyTorch MLP, 34 -> 128 -> 64 -> 3, BatchNorm1d + ReLU + Dropout, weighted cross-entropy loss",
	"framework": "pytorch",
	"test_metrics": {
	"model": "mlp",
	"accuracy": 0.6,
	"macro_f1": 0.5914438502673797,
	"weighted_f1": 0.6054545454545455,
	"per_class_f1": {
	"low": 0.6470588235294118,
	"medium": 0.4,
	"high": 0.7272727272727273
	},
	"confusion_matrix": {
	"labels": [
	"low",
	"medium",
	"high"
	],
	"matrix": [
	[
	11,
	5,
	1
	],
	[
	4,
	3,
	0
	],
	[
	2,
	0,
	4
	]
	]
	},
	"macro_roc_auc_ovr": 0.6973752247089843
	}
	}
	}
	}