File size: 3,013 Bytes
a4e906f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

import joblib
import json
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier

class RelationshipPredictor:
    """
    Relationship Longevity Predictor
    
    Predicts compatibility between two individuals based on their 
    personal profiles, values, and interests.
    
    Returns:
        - compatibility_score (0-1): Predicted probability of successful match
        - prediction: "High Compatibility" / "Moderate Compatibility" / "Low Compatibility"
        - key_factors: Top factors driving the prediction
    """
    
    def __init__(self, model_dir="./"):
        self.xgb = joblib.load(f"{model_dir}/xgboost_model.joblib")
        self.lgb = joblib.load(f"{model_dir}/lightgbm_model.joblib")
        self.cat = CatBoostClassifier()
        self.cat.load_model(f"{model_dir}/catboost_model.cbm")
        self.feature_cols = joblib.load(f"{model_dir}/feature_columns.joblib")
        
        with open(f"{model_dir}/ensemble_config.json") as f:
            self.config = json.load(f)
    
    def predict(self, person_a: dict, person_b: dict) -> dict:
        """
        Predict relationship compatibility between two people.
        
        Args:
            person_a: Dict with keys like age, race, interests, personality scores
            person_b: Dict with same structure
            
        Returns:
            Dict with compatibility_score, prediction label, and key factors
        """
        # Build feature vector from the two profiles
        features = self._build_features(person_a, person_b)
        
        # Ensemble prediction
        xgb_prob = self.xgb.predict_proba(features)[:, 1][0]
        lgb_prob = self.lgb.predict_proba(features)[:, 1][0]
        cat_prob = self.cat.predict_proba(features)[:, 1][0]
        
        w = self.config['weights']
        score = w['xgboost'] * xgb_prob + w['lightgbm'] * lgb_prob + w['catboost'] * cat_prob
        
        if score >= 0.7:
            label = "High Compatibility"
        elif score >= 0.4:
            label = "Moderate Compatibility" 
        else:
            label = "Low Compatibility"
        
        return {
            'compatibility_score': round(float(score), 4),
            'prediction': label,
            'individual_models': {
                'xgboost': round(float(xgb_prob), 4),
                'lightgbm': round(float(lgb_prob), 4),
                'catboost': round(float(cat_prob), 4),
            }
        }
    
    def _build_features(self, a, b):
        """Build engineered feature vector from two person profiles."""
        # This would map raw profile inputs to the trained feature space
        # Implementation depends on the input format
        raise NotImplementedError(
            "Implement feature mapping based on your input format. "
            "See feature_columns.joblib for required features."
        )

# Usage example:
# predictor = RelationshipPredictor("./model_output")
# result = predictor.predict(person_a_profile, person_b_profile)