Stacking and Blending

Stacking and blending are advanced ensemble methods that combine predictions from multiple models using a meta-model. This section covers their implementation and best practices.

Theoretical Foundation

1. Stacking Concepts

  • Multiple base models
  • Meta-model (level-2 model)
  • Cross-validation predictions
  • Model diversity

2. Blending Concepts

  • Hold-out set predictions
  • Weighted combinations
  • Model selection
  • Overfitting prevention

Implementation

1. Basic Stacking

from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

def create_stacking_classifier(
    cv=5,
    final_estimator=None
):
    """Create a stacking classifier"""
    # Define base estimators
    estimators = [
        ('lr', LogisticRegression()),
        ('dt', DecisionTreeClassifier()),
        ('svm', SVC(probability=True)),
        ('knn', KNeighborsClassifier())
    ]
    
    # Define final estimator
    if final_estimator is None:
        final_estimator = LogisticRegression()
    
    # Create stacking classifier
    stacking = StackingClassifier(
        estimators=estimators,
        final_estimator=final_estimator,
        cv=cv,
        stack_method='predict_proba'
    )
    
    return stacking

2. Custom Stacking Implementation

import numpy as np
from sklearn.model_selection import KFold
from sklearn.base import BaseEstimator, ClassifierMixin

class CustomStackingClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        """Initialize custom stacking classifier"""
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
        self.base_predictions = None
    
    def fit(self, X, y):
        """Fit stacking classifier"""
        # Initialize storage for meta features
        self.meta_features = np.zeros((X.shape[0], len(self.base_models)))
        
        # Create cross-validation folds
        kf = KFold(n_splits=self.n_folds, shuffle=True, random_state=42)
        
        # Generate meta features
        for i, model in enumerate(self.base_models):
            # Perform cross-validation predictions
            for train_idx, val_idx in kf.split(X):
                # Split data
                X_train, X_val = X[train_idx], X[val_idx]
                y_train = y[train_idx]
                
                # Train model and predict
                model.fit(X_train, y_train)
                self.meta_features[val_idx, i] = model.predict_proba(X_val)[:, 1]
            
            # Fit on full dataset
            model.fit(X, y)
        
        # Train meta model
        self.meta_model.fit(self.meta_features, y)
        
        return self
    
    def predict_proba(self, X):
        """Predict probability estimates"""
        # Generate predictions from base models
        meta_features = np.column_stack([
            model.predict_proba(X)[:, 1]
            for model in self.base_models
        ])
        
        # Get meta model predictions
        return self.meta_model.predict_proba(meta_features)
    
    def predict(self, X):
        """Make predictions"""
        return np.argmax(self.predict_proba(X), axis=1)

3. Blending Implementation

class Blender:
    def __init__(self, models, weights=None):
        """Initialize blender"""
        self.models = models
        self.weights = weights
        if weights is None:
            self.weights = np.ones(len(models)) / len(models)
    
    def fit(self, X, y, val_size=0.3):
        """Fit models using hold-out validation"""
        # Split data
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=val_size, random_state=42
        )
        
        # Train models
        self.train_predictions = np.zeros((X_val.shape[0], len(self.models)))
        
        for i, model in enumerate(self.models):
            # Train model
            model.fit(X_train, y_train)
            
            # Generate validation predictions
            self.train_predictions[:, i] = model.predict_proba(X_val)[:, 1]
        
        # Optimize weights if not provided
        if self.weights is None:
            self.optimize_weights(y_val)
        
        # Retrain on full dataset
        for model in self.models:
            model.fit(X, y)
        
        return self
    
    def optimize_weights(self, y_val):
        """Optimize blending weights"""
        from scipy.optimize import minimize
        
        def loss_function(weights):
            """Compute log loss for given weights"""
            weighted_preds = np.sum(
                self.train_predictions * weights.reshape(-1, 1),
                axis=1
            )
            return log_loss(y_val, weighted_preds)
        
        # Optimize weights
        constraints = (
            {'type': 'eq', 'fun': lambda w: np.sum(w) - 1},  # sum to 1
            {'type': 'ineq', 'fun': lambda w: w}  # non-negative
        )
        
        result = minimize(
            loss_function,
            self.weights,
            constraints=constraints,
            method='SLSQP'
        )
        
        self.weights = result.x
    
    def predict_proba(self, X):
        """Make probability predictions"""
        predictions = np.zeros((X.shape[0], len(self.models)))
        
        for i, model in enumerate(self.models):
            predictions[:, i] = model.predict_proba(X)[:, 1]
        
        return np.sum(predictions * self.weights.reshape(1, -1), axis=1)
    
    def predict(self, X):
        """Make predictions"""
        probas = self.predict_proba(X)
        return (probas > 0.5).astype(int)

Advanced Techniques

1. Multi-level Stacking

class MultiLevelStacking:
    def __init__(self, base_models, meta_models, final_model):
        """Initialize multi-level stacking"""
        self.base_models = base_models
        self.meta_models = meta_models
        self.final_model = final_model
    
    def fit(self, X, y):
        """Fit multi-level stacking model"""
        # First level
        first_level_features = np.zeros((X.shape[0], len(self.base_models)))
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        
        for i, model in enumerate(self.base_models):
            for train_idx, val_idx in kf.split(X):
                X_train, X_val = X[train_idx], X[val_idx]
                y_train = y[train_idx]
                
                model.fit(X_train, y_train)
                first_level_features[val_idx, i] = model.predict_proba(X_val)[:, 1]
            
            model.fit(X, y)
        
        # Second level
        second_level_features = np.zeros((X.shape[0], len(self.meta_models)))
        
        for i, model in enumerate(self.meta_models):
            model.fit(first_level_features, y)
            second_level_features[:, i] = model.predict_proba(first_level_features)[:, 1]
        
        # Final level
        self.final_model.fit(second_level_features, y)
        
        return self

2. Feature-Weighted Linear Stacking

class FeatureWeightedLinearStacking:
    def __init__(self, models, meta_features=None):
        """Initialize feature-weighted linear stacking"""
        self.models = models
        self.meta_features = meta_features
        self.weights = None
    
    def fit(self, X, y):
        """Fit feature-weighted linear stacking"""
        # Generate base predictions
        base_predictions = np.column_stack([
            model.fit(X, y).predict_proba(X)[:, 1]
            for model in self.models
        ])
        
        if self.meta_features is None:
            self.meta_features = X
        
        # Learn feature-dependent weights
        self.weights = np.zeros((self.meta_features.shape[1], len(self.models)))
        
        for i in range(self.meta_features.shape[1]):
            # Weight learning for each feature
            feature = self.meta_features[:, i].reshape(-1, 1)
            self.weights[i] = LinearRegression().fit(
                feature,
                y - base_predictions
            ).coef_
        
        return self
    
    def predict_proba(self, X):
        """Make probability predictions"""
        # Generate base predictions
        base_predictions = np.column_stack([
            model.predict_proba(X)[:, 1]
            for model in self.models
        ])
        
        # Compute feature-dependent weights
        if self.meta_features is None:
            meta_features = X
        else:
            meta_features = self.meta_features
        
        # Apply feature-dependent weights
        weighted_predictions = np.zeros_like(base_predictions)
        
        for i in range(meta_features.shape[1]):
            feature = meta_features[:, i].reshape(-1, 1)
            weighted_predictions += feature * self.weights[i]
        
        return weighted_predictions

Best Practices

1. Model Selection

  • Choose diverse base models
  • Consider computational cost
  • Balance model complexity
  • Evaluate individual performance

2. Architecture Design

  • Determine number of levels
  • Choose appropriate meta-learner
  • Consider feature engineering
  • Validate architecture choices

3. Implementation Tips

  • Use cross-validation
  • Handle data leakage
  • Monitor memory usage
  • Implement early stopping

4. Common Pitfalls

  • Overfitting at meta-level
  • Insufficient model diversity
  • Data leakage in stacking
  • Complex architectures