Hyperparameter Tuning

Systematic approaches to finding optimal hyperparameters for machine learning models

Systematic approaches to finding optimal hyperparameters for machine learning models.

Grid Search

Basic Implementation

from sklearn.model_selection import GridSearchCV

def grid_search_cv(model, param_grid, X, y, cv=5):
    # Initialize grid search
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=cv,
        n_jobs=-1,
        verbose=1
    )
    
    # Fit grid search
    grid_search.fit(X, y)
    
    return {
        'best_params': grid_search.best_params_,
        'best_score': grid_search.best_score_,
        'cv_results': grid_search.cv_results_
    }

Visualization

def plot_grid_search_results(cv_results, param_name):
    plt.figure(figsize=(10, 6))
    plt.plot(
        cv_results[f'param_{param_name}'],
        cv_results['mean_test_score']
    )
    plt.fill_between(
        cv_results[f'param_{param_name}'],
        cv_results['mean_test_score'] - cv_results['std_test_score'],
        cv_results['mean_test_score'] + cv_results['std_test_score'],
        alpha=0.2
    )
    plt.xlabel(param_name)
    plt.ylabel('CV Score')
    plt.title(f'Grid Search Results for {param_name}')
    return plt

Random Search

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

def random_search_cv(model, param_distributions, X, y, n_iter=100, cv=5):
    # Initialize random search
    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_distributions,
        n_iter=n_iter,
        cv=cv,
        n_jobs=-1,
        verbose=1
    )
    
    # Fit random search
    random_search.fit(X, y)
    
    return {
        'best_params': random_search.best_params_,
        'best_score': random_search.best_score_,
        'cv_results': random_search.cv_results_
    }

Bayesian Optimization

from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical

def bayesian_optimization(model, search_spaces, X, y, n_iter=50, cv=5):
    # Initialize Bayesian optimization
    bayes_search = BayesSearchCV(
        estimator=model,
        search_spaces=search_spaces,
        n_iter=n_iter,
        cv=cv,
        n_jobs=-1,
        verbose=1
    )
    
    # Fit Bayesian search
    bayes_search.fit(X, y)
    
    return {
        'best_params': bayes_search.best_params_,
        'best_score': bayes_search.best_score_,
        'cv_results': bayes_search.cv_results_
    }

Population-Based Methods

Genetic Algorithm

class GeneticOptimizer:
    def __init__(self, param_bounds, population_size=50):
        self.param_bounds = param_bounds
        self.population_size = population_size
        
    def initialize_population(self):
        population = []
        for _ in range(self.population_size):
            individual = {
                param: np.random.uniform(bounds[0], bounds[1])
                for param, bounds in self.param_bounds.items()
            }
            population.append(individual)
        return population
    
    def crossover(self, parent1, parent2):
        child = {}
        for param in self.param_bounds:
            if np.random.random() < 0.5:
                child[param] = parent1[param]
            else:
                child[param] = parent2[param]
        return child
    
    def mutate(self, individual, mutation_rate=0.1):
        mutated = individual.copy()
        for param, bounds in self.param_bounds.items():
            if np.random.random() < mutation_rate:
                mutated[param] = np.random.uniform(bounds[0], bounds[1])
        return mutated
    
    def optimize(self, fitness_func, n_generations=50):
        population = self.initialize_population()
        best_individual = None
        best_fitness = float('-inf')
        
        for generation in range(n_generations):
            # Evaluate fitness
            fitness_scores = [
                fitness_func(individual)
                for individual in population
            ]
            
            # Select best individuals
            elite_size = self.population_size // 4
            elite_indices = np.argsort(fitness_scores)[-elite_size:]
            elite = [population[i] for i in elite_indices]
            
            # Create new population
            new_population = elite.copy()
            
            while len(new_population) < self.population_size:
                parent1 = np.random.choice(elite)
                parent2 = np.random.choice(elite)
                child = self.crossover(parent1, parent2)
                child = self.mutate(child)
                new_population.append(child)
            
            population = new_population
            
            # Update best individual
            generation_best_idx = np.argmax(fitness_scores)
            if fitness_scores[generation_best_idx] > best_fitness:
                best_fitness = fitness_scores[generation_best_idx]
                best_individual = population[generation_best_idx]
        
        return best_individual, best_fitness

Hyperparameter Optimization Strategies

Sequential Model-Based Optimization

from skopt import gp_minimize

def smbo_optimization(objective_func, search_space, n_calls=50):
    result = gp_minimize(
        func=objective_func,
        dimensions=search_space,
        n_calls=n_calls,
        noise=0.1,
        n_jobs=-1
    )
    
    return {
        'best_params': result.x,
        'best_score': result.fun,
        'all_results': result
    }

Multi-Objective Optimization

def pareto_optimization(model, param_grid, X, y, objectives):
    results = []
    
    for params in ParameterGrid(param_grid):
        model.set_params(**params)
        model.fit(X, y)
        
        # Calculate multiple objectives
        scores = {
            name: objective(model, X, y)
            for name, objective in objectives.items()
        }
        
        results.append({
            'params': params,
            'scores': scores
        })
    
    return results

def is_pareto_efficient(scores):
    is_efficient = np.ones(scores.shape[0], dtype=bool)
    for i in range(scores.shape[0]):
        for j in range(scores.shape[0]):
            if all(scores[j] >= scores[i]) and any(scores[j] > scores[i]):
                is_efficient[i] = False
                break
    return is_efficient

Automated Hyperparameter Tuning

Early Stopping

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    
    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

Learning Rate Scheduling

def cosine_annealing_with_restarts(initial_lr, T_max, T_mult=2):
    """
    Cosine annealing with warm restarts
    """
    def schedule(epoch):
        epoch = epoch % T_max
        return initial_lr * (1 + np.cos(np.pi * epoch / T_max)) / 2
    return schedule

Visualization and Analysis

def plot_optimization_history(results):
    plt.figure(figsize=(12, 4))
    
    # Plot optimization trajectory
    plt.subplot(1, 2, 1)
    plt.plot(results['mean_test_score'])
    plt.xlabel('Iteration')
    plt.ylabel('Score')
    plt.title('Optimization History')
    
    # Plot parameter importance
    plt.subplot(1, 2, 2)
    importances = pd.DataFrame({
        'parameter': results['params'].keys(),
        'importance': results['feature_importances_']
    })
    importances.sort_values('importance').plot(
        kind='barh',
        x='parameter',
        y='importance'
    )
    plt.title('Parameter Importance')
    
    return plt

Best Practices

Search Space Definition

def define_search_space(model_type):
    if model_type == 'random_forest':
        return {
            'n_estimators': randint(10, 500),
            'max_depth': randint(3, 20),
            'min_samples_split': randint(2, 20),
            'min_samples_leaf': randint(1, 10)
        }
    elif model_type == 'xgboost':
        return {
            'learning_rate': uniform(0.01, 0.3),
            'max_depth': randint(3, 10),
            'n_estimators': randint(100, 1000),
            'subsample': uniform(0.6, 0.4)
        }

Cross-Validation Strategy

def get_cv_strategy(problem_type, n_splits=5):
    if problem_type == 'classification':
        return StratifiedKFold(n_splits=n_splits)
    elif problem_type == 'regression':
        return KFold(n_splits=n_splits)
    elif problem_type == 'time_series':
        return TimeSeriesSplit(n_splits=n_splits)

Resource Management

def optimize_with_resource_constraints(
    model, param_grid, X, y,
    max_time=3600, max_evals=100
):
    start_time = time.time()
    results = []
    
    for i, params in enumerate(ParameterGrid(param_grid)):
        if time.time() - start_time > max_time or i >= max_evals:
            break
            
        model.set_params(**params)
        score = cross_val_score(model, X, y).mean()
        results.append({
            'params': params,
            'score': score
        })
    
    return results

PreviousGradient Descent

NextRegularization

Getting Started

Math

Machine Learning

Deep Learning

Natural Language Processing

Reinforcement Learning

References

Hyperparameter Tuning

Grid Search

Basic Implementation

Visualization

Random Search

Bayesian Optimization

Population-Based Methods

Genetic Algorithm

Hyperparameter Optimization Strategies

Sequential Model-Based Optimization

Multi-Objective Optimization

Automated Hyperparameter Tuning

Early Stopping

Learning Rate Scheduling

Visualization and Analysis

Best Practices

On this page