Optimization Algorithms in AI

Understanding optimization algorithms and their specific applications in artificial intelligence and machine learning.

Optimization algorithms play a crucial role in training AI models, from simple linear classifiers to complex neural networks.

Neural Network Training

Stochastic Gradient Descent with Momentum

class SGDMomentum:
    def __init__(self, learning_rate=0.01, momentum=0.9, 
                 weight_decay=0.0):
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.velocity = {}
    
    def step(self, parameters, gradients):
        """Update parameters using momentum"""
        if not self.velocity:
            for key in parameters:
                self.velocity[key] = np.zeros_like(parameters[key])
        
        for key in parameters:
            # Add weight decay
            grad = gradients[key] + self.weight_decay * parameters[key]
            
            # Update velocity
            self.velocity[key] = self.momentum * self.velocity[key] - \
                                self.learning_rate * grad
            
            # Update parameters
            parameters[key] += self.velocity[key]

Adam Optimizer

class Adam:
    def __init__(self, learning_rate=0.001, betas=(0.9, 0.999),
                 epsilon=1e-8, weight_decay=0.0):
        self.learning_rate = learning_rate
        self.betas = betas
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.m = {}  # First moment
        self.v = {}  # Second moment
        self.t = 0   # Time step
    
    def step(self, parameters, gradients):
        """Update parameters using Adam"""
        if not self.m:
            for key in parameters:
                self.m[key] = np.zeros_like(parameters[key])
                self.v[key] = np.zeros_like(parameters[key])
        
        self.t += 1
        
        for key in parameters:
            # Add weight decay
            grad = gradients[key] + self.weight_decay * parameters[key]
            
            # Update biased first moment
            self.m[key] = self.betas[0] * self.m[key] + \
                         (1 - self.betas[0]) * grad
            
            # Update biased second moment
            self.v[key] = self.betas[1] * self.v[key] + \
                         (1 - self.betas[1]) * grad**2
            
            # Compute bias-corrected moments
            m_hat = self.m[key] / (1 - self.betas[0]**self.t)
            v_hat = self.v[key] / (1 - self.betas[1]**self.t)
            
            # Update parameters
            parameters[key] -= self.learning_rate * m_hat / \
                             (np.sqrt(v_hat) + self.epsilon)

Hyperparameter Optimization

class GridSearch:
    def __init__(self, model_class, param_grid):
        self.model_class = model_class
        self.param_grid = param_grid
    
    def fit(self, X, y, cv=5):
        """Perform grid search with cross-validation"""
        param_combinations = self._get_param_combinations()
        results = []
        
        for params in param_combinations:
            # Cross-validation
            scores = []
            for train_idx, val_idx in self._cv_split(X, cv):
                # Train model
                model = self.model_class(**params)
                model.fit(X[train_idx], y[train_idx])
                
                # Evaluate
                score = model.score(X[val_idx], y[val_idx])
                scores.append(score)
            
            results.append({
                'params': params,
                'mean_score': np.mean(scores),
                'std_score': np.std(scores)
            })
        
        # Find best parameters
        best_result = max(results, key=lambda x: x['mean_score'])
        self.best_params_ = best_result['params']
        self.cv_results_ = results
        
        return self
class RandomSearch:
    def __init__(self, model_class, param_distributions, n_iter=10):
        self.model_class = model_class
        self.param_distributions = param_distributions
        self.n_iter = n_iter
    
    def fit(self, X, y, cv=5):
        """Perform random search with cross-validation"""
        results = []
        
        for _ in range(self.n_iter):
            # Sample parameters
            params = self._sample_parameters()
            
            # Cross-validation
            scores = []
            for train_idx, val_idx in self._cv_split(X, cv):
                # Train model
                model = self.model_class(**params)
                model.fit(X[train_idx], y[train_idx])
                
                # Evaluate
                score = model.score(X[val_idx], y[val_idx])
                scores.append(score)
            
            results.append({
                'params': params,
                'mean_score': np.mean(scores),
                'std_score': np.std(scores)
            })
        
        # Find best parameters
        best_result = max(results, key=lambda x: x['mean_score'])
        self.best_params_ = best_result['params']
        self.cv_results_ = results
        
        return self

Reinforcement Learning

Policy Gradient

class PolicyGradient:
    def __init__(self, policy_network, learning_rate=0.01):
        self.policy_network = policy_network
        self.optimizer = Adam(learning_rate=learning_rate)
    
    def update(self, states, actions, rewards):
        """Update policy using policy gradient"""
        # Compute returns
        returns = self._compute_returns(rewards)
        
        # Compute action probabilities
        action_probs = self.policy_network(states)
        
        # Compute loss
        loss = -np.mean(
            returns * np.log(action_probs[np.arange(len(actions)), actions])
        )
        
        # Compute gradients
        gradients = self.policy_network.backward(loss)
        
        # Update policy
        self.optimizer.step(self.policy_network.parameters, gradients)
    
    def _compute_returns(self, rewards, gamma=0.99):
        """Compute discounted returns"""
        returns = np.zeros_like(rewards)
        running_return = 0
        
        for t in reversed(range(len(rewards))):
            running_return = rewards[t] + gamma * running_return
            returns[t] = running_return
        
        # Normalize returns
        returns = (returns - np.mean(returns)) / (np.std(returns) + 1e-8)
        
        return returns

Deep Q-Learning

class DQN:
    def __init__(self, q_network, target_network, learning_rate=0.001,
                 gamma=0.99, target_update_freq=100):
        self.q_network = q_network
        self.target_network = target_network
        self.optimizer = Adam(learning_rate=learning_rate)
        self.gamma = gamma
        self.target_update_freq = target_update_freq
        self.steps = 0
    
    def update(self, batch):
        """Update Q-network using experience replay"""
        states, actions, rewards, next_states, dones = batch
        
        # Compute target Q-values
        next_q_values = self.target_network(next_states)
        target_q_values = rewards + self.gamma * \
                         np.max(next_q_values, axis=1) * (1 - dones)
        
        # Compute current Q-values
        current_q_values = self.q_network(states)
        
        # Compute loss
        loss = np.mean((current_q_values[np.arange(len(actions)), actions] - 
                       target_q_values)**2)
        
        # Compute gradients
        gradients = self.q_network.backward(loss)
        
        # Update Q-network
        self.optimizer.step(self.q_network.parameters, gradients)
        
        # Update target network
        self.steps += 1
        if self.steps % self.target_update_freq == 0:
            self.target_network.load_state_dict(
                self.q_network.state_dict())

Meta-Learning

Model-Agnostic Meta-Learning (MAML)

class MAML:
    def __init__(self, model, inner_lr=0.01, outer_lr=0.001,
                 num_inner_steps=1):
        self.model = model
        self.inner_lr = inner_lr
        self.outer_optimizer = Adam(learning_rate=outer_lr)
        self.num_inner_steps = num_inner_steps
    
    def meta_train_step(self, tasks):
        """Perform one meta-training step"""
        meta_gradients = {k: np.zeros_like(v) 
                         for k, v in self.model.parameters.items()}
        
        for task in tasks:
            # Clone model parameters
            theta = {k: v.copy() 
                    for k, v in self.model.parameters.items()}
            
            # Inner loop optimization
            for _ in range(self.num_inner_steps):
                support_loss = self.model.compute_loss(
                    task['support_x'], task['support_y'])
                gradients = self.model.backward(support_loss)
                
                # Update theta
                for k in theta:
                    theta[k] = theta[k] - self.inner_lr * gradients[k]
            
            # Compute query set loss
            self.model.load_state_dict(theta)
            query_loss = self.model.compute_loss(
                task['query_x'], task['query_y'])
            gradients = self.model.backward(query_loss)
            
            # Accumulate meta-gradients
            for k in meta_gradients:
                meta_gradients[k] += gradients[k]
        
        # Update meta-parameters
        self.outer_optimizer.step(self.model.parameters, meta_gradients)

Reptile

class Reptile:
    def __init__(self, model, inner_lr=0.01, outer_lr=0.001,
                 num_inner_steps=1):
        self.model = model
        self.inner_lr = inner_lr
        self.outer_lr = outer_lr
        self.num_inner_steps = num_inner_steps
    
    def meta_train_step(self, tasks):
        """Perform one meta-training step"""
        old_theta = {k: v.copy() 
                    for k, v in self.model.parameters.items()}
        
        for task in tasks:
            # Inner loop optimization
            for _ in range(self.num_inner_steps):
                loss = self.model.compute_loss(
                    task['x'], task['y'])
                gradients = self.model.backward(loss)
                
                # Update parameters
                for k in self.model.parameters:
                    self.model.parameters[k] -= \
                        self.inner_lr * gradients[k]
            
            # Move meta-parameters towards task-specific parameters
            for k in self.model.parameters:
                self.model.parameters[k] = old_theta[k] + \
                    self.outer_lr * (self.model.parameters[k] - old_theta[k])