Optimization Algorithms in AI
Understanding optimization algorithms and their specific applications in artificial intelligence and machine learning.
Optimization algorithms play a crucial role in training AI models, from simple linear classifiers to complex neural networks.
Neural Network Training
Stochastic Gradient Descent with Momentum
class SGDMomentum:
def __init__(self, learning_rate=0.01, momentum=0.9,
weight_decay=0.0):
self.learning_rate = learning_rate
self.momentum = momentum
self.weight_decay = weight_decay
self.velocity = {}
def step(self, parameters, gradients):
"""Update parameters using momentum"""
if not self.velocity:
for key in parameters:
self.velocity[key] = np.zeros_like(parameters[key])
for key in parameters:
# Add weight decay
grad = gradients[key] + self.weight_decay * parameters[key]
# Update velocity
self.velocity[key] = self.momentum * self.velocity[key] - \
self.learning_rate * grad
# Update parameters
parameters[key] += self.velocity[key]
Adam Optimizer
class Adam:
def __init__(self, learning_rate=0.001, betas=(0.9, 0.999),
epsilon=1e-8, weight_decay=0.0):
self.learning_rate = learning_rate
self.betas = betas
self.epsilon = epsilon
self.weight_decay = weight_decay
self.m = {} # First moment
self.v = {} # Second moment
self.t = 0 # Time step
def step(self, parameters, gradients):
"""Update parameters using Adam"""
if not self.m:
for key in parameters:
self.m[key] = np.zeros_like(parameters[key])
self.v[key] = np.zeros_like(parameters[key])
self.t += 1
for key in parameters:
# Add weight decay
grad = gradients[key] + self.weight_decay * parameters[key]
# Update biased first moment
self.m[key] = self.betas[0] * self.m[key] + \
(1 - self.betas[0]) * grad
# Update biased second moment
self.v[key] = self.betas[1] * self.v[key] + \
(1 - self.betas[1]) * grad**2
# Compute bias-corrected moments
m_hat = self.m[key] / (1 - self.betas[0]**self.t)
v_hat = self.v[key] / (1 - self.betas[1]**self.t)
# Update parameters
parameters[key] -= self.learning_rate * m_hat / \
(np.sqrt(v_hat) + self.epsilon)
Hyperparameter Optimization
Grid Search
class GridSearch:
def __init__(self, model_class, param_grid):
self.model_class = model_class
self.param_grid = param_grid
def fit(self, X, y, cv=5):
"""Perform grid search with cross-validation"""
param_combinations = self._get_param_combinations()
results = []
for params in param_combinations:
# Cross-validation
scores = []
for train_idx, val_idx in self._cv_split(X, cv):
# Train model
model = self.model_class(**params)
model.fit(X[train_idx], y[train_idx])
# Evaluate
score = model.score(X[val_idx], y[val_idx])
scores.append(score)
results.append({
'params': params,
'mean_score': np.mean(scores),
'std_score': np.std(scores)
})
# Find best parameters
best_result = max(results, key=lambda x: x['mean_score'])
self.best_params_ = best_result['params']
self.cv_results_ = results
return self
Random Search
class RandomSearch:
def __init__(self, model_class, param_distributions, n_iter=10):
self.model_class = model_class
self.param_distributions = param_distributions
self.n_iter = n_iter
def fit(self, X, y, cv=5):
"""Perform random search with cross-validation"""
results = []
for _ in range(self.n_iter):
# Sample parameters
params = self._sample_parameters()
# Cross-validation
scores = []
for train_idx, val_idx in self._cv_split(X, cv):
# Train model
model = self.model_class(**params)
model.fit(X[train_idx], y[train_idx])
# Evaluate
score = model.score(X[val_idx], y[val_idx])
scores.append(score)
results.append({
'params': params,
'mean_score': np.mean(scores),
'std_score': np.std(scores)
})
# Find best parameters
best_result = max(results, key=lambda x: x['mean_score'])
self.best_params_ = best_result['params']
self.cv_results_ = results
return self
Reinforcement Learning
Policy Gradient
class PolicyGradient:
def __init__(self, policy_network, learning_rate=0.01):
self.policy_network = policy_network
self.optimizer = Adam(learning_rate=learning_rate)
def update(self, states, actions, rewards):
"""Update policy using policy gradient"""
# Compute returns
returns = self._compute_returns(rewards)
# Compute action probabilities
action_probs = self.policy_network(states)
# Compute loss
loss = -np.mean(
returns * np.log(action_probs[np.arange(len(actions)), actions])
)
# Compute gradients
gradients = self.policy_network.backward(loss)
# Update policy
self.optimizer.step(self.policy_network.parameters, gradients)
def _compute_returns(self, rewards, gamma=0.99):
"""Compute discounted returns"""
returns = np.zeros_like(rewards)
running_return = 0
for t in reversed(range(len(rewards))):
running_return = rewards[t] + gamma * running_return
returns[t] = running_return
# Normalize returns
returns = (returns - np.mean(returns)) / (np.std(returns) + 1e-8)
return returns
Deep Q-Learning
class DQN:
def __init__(self, q_network, target_network, learning_rate=0.001,
gamma=0.99, target_update_freq=100):
self.q_network = q_network
self.target_network = target_network
self.optimizer = Adam(learning_rate=learning_rate)
self.gamma = gamma
self.target_update_freq = target_update_freq
self.steps = 0
def update(self, batch):
"""Update Q-network using experience replay"""
states, actions, rewards, next_states, dones = batch
# Compute target Q-values
next_q_values = self.target_network(next_states)
target_q_values = rewards + self.gamma * \
np.max(next_q_values, axis=1) * (1 - dones)
# Compute current Q-values
current_q_values = self.q_network(states)
# Compute loss
loss = np.mean((current_q_values[np.arange(len(actions)), actions] -
target_q_values)**2)
# Compute gradients
gradients = self.q_network.backward(loss)
# Update Q-network
self.optimizer.step(self.q_network.parameters, gradients)
# Update target network
self.steps += 1
if self.steps % self.target_update_freq == 0:
self.target_network.load_state_dict(
self.q_network.state_dict())
Meta-Learning
Model-Agnostic Meta-Learning (MAML)
class MAML:
def __init__(self, model, inner_lr=0.01, outer_lr=0.001,
num_inner_steps=1):
self.model = model
self.inner_lr = inner_lr
self.outer_optimizer = Adam(learning_rate=outer_lr)
self.num_inner_steps = num_inner_steps
def meta_train_step(self, tasks):
"""Perform one meta-training step"""
meta_gradients = {k: np.zeros_like(v)
for k, v in self.model.parameters.items()}
for task in tasks:
# Clone model parameters
theta = {k: v.copy()
for k, v in self.model.parameters.items()}
# Inner loop optimization
for _ in range(self.num_inner_steps):
support_loss = self.model.compute_loss(
task['support_x'], task['support_y'])
gradients = self.model.backward(support_loss)
# Update theta
for k in theta:
theta[k] = theta[k] - self.inner_lr * gradients[k]
# Compute query set loss
self.model.load_state_dict(theta)
query_loss = self.model.compute_loss(
task['query_x'], task['query_y'])
gradients = self.model.backward(query_loss)
# Accumulate meta-gradients
for k in meta_gradients:
meta_gradients[k] += gradients[k]
# Update meta-parameters
self.outer_optimizer.step(self.model.parameters, meta_gradients)
Reptile
class Reptile:
def __init__(self, model, inner_lr=0.01, outer_lr=0.001,
num_inner_steps=1):
self.model = model
self.inner_lr = inner_lr
self.outer_lr = outer_lr
self.num_inner_steps = num_inner_steps
def meta_train_step(self, tasks):
"""Perform one meta-training step"""
old_theta = {k: v.copy()
for k, v in self.model.parameters.items()}
for task in tasks:
# Inner loop optimization
for _ in range(self.num_inner_steps):
loss = self.model.compute_loss(
task['x'], task['y'])
gradients = self.model.backward(loss)
# Update parameters
for k in self.model.parameters:
self.model.parameters[k] -= \
self.inner_lr * gradients[k]
# Move meta-parameters towards task-specific parameters
for k in self.model.parameters:
self.model.parameters[k] = old_theta[k] + \
self.outer_lr * (self.model.parameters[k] - old_theta[k])