Optimization
Understanding fundamental concepts of optimization and their role in machine learning.
Optimization is a cornerstone of machine learning, providing the mathematical foundation for training models and finding optimal solutions to complex problems.
Fundamentals
Optimization Problem Structure
- Objective function: to minimize or maximize
- Variables:
- Constraints: ,
class OptimizationProblem:
def __init__(self, objective_fn, constraints=None):
self.objective_fn = objective_fn
self.constraints = constraints or []
def is_feasible(self, x, tol=1e-6):
"""Check if point satisfies constraints"""
if not self.constraints:
return True
return all(
abs(h(x)) <= tol for h in self.constraints.get('equality', [])
) and all(
g(x) <= tol for g in self.constraints.get('inequality', [])
)
Types of Optimization Problems
- Unconstrained Optimization
def unconstrained_minimize(f, x0, method='gradient_descent'):
"""Solve unconstrained optimization problem"""
if method == 'gradient_descent':
optimizer = GradientDescent()
elif method == 'newton':
optimizer = NewtonMethod()
else:
raise ValueError("Unsupported method")
return optimizer.optimize(f, x0)
- Constrained Optimization
def constrained_minimize(f, constraints, x0, method='barrier'):
"""Solve constrained optimization problem"""
if method == 'barrier':
optimizer = BarrierMethod()
elif method == 'lagrange':
optimizer = LagrangeMultipliers()
else:
raise ValueError("Unsupported method")
return optimizer.optimize(f, constraints, x0)
Optimality Conditions
First-Order Conditions
class OptimalityConditions:
@staticmethod
def check_first_order(f, grad_f, x, tol=1e-6):
"""Check first-order necessary conditions"""
grad = grad_f(x)
return np.linalg.norm(grad) <= tol
@staticmethod
def check_second_order(f, grad_f, hess_f, x, tol=1e-6):
"""Check second-order sufficient conditions"""
grad = grad_f(x)
hess = hess_f(x)
# Check first-order conditions
if np.linalg.norm(grad) > tol:
return False
# Check positive definiteness of Hessian
try:
np.linalg.cholesky(hess)
return True
except np.linalg.LinAlgError:
return False
Convergence Analysis
Convergence Rates
class ConvergenceAnalysis:
@staticmethod
def linear_convergence(sequence):
"""Check for linear convergence"""
ratios = []
for i in range(2, len(sequence)):
ratio = (sequence[i] - sequence[-1]) / \
(sequence[i-1] - sequence[-1])
ratios.append(ratio)
return np.mean(ratios)
@staticmethod
def quadratic_convergence(sequence):
"""Check for quadratic convergence"""
ratios = []
for i in range(2, len(sequence)):
ratio = (sequence[i] - sequence[-1]) / \
(sequence[i-1] - sequence[-1])**2
ratios.append(ratio)
return np.mean(ratios)
Optimization Landscapes
Function Analysis
class FunctionAnalysis:
@staticmethod
def compute_gradient(f, x, eps=1e-8):
"""Compute numerical gradient"""
n = len(x)
grad = np.zeros(n)
for i in range(n):
ei = np.zeros(n)
ei[i] = eps
grad[i] = (f(x + ei) - f(x - ei)) / (2 * eps)
return grad
@staticmethod
def compute_hessian(f, x, eps=1e-8):
"""Compute numerical Hessian"""
n = len(x)
hess = np.zeros((n, n))
for i in range(n):
for j in range(n):
ei = np.zeros(n)
ej = np.zeros(n)
ei[i] = eps
ej[j] = eps
hess[i,j] = (f(x + ei + ej) - f(x + ei - ej) - \
f(x - ei + ej) + f(x - ei - ej)) / \
(4 * eps**2)
return hess
Applications
Model Training
class ModelTraining:
def __init__(self, model, loss_fn, optimizer):
self.model = model
self.loss_fn = loss_fn
self.optimizer = optimizer
def train(self, X, y, epochs=100, batch_size=32):
"""Train model using optimization"""
history = []
for epoch in range(epochs):
# Mini-batch training
for i in range(0, len(X), batch_size):
batch_X = X[i:i+batch_size]
batch_y = y[i:i+batch_size]
# Forward pass
pred = self.model(batch_X)
loss = self.loss_fn(pred, batch_y)
# Backward pass and optimization
grads = self.model.backward(loss)
self.optimizer.step(grads)
# Record metrics
history.append({
'epoch': epoch,
'loss': loss.item()
})
return history
Hyperparameter Tuning
class HyperparameterTuning:
def __init__(self, model_fn, param_space):
self.model_fn = model_fn
self.param_space = param_space
def optimize(self, X, y, n_trials=50):
"""Optimize hyperparameters"""
best_params = None
best_score = float('inf')
for _ in range(n_trials):
# Sample parameters
params = {
k: np.random.uniform(v[0], v[1])
for k, v in self.param_space.items()
}
# Train and evaluate model
model = self.model_fn(**params)
score = self.evaluate(model, X, y)
# Update best parameters
if score < best_score:
best_score = score
best_params = params
return best_params, best_score
def evaluate(self, model, X, y):
"""Evaluate model performance"""
# Implement cross-validation or validation split
return np.mean([
model.fit(X_train, y_train).score(X_val, y_val)
for X_train, X_val, y_train, y_val in cross_validate(X, y)
])