Generalized Linear Models
Generalized Linear Models (GLMs) extend linear regression to handle response variables with non-normal distributions and non-linear relationships with predictors through a link function.
Theoretical Foundations
1. Components of GLMs
-
Random Component
- Specifies the probability distribution of the response variable
- Common distributions:
- Gaussian (Normal):
- Binomial:
- Poisson:
- Gamma:
-
Systematic Component
- Linear predictor:
- Where:
- is the design matrix
- are the coefficients
-
Link Function
- Connects mean of response to linear predictor:
- Common link functions:
- Identity:
- Logit:
- Log:
- Inverse:
2. Maximum Likelihood Estimation
The log-likelihood function:
l(\beta) = \sum_{i=1}^n \log f(y_i; \theta_i, \phi)
where:
- is the probability density/mass function
- is the canonical parameter
- is the dispersion parameter
Implementation Examples
1. Logistic Regression
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
class LogisticRegression:
def __init__(self, fit_intercept=True):
self.fit_intercept = fit_intercept
self.coef_ = None
self.intercept_ = None
def _sigmoid(self, z):
"""Sigmoid function"""
return 1 / (1 + np.exp(-z))
def _add_intercept(self, X):
"""Add intercept term to X"""
if self.fit_intercept:
return np.c_[np.ones(X.shape[0]), X]
return X
def _negative_log_likelihood(self, beta, X, y):
"""Compute negative log-likelihood"""
z = X @ beta
log_likelihood = np.sum(y * z - np.log(1 + np.exp(z)))
return -log_likelihood
def _gradient(self, beta, X, y):
"""Compute gradient of negative log-likelihood"""
z = X @ beta
return -X.T @ (y - self._sigmoid(z))
def fit(self, X, y, max_iter=100):
"""Fit logistic regression using maximum likelihood"""
X = self._add_intercept(X)
# Initialize parameters
beta_init = np.zeros(X.shape[1])
# Optimize using BFGS
result = minimize(
fun=self._negative_log_likelihood,
x0=beta_init,
args=(X, y),
method='BFGS',
jac=self._gradient,
options={'maxiter': max_iter}
)
if self.fit_intercept:
self.intercept_ = result.x[0]
self.coef_ = result.x[1:]
else:
self.coef_ = result.x
return self
def predict_proba(self, X):
"""Predict probabilities"""
X = self._add_intercept(X)
return self._sigmoid(X @ np.r_[self.intercept_, self.coef_])
def predict(self, X, threshold=0.5):
"""Predict classes"""
return (self.predict_proba(X) >= threshold).astype(int)
# Example usage
def plot_logistic_regression():
# Generate sample data
np.random.seed(42)
X = np.random.randn(200, 2)
y = (X[:, 0] + X[:, 1] > 0).astype(int)
# Fit model
model = LogisticRegression()
model.fit(X, y)
# Create grid for visualization
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(
np.linspace(x_min, x_max, 100),
np.linspace(y_min, y_max, 100)
)
# Make predictions
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Logistic Regression Decision Boundary')
return plt.gcf()
2. Poisson Regression
class PoissonRegression:
def __init__(self, fit_intercept=True):
self.fit_intercept = fit_intercept
self.coef_ = None
self.intercept_ = None
def _add_intercept(self, X):
if self.fit_intercept:
return np.c_[np.ones(X.shape[0]), X]
return X
def _negative_log_likelihood(self, beta, X, y):
"""Compute negative log-likelihood for Poisson regression"""
mu = np.exp(X @ beta)
return -np.sum(y * np.log(mu) - mu - np.log(np.factorial(y)))
def _gradient(self, beta, X, y):
"""Compute gradient of negative log-likelihood"""
mu = np.exp(X @ beta)
return -X.T @ (y - mu)
def fit(self, X, y, max_iter=100):
"""Fit Poisson regression using maximum likelihood"""
X = self._add_intercept(X)
# Initialize parameters
beta_init = np.zeros(X.shape[1])
# Optimize using BFGS
result = minimize(
fun=self._negative_log_likelihood,
x0=beta_init,
args=(X, y),
method='BFGS',
jac=self._gradient,
options={'maxiter': max_iter}
)
if self.fit_intercept:
self.intercept_ = result.x[0]
self.coef_ = result.x[1:]
else:
self.coef_ = result.x
return self
def predict(self, X):
"""Predict counts"""
X = self._add_intercept(X)
return np.exp(X @ np.r_[self.intercept_, self.coef_])
3. Gamma Regression
class GammaRegression:
def __init__(self, fit_intercept=True):
self.fit_intercept = fit_intercept
self.coef_ = None
self.intercept_ = None
self.shape_ = None
def _negative_log_likelihood(self, params, X, y):
"""Compute negative log-likelihood for Gamma regression"""
beta = params[:-1]
alpha = params[-1] # shape parameter
mu = np.exp(X @ beta)
ll = (alpha - 1) * np.log(y) - y/mu - alpha * np.log(mu) - np.log(gamma(alpha))
return -np.sum(ll)
def fit(self, X, y, max_iter=100):
"""Fit Gamma regression using maximum likelihood"""
X = self._add_intercept(X)
# Initialize parameters (including shape parameter)
params_init = np.zeros(X.shape[1] + 1)
params_init[-1] = 1 # initial shape parameter
# Optimize
result = minimize(
fun=self._negative_log_likelihood,
x0=params_init,
args=(X, y),
method='BFGS',
options={'maxiter': max_iter}
)
if self.fit_intercept:
self.intercept_ = result.x[0]
self.coef_ = result.x[1:-1]
else:
self.coef_ = result.x[:-1]
self.shape_ = result.x[-1]
return self
def predict(self, X):
"""Predict mean response"""
X = self._add_intercept(X)
return np.exp(X @ np.r_[self.intercept_, self.coef_])
Model Diagnostics
1. Residual Analysis
def analyze_residuals(model, X, y):
"""Analyze residuals for GLM"""
y_pred = model.predict(X)
# Compute different types of residuals
response_residuals = y - y_pred
pearson_residuals = response_residuals / np.sqrt(y_pred)
deviance_residuals = np.sign(response_residuals) * np.sqrt(2 * (
y * np.log(y/y_pred) - (y - y_pred)
))
# Create diagnostic plots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Residuals vs Fitted
axes[0, 0].scatter(y_pred, response_residuals)
axes[0, 0].axhline(y=0, color='r', linestyle='--')
axes[0, 0].set_xlabel('Fitted values')
axes[0, 0].set_ylabel('Response residuals')
# Scale-Location
axes[0, 1].scatter(y_pred, np.abs(pearson_residuals))
axes[0, 1].set_xlabel('Fitted values')
axes[0, 1].set_ylabel('|Pearson residuals|')
# QQ plot
from scipy import stats
stats.probplot(deviance_residuals, dist="norm", plot=axes[1, 0])
# Cook's distance
leverage = np.diag(X @ np.linalg.inv(X.T @ X) @ X.T)
cooks_dist = (pearson_residuals**2 * leverage) / (1 - leverage)
axes[1, 1].scatter(range(len(cooks_dist)), cooks_dist)
axes[1, 1].set_xlabel('Observation')
axes[1, 1].set_ylabel("Cook's distance")
plt.tight_layout()
return fig
2. Deviance Analysis
def compute_deviance(model, X, y, family='poisson'):
"""Compute deviance for GLM"""
y_pred = model.predict(X)
if family == 'poisson':
deviance = 2 * np.sum(
y * np.log(y/y_pred) - (y - y_pred)
)
elif family == 'gamma':
deviance = 2 * np.sum(
-np.log(y/y_pred) + (y - y_pred)/y_pred
)
elif family == 'binomial':
deviance = 2 * np.sum(
y * np.log(y/y_pred) + (1-y) * np.log((1-y)/(1-y_pred))
)
return deviance
Advanced Topics
1. Elastic-Net GLM
class ElasticNetGLM:
def __init__(self, alpha=1.0, l1_ratio=0.5, family='gaussian'):
self.alpha = alpha
self.l1_ratio = l1_ratio
self.family = family
self.coef_ = None
def _penalty(self, beta):
"""Compute elastic net penalty"""
l1 = self.l1_ratio * self.alpha * np.sum(np.abs(beta))
l2 = (1 - self.l1_ratio) * self.alpha * np.sum(beta**2)
return l1 + l2
def fit(self, X, y, max_iter=1000):
"""Fit elastic net GLM using coordinate descent"""
# Implementation details...
return self
2. Quasi-likelihood Methods
class QuasiPoissonRegression:
def __init__(self):
self.coef_ = None
self.dispersion_ = None
def fit(self, X, y):
"""Fit quasi-Poisson regression"""
# First fit standard Poisson regression
poisson_model = PoissonRegression()
poisson_model.fit(X, y)
# Estimate dispersion parameter
y_pred = poisson_model.predict(X)
pearson_residuals = (y - y_pred) / np.sqrt(y_pred)
self.dispersion_ = np.sum(pearson_residuals**2) / (len(y) - X.shape[1])
# Store coefficients
self.coef_ = poisson_model.coef_
return self
Best Practices
-
Model Selection
- Choose appropriate distribution family
- Select suitable link function
- Consider model complexity
- Use diagnostic tools
-
Data Preprocessing
- Handle missing values
- Check for outliers
- Scale predictors
- Handle categorical variables
-
Model Validation
- Check residuals
- Assess goodness of fit
- Validate assumptions
- Use cross-validation
-
Interpretation
- Understand coefficient meanings
- Consider scale of predictors
- Account for link function
- Report uncertainty estimates