Principal Component Analysis
Understanding PCA and its variants for dimensionality reduction in machine learning.
Principal Component Analysis (PCA) is a fundamental technique for dimensionality reduction that finds orthogonal directions of maximum variance in the data.
Basic PCA
Standard PCA
class PCA:
def __init__(self, n_components=None):
self.n_components = n_components
self.components_ = None
self.mean_ = None
self.explained_variance_ = None
self.explained_variance_ratio_ = None
def fit(self, X):
"""Fit PCA model"""
# Center the data
self.mean_ = np.mean(X, axis=0)
X_centered = X - self.mean_
# Compute covariance matrix
n_samples = X.shape[0]
cov_matrix = np.dot(X_centered.T, X_centered) / (n_samples - 1)
# Compute eigenvalues and eigenvectors
eigenvals, eigenvecs = np.linalg.eigh(cov_matrix)
# Sort in descending order
idx = np.argsort(eigenvals)[::-1]
eigenvals = eigenvals[idx]
eigenvecs = eigenvecs[:, idx]
# Select components
if self.n_components is None:
self.n_components = X.shape[1]
self.components_ = eigenvecs[:, :self.n_components]
self.explained_variance_ = eigenvals[:self.n_components]
self.explained_variance_ratio_ = \
eigenvals[:self.n_components] / np.sum(eigenvals)
return self
def transform(self, X):
"""Transform data using PCA"""
X_centered = X - self.mean_
return np.dot(X_centered, self.components_)
def inverse_transform(self, X_transformed):
"""Inverse transform to original space"""
return np.dot(X_transformed, self.components_.T) + self.mean_
SVD-based PCA
class SVDPCA:
def __init__(self, n_components=None):
self.n_components = n_components
self.components_ = None
self.mean_ = None
self.singular_values_ = None
def fit(self, X):
"""Fit PCA using SVD"""
# Center the data
self.mean_ = np.mean(X, axis=0)
X_centered = X - self.mean_
# Compute SVD
U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
# Select components
if self.n_components is None:
self.n_components = X.shape[1]
self.components_ = Vt[:self.n_components]
self.singular_values_ = S[:self.n_components]
return self
def transform(self, X):
"""Transform data using SVD-PCA"""
X_centered = X - self.mean_
return np.dot(X_centered, self.components_.T)
Robust PCA
Robust PCA using ADMM
class RobustPCA:
def __init__(self, lambda_=1.0, max_iter=1000, tol=1e-7):
self.lambda_ = lambda_
self.max_iter = max_iter
self.tol = tol
def fit_transform(self, X):
"""Decompose matrix into low-rank and sparse components"""
m, n = X.shape
norm = np.linalg.norm(X, 2)
Y = np.zeros_like(X)
S = np.zeros_like(X)
mu = 1.25 / norm
mu_bar = mu * 1e7
rho = 1.5
for i in range(self.max_iter):
# Update L
U, sigma, Vt = np.linalg.svd(
X - S + (1/mu) * Y, full_matrices=False)
sigma = np.maximum(sigma - mu, 0)
L = np.dot(U * sigma, Vt)
# Update S
temp = X - L + (1/mu) * Y
S = np.sign(temp) * np.maximum(
np.abs(temp) - self.lambda_/mu, 0)
# Update Y
Z = X - L - S
Y = Y + mu * Z
# Update mu
mu = min(rho * mu, mu_bar)
# Check convergence
err = np.linalg.norm(Z, 'fro') / norm
if err < self.tol:
break
self.L_ = L # Low-rank component
self.S_ = S # Sparse component
return L
Kernel PCA
Kernel PCA Implementation
class KernelPCA:
def __init__(self, n_components=None, kernel='rbf', gamma=1.0):
self.n_components = n_components
self.kernel = kernel
self.gamma = gamma
self.alphas = None
self.X_fit = None
def _kernel_function(self, X1, X2):
"""Compute kernel matrix"""
if self.kernel == 'rbf':
# Radial basis function kernel
X1_norm = np.sum(X1**2, axis=1)
X2_norm = np.sum(X2**2, axis=1)
K = np.exp(-self.gamma * (
X1_norm[:, None] + X2_norm[None, :] -
2 * np.dot(X1, X2.T)))
elif self.kernel == 'linear':
K = np.dot(X1, X2.T)
elif self.kernel == 'poly':
K = (np.dot(X1, X2.T) + 1)**2
else:
raise ValueError("Unsupported kernel")
return K
def fit(self, X):
"""Fit Kernel PCA"""
self.X_fit = X
n_samples = X.shape[0]
# Compute kernel matrix
K = self._kernel_function(X, X)
# Center kernel matrix
one_n = np.ones((n_samples, n_samples)) / n_samples
K_centered = K - one_n @ K - K @ one_n + \
one_n @ K @ one_n
# Compute eigenvalues and eigenvectors
eigenvals, eigenvecs = np.linalg.eigh(K_centered)
# Sort in descending order
idx = np.argsort(eigenvals)[::-1]
eigenvals = eigenvals[idx]
eigenvecs = eigenvecs[:, idx]
# Select components
if self.n_components is None:
self.n_components = X.shape[1]
# Normalize eigenvectors
self.alphas = eigenvecs[:, :self.n_components] / \
np.sqrt(eigenvals[:self.n_components])
return self
def transform(self, X):
"""Transform data using Kernel PCA"""
K = self._kernel_function(X, self.X_fit)
return np.dot(K, self.alphas)
Incremental PCA
Online PCA
class IncrementalPCA:
def __init__(self, n_components=None, batch_size=None):
self.n_components = n_components
self.batch_size = batch_size
self.components_ = None
self.mean_ = None
self.var_ = None
self.n_samples_seen_ = 0
def partial_fit(self, X):
"""Incremental fit with X"""
if self.components_ is None:
self.components_ = np.zeros((self.n_components, X.shape[1]))
self.mean_ = np.zeros(X.shape[1])
self.var_ = np.zeros(X.shape[1])
# Update mean and variance
self.n_samples_seen_ += X.shape[0]
col_mean = np.mean(X, axis=0)
col_var = np.var(X, axis=0)
# Update using Welford's method
mean_diff = col_mean - self.mean_
self.mean_ += mean_diff * X.shape[0] / self.n_samples_seen_
self.var_ = (self.var_ * (self.n_samples_seen_ - X.shape[0]) +
col_var * (X.shape[0] - 1) +
mean_diff**2 * X.shape[0] *
(self.n_samples_seen_ - X.shape[0]) /
self.n_samples_seen_) / (self.n_samples_seen_ - 1)
# Perform incremental SVD
X_centered = X - self.mean_
U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
# Update components
self.components_ = Vt[:self.n_components]
return self
def fit(self, X):
"""Fit PCA to X using batches"""
self.components_ = None
for batch in np.array_split(X, max(1, X.shape[0]//self.batch_size)):
self.partial_fit(batch)
return self
def transform(self, X):
"""Apply dimensionality reduction to X"""
X_centered = X - self.mean_
return np.dot(X_centered, self.components_.T)
Applications
Feature Selection
class PCAFeatureSelector:
def __init__(self, n_components=None, threshold=0.95):
self.n_components = n_components
self.threshold = threshold
self.pca = PCA(n_components=n_components)
def fit_transform(self, X):
"""Select features using PCA"""
# Fit PCA
self.pca.fit(X)
# Determine number of components
if self.n_components is None:
cumsum = np.cumsum(self.pca.explained_variance_ratio_)
self.n_components = np.argmax(cumsum >= self.threshold) + 1
# Transform data
X_transformed = self.pca.transform(X)
# Get feature importance
self.feature_importance_ = np.abs(
self.pca.components_[:self.n_components]).sum(axis=0)
return X_transformed[:, :self.n_components]
Noise Reduction
class PCADenoiser:
def __init__(self, n_components=None, threshold=None):
self.n_components = n_components
self.threshold = threshold
self.pca = PCA(n_components=n_components)
def fit(self, X):
"""Fit PCA denoiser"""
self.pca.fit(X)
if self.threshold is not None:
# Select components based on explained variance
cumsum = np.cumsum(self.pca.explained_variance_ratio_)
self.n_components = np.argmax(cumsum >= self.threshold) + 1
return self
def transform(self, X):
"""Denoise data"""
# Project to low-dimensional space and back
X_transformed = self.pca.transform(X)
if self.n_components:
X_transformed[:, self.n_components:] = 0
return self.pca.inverse_transform(X_transformed)