Regression Metrics
This section covers the various metrics used to evaluate regression models, including their mathematical foundations, implementation, and practical considerations.
Basic Error Metrics
1. Mean Squared Error (MSE)
import numpy as np
from sklearn.metrics import mean_squared_error
def compute_mse(y_true, y_pred):
"""Compute Mean Squared Error"""
return mean_squared_error(y_true, y_pred)
2. Root Mean Squared Error (RMSE)
def compute_rmse(y_true, y_pred):
"""Compute Root Mean Squared Error"""
return np.sqrt(mean_squared_error(y_true, y_pred))
3. Mean Absolute Error (MAE)
from sklearn.metrics import mean_absolute_error
def compute_mae(y_true, y_pred):
"""Compute Mean Absolute Error"""
return mean_absolute_error(y_true, y_pred)
4. Mean Absolute Percentage Error (MAPE)
def compute_mape(y_true, y_pred):
"""Compute Mean Absolute Percentage Error"""
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
Goodness of Fit Metrics
1. R-squared (Coefficient of Determination)
from sklearn.metrics import r2_score
def compute_r2(y_true, y_pred):
"""Compute R-squared score"""
return r2_score(y_true, y_pred)
2. Adjusted R-squared
def compute_adjusted_r2(y_true, y_pred, n_features):
"""Compute Adjusted R-squared score"""
n_samples = len(y_true)
r2 = compute_r2(y_true, y_pred)
return 1 - (1 - r2) * (n_samples - 1) / (n_samples - n_features - 1)
3. Explained Variance Score
from sklearn.metrics import explained_variance_score
def compute_explained_variance(y_true, y_pred):
"""Compute explained variance score"""
return explained_variance_score(y_true, y_pred)
Residual Analysis
1. Basic Residual Plots
import matplotlib.pyplot as plt
import seaborn as sns
def plot_residuals(y_true, y_pred):
"""Create basic residual plots"""
residuals = y_true - y_pred
# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
# Residuals vs Predicted
axes[0, 0].scatter(y_pred, residuals)
axes[0, 0].axhline(y=0, color='r', linestyle='--')
axes[0, 0].set_xlabel('Predicted values')
axes[0, 0].set_ylabel('Residuals')
axes[0, 0].set_title('Residuals vs Predicted')
# Residual histogram
axes[0, 1].hist(residuals, bins=30)
axes[0, 1].set_xlabel('Residual value')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Residual Distribution')
# Q-Q plot
from scipy import stats
stats.probplot(residuals, dist="norm", plot=axes[1, 0])
axes[1, 0].set_title('Normal Q-Q Plot')
# Scale-Location plot
axes[1, 1].scatter(y_pred, np.sqrt(np.abs(residuals)))
axes[1, 1].set_xlabel('Predicted values')
axes[1, 1].set_ylabel('√|Residuals|')
axes[1, 1].set_title('Scale-Location Plot')
plt.tight_layout()
return fig
2. Advanced Residual Analysis
def analyze_residuals(y_true, y_pred):
"""Perform detailed residual analysis"""
residuals = y_true - y_pred
# Basic statistics
stats = {
'mean': np.mean(residuals),
'std': np.std(residuals),
'skewness': stats.skew(residuals),
'kurtosis': stats.kurtosis(residuals)
}
# Normality test
_, p_value = stats.normaltest(residuals)
stats['normality_p_value'] = p_value
# Heteroscedasticity test (Breusch-Pagan)
from statsmodels.stats.diagnostic import het_breuschpagan
_, p_value, _ = het_breuschpagan(residuals, y_pred.reshape(-1, 1))
stats['heteroscedasticity_p_value'] = p_value
return stats
Prediction Intervals
1. Bootstrap Prediction Intervals
def compute_prediction_intervals(model, X, y, n_bootstraps=1000, confidence=0.95):
"""Compute prediction intervals using bootstrap"""
predictions = np.zeros((n_bootstraps, len(X)))
for i in range(n_bootstraps):
# Bootstrap sample
indices = np.random.randint(0, len(X), len(X))
X_boot = X[indices]
y_boot = y[indices]
# Train model and predict
model.fit(X_boot, y_boot)
predictions[i] = model.predict(X)
# Compute intervals
lower = np.percentile(predictions, ((1 - confidence) / 2) * 100, axis=0)
upper = np.percentile(predictions, (1 - (1 - confidence) / 2) * 100, axis=0)
return lower, upper
2. Plot with Prediction Intervals
def plot_prediction_intervals(X, y, y_pred, lower, upper):
"""Plot predictions with intervals"""
plt.figure(figsize=(12, 8))
# Sort by X for better visualization
sort_idx = np.argsort(X.ravel())
X = X[sort_idx]
y = y[sort_idx]
y_pred = y_pred[sort_idx]
lower = lower[sort_idx]
upper = upper[sort_idx]
# Plot data and predictions
plt.scatter(X, y, color='blue', alpha=0.5, label='Actual')
plt.plot(X, y_pred, color='red', label='Predicted')
plt.fill_between(X.ravel(), lower, upper, color='gray', alpha=0.2,
label=f'{int(confidence*100)}% Prediction Interval')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Predictions with Confidence Intervals')
plt.legend()
return plt.gcf()
Best Practices
1. Metric Selection
- Choose metrics based on problem characteristics
- Consider scale of target variable
- Account for business requirements
- Use multiple complementary metrics
2. Error Analysis
- Check for heteroscedasticity
- Assess normality of residuals
- Look for patterns in residuals
- Consider transformations if needed
3. Reporting
- Include confidence intervals
- Report all relevant metrics
- Provide visual representations
- Document assumptions and limitations
4. Common Pitfalls
- Using R-squared alone
- Ignoring residual analysis
- Not considering data scale
- Overlooking outliers' impact