Inferential Statistics

Understanding statistical inference methods, hypothesis testing, and their applications in data science and machine learning.

Inferential statistics allows us to draw conclusions about populations based on sample data, a crucial aspect of data science and machine learning.

Point Estimation

Maximum Likelihood Estimation (MLE)

  • Likelihood function: L(θx)=i=1nf(xiθ)L(\theta|x) = \prod_{i=1}^n f(x_i|\theta)
  • Log-likelihood: (θx)=i=1nlogf(xiθ)\ell(\theta|x) = \sum_{i=1}^n \log f(x_i|\theta)
class MaximumLikelihoodEstimation:
    @staticmethod
    def normal_mle(data):
        """MLE for normal distribution"""
        mu = np.mean(data)
        sigma = np.sqrt(np.mean((data - mu)**2))
        return {'mu': mu, 'sigma': sigma}
    
    @staticmethod
    def poisson_mle(data):
        """MLE for Poisson distribution"""
        return {'lambda': np.mean(data)}
    
    @staticmethod
    def bernoulli_mle(data):
        """MLE for Bernoulli distribution"""
        return {'p': np.mean(data)}

Method of Moments

class MethodOfMoments:
    @staticmethod
    def estimate_parameters(data, n_moments):
        """Estimate parameters using method of moments"""
        moments = [np.mean(data**k) for k in range(1, n_moments + 1)]
        return moments
    
    @staticmethod
    def gamma_mom(data):
        """Method of moments for Gamma distribution"""
        mean = np.mean(data)
        var = np.var(data)
        alpha = mean**2 / var
        beta = mean / var
        return {'alpha': alpha, 'beta': beta}

Interval Estimation

Confidence Intervals

class ConfidenceIntervals:
    @staticmethod
    def normal_ci(data, confidence=0.95):
        """Confidence interval for normal mean"""
        n = len(data)
        mean = np.mean(data)
        se = np.std(data, ddof=1) / np.sqrt(n)
        z = stats.norm.ppf((1 + confidence) / 2)
        return {
            'mean': mean,
            'ci_lower': mean - z * se,
            'ci_upper': mean + z * se
        }
    
    @staticmethod
    def proportion_ci(successes, n, confidence=0.95):
        """Confidence interval for proportion"""
        p_hat = successes / n
        z = stats.norm.ppf((1 + confidence) / 2)
        se = np.sqrt(p_hat * (1 - p_hat) / n)
        return {
            'proportion': p_hat,
            'ci_lower': p_hat - z * se,
            'ci_upper': p_hat + z * se
        }

Bootstrap Methods

class BootstrapMethods:
    @staticmethod
    def bootstrap_ci(data, statistic, n_bootstrap=1000, confidence=0.95):
        """Bootstrap confidence interval"""
        bootstrap_stats = []
        for _ in range(n_bootstrap):
            sample = np.random.choice(data, size=len(data), replace=True)
            bootstrap_stats.append(statistic(sample))
        
        lower = np.percentile(bootstrap_stats, 
                            (1 - confidence) * 100 / 2)
        upper = np.percentile(bootstrap_stats, 
                            (1 + confidence) * 100 / 2)
        return {
            'estimate': np.mean(bootstrap_stats),
            'ci_lower': lower,
            'ci_upper': upper
        }

Hypothesis Testing

Classical Tests

class HypothesisTests:
    @staticmethod
    def z_test(data, null_mean, known_std):
        """Z-test for mean (known variance)"""
        n = len(data)
        sample_mean = np.mean(data)
        z_stat = (sample_mean - null_mean) / (known_std / np.sqrt(n))
        p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
        return {
            'statistic': z_stat,
            'p_value': p_value
        }
    
    @staticmethod
    def t_test(data1, data2=None, paired=False):
        """Student's t-test"""
        if data2 is None:
            stat, p_value = stats.ttest_1samp(data1, 0)
        elif paired:
            stat, p_value = stats.ttest_rel(data1, data2)
        else:
            stat, p_value = stats.ttest_ind(data1, data2)
        return {
            'statistic': stat,
            'p_value': p_value
        }

Nonparametric Tests

class NonparametricTests:
    @staticmethod
    def mann_whitney(data1, data2):
        """Mann-Whitney U test"""
        return stats.mannwhitneyu(data1, data2)
    
    @staticmethod
    def wilcoxon(data1, data2=None):
        """Wilcoxon signed-rank test"""
        if data2 is None:
            return stats.wilcoxon(data1)
        return stats.wilcoxon(data1, data2)
    
    @staticmethod
    def kruskal_wallis(*args):
        """Kruskal-Wallis H-test"""
        return stats.kruskal(*args)

Applications in Machine Learning

Model Evaluation

class ModelEvaluation:
    @staticmethod
    def compare_models(model1_scores, model2_scores):
        """Compare two models using paired t-test"""
        return HypothesisTests.t_test(
            model1_scores, model2_scores, paired=True)
    
    @staticmethod
    def cross_validation_inference(cv_scores, confidence=0.95):
        """Inference on cross-validation scores"""
        return ConfidenceIntervals.normal_ci(
            cv_scores, confidence)

Feature Selection

class FeatureSelection:
    @staticmethod
    def univariate_selection(X, y, alpha=0.05):
        """Select features based on statistical tests"""
        n_features = X.shape[1]
        selected_features = []
        
        for i in range(n_features):
            if isinstance(y[0], (int, bool)):  # Classification
                stat, p_value = stats.chi2_contingency(
                    pd.crosstab(y, X[:, i]))[:2]
            else:  # Regression
                stat, p_value = stats.pearsonr(X[:, i], y)
            
            if p_value < alpha:
                selected_features.append(i)
        
        return selected_features

Advanced Topics

Bayesian Inference

class BayesianInference:
    @staticmethod
    def posterior_normal(prior_mean, prior_var, data, likelihood_var):
        """Compute posterior for normal distribution"""
        n = len(data)
        sample_mean = np.mean(data)
        
        posterior_var = 1 / (1/prior_var + n/likelihood_var)
        posterior_mean = posterior_var * (
            prior_mean/prior_var + n*sample_mean/likelihood_var)
        
        return {
            'posterior_mean': posterior_mean,
            'posterior_var': posterior_var
        }
    
    @staticmethod
    def credible_interval(posterior_samples, credible_level=0.95):
        """Compute credible interval from posterior samples"""
        lower = np.percentile(posterior_samples, 
                            (1 - credible_level) * 100 / 2)
        upper = np.percentile(posterior_samples, 
                            (1 + credible_level) * 100 / 2)
        return {'lower': lower, 'upper': upper}

Multiple Testing

class MultipleTestingCorrection:
    @staticmethod
    def bonferroni(p_values, alpha=0.05):
        """Bonferroni correction"""
        n_tests = len(p_values)
        return p_values < (alpha / n_tests)
    
    @staticmethod
    def benjamini_hochberg(p_values, alpha=0.05):
        """Benjamini-Hochberg procedure"""
        n_tests = len(p_values)
        sorted_idx = np.argsort(p_values)
        sorted_p = p_values[sorted_idx]
        
        # Find largest k such that p_k ≤ (k/n)α
        comparison = sorted_p <= np.arange(1, n_tests + 1) * alpha / n_tests
        if not np.any(comparison):
            return np.zeros_like(p_values, dtype=bool)
        
        k = np.max(np.where(comparison)[0])
        threshold = sorted_p[k]
        
        return p_values <= threshold