Neural Networks Basics
Understanding fundamental concepts and implementations of neural networks
Neural networks are powerful models inspired by biological neural networks, capable of learning complex patterns in data.
Fundamental Concepts
Artificial Neurons
The basic building block of neural networks:
where:
- x_i are inputs
- w_i are weights
- b is bias
- σ is activation function
Activation Functions
import numpy as np
class ActivationFunctions:
    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def relu(x):
        return np.maximum(0, x)
    
    @staticmethod
    def tanh(x):
        return np.tanh(x)
    
    @staticmethod
    def softmax(x):
        exp_x = np.exp(x - np.max(x))
        return exp_x / exp_x.sum(axis=0)
Neural Network Architecture
Simple Feed-forward Network
import torch
import torch.nn as nn
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x
Multi-layer Perceptron (MLP)
class MLP(nn.Module):
    def __init__(self, layer_sizes):
        super(MLP, self).__init__()
        self.layers = nn.ModuleList()
        
        for i in range(len(layer_sizes)-1):
            self.layers.append(nn.Linear(
                layer_sizes[i],
                layer_sizes[i+1]
            ))
            
    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i < len(self.layers) - 1:
                x = nn.ReLU()(x)
        return x
Training Neural Networks
Loss Functions
def compute_loss(y_pred, y_true, loss_type='mse'):
    if loss_type == 'mse':
        return nn.MSELoss()(y_pred, y_true)
    elif loss_type == 'cross_entropy':
        return nn.CrossEntropyLoss()(y_pred, y_true)
    elif loss_type == 'binary_cross_entropy':
        return nn.BCELoss()(y_pred, y_true)
Backpropagation
def train_step(model, optimizer, X_batch, y_batch):
    # Forward pass
    y_pred = model(X_batch)
    loss = compute_loss(y_pred, y_batch)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.item()
Training Loop
def train_model(model, train_loader, val_loader, 
                epochs=100, learning_rate=0.001):
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=learning_rate
    )
    
    history = {
        'train_loss': [],
        'val_loss': []
    }
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_losses = []
        
        for X_batch, y_batch in train_loader:
            loss = train_step(model, optimizer, X_batch, y_batch)
            train_losses.append(loss)
            
        # Validation phase
        model.eval()
        val_losses = []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                y_pred = model(X_batch)
                val_loss = compute_loss(y_pred, y_batch)
                val_losses.append(val_loss.item())
        
        # Record history
        history['train_loss'].append(np.mean(train_losses))
        history['val_loss'].append(np.mean(val_losses))
        
    return history
Regularization Techniques
Dropout
class DropoutMLP(nn.Module):
    def __init__(self, layer_sizes, dropout_rate=0.5):
        super(DropoutMLP, self).__init__()
        self.layers = nn.ModuleList()
        self.dropout = nn.Dropout(dropout_rate)
        
        for i in range(len(layer_sizes)-1):
            self.layers.append(nn.Linear(
                layer_sizes[i],
                layer_sizes[i+1]
            ))
            
    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i < len(self.layers) - 1:
                x = nn.ReLU()(x)
                x = self.dropout(x)
        return x
L1/L2 Regularization
def add_regularization(model, loss, lambda_l1=0.01, lambda_l2=0.01):
    # L1 regularization
    l1_reg = torch.tensor(0.)
    for param in model.parameters():
        l1_reg += torch.norm(param, 1)
    
    # L2 regularization
    l2_reg = torch.tensor(0.)
    for param in model.parameters():
        l2_reg += torch.norm(param, 2)
    
    return loss + lambda_l1 * l1_reg + lambda_l2 * l2_reg
Weight Initialization
def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            nn.init.zeros_(m.bias)
Learning Rate Scheduling
def create_lr_scheduler(optimizer, mode='step'):
    if mode == 'step':
        return torch.optim.lr_scheduler.StepLR(
            optimizer,
            step_size=30,
            gamma=0.1
        )
    elif mode == 'cosine':
        return torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=100
        )
Model Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actual = []
    
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            predictions.extend(y_pred.numpy())
            actual.extend(y_batch.numpy())
    
    return np.array(predictions), np.array(actual)
Visualization Tools
def plot_training_history(history):
    plt.figure(figsize=(10, 6))
    plt.plot(history['train_loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training History')
    plt.legend()
    return plt
def visualize_decision_boundary(model, X, y):
    # Create mesh grid
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(
        np.arange(x_min, x_max, 0.02),
        np.arange(y_min, y_max, 0.02)
    )
    
    # Make predictions
    Z = model(torch.FloatTensor(
        np.c_[xx.ravel(), yy.ravel()]
    )).detach().numpy()
    Z = Z.reshape(xx.shape)
    
    # Plot
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8)
    return plt
Best Practices
- Data Preparation
def prepare_data_for_nn(X, y):
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Convert to tensors
    X_tensor = torch.FloatTensor(X_scaled)
    y_tensor = torch.FloatTensor(y)
    
    return X_tensor, y_tensor
- Model Architecture Selection
def select_architecture(input_size, output_size, problem_type):
    if problem_type == 'binary':
        return SimpleNN(input_size, 64, 1)
    elif problem_type == 'multiclass':
        return MLP([input_size, 128, 64, output_size])
    elif problem_type == 'regression':
        return MLP([input_size, 64, 32, 1])
- Hyperparameter Tuning
def tune_hyperparameters(model_class, param_grid, X, y):
    results = []
    
    for params in param_grid:
        model = model_class(**params)
        history = train_model(model, X, y)
        results.append({
            'params': params,
            'final_loss': history['val_loss'][-1]
        })
    
    return results