PyTorch: The Researcher's Deep Learning Framework

What is PyTorch?

PyTorch is an open-source deep learning framework developed by Meta AI (formerly Facebook). Known for its intuitive, Pythonic interface and dynamic computation graphs, PyTorch has become the go-to framework for AI research and is rapidly gaining ground in production deployments.

If you read recent AI research papers, you'll find most are implemented in PyTorch. Its flexibility makes it perfect for experimenting with new architectures and ideas.

Why Choose PyTorch?

Dynamic Computation Graphs: Build and modify networks on-the-fly
Pythonic: Feels like native Python, easy debugging
Research Favorite: Most papers use PyTorch
Strong Community: Extensive tutorials and pretrained models
Production Ready: TorchServe, ONNX export, mobile deployment

Tensors: The Foundation

Tensors are multi-dimensional arrays, similar to NumPy arrays but with GPU support:

import torch

# Create tensors
x = torch.tensor([1, 2, 3])
zeros = torch.zeros(3, 4)
ones = torch.ones(2, 3)
random = torch.randn(2, 3)  # Normal distribution

# Convert from NumPy
import numpy as np
np_array = np.array([1, 2, 3])
tensor = torch.from_numpy(np_array)

# Move to GPU
if torch.cuda.is_available():
    x_gpu = x.to('cuda')
    # Or x.cuda()

# Tensor operations
y = torch.tensor([4, 5, 6])
print(x + y)          # Element-wise addition
print(x * y)          # Element-wise multiplication
print(torch.dot(x, y)) # Dot product
print(x.shape)        # Tensor shape

Autograd: Automatic Differentiation

PyTorch automatically computes gradients for backpropagation:

import torch

# Create tensor with gradient tracking
x = torch.tensor([2.0, 3.0], requires_grad=True)

# Forward pass
y = x ** 2
z = y.sum()

# Backward pass (compute gradients)
z.backward()

# Gradients
print(x.grad)  # tensor([4., 6.]) - derivative of x^2 is 2x

# In training loops
with torch.no_grad():  # Disable gradient tracking
    # Inference code here
    pass

Building Neural Networks

import torch
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Create model
model = NeuralNetwork(784, 256, 10)
print(model)

# Move to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

Training Loop

import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Prepare data
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

Convolutional Neural Networks

class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 32x32 -> 16x16
        x = self.pool(F.relu(self.conv2(x)))  # 16x16 -> 8x8
        x = self.pool(F.relu(self.conv3(x)))  # 8x8 -> 4x4
        x = x.view(-1, 128 * 4 * 4)  # Flatten
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

Transfer Learning

import torchvision.models as models

# Load pretrained model
model = models.resnet50(pretrained=True)

# Freeze pretrained layers
for param in model.parameters():
    param.requires_grad = False

# Replace final layer for your task
num_classes = 5
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Only train the new layer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

Saving and Loading Models

# Save model
torch.save(model.state_dict(), 'model.pth')

# Load model
model = NeuralNetwork(784, 256, 10)
model.load_state_dict(torch.load('model.pth'))
model.eval()  # Set to evaluation mode

# Save complete checkpoint
checkpoint = {
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss
}
torch.save(checkpoint, 'checkpoint.pth')

Master PyTorch with Expert Mentorship

Our Data Science program covers PyTorch from basics to advanced architectures. Build real deep learning projects with guidance from industry experts.

Explore Data Science Program

PyTorch