PyTorch 是由 Meta(Facebook)的人工智能研究团队于 2016 年开源的深度学习框架,基于 Torch 库和 Python 语言构建。PyTorch 以 动态计算图(Define-by-Run) 为核心特性,被誉为"学术界最受欢迎的深度学习框架"。
PyTorch 的核心定位是 研究与生产一体化的深度学习框架。它提供了:
PyTorch 由 Meta 的 AI 研究团队于 2016 年开源,基于 Torch(Lua)的 Python 实现。其动态计算图特性吸引了大量研究者,迅速成为学术界的首选框架。
import torch
import numpy as np
# 张量创建
x = torch.tensor([1, 2, 3, 4, 5])
y = torch.tensor([[1, 2], [3, 4]])
z = torch.zeros(3, 4)
w = torch.ones(2, 3)
r = torch.randn(3, 3) # 正态分布随机
# 张量属性
print(x.shape) # torch.Size([5])
print(x.dtype) # torch.int64
print(x.device) # cpu
# NumPy 互转
arr = np.array([1, 2, 3])
tensor = torch.from_numpy(arr)
back_to_numpy = tensor.numpy()
# GPU 移动
if torch.cuda.is_available():
x_gpu = x.cuda()
y_cpu = x_gpu.cpu()
# 张量运算
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
c = a + b
d = a * b
e = torch.matmul(a, b)
f = torch.sum(a)
# 自动微分示例
x = torch.tensor(2.0, requires_grad=True)
y = x ** 2 + 3 * x + 1
# 反向传播
y.backward()
# 梯度
print(x.grad) # 2*2 + 3 = 7
# 复杂计算图
x = torch.randn(3, 3, requires_grad=True)
y = x ** 2
z = y.mean()
z.backward()
print(x.grad) # 2x/9
import torch.nn as nn
import torch.nn.functional as F
# 简单神经网络
class SimpleNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# 卷积神经网络(CNN)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
self.pool = nn.MaxPool2d(2, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 7 * 7)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# LSTM 网络
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTM, self).__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
out, _ = self.lstm(x)
out = out[:, -1, :] # 取最后一个时间步
out = self.fc(out)
return out
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# 准备数据
X = torch.randn(1000, 10)
y = torch.randint(0, 2, (1000,))
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# 模型、损失函数、优化器
model = SimpleNN(10, 64, 2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练循环
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
for batch_X, batch_y in dataloader:
# 前向传播
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.4f}")
# 评估
model.eval()
with torch.no_grad():
test_X = torch.randn(100, 10)
test_y = torch.randint(0, 2, (100,))
outputs = model(test_X)
_, predicted = torch.max(outputs, 1)
accuracy = (predicted == test_y).float().mean()
print(f"Accuracy: {accuracy:.4f}")
import torchvision.models as models
import torchvision.transforms as transforms
# 加载预训练模型
resnet = models.resnet18(pretrained=True)
# 冻结所有层
for param in resnet.parameters():
param.requires_grad = False
# 替换最后一层
num_features = resnet.fc.in_features
resnet.fc = nn.Linear(num_features, 10) # 10 个类别
# 定义数据增强
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# 训练(只训练最后一层)
optimizer = optim.Adam(resnet.fc.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# ... 训练循环
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
# 初始化进程组
dist.init_process_group("nccl")
local_rank = int(os.environ["LOCAL_RANK"])
# 创建模型并移动到 GPU
model = SimpleNN(10, 64, 2).cuda(local_rank)
model = DDP(model, device_ids=[local_rank])
# 分布式数据加载
train_sampler = DistributedSampler(train_dataset)
train_loader = DataLoader(train_dataset,
batch_size=32,
sampler=train_sampler,
num_workers=4)
# 训练
for epoch in range(num_epochs):
train_sampler.set_epoch(epoch)
for batch in train_loader:
# ... 训练代码
# 保存模型
torch.save(model.state_dict(), "model.pth")
# 加载模型
model = SimpleNN(10, 64, 2)
model.load_state_dict(torch.load("model.pth"))
model.eval()
# 保存完整模型(包含架构)
torch.save(model, "model_full.pth")
model = torch.load("model_full.pth")
# 导出为 ONNX
dummy_input = torch.randn(1, 10)
torch.onnx.export(model, dummy_input, "model.onnx")
# 使用 TorchScript 导出
scripted_model = torch.jit.script(model)
scripted_model.save("model.pt")
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("logs")
# 记录损失
writer.add_scalar("Loss/train", loss, epoch)
# 记录准确率
writer.add_scalar("Accuracy/train", accuracy, epoch)
# 记录模型结构
writer.add_graph(model, dummy_input)
# 记录图像
writer.add_image("images", batch_images, epoch)
# 记录超参数
writer.add_hparams({"lr": 0.001, "batch_size": 32}, {"accuracy": 0.95})
writer.close()
| 对比项 | PyTorch | TensorFlow | JAX |
|---|---|---|---|
| 计算图 | 动态 | 静态/动态 | 动态 |
| 学习曲线 | 平缓 | 中等 | 陡峭 |
| 学术界 | 最流行 | 流行 | 增长中 |
| 生产部署 | ✅ TorchScript | ✅ 完善 | ❌ 有限 |
| 生态 | 丰富 | 极丰富 | 增长中 |
Python、NumPy、机器学习基础
张量操作、自动微分、简单神经网络
CNN、RNN、LSTM、迁移学习
分布式训练、模型部署、Transformer、大模型微调
PyTorch 是深度学习研究的"首选工具"。
它用 动态计算图、Pythonic 风格、强大的 GPU 支持 让深度学习研究和开发变得高效而愉悦。PyTorch 是学术界和工业界都喜爱的框架。
"PyTorch 让深度学习变得像写 Python 一样自然。" 🔥