1. 预备知识
1.1. 数据操作
# 查看pytorch中的所有函数名或属性名
import torch
print(dir(torch.distributions))
print('1.张量的创建')
# ones 函数创建一个具有指定形状的新张量,并将所有元素值设置为 1
t = torch.ones(4)
print('t:', t)
x = torch.arange(12)
print('x:', x)
print('x shape:', x.shape) # 访问向量的形状
y = x.reshape(3, 4) # 改变一个张量的形状而不改变元素数量和元素值
print('y:', y)
print('y.numel():', y.numel()) # 返回张量中元素的总个数
z = torch.zeros(2, 3, 4) # 创建一个张量,其中所有元素都设置为0
print('z:', z)
w = torch.randn(2, 3, 4) # 每个元素都从均值为0、标准差为1的标准高斯(正态)分布中随机采样。
print('w:', w)
q = torch.tensor([[1, 2, 3], [4, 3, 2], [7, 4, 3]]) # 通过提供包含数值的 Python 列表(或嵌套列表)来为所需张量中的每个元素赋予确定值
print('q:', q)
print('2.张量的运算')
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
print(x + y)
print(x - y)
print(x * y)
print(x / y)
print(x ** y) # **运算符是求幂运算
print(torch.exp(x))
X = torch.arange(12, dtype=torch.float32).reshape(3, 4)
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
print('cat操作 dim=0', torch.cat((X, Y), dim=0))
print('cat操作 dim=1', torch.cat((X, Y), dim=1)) # 连结(concatenate) ,将它们端到端堆叠以形成更大的张量。
print('X == Y', X == Y) # 通过 逻辑运算符 构建二元张量
print('X < Y', X < Y)
print('张量所有元素的和:', X.sum()) # 张量所有元素的和
print('3.广播机制')
a = torch.arange(3).reshape(3, 1)
b = torch.arange(2).reshape(1, 2)
print('a:', a)
print('b:', b)
print('a + b:', a + b) # 神奇的广播运算
print('4.索引和切片')
X = torch.arange(12, dtype=torch.float32).reshape(3, 4)
print('X:', X)
print('X[-1]:', X[-1]) # 用 [-1] 选择最后一个元素
print('X[1:3]:', X[1:3]) # 用 [1:3] 选择第二个和第三个元素]
X[1, 2] = 9 # 写入元素。
print('X:', X)
X[0:2, :] = 12 # 写入元素。
print('X:', X)
print('5.节约内存')
before = id(Y) # id()函数提供了内存中引用对象的确切地址
Y = Y + X
print(id(Y) == before)
before = id(X)
X += Y
print(id(X) == before) # 使用 X[:] = X + Y 或 X += Y 来减少操作的内存开销。
before = id(X)
X[:] = X + Y
print(id(X) == before) # 使用 X[:] = X + Y 或 X += Y 来减少操作的内存开销。
print('6.转换为其他 Python对象')
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
A = Y.numpy()
print(type(A)) # 打印A的类型
print(A)
B = torch.tensor(A)
print(type(B)) # 打印B的类型
print(B)
a = torch.tensor([3.5])
print(a, a.item(), float(a), int(a))
1.2. 数据预处理
import os
import numpy as np
import pandas as pd
import torch
from numpy import nan as NaN
os.makedirs(os.path.join('..', 'data'), exist_ok=True) # 在上级目录创建data文件夹
datafile = os.path.join('..', 'data', 'house_tiny.csv') # 创建文件
with open(datafile, 'w') as f: # 往文件中写数据
f.write('NumRooms,Alley,Price\n') # 列名
f.write('NA,Pave,127500\n') # 第1行的值
f.write('2,NA,106000\n') # 第2行的值
f.write('4,NA,178100\n') # 第3行的值
f.write('NA,NA,140000\n') # 第4行的值
data = pd.read_csv(datafile) # 可以看到原始表格中的空值NA被识别成了NaN
print('1.原始数据:\n', data)
inputs, outputs = data.iloc[:, 0: 2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean()) # 用均值填充NaN
print(inputs)
print(outputs)
# 利用pandas中的get_dummies函数来处理离散值或者类别值。
# [对于 inputs 中的类别值或离散值,我们将 “NaN” 视为一个类别。] 由于 “Alley”列只接受两种类型的类别值 “Pave” 和 “NaN”
inputs = pd.get_dummies(inputs, dummy_na=True)
print('2.利用pandas中的get_dummies函数处理:\n', inputs)
x, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
print('3.转换为张量:')
print(x)
print(y)
# 扩展填充函数fillna的用法
df1 = pd.DataFrame([[1, 2, 3], [NaN, NaN, 2], [NaN, NaN, NaN], [8, 8, NaN]]) # 创建初始数据
print('4.函数fillna的用法:')
print(df1)
print(df1.fillna(100)) # 用常数填充 ,默认不会修改原对象
print(df1.fillna({0: 10, 1: 20, 2: 30})) # 通过字典填充不同的常数,默认不会修改原对象
print(df1.fillna(method='ffill')) # 用前面的值来填充
# print(df1.fillna(0, inplace=True)) # inplace= True直接修改原对象
df2 = pd.DataFrame(np.random.randint(0, 10, (5, 5))) # 随机创建一个5*5
df2.iloc[1:4, 3] = NaN
df2.iloc[2:4, 4] = NaN # 指定的索引处插入值
print(df2)
print(df2.fillna(method='bfill', limit=2)) # 限制填充个数
print(df2.fillna(method="ffill", limit=1, axis=1)) #
1.3. 线性代数
import torch
print('1.标量与变量')
x = torch.tensor([3.0])
y = torch.tensor([2.0])
print(x + y, x * y, x / y, x ** y)
x = torch.arange(4)
print('2.向量')
print('x:', x)
print('x[3]:', x[3]) # 通过张量的索引来访问任一元素
print('张量的形状:', x.shape) # 张量的形状
print('张量的长度:', len(x)) # 张量的长度
z = torch.arange(24).reshape(2, 3, 4)
print('三维张量的长度:', len(z))
print('3.矩阵')
A = torch.arange(20).reshape(5, 4)
print('A:', A)
print('A.shape:', A.shape)
print('A.shape[-1]:', A.shape[-1])
print('A.T:', A.T) # 矩阵的转置
print('4.矩阵的计算')
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
B = A.clone() # 通过分配新内存,将A的一个副本分配给B
print('A:', A)
print('B:', B)
print('A + B:', A + B) # 矩阵相加
print('A * B:', A * B) # 矩阵相乘
a = 2
X = torch.arange(24).reshape(2, 3, 4)
print('X:', X)
print('a + X:', a + X) # 矩阵的值加上标量
print('a * X:', a * X)
print((a * X).shape)
print('5.矩阵的sum运算')
print('A:', A)
print('A.shape:', A.shape)
print('A.sum():', A.sum())
print('A.sum(axis=0):', A.sum(axis=0)) # 沿0轴汇总以生成输出向量
print('A.sum(axis=1):', A.sum(axis=1)) # 沿1轴汇总以生成输出向量
print('A.sum(axis=1, keepdims=True)', A.sum(axis=1, keepdims=True)) # 计算总和保持轴数不变
print('A.sum(axis=[0, 1]):', A.sum(axis=[0, 1])) # Same as `A.sum()`
print('A.mean():', A.mean())
print('A.sum() / A.numel():', A.sum() / A.numel())
print('6.向量-向量相乘(点积)')
x = torch.arange(4, dtype=torch.float32)
y = torch.ones(4, dtype=torch.float32)
print('x:', x)
print('y:', y)
print('向量-向量点积:', torch.dot(x, y))
print('7.矩阵-向量相乘(向量积)')
print('A:', A) # 5*4维
print('x:', x) # 4*1维
print('torch.mv(A, x):', torch.mv(A, x))
print('8.矩阵-矩阵相乘(向量积)')
print('A:', A) # 5*4维
B = torch.ones(4, 3) # 4*3维
print('B:', B)
print('torch.mm(A, B):', torch.mm(A, B))
print('9.范数')
u = torch.tensor([3.0, -4.0])
print('向量的𝐿2范数:', torch.norm(u)) # 向量的𝐿2范数
print('向量的𝐿1范数:', torch.abs(u).sum()) # 向量的𝐿1范数
v = torch.ones((4, 9))
print('v:', v)
print('矩阵的𝐿2范数:', torch.norm(v)) # 矩阵的𝐿2范数
print('10.根据索引访问矩阵')
y = torch.arange(10).reshape(5, 2)
print('y:', y)
index = torch.tensor([1, 4])
print('y[index]:', y[index])
print('11.理解pytorch中的gather()函数')
a = torch.arange(15).view(3, 5)
print('11.1二维矩阵上gather()函数')
print('a:', a)
b = torch.zeros_like(a)
b[1][2] = 1 ##给指定索引的元素赋值
b[0][0] = 1 ##给指定索引的元素赋值
print('b:', b)
c = a.gather(0, b) # dim=0
d = a.gather(1, b) # dim=1
print('d:', d)
print('11.2三维矩阵上gather()函数')
a = torch.randint(0, 30, (2, 3, 5))
print('a:', a)
index = torch.LongTensor([[[0, 1, 2, 0, 2],
[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1]],
[[1, 2, 2, 2, 2],
[0, 0, 0, 0, 0],
[2, 2, 2, 2, 2]]])
print(a.size() == index.size())
b = torch.gather(a, 1, index)
print('b:', b)
c = torch.gather(a, 2, index)
print('c:', c)
index2 = torch.LongTensor([[[0, 1, 1, 0, 1],
[0, 1, 1, 1, 1],
[1, 1, 1, 1, 1]],
[[1, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 0, 0, 0]]])
d = torch.gather(a, 0, index2)
print('d:', d)
print('12.理解pytorch中的max()和argmax()函数')
a = torch.tensor([[1, 2, 3], [3, 2, 1]])
b = a.argmax(1)
c = a.max(1)
d = a.max(1)[1]
print('a:', a)
print('a.argmax(1):', b)
print('a.max(1):', c)
print('a.max(1)[1]:', d)
print('13.item()函数')
a = torch.Tensor([1, 2, 3])
print('a[0]:', a[0]) # 直接取索引返回的是tensor数据
print('a[0].item():', a[0].item()) # 获取python number
1.4. 微积分
import numpy as np
from d2l import torch as d2l
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def f(x):
return 3 * x ** 2 - 4 * x
def numerical_lim(f, x, h):
return (f(x + h) - f(x)) / h
h = 0.1
for i in range(5):
print(f'h={h:.5f}, numerical limit={numerical_lim(f, 1, h):.5f}')
h *= 0.1
x = np.arange(0, 3, 0.1)
d2l.plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])
d2l.plt.show();
x = np.arange(0.5, 3, 0.2)
d2l.plot(x, [x ** 3 - 1 / x, 4 * x - 4], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])
d2l.plt.show();
1.5. 自动微分
import torch
print('1.自动梯度计算')
x = torch.arange(4.0, requires_grad=True) # 1.将梯度附加到想要对其计算偏导数的变量
print('x:', x)
print('x.grad:', x.grad)
y = 2 * torch.dot(x, x) # 2.记录目标值的计算
print('y:', y)
y.backward() # 3.执行它的反向传播函数
print('x.grad:', x.grad) # 4.访问得到的梯度
print('x.grad == 4*x:', x.grad == 4 * x)
## 计算另一个函数
x.grad.zero_()
y = x.sum()
print('y:', y)
y.backward()
print('x.grad:', x.grad)
# 非标量变量的反向传播
x.grad.zero_()
print('x:', x)
y = x * x
y.sum().backward()
print('x.grad:', x.grad)
def f(a):
b = a * 2
print(b.norm())
while b.norm() < 1000: # 求L2范数:元素平方和的平方根
b = b * 2
if b.sum() > 0:
c = b
else:
c = 100 * b
return c
print('2.Python控制流的梯度计算')
a = torch.tensor(2.0) # 初始化变量
a.requires_grad_(True) # 1.将梯度赋给想要对其求偏导数的变量
print('a:', a)
d = f(a) # 2.记录目标函数
print('d:', d)
d.backward() # 3.执行目标函数的反向传播函数
print('a.grad:', a.grad) # 4.获取梯度
2. 线性神经网络
2.1. 线性回归
import math
import os
import numpy as np
import torch
from d2l import torch as d2l
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
n = 10000
a = torch.ones(n)
b = torch.ones(n)
c = torch.zeros(n)
timer = d2l.Timer()
for i in range(n):
c[i] = a[i] + b[i]
print(c)
print("{0:.5f} sec".format(timer.stop()))
timer.start()
d = a + b
print(d)
print("{0:.5f} sec".format(timer.stop()))
def normal(x, mu, sigma):
p = 1 / math.sqrt(2 * math.pi * sigma ** 2)
return p * np.exp((- 0.5 / sigma ** 2) * (x - mu) ** 2)
## 可视化正态分布
x = np.arange(-7, 7, 0.01)
params = [(0, 1), (0, 2), (3, 1)]
d2l.plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x', ylabel='p(x)', figsize=(4.5, 2.5),
legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])
d2l.plt.show()
2.2. 线性回归的从零开始实现
import random
import torch
## with torch.no_grad() 则主要是用于停止autograd模块的工作,
## 以起到加速和节省显存的作用,具体行为就是停止gradient计算,从而节省了GPU算力和显存,但是并不会影响dropout和batchnorm层的行为。
## mm只能进行矩阵乘法,也就是输入的两个tensor维度只能是( n × m ) (n\times m)(n×m)和( m × p ) (m\times p)(m×p)
## bmm是两个三维张量相乘, 两个输入tensor维度是( b × n × m )和( b × m × p ), 第一维b代表batch size,输出为( b × n × p )
## matmul可以进行张量乘法, 输入可以是高维.
## python知识补充:
## Python3 range() 函数返回的是一个可迭代对象(类型是对象),而不是列表类型, 所以打印的时候不会打印列表。
## Python3 list() 函数是对象迭代器,可以把range()返回的可迭代对象转为一个列表,返回的变量类型为列表。
## Python3 range(start, stop[, step])
## Python3 shuffle() 方法将序列的所有元素随机排序。shuffle()是不能直接访问的,需要导入 random 模块。举例:random.shuffle (list)
## Python3 yield是python中的生成器
## 人造数据集
def create_data(w, b, nums_example):
X = torch.normal(0, 1, (nums_example, len(w)))
y = torch.matmul(X, w) + b
print("y_shape:", y.shape)
y += torch.normal(0, 0.01, y.shape) # 加入噪声
return X, y.reshape(-1, 1) # y从行向量转为列向量
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = create_data(true_w, true_b, 1000)
## 读数据集
def read_data(batch_size, features, lables):
nums_example = len(features)
indices = list(range(nums_example)) # 生成0-999的元组,然后将range()返回的可迭代对象转为一个列表
random.shuffle(indices) # 将序列的所有元素随机排序。
for i in range(0, nums_example, batch_size): # range(start, stop, step)
index_tensor = torch.tensor(indices[i: min(i + batch_size, nums_example)])
yield features[index_tensor], lables[index_tensor] # 通过索引访问向量
batch_size = 10
for X, y in read_data(batch_size, features, labels):
print("X:", X, "\ny", y)
break;
##初始化参数
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# 定义模型
def net(X, w, b):
return torch.matmul(X, w) + b
# 定义损失函数
def loss(y_hat, y):
# print("y_hat_shape:",y_hat.shape,"\ny_shape:",y.shape)
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2 # 这里为什么要加 y_hat_shape: torch.Size([10, 1]) y_shape: torch.Size([10])
# 定义优化算法
def sgd(params, batch_size, lr):
with torch.no_grad(): # with torch.no_grad() 则主要是用于停止autograd模块的工作,
for param in params:
param -= lr * param.grad / batch_size ## 这里用param = param - lr * param.grad / batch_size会导致导数丢失, zero_()函数报错
param.grad.zero_() ## 导数如果丢失了,会报错‘NoneType’ object has no attribute ‘zero_’
# 训练模型
lr = 0.03
num_epochs = 3
for epoch in range(0, num_epochs):
for X, y in read_data(batch_size, features, labels):
f = loss(net(X, w, b), y)
# 因为`f`形状是(`batch_size`, 1),而不是一个标量。`f`中的所有元素被加到一起,
# 并以此计算关于[`w`, `b`]的梯度
f.sum().backward()
sgd([w, b], batch_size, lr) # 使用参数的梯度更新参数
with torch.no_grad():
train_l = loss(net(features, w, b), labels)
print("w {0} \nb {1} \nloss {2:f}".format(w, b, float(train_l.mean())))
print("w误差 ", true_w - w, "\nb误差 ", true_b - b)
2.3. 线性回归的简洁实现
import torch
from torch.utils import data
from torch import nn
from d2l import torch as d2l
'''生成数据集'''
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)
'''读取数据集'''
def load_array(data_arrays, batch_size, is_train=True): #@save
"""构造一个PyTorch数据迭代器"""
dataset = data.TensorDataset(*data_arrays)
return data.DataLoader(dataset, batch_size, shuffle=is_train)
batch_size = 10
data_iter = load_array((features, labels), batch_size)
'''定义模型'''
net = nn.Sequential(nn.Linear(2, 1))
'''初始化模型参数'''
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
'''定义损失函数'''
loss = nn.MSELoss()
'''定义优化算法'''
trainer = torch.optim.SGD(net.parameters(), lr=0.03)
'''训练'''
num_epochs = 3
for epoch in range(num_epochs):
for X, y in data_iter:
l = loss(net(X) ,y)
trainer.zero_grad()
l.backward()
trainer.step()
l = loss(net(features), labels)
print(f'epoch {epoch + 1}, loss {l:f}')
w = net[0].weight.data
print('w的估计误差:', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差:', true_b - b)
2.4. 图像分类数据集
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import torch as d2l
d2l.use_svg_display()
'''读取数据集'''
# 通过ToTensor实例将图像数据从PIL类型变换成32位浮点数格式,
# 并除以255使得所有像素的数值均在0到1之间
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=True)
def get_fashion_mnist_labels(labels): #@save
"""返回Fashion-MNIST数据集的文本标签"""
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
'''可视化样本'''
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5): #@save
"""绘制图像列表"""
figsize = (num_cols * scale, num_rows * scale)
_, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
# 图片张量
ax.imshow(img.numpy())
else:
# PIL图片
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if titles:
ax.set_title(titles[i])
return axes
X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))
show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y));
d2l.plt.show() # (1)
'''读取小批量'''
batch_size = 256
def get_dataloader_workers(): #@save
"""使用4个进程来读取数据"""
return 4
'''整合所有组件'''
def load_data_fashion_mnist(batch_size, resize=None): #@save
"""下载Fashion-MNIST数据集,然后将其加载到内存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=get_dataloader_workers()),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=get_dataloader_workers()))
-
jupyter notebook中不需要这一行,pycharm中需要
2.5. softmax回归的从零开始实现
import torch
from IPython import display
from d2l import torch as d2l
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
'''初始化模型参数'''
num_inputs = 784
num_outputs = 10
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
'''定义softmax操作'''
def softmax(X):
X_exp = torch.exp(X)
partition = X_exp.sum(1, keepdim=True)
return X_exp / partition # 这里应用了广播机制
'''定义模型'''
def net(X):
return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
'''定义损失函数'''
def cross_entropy(y_hat, y):
return - torch.log(y_hat[range(len(y_hat)), y])
'''分类精度'''
def accuracy(y_hat, y): #@save
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
def evaluate_accuracy(net, data_iter): #@save
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval() # 将模型设置为评估模式
metric = Accumulator(2) # 正确预测数、预测总数
with torch.no_grad():
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
class Accumulator: #@save
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
'''训练'''
def train_epoch_ch3(net, train_iter, loss, updater): #@save
"""训练模型一个迭代周期(定义见第3章)"""
# 将模型设置为训练模式
if isinstance(net, torch.nn.Module):
net.train()
# 训练损失总和、训练准确度总和、样本数
metric = Accumulator(3)
for X, y in train_iter:
# 计算梯度并更新参数
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
# 使用PyTorch内置的优化器和损失函数
updater.zero_grad()
l.mean().backward()
updater.step()
else:
# 使用定制的优化器和损失函数
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
# 返回训练损失和训练精度
return metric[0] / metric[2], metric[1] / metric[2]
class Animator: #@save
"""在动画中绘制数据"""
def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
figsize=(3.5, 2.5)):
# 增量地绘制多条线
if legend is None:
legend = []
d2l.use_svg_display()
self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
# 使用lambda函数捕获参数
self.config_axes = lambda: d2l.set_axes(
self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts
def add(self, x, y):
# 向图表中添加多个数据点
if not hasattr(y, "__len__"):
y = [y]
n = len(y)
if not hasattr(x, "__len__"):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
display.display(self.fig)
display.clear_output(wait=True)
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): #@save
"""训练模型(定义见第3章)"""
animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
legend=['train loss', 'train acc', 'test acc'])
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
lr = 0.1
def updater(batch_size):
return d2l.sgd([W, b], lr, batch_size)
num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)
'''预测'''
def predict_ch3(net, test_iter, n=6): #@save
"""预测标签(定义见第3章)"""
for X, y in test_iter:
break
trues = d2l.get_fashion_mnist_labels(y)
preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
titles = [true +'\n' + pred for true, pred in zip(trues, preds)]
d2l.show_images(
X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])
predict_ch3(net, test_iter)
2.6. softmax回归的简洁实现
import torch
from torch import nn
from d2l import torch as d2l
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# PyTorch不会隐式地调整输入的形状。因此,
# 我们在线性层前定义了展平层(flatten),来调整网络输入的形状
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=0.1)
'''训练'''
num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)