d2l-1.html 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. <!DOCTYPE html>
  2. <html lang="en">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7. <meta name="generator" content="Asciidoctor 2.0.15">
  8. <title>d2l(chapter1-3)</title>
  9. <link rel="stylesheet" href="css/site.css">
  10. <link href="css/custom.css" rel="stylesheet">
  11. <script src="js/setup.js"></script><script defer src="js/site.js"></script>
  12. </head>
  13. <body class="article toc2 toc-left"><div id="banner-container" class="container" role="banner">
  14. <div id="banner" class="contained" role="banner">
  15. <div id="switch-theme">
  16. <input type="checkbox" id="switch-theme-checkbox" />
  17. <label for="switch-theme-checkbox">Dark Theme</label>
  18. </div>
  19. </div>
  20. </div>
  21. <div id="tocbar-container" class="container" role="navigation">
  22. <div id="tocbar" class="contained" role="navigation">
  23. <button id="toggle-toc"></button>
  24. </div>
  25. </div>
  26. <div id="main-container" class="container">
  27. <div id="main" class="contained">
  28. <div id="doc" class="doc">
  29. <div id="header">
  30. <h1>d2l(chapter1-3)</h1>
  31. <div id="toc" class="toc2">
  32. <div id="toctitle">Table of Contents</div>
  33. <span id="back-to-index"><a href="index.html">Back to index</a></span><ul class="sectlevel1">
  34. <li><a href="#_预备知识">1. 预备知识</a>
  35. <ul class="sectlevel2">
  36. <li><a href="#_数据操作">1.1. 数据操作</a></li>
  37. <li><a href="#_数据预处理">1.2. 数据预处理</a></li>
  38. <li><a href="#_线性代数">1.3. 线性代数</a></li>
  39. <li><a href="#_微积分">1.4. 微积分</a></li>
  40. <li><a href="#_自动微分">1.5. 自动微分</a></li>
  41. </ul>
  42. </li>
  43. <li><a href="#_线性神经网络">2. 线性神经网络</a>
  44. <ul class="sectlevel2">
  45. <li><a href="#_线性回归">2.1. 线性回归</a></li>
  46. <li><a href="#_线性回归的从零开始实现">2.2. 线性回归的从零开始实现</a></li>
  47. <li><a href="#_线性回归的简洁实现">2.3. 线性回归的简洁实现</a></li>
  48. <li><a href="#_图像分类数据集">2.4. 图像分类数据集</a></li>
  49. <li><a href="#_softmax回归的从零开始实现">2.5. softmax回归的从零开始实现</a></li>
  50. <li><a href="#_softmax回归的简洁实现">2.6. softmax回归的简洁实现</a></li>
  51. </ul>
  52. </li>
  53. </ul>
  54. </div>
  55. </div>
  56. <div id="content">
  57. <div class="sect1">
  58. <h2 id="_预备知识"><a class="anchor" href="#_预备知识"></a>1. 预备知识</h2>
  59. <div class="sectionbody">
  60. <div class="sect2">
  61. <h3 id="_数据操作"><a class="anchor" href="#_数据操作"></a>1.1. 数据操作</h3>
  62. <div class="listingblock">
  63. <div class="content">
  64. <pre class="highlight"><code class="language-python" data-lang="python"># 查看pytorch中的所有函数名或属性名
  65. import torch
  66. print(dir(torch.distributions))
  67. print('1.张量的创建')
  68. # ones 函数创建一个具有指定形状的新张量,并将所有元素值设置为 1
  69. t = torch.ones(4)
  70. print('t:', t)
  71. x = torch.arange(12)
  72. print('x:', x)
  73. print('x shape:', x.shape) # 访问向量的形状
  74. y = x.reshape(3, 4) # 改变一个张量的形状而不改变元素数量和元素值
  75. print('y:', y)
  76. print('y.numel():', y.numel()) # 返回张量中元素的总个数
  77. z = torch.zeros(2, 3, 4) # 创建一个张量,其中所有元素都设置为0
  78. print('z:', z)
  79. w = torch.randn(2, 3, 4) # 每个元素都从均值为0、标准差为1的标准高斯(正态)分布中随机采样。
  80. print('w:', w)
  81. q = torch.tensor([[1, 2, 3], [4, 3, 2], [7, 4, 3]]) # 通过提供包含数值的 Python 列表(或嵌套列表)来为所需张量中的每个元素赋予确定值
  82. print('q:', q)
  83. print('2.张量的运算')
  84. x = torch.tensor([1.0, 2, 4, 8])
  85. y = torch.tensor([2, 2, 2, 2])
  86. print(x + y)
  87. print(x - y)
  88. print(x * y)
  89. print(x / y)
  90. print(x ** y) # **运算符是求幂运算
  91. print(torch.exp(x))
  92. X = torch.arange(12, dtype=torch.float32).reshape(3, 4)
  93. Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
  94. print('cat操作 dim=0', torch.cat((X, Y), dim=0))
  95. print('cat操作 dim=1', torch.cat((X, Y), dim=1)) # 连结(concatenate) ,将它们端到端堆叠以形成更大的张量。
  96. print('X == Y', X == Y) # 通过 逻辑运算符 构建二元张量
  97. print('X &lt; Y', X &lt; Y)
  98. print('张量所有元素的和:', X.sum()) # 张量所有元素的和
  99. print('3.广播机制')
  100. a = torch.arange(3).reshape(3, 1)
  101. b = torch.arange(2).reshape(1, 2)
  102. print('a:', a)
  103. print('b:', b)
  104. print('a + b:', a + b) # 神奇的广播运算
  105. print('4.索引和切片')
  106. X = torch.arange(12, dtype=torch.float32).reshape(3, 4)
  107. print('X:', X)
  108. print('X[-1]:', X[-1]) # 用 [-1] 选择最后一个元素
  109. print('X[1:3]:', X[1:3]) # 用 [1:3] 选择第二个和第三个元素]
  110. X[1, 2] = 9 # 写入元素。
  111. print('X:', X)
  112. X[0:2, :] = 12 # 写入元素。
  113. print('X:', X)
  114. print('5.节约内存')
  115. before = id(Y) # id()函数提供了内存中引用对象的确切地址
  116. Y = Y + X
  117. print(id(Y) == before)
  118. before = id(X)
  119. X += Y
  120. print(id(X) == before) # 使用 X[:] = X + Y 或 X += Y 来减少操作的内存开销。
  121. before = id(X)
  122. X[:] = X + Y
  123. print(id(X) == before) # 使用 X[:] = X + Y 或 X += Y 来减少操作的内存开销。
  124. print('6.转换为其他 Python对象')
  125. Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
  126. A = Y.numpy()
  127. print(type(A)) # 打印A的类型
  128. print(A)
  129. B = torch.tensor(A)
  130. print(type(B)) # 打印B的类型
  131. print(B)
  132. a = torch.tensor([3.5])
  133. print(a, a.item(), float(a), int(a))</code></pre>
  134. </div>
  135. </div>
  136. </div>
  137. <div class="sect2">
  138. <h3 id="_数据预处理"><a class="anchor" href="#_数据预处理"></a>1.2. 数据预处理</h3>
  139. <div class="listingblock">
  140. <div class="content">
  141. <pre class="highlight"><code class="language-python" data-lang="python">import os
  142. import numpy as np
  143. import pandas as pd
  144. import torch
  145. from numpy import nan as NaN
  146. os.makedirs(os.path.join('..', 'data'), exist_ok=True) # 在上级目录创建data文件夹
  147. datafile = os.path.join('..', 'data', 'house_tiny.csv') # 创建文件
  148. with open(datafile, 'w') as f: # 往文件中写数据
  149. f.write('NumRooms,Alley,Price\n') # 列名
  150. f.write('NA,Pave,127500\n') # 第1行的值
  151. f.write('2,NA,106000\n') # 第2行的值
  152. f.write('4,NA,178100\n') # 第3行的值
  153. f.write('NA,NA,140000\n') # 第4行的值
  154. data = pd.read_csv(datafile) # 可以看到原始表格中的空值NA被识别成了NaN
  155. print('1.原始数据:\n', data)
  156. inputs, outputs = data.iloc[:, 0: 2], data.iloc[:, 2]
  157. inputs = inputs.fillna(inputs.mean()) # 用均值填充NaN
  158. print(inputs)
  159. print(outputs)
  160. # 利用pandas中的get_dummies函数来处理离散值或者类别值。
  161. # [对于 inputs 中的类别值或离散值,我们将 “NaN” 视为一个类别。] 由于 “Alley”列只接受两种类型的类别值 “Pave” 和 “NaN”
  162. inputs = pd.get_dummies(inputs, dummy_na=True)
  163. print('2.利用pandas中的get_dummies函数处理:\n', inputs)
  164. x, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
  165. print('3.转换为张量:')
  166. print(x)
  167. print(y)
  168. # 扩展填充函数fillna的用法
  169. df1 = pd.DataFrame([[1, 2, 3], [NaN, NaN, 2], [NaN, NaN, NaN], [8, 8, NaN]]) # 创建初始数据
  170. print('4.函数fillna的用法:')
  171. print(df1)
  172. print(df1.fillna(100)) # 用常数填充 ,默认不会修改原对象
  173. print(df1.fillna({0: 10, 1: 20, 2: 30})) # 通过字典填充不同的常数,默认不会修改原对象
  174. print(df1.fillna(method='ffill')) # 用前面的值来填充
  175. # print(df1.fillna(0, inplace=True)) # inplace= True直接修改原对象
  176. df2 = pd.DataFrame(np.random.randint(0, 10, (5, 5))) # 随机创建一个5*5
  177. df2.iloc[1:4, 3] = NaN
  178. df2.iloc[2:4, 4] = NaN # 指定的索引处插入值
  179. print(df2)
  180. print(df2.fillna(method='bfill', limit=2)) # 限制填充个数
  181. print(df2.fillna(method="ffill", limit=1, axis=1)) #</code></pre>
  182. </div>
  183. </div>
  184. </div>
  185. <div class="sect2">
  186. <h3 id="_线性代数"><a class="anchor" href="#_线性代数"></a>1.3. 线性代数</h3>
  187. <div class="listingblock">
  188. <div class="content">
  189. <pre class="highlight"><code class="language-python" data-lang="python">import torch
  190. print('1.标量与变量')
  191. x = torch.tensor([3.0])
  192. y = torch.tensor([2.0])
  193. print(x + y, x * y, x / y, x ** y)
  194. x = torch.arange(4)
  195. print('2.向量')
  196. print('x:', x)
  197. print('x[3]:', x[3]) # 通过张量的索引来访问任一元素
  198. print('张量的形状:', x.shape) # 张量的形状
  199. print('张量的长度:', len(x)) # 张量的长度
  200. z = torch.arange(24).reshape(2, 3, 4)
  201. print('三维张量的长度:', len(z))
  202. print('3.矩阵')
  203. A = torch.arange(20).reshape(5, 4)
  204. print('A:', A)
  205. print('A.shape:', A.shape)
  206. print('A.shape[-1]:', A.shape[-1])
  207. print('A.T:', A.T) # 矩阵的转置
  208. print('4.矩阵的计算')
  209. A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
  210. B = A.clone() # 通过分配新内存,将A的一个副本分配给B
  211. print('A:', A)
  212. print('B:', B)
  213. print('A + B:', A + B) # 矩阵相加
  214. print('A * B:', A * B) # 矩阵相乘
  215. a = 2
  216. X = torch.arange(24).reshape(2, 3, 4)
  217. print('X:', X)
  218. print('a + X:', a + X) # 矩阵的值加上标量
  219. print('a * X:', a * X)
  220. print((a * X).shape)
  221. print('5.矩阵的sum运算')
  222. print('A:', A)
  223. print('A.shape:', A.shape)
  224. print('A.sum():', A.sum())
  225. print('A.sum(axis=0):', A.sum(axis=0)) # 沿0轴汇总以生成输出向量
  226. print('A.sum(axis=1):', A.sum(axis=1)) # 沿1轴汇总以生成输出向量
  227. print('A.sum(axis=1, keepdims=True)', A.sum(axis=1, keepdims=True)) # 计算总和保持轴数不变
  228. print('A.sum(axis=[0, 1]):', A.sum(axis=[0, 1])) # Same as `A.sum()`
  229. print('A.mean():', A.mean())
  230. print('A.sum() / A.numel():', A.sum() / A.numel())
  231. print('6.向量-向量相乘(点积)')
  232. x = torch.arange(4, dtype=torch.float32)
  233. y = torch.ones(4, dtype=torch.float32)
  234. print('x:', x)
  235. print('y:', y)
  236. print('向量-向量点积:', torch.dot(x, y))
  237. print('7.矩阵-向量相乘(向量积)')
  238. print('A:', A) # 5*4维
  239. print('x:', x) # 4*1维
  240. print('torch.mv(A, x):', torch.mv(A, x))
  241. print('8.矩阵-矩阵相乘(向量积)')
  242. print('A:', A) # 5*4维
  243. B = torch.ones(4, 3) # 4*3维
  244. print('B:', B)
  245. print('torch.mm(A, B):', torch.mm(A, B))
  246. print('9.范数')
  247. u = torch.tensor([3.0, -4.0])
  248. print('向量的𝐿2范数:', torch.norm(u)) # 向量的𝐿2范数
  249. print('向量的𝐿1范数:', torch.abs(u).sum()) # 向量的𝐿1范数
  250. v = torch.ones((4, 9))
  251. print('v:', v)
  252. print('矩阵的𝐿2范数:', torch.norm(v)) # 矩阵的𝐿2范数
  253. print('10.根据索引访问矩阵')
  254. y = torch.arange(10).reshape(5, 2)
  255. print('y:', y)
  256. index = torch.tensor([1, 4])
  257. print('y[index]:', y[index])
  258. print('11.理解pytorch中的gather()函数')
  259. a = torch.arange(15).view(3, 5)
  260. print('11.1二维矩阵上gather()函数')
  261. print('a:', a)
  262. b = torch.zeros_like(a)
  263. b[1][2] = 1 ##给指定索引的元素赋值
  264. b[0][0] = 1 ##给指定索引的元素赋值
  265. print('b:', b)
  266. c = a.gather(0, b) # dim=0
  267. d = a.gather(1, b) # dim=1
  268. print('d:', d)
  269. print('11.2三维矩阵上gather()函数')
  270. a = torch.randint(0, 30, (2, 3, 5))
  271. print('a:', a)
  272. index = torch.LongTensor([[[0, 1, 2, 0, 2],
  273. [0, 0, 0, 0, 0],
  274. [1, 1, 1, 1, 1]],
  275. [[1, 2, 2, 2, 2],
  276. [0, 0, 0, 0, 0],
  277. [2, 2, 2, 2, 2]]])
  278. print(a.size() == index.size())
  279. b = torch.gather(a, 1, index)
  280. print('b:', b)
  281. c = torch.gather(a, 2, index)
  282. print('c:', c)
  283. index2 = torch.LongTensor([[[0, 1, 1, 0, 1],
  284. [0, 1, 1, 1, 1],
  285. [1, 1, 1, 1, 1]],
  286. [[1, 0, 0, 0, 0],
  287. [0, 0, 0, 0, 0],
  288. [1, 1, 0, 0, 0]]])
  289. d = torch.gather(a, 0, index2)
  290. print('d:', d)
  291. print('12.理解pytorch中的max()和argmax()函数')
  292. a = torch.tensor([[1, 2, 3], [3, 2, 1]])
  293. b = a.argmax(1)
  294. c = a.max(1)
  295. d = a.max(1)[1]
  296. print('a:', a)
  297. print('a.argmax(1):', b)
  298. print('a.max(1):', c)
  299. print('a.max(1)[1]:', d)
  300. print('13.item()函数')
  301. a = torch.Tensor([1, 2, 3])
  302. print('a[0]:', a[0]) # 直接取索引返回的是tensor数据
  303. print('a[0].item():', a[0].item()) # 获取python number</code></pre>
  304. </div>
  305. </div>
  306. </div>
  307. <div class="sect2">
  308. <h3 id="_微积分"><a class="anchor" href="#_微积分"></a>1.4. 微积分</h3>
  309. <div class="listingblock">
  310. <div class="content">
  311. <pre class="highlight"><code class="language-python" data-lang="python">import numpy as np
  312. from d2l import torch as d2l
  313. import os
  314. os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
  315. def f(x):
  316. return 3 * x ** 2 - 4 * x
  317. def numerical_lim(f, x, h):
  318. return (f(x + h) - f(x)) / h
  319. h = 0.1
  320. for i in range(5):
  321. print(f'h={h:.5f}, numerical limit={numerical_lim(f, 1, h):.5f}')
  322. h *= 0.1
  323. x = np.arange(0, 3, 0.1)
  324. d2l.plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])
  325. d2l.plt.show();
  326. x = np.arange(0.5, 3, 0.2)
  327. d2l.plot(x, [x ** 3 - 1 / x, 4 * x - 4], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])
  328. d2l.plt.show();</code></pre>
  329. </div>
  330. </div>
  331. </div>
  332. <div class="sect2">
  333. <h3 id="_自动微分"><a class="anchor" href="#_自动微分"></a>1.5. 自动微分</h3>
  334. <div class="listingblock">
  335. <div class="content">
  336. <pre class="highlight"><code class="language-python" data-lang="python">import torch
  337. print('1.自动梯度计算')
  338. x = torch.arange(4.0, requires_grad=True) # 1.将梯度附加到想要对其计算偏导数的变量
  339. print('x:', x)
  340. print('x.grad:', x.grad)
  341. y = 2 * torch.dot(x, x) # 2.记录目标值的计算
  342. print('y:', y)
  343. y.backward() # 3.执行它的反向传播函数
  344. print('x.grad:', x.grad) # 4.访问得到的梯度
  345. print('x.grad == 4*x:', x.grad == 4 * x)
  346. ## 计算另一个函数
  347. x.grad.zero_()
  348. y = x.sum()
  349. print('y:', y)
  350. y.backward()
  351. print('x.grad:', x.grad)
  352. # 非标量变量的反向传播
  353. x.grad.zero_()
  354. print('x:', x)
  355. y = x * x
  356. y.sum().backward()
  357. print('x.grad:', x.grad)
  358. def f(a):
  359. b = a * 2
  360. print(b.norm())
  361. while b.norm() &lt; 1000: # 求L2范数:元素平方和的平方根
  362. b = b * 2
  363. if b.sum() &gt; 0:
  364. c = b
  365. else:
  366. c = 100 * b
  367. return c
  368. print('2.Python控制流的梯度计算')
  369. a = torch.tensor(2.0) # 初始化变量
  370. a.requires_grad_(True) # 1.将梯度赋给想要对其求偏导数的变量
  371. print('a:', a)
  372. d = f(a) # 2.记录目标函数
  373. print('d:', d)
  374. d.backward() # 3.执行目标函数的反向传播函数
  375. print('a.grad:', a.grad) # 4.获取梯度</code></pre>
  376. </div>
  377. </div>
  378. </div>
  379. </div>
  380. </div>
  381. <div class="sect1">
  382. <h2 id="_线性神经网络"><a class="anchor" href="#_线性神经网络"></a>2. 线性神经网络</h2>
  383. <div class="sectionbody">
  384. <div class="sect2">
  385. <h3 id="_线性回归"><a class="anchor" href="#_线性回归"></a>2.1. 线性回归</h3>
  386. <div class="listingblock">
  387. <div class="content">
  388. <pre class="highlight"><code class="language-python" data-lang="python">import math
  389. import os
  390. import numpy as np
  391. import torch
  392. from d2l import torch as d2l
  393. os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
  394. n = 10000
  395. a = torch.ones(n)
  396. b = torch.ones(n)
  397. c = torch.zeros(n)
  398. timer = d2l.Timer()
  399. for i in range(n):
  400. c[i] = a[i] + b[i]
  401. print(c)
  402. print("{0:.5f} sec".format(timer.stop()))
  403. timer.start()
  404. d = a + b
  405. print(d)
  406. print("{0:.5f} sec".format(timer.stop()))
  407. def normal(x, mu, sigma):
  408. p = 1 / math.sqrt(2 * math.pi * sigma ** 2)
  409. return p * np.exp((- 0.5 / sigma ** 2) * (x - mu) ** 2)
  410. ## 可视化正态分布
  411. x = np.arange(-7, 7, 0.01)
  412. params = [(0, 1), (0, 2), (3, 1)]
  413. d2l.plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x', ylabel='p(x)', figsize=(4.5, 2.5),
  414. legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])
  415. d2l.plt.show()</code></pre>
  416. </div>
  417. </div>
  418. </div>
  419. <div class="sect2">
  420. <h3 id="_线性回归的从零开始实现"><a class="anchor" href="#_线性回归的从零开始实现"></a>2.2. 线性回归的从零开始实现</h3>
  421. <div class="listingblock">
  422. <div class="content">
  423. <pre class="highlight"><code class="language-python" data-lang="python">import random
  424. import torch
  425. ## with torch.no_grad() 则主要是用于停止autograd模块的工作,
  426. ## 以起到加速和节省显存的作用,具体行为就是停止gradient计算,从而节省了GPU算力和显存,但是并不会影响dropout和batchnorm层的行为。
  427. ## mm只能进行矩阵乘法,也就是输入的两个tensor维度只能是( n × m ) (n\times m)(n×m)和( m × p ) (m\times p)(m×p)
  428. ## bmm是两个三维张量相乘, 两个输入tensor维度是( b × n × m )和( b × m × p ), 第一维b代表batch size,输出为( b × n × p )
  429. ## matmul可以进行张量乘法, 输入可以是高维.
  430. ## python知识补充:
  431. ## Python3 range() 函数返回的是一个可迭代对象(类型是对象),而不是列表类型, 所以打印的时候不会打印列表。
  432. ## Python3 list() 函数是对象迭代器,可以把range()返回的可迭代对象转为一个列表,返回的变量类型为列表。
  433. ## Python3 range(start, stop[, step])
  434. ## Python3 shuffle() 方法将序列的所有元素随机排序。shuffle()是不能直接访问的,需要导入 random 模块。举例:random.shuffle (list)
  435. ## Python3 yield是python中的生成器
  436. ## 人造数据集
  437. def create_data(w, b, nums_example):
  438. X = torch.normal(0, 1, (nums_example, len(w)))
  439. y = torch.matmul(X, w) + b
  440. print("y_shape:", y.shape)
  441. y += torch.normal(0, 0.01, y.shape) # 加入噪声
  442. return X, y.reshape(-1, 1) # y从行向量转为列向量
  443. true_w = torch.tensor([2, -3.4])
  444. true_b = 4.2
  445. features, labels = create_data(true_w, true_b, 1000)
  446. ## 读数据集
  447. def read_data(batch_size, features, lables):
  448. nums_example = len(features)
  449. indices = list(range(nums_example)) # 生成0-999的元组,然后将range()返回的可迭代对象转为一个列表
  450. random.shuffle(indices) # 将序列的所有元素随机排序。
  451. for i in range(0, nums_example, batch_size): # range(start, stop, step)
  452. index_tensor = torch.tensor(indices[i: min(i + batch_size, nums_example)])
  453. yield features[index_tensor], lables[index_tensor] # 通过索引访问向量
  454. batch_size = 10
  455. for X, y in read_data(batch_size, features, labels):
  456. print("X:", X, "\ny", y)
  457. break;
  458. ##初始化参数
  459. w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
  460. b = torch.zeros(1, requires_grad=True)
  461. # 定义模型
  462. def net(X, w, b):
  463. return torch.matmul(X, w) + b
  464. # 定义损失函数
  465. def loss(y_hat, y):
  466. # print("y_hat_shape:",y_hat.shape,"\ny_shape:",y.shape)
  467. return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2 # 这里为什么要加 y_hat_shape: torch.Size([10, 1]) y_shape: torch.Size([10])
  468. # 定义优化算法
  469. def sgd(params, batch_size, lr):
  470. with torch.no_grad(): # with torch.no_grad() 则主要是用于停止autograd模块的工作,
  471. for param in params:
  472. param -= lr * param.grad / batch_size ## 这里用param = param - lr * param.grad / batch_size会导致导数丢失, zero_()函数报错
  473. param.grad.zero_() ## 导数如果丢失了,会报错‘NoneType’ object has no attribute ‘zero_’
  474. # 训练模型
  475. lr = 0.03
  476. num_epochs = 3
  477. for epoch in range(0, num_epochs):
  478. for X, y in read_data(batch_size, features, labels):
  479. f = loss(net(X, w, b), y)
  480. # 因为`f`形状是(`batch_size`, 1),而不是一个标量。`f`中的所有元素被加到一起,
  481. # 并以此计算关于[`w`, `b`]的梯度
  482. f.sum().backward()
  483. sgd([w, b], batch_size, lr) # 使用参数的梯度更新参数
  484. with torch.no_grad():
  485. train_l = loss(net(features, w, b), labels)
  486. print("w {0} \nb {1} \nloss {2:f}".format(w, b, float(train_l.mean())))
  487. print("w误差 ", true_w - w, "\nb误差 ", true_b - b)</code></pre>
  488. </div>
  489. </div>
  490. </div>
  491. <div class="sect2">
  492. <h3 id="_线性回归的简洁实现"><a class="anchor" href="#_线性回归的简洁实现"></a>2.3. 线性回归的简洁实现</h3>
  493. <div class="listingblock">
  494. <div class="content">
  495. <pre class="highlight"><code class="language-python" data-lang="python">import torch
  496. from torch.utils import data
  497. from torch import nn
  498. from d2l import torch as d2l
  499. '''生成数据集'''
  500. true_w = torch.tensor([2, -3.4])
  501. true_b = 4.2
  502. features, labels = d2l.synthetic_data(true_w, true_b, 1000)
  503. '''读取数据集'''
  504. def load_array(data_arrays, batch_size, is_train=True): #@save
  505. """构造一个PyTorch数据迭代器"""
  506. dataset = data.TensorDataset(*data_arrays)
  507. return data.DataLoader(dataset, batch_size, shuffle=is_train)
  508. batch_size = 10
  509. data_iter = load_array((features, labels), batch_size)
  510. '''定义模型'''
  511. net = nn.Sequential(nn.Linear(2, 1))
  512. '''初始化模型参数'''
  513. net[0].weight.data.normal_(0, 0.01)
  514. net[0].bias.data.fill_(0)
  515. '''定义损失函数'''
  516. loss = nn.MSELoss()
  517. '''定义优化算法'''
  518. trainer = torch.optim.SGD(net.parameters(), lr=0.03)
  519. '''训练'''
  520. num_epochs = 3
  521. for epoch in range(num_epochs):
  522. for X, y in data_iter:
  523. l = loss(net(X) ,y)
  524. trainer.zero_grad()
  525. l.backward()
  526. trainer.step()
  527. l = loss(net(features), labels)
  528. print(f'epoch {epoch + 1}, loss {l:f}')
  529. w = net[0].weight.data
  530. print('w的估计误差:', true_w - w.reshape(true_w.shape))
  531. b = net[0].bias.data
  532. print('b的估计误差:', true_b - b)</code></pre>
  533. </div>
  534. </div>
  535. </div>
  536. <div class="sect2">
  537. <h3 id="_图像分类数据集"><a class="anchor" href="#_图像分类数据集"></a>2.4. 图像分类数据集</h3>
  538. <div class="listingblock">
  539. <div class="content">
  540. <pre class="highlight"><code class="language-python" data-lang="python">import torch
  541. import torchvision
  542. from torch.utils import data
  543. from torchvision import transforms
  544. from d2l import torch as d2l
  545. d2l.use_svg_display()
  546. '''读取数据集'''
  547. # 通过ToTensor实例将图像数据从PIL类型变换成32位浮点数格式,
  548. # 并除以255使得所有像素的数值均在0到1之间
  549. trans = transforms.ToTensor()
  550. mnist_train = torchvision.datasets.FashionMNIST(
  551. root="../data", train=True, transform=trans, download=True)
  552. mnist_test = torchvision.datasets.FashionMNIST(
  553. root="../data", train=False, transform=trans, download=True)
  554. def get_fashion_mnist_labels(labels): #@save
  555. """返回Fashion-MNIST数据集的文本标签"""
  556. text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
  557. 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
  558. return [text_labels[int(i)] for i in labels]
  559. '''可视化样本'''
  560. def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5): #@save
  561. """绘制图像列表"""
  562. figsize = (num_cols * scale, num_rows * scale)
  563. _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
  564. axes = axes.flatten()
  565. for i, (ax, img) in enumerate(zip(axes, imgs)):
  566. if torch.is_tensor(img):
  567. # 图片张量
  568. ax.imshow(img.numpy())
  569. else:
  570. # PIL图片
  571. ax.imshow(img)
  572. ax.axes.get_xaxis().set_visible(False)
  573. ax.axes.get_yaxis().set_visible(False)
  574. if titles:
  575. ax.set_title(titles[i])
  576. return axes
  577. X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))
  578. show_images(X.reshape(18, 28, 28), 2, 9, titles=get_fashion_mnist_labels(y));
  579. d2l.plt.show() # <i class="conum" data-value="1"></i><b>(1)</b>
  580. '''读取小批量'''
  581. batch_size = 256
  582. def get_dataloader_workers(): #@save
  583. """使用4个进程来读取数据"""
  584. return 4
  585. '''整合所有组件'''
  586. def load_data_fashion_mnist(batch_size, resize=None): #@save
  587. """下载Fashion-MNIST数据集,然后将其加载到内存中"""
  588. trans = [transforms.ToTensor()]
  589. if resize:
  590. trans.insert(0, transforms.Resize(resize))
  591. trans = transforms.Compose(trans)
  592. mnist_train = torchvision.datasets.FashionMNIST(
  593. root="../data", train=True, transform=trans, download=True)
  594. mnist_test = torchvision.datasets.FashionMNIST(
  595. root="../data", train=False, transform=trans, download=True)
  596. return (data.DataLoader(mnist_train, batch_size, shuffle=True,
  597. num_workers=get_dataloader_workers()),
  598. data.DataLoader(mnist_test, batch_size, shuffle=False,
  599. num_workers=get_dataloader_workers()))</code></pre>
  600. </div>
  601. </div>
  602. <div class="olist arabic">
  603. <ol class="arabic">
  604. <li>
  605. <p>jupyter notebook中不需要这一行,pycharm中需要</p>
  606. </li>
  607. </ol>
  608. </div>
  609. </div>
  610. <div class="sect2">
  611. <h3 id="_softmax回归的从零开始实现"><a class="anchor" href="#_softmax回归的从零开始实现"></a>2.5. softmax回归的从零开始实现</h3>
  612. <div class="listingblock">
  613. <div class="content">
  614. <pre class="highlight"><code class="language-python" data-lang="python">import torch
  615. from IPython import display
  616. from d2l import torch as d2l
  617. batch_size = 256
  618. train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
  619. '''初始化模型参数'''
  620. num_inputs = 784
  621. num_outputs = 10
  622. W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
  623. b = torch.zeros(num_outputs, requires_grad=True)
  624. '''定义softmax操作'''
  625. def softmax(X):
  626. X_exp = torch.exp(X)
  627. partition = X_exp.sum(1, keepdim=True)
  628. return X_exp / partition # 这里应用了广播机制
  629. '''定义模型'''
  630. def net(X):
  631. return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
  632. '''定义损失函数'''
  633. def cross_entropy(y_hat, y):
  634. return - torch.log(y_hat[range(len(y_hat)), y])
  635. '''分类精度'''
  636. def accuracy(y_hat, y): #@save
  637. """计算预测正确的数量"""
  638. if len(y_hat.shape) &gt; 1 and y_hat.shape[1] &gt; 1:
  639. y_hat = y_hat.argmax(axis=1)
  640. cmp = y_hat.type(y.dtype) == y
  641. return float(cmp.type(y.dtype).sum())
  642. def evaluate_accuracy(net, data_iter): #@save
  643. """计算在指定数据集上模型的精度"""
  644. if isinstance(net, torch.nn.Module):
  645. net.eval() # 将模型设置为评估模式
  646. metric = Accumulator(2) # 正确预测数、预测总数
  647. with torch.no_grad():
  648. for X, y in data_iter:
  649. metric.add(accuracy(net(X), y), y.numel())
  650. return metric[0] / metric[1]
  651. class Accumulator: #@save
  652. """在n个变量上累加"""
  653. def __init__(self, n):
  654. self.data = [0.0] * n
  655. def add(self, *args):
  656. self.data = [a + float(b) for a, b in zip(self.data, args)]
  657. def reset(self):
  658. self.data = [0.0] * len(self.data)
  659. def __getitem__(self, idx):
  660. return self.data[idx]
  661. '''训练'''
  662. def train_epoch_ch3(net, train_iter, loss, updater): #@save
  663. """训练模型一个迭代周期(定义见第3章)"""
  664. # 将模型设置为训练模式
  665. if isinstance(net, torch.nn.Module):
  666. net.train()
  667. # 训练损失总和、训练准确度总和、样本数
  668. metric = Accumulator(3)
  669. for X, y in train_iter:
  670. # 计算梯度并更新参数
  671. y_hat = net(X)
  672. l = loss(y_hat, y)
  673. if isinstance(updater, torch.optim.Optimizer):
  674. # 使用PyTorch内置的优化器和损失函数
  675. updater.zero_grad()
  676. l.mean().backward()
  677. updater.step()
  678. else:
  679. # 使用定制的优化器和损失函数
  680. l.sum().backward()
  681. updater(X.shape[0])
  682. metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
  683. # 返回训练损失和训练精度
  684. return metric[0] / metric[2], metric[1] / metric[2]
  685. class Animator: #@save
  686. """在动画中绘制数据"""
  687. def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
  688. ylim=None, xscale='linear', yscale='linear',
  689. fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
  690. figsize=(3.5, 2.5)):
  691. # 增量地绘制多条线
  692. if legend is None:
  693. legend = []
  694. d2l.use_svg_display()
  695. self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
  696. if nrows * ncols == 1:
  697. self.axes = [self.axes, ]
  698. # 使用lambda函数捕获参数
  699. self.config_axes = lambda: d2l.set_axes(
  700. self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
  701. self.X, self.Y, self.fmts = None, None, fmts
  702. def add(self, x, y):
  703. # 向图表中添加多个数据点
  704. if not hasattr(y, "__len__"):
  705. y = [y]
  706. n = len(y)
  707. if not hasattr(x, "__len__"):
  708. x = [x] * n
  709. if not self.X:
  710. self.X = [[] for _ in range(n)]
  711. if not self.Y:
  712. self.Y = [[] for _ in range(n)]
  713. for i, (a, b) in enumerate(zip(x, y)):
  714. if a is not None and b is not None:
  715. self.X[i].append(a)
  716. self.Y[i].append(b)
  717. self.axes[0].cla()
  718. for x, y, fmt in zip(self.X, self.Y, self.fmts):
  719. self.axes[0].plot(x, y, fmt)
  720. self.config_axes()
  721. display.display(self.fig)
  722. display.clear_output(wait=True)
  723. def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): #@save
  724. """训练模型(定义见第3章)"""
  725. animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
  726. legend=['train loss', 'train acc', 'test acc'])
  727. for epoch in range(num_epochs):
  728. train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
  729. test_acc = evaluate_accuracy(net, test_iter)
  730. animator.add(epoch + 1, train_metrics + (test_acc,))
  731. train_loss, train_acc = train_metrics
  732. assert train_loss &lt; 0.5, train_loss
  733. assert train_acc &lt;= 1 and train_acc &gt; 0.7, train_acc
  734. assert test_acc &lt;= 1 and test_acc &gt; 0.7, test_acc
  735. lr = 0.1
  736. def updater(batch_size):
  737. return d2l.sgd([W, b], lr, batch_size)
  738. num_epochs = 10
  739. train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)
  740. '''预测'''
  741. def predict_ch3(net, test_iter, n=6): #@save
  742. """预测标签(定义见第3章)"""
  743. for X, y in test_iter:
  744. break
  745. trues = d2l.get_fashion_mnist_labels(y)
  746. preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
  747. titles = [true +'\n' + pred for true, pred in zip(trues, preds)]
  748. d2l.show_images(
  749. X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])
  750. predict_ch3(net, test_iter)</code></pre>
  751. </div>
  752. </div>
  753. </div>
  754. <div class="sect2">
  755. <h3 id="_softmax回归的简洁实现"><a class="anchor" href="#_softmax回归的简洁实现"></a>2.6. softmax回归的简洁实现</h3>
  756. <div class="listingblock">
  757. <div class="content">
  758. <pre class="highlight"><code class="language-python" data-lang="python">import torch
  759. from torch import nn
  760. from d2l import torch as d2l
  761. batch_size = 256
  762. train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
  763. # PyTorch不会隐式地调整输入的形状。因此,
  764. # 我们在线性层前定义了展平层(flatten),来调整网络输入的形状
  765. net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))
  766. def init_weights(m):
  767. if type(m) == nn.Linear:
  768. nn.init.normal_(m.weight, std=0.01)
  769. net.apply(init_weights)
  770. loss = nn.CrossEntropyLoss(reduction='none')
  771. trainer = torch.optim.SGD(net.parameters(), lr=0.1)
  772. '''训练'''
  773. num_epochs = 10
  774. d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)</code></pre>
  775. </div>
  776. </div>
  777. </div>
  778. </div>
  779. </div>
  780. </div>
  781. <div id="footer">
  782. <div id="footer-text">
  783. Last updated 2024-03-18 05:44:42 UTC
  784. </div>
  785. </div>
  786. <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.18.3/highlight.min.js"></script>
  787. <script>
  788. if (!hljs.initHighlighting.called) {
  789. hljs.initHighlighting.called = true
  790. ;[].slice.call(document.querySelectorAll('pre.highlight > code')).forEach(function (el) { hljs.highlightBlock(el) })
  791. }
  792. </script>
  793. <script src="https://utteranc.es/client.js"
  794. repo="pxzxj/articles"
  795. issue-term="title"
  796. label="utteranc"
  797. theme="github-light"
  798. crossorigin="anonymous"
  799. async>
  800. </script>
  801. </div>
  802. </div>
  803. </div>
  804. </body>
  805. </html>