PyTorch_17_优化器


import torch
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                         download=True)
dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=True, num_workers=0, drop_last=False)


class TuDui(nn.Module):
    def __init__(self):
        super(TuDui, self).__init__()

        # self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2)
        # self.maxpool1 = MaxPool2d(2)
        # self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2)
        # self.maxpool2 = MaxPool2d(2)
        # self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        # self.maxpool3 = MaxPool2d(2)
        # self.flatten = Flatten()
        # self.linear1 = Linear(1024, 64)
        # self.linear2 = Linear(64, 10)

        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            MaxPool2d(2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            MaxPool2d(2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, x):
        # x = self.conv1(x)
        # x = self.maxpool1(x)
        # x = self.conv2(x)
        # x = self.maxpool2(x)
        # x = self.conv3(x)
        # x = self.maxpool3(x)
        # x = self.flatten(x)
        # x = self.linear1(x)
        # x = self.linear2(x)
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()
tudui = TuDui()
# 一般来说,学习速率就设置为0.01,太大,容易导致不稳定;太小,那么训练的速度就比较慢
# 一开始,使用一个比较大的学习速率进行学习;后面就使用一个较小的学习速率进行学习
optim = torch.optim.SGD(tudui.parameters(), lr=0.01)


for epoch in range(20):
    running_loss = 0.0
    # 第一个作用:这就是神经网络的输出,和真实输出之间的一个误差
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        # print("outputs:")
        # print(outputs)
        # print("targets:")
        # print(targets)
        result_loss = loss(outputs, targets)
        # print(result_loss)
        # 梯度归零,上一次的梯度更新,对这一次来说,是没有用的
        optim.zero_grad()
        # 反向传播,求梯度
        result_loss.backward()
        # print("OK")
        # 对卷积核里面的参数进行调优
        optim.step()
        # print(result_loss)
        running_loss += result_loss
    print(running_loss)


# 第二个作用,为每一个卷积核参数实行调整
# 为每一个卷积核参数设置了一个梯度:grad
# 每一个节点,每一个要更新的参数,都有一个梯度
# 在优化的过程中,根据这个梯度,对参数进行一个优化,最终达到整个loss降低的目的
# 比如:梯度下降法

Author: Ruimin Huang
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint polocy. If reproduced, please indicate source Ruimin Huang !
  TOC