AlexNet是由Alex Krizhevsky、Ilya Sutskever和Geoffrey Hinton在2012年ImageNet图像分类竞赛中提出的一种经典的卷积神经网络。当时,AlexNet在 ImageNet 大规模视觉识别竞赛中取得了优异的成绩,把深度学习模型在比赛中的正确率提升到一个前所未有的高度。因此,它的出现对深度学习发展具有里程碑式的意义。

基本结构

AlexNet输入为RGB三通道的224 × 224 × 3大小的图像(也可填充为227 × 227 × 3 )。AlexNet 共包含5 个卷积层(包含3个池化)和 3 个全连接层。其中,每个卷积层都包含卷积核、偏置项、ReLU激活函数和局部响应归一化(LRN)模块。第1、2、5个卷积层后面都跟着一个最大池化层,后三个层为全连接层。最终输出层为softmax,将网络输出转化为概率值,用于预测图像的类别。

由于ImageNet数据集太大,本文以MNIST数据集进行代替,修改网络参数,输入通道为1,输出结果为10个。

代码实现

model.py

import torchfrom torch import nnclass AlexNet(nn.Module):def __init__(self, *args, **kwargs) -> None:super().__init__(*args, **kwargs)self.model = nn.Sequential(nn.Conv2d(1,96,kernel_size=11,stride=4,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=3,stride=2),nn.Conv2d(96,256,kernel_size=5,padding=2),nn.ReLU(),nn.MaxPool2d(kernel_size=3,stride=2),nn.Conv2d(256,384,kernel_size=3,padding=1),nn.ReLU(),nn.Conv2d(384,384,kernel_size=3,padding=1),nn.ReLU(),nn.Conv2d(384,256,kernel_size=3,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=3,stride=2),nn.Flatten(),nn.Linear(6400,4096),nn.ReLU(),nn.Dropout(p=0.5),nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(p=0.5),nn.Linear(4096,10))def forward(self,x):return self.model(x)# 验证网络正确性if __name__ == '__main__':net = AlexNet()my_input = torch.ones((64,1,28,28))my_output = net(my_input)print(my_output.shape)

train.py

import torchfrom torch import nnfrom torch.utils.data import DataLoaderfrom torch.utils.tensorboard import SummaryWriterfrom torchvision import datasetsfrom torchvision.transforms import transformsfrom model import AlexNet# 扫描数据次数epochs = 10# 分组大小batch = 64# 学习率learning_rate = 0.01# 训练次数train_step = 0# 测试次数test_step = 0# 定义图像转换transform = transforms.Compose([transforms.Resize(224),transforms.ToTensor()])# 读取数据train_dataset = datasets.MNIST(root="./dataset",train=True,transform=transform,download=True)test_dataset = datasets.MNIST(root="./dataset",train=False,transform=transform,download=True)# 加载数据train_dataloader = DataLoader(train_dataset,batch_size=batch,shuffle=True,num_workers=0)test_dataloader = DataLoader(test_dataset,batch_size=batch,shuffle=True,num_workers=0)# 数据大小train_size = len(train_dataset)test_size = len(test_dataset)print("训练集大小:{}".format(train_size))print("验证集大小:{}".format(test_size))# GPUdevice = torch.device("mps" if torch.backends.mps.is_available() else "cpu")print(device)# 创建网络net = AlexNet()net = net.to(device)# 定义损失函数loss = nn.CrossEntropyLoss()loss = loss.to(device)# 定义优化器optimizer = torch.optim.SGD(net.parameters(),lr=learning_rate)writer = SummaryWriter("logs")# 训练for epoch in range(epochs):print("-------------------第 {} 轮训练开始-------------------".format(epoch))net.train()for data in train_dataloader:train_step = train_step + 1images,targets = dataimages = images.to(device)targets = targets.to(device)outputs = net(images)loss_out = loss(outputs,targets)optimizer.zero_grad()loss_out.backward()optimizer.step()if train_step%100==0:writer.add_scalar("Train Loss",scalar_value=loss_out.item(),global_step=train_step)print("训练次数:{},Loss:{}".format(train_step,loss_out.item()))# 测试net.eval()total_loss = 0total_accuracy = 0with torch.no_grad():for data in test_dataloader:test_step = test_step + 1images, targets = dataimages = images.to(device)targets = targets.to(device)outputs = net(images)loss_out = loss(outputs, targets)total_loss = total_loss + loss_outaccuracy = (targets == torch.argmax(outputs,dim=1)).sum()total_accuracy = total_accuracy + accuracy# 计算精确率print(total_accuracy)accuracy_rate = total_accuracy / test_sizeprint("第 {} 轮,验证集总损失为:{}".format(epoch+1,total_loss))print("第 {} 轮,精确率为:{}".format(epoch+1,accuracy_rate))writer.add_scalar("Test Total Loss",scalar_value=total_loss,global_step=epoch+1)writer.add_scalar("Accuracy Rate",scalar_value=accuracy_rate,global_step=epoch+1)torch.save(net,"./model/net_{}.pth".format(epoch+1))print("模型net_{}.pth已保存".format(epoch+1))