two types of data

  • dataset
  • dataloader
dataset

Provide a way to get data and its label

dataloader

为后面的网络提供不同的数据形式

dataset practice

Dataset from torch.utils.data is a abstract class. We should create a subclass of it.

from torch.utils.data import Dataset
from PIL import Image
import os


class MyData(Dataset):

def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_dir = label_dir
self.path = os.path.join(self.root_dir, self.label_dir)
self.img_path = os.listdir(self.path) # It's a list of str

def __getitem__(self, idx):
img_name = self.img_path[idx]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
img = Image.open(img_item_path)
label = self.label_dir
return img, label

def __len__(self):
return len(self.img_path)


root_dir = 'dataset/train'
ants_label_dir = 'ants'
bees_label_dir = 'bees'
ants_dataset = MyData(root_dir, ants_label_dir)
bees_dataset = MyData(root_dir, bees_label_dir)
train_dataset = ants_dataset + bees_dataset

tensorboard

from torch.utils.tensorboard import SummaryWriter
# 如果没有tensorboard则需要手动安装
# pip install tensorboard
writer = SummaryWriter('logs')
# Create an instance and input the log dir
for i in range(100):
writer.add_scalar('y=2x', 2 * i, i)
# Set the tag, value of y and x
writer.close()

Then in the terminal:

tensorboard --logdir=logs
# logs is the dir path

add_image

The type of the image has to be numpy array. And also, check the dimension of the image. Default is (3, H, W). If not default, set it manually.

writer.add_image('test', img_array, 1, dataformats='HWC')

1 means this is the first step of tag test. In an iteration, we should update step variable after completing one iteration.

transforms

ToTensor

可以改变图像的类型。First create an instance of ToTensor, then call this instance like a function to convert an image. Because there is a function called __call__() inside the class ToTensor

from torchvision import transforms

tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)

Normalize

It can normalize a imgae of tensor type.

trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
img_norm = trans_norm(img_tensor)

The inputs are respectively means and std. Calculation: (input - means) / std

resize

改变图像大小. When inputting a pair of value, the size will be forced to be set to the specified value.

trans_resize = transforms.Resize((512, 512))
img_resize = trans_resize(img)

resize doesn’t change the type of img. img is still PIL type after resize. When inputting only one value, the shorter edge will be set to this value. Then scale the image according to the original ratio.

trans_resize = transforms.Resize(512)

compose

组合变换. Input should be a list. Every element should be an instance of functions in transforms. It will execute the function in order.

trans_resize = transforms.Resize((512, 512))
trans_totensor = transforms.ToTensor()
trans_compose = transforms.Compose([trans_resize, trans_totensor])

CIFAR10数据集实践

import torchvision
from torch.utils.tensorboard import SummaryWriter

dataset_trans = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
# 自定义一个处理函数,用作对数据集的初始化处理
# 这里的处理函数把所有图片都处理成tensor类型

train_set = torchvision.datasets.CIFAR10(root='./CIFAR10', train=True, transform=dataset_trans, download=True)
test_set = torchvision.datasets.CIFAR10(root='./CIFAR10', train=False, transform=dataset_trans, download=True)
# 在transform项中传入自定义的处理函数

writer = SummaryWriter('logs')

for i in range(10):
img, target = test_set[i]
# 每个数据点返回两个值,用两个变量保存
writer.add_image('test_set', img, i)

writer.close() # 别忘了close

dataloader

数据加载器。

import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# 准备测试数据集
test_data = torchvision.datasets.CIFAR10(root='./CIFAR10', train=False, transform=torchvision.transforms.ToTensor(), download=True)
# 初始化dataloader
test_loader = DataLoader(dataset=test_data, batch_size=128, shuffle=True, num_workers=0, drop_last=False)

writer = SummaryWriter('dataloader')
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images('dataloader', imgs, step)
step += 1

writer.close()

batch_size: how many data samples to pick per time.

shuffle: every time choose data samples differently 洗牌

nn.Module的使用

from torch import nn
import torch

class kevvv(nn.Module):
def __init__(self):
super().__init__()

def forward(self, input):
output = input + 1
return output


tudui = kevvv()
x = torch.tensor(1.0)
output = tudui(x)
print(output)

构造函数直接继承,简化前向传播。输出为tensor(2.)。这段函数会返回一个Tensor类型的数据,值为2

简单卷积操作

import torch
import torchvision
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Conv2d
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("CIFAR10", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

def forward(self, x):
x = self.conv1(x)
return x


tudui = Tudui()
print(tudui)
writer = SummaryWriter("log")
step = 0
for data in dataloader:
imgs, targets = data
output = tudui(imgs)
# torch.Size([64, 3, 32, 32])
print(imgs.shape)
# torch.Size([64, 6, 30, 30])
writer.add_images("input", imgs, step)
output = torch.reshape(output, (-1, 3, 30, 30))
print(output.shape)
writer.add_images("output", output, step)
step += 1

When we call the instance tudui and input a parameter, it will use the method forward to deal with it. Here it is self.conv1(), namely Conv2d().

Dilation 空洞卷积

卷积的对象之间有间隔,而不是连续的。

pooling layer

ceil_mode参数。当为真时,采用ceil模式取整,在即使剩余元素不够理想大小的时候,也进行池化操作并且输出。默认为假。

import torch
from torch import nn
from torch.nn import MaxPool2d

input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)

class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)

def forward(self, input):
output = self.maxpool1(input)
return output

tudui = Tudui()
output = tudui(input)
print(output)

In this program, we carry out an operation of pooling to choose the maximum value of each part. And because of ceil_mode=True, although there are some parts not fully matched with the size 3 by 3, it can still be pooled.

image-20240111163211021

效果展示:

image-20240111164923890

tensor类型仍然需要reshap以获取batch_size和通道

import torch

input = torch.tensor([[1, -0.5],
[-1, 3]])
print(input.shape)
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)

输出如下:

torch.Size([2, 2])
torch.Size([1, 1, 2, 2])

nonlinear activation layer

import torch
import torchvision.datasets
from torch import nn
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10("CIFAR10", train=False, download=True,
transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)

input = torch.tensor([[1, -0.5],
[-1, 3]])
print(input.shape)
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)


class Kev(nn.Module):
def __init__(self):
super(Kev, self).__init__()
self.relu1 = ReLU()
self.sigmoid = Sigmoid()

def forward(self, input):
output = self.sigmoid(input)
return output


writer = SummaryWriter("logs_relu")

kev = Kev()
step = 0
for data in dataloader:
imgs, targets = data
output = kev(imgs)
writer.add_images("input", imgs, step)
writer.add_images("output", output, step)
step += 1

linear layer

import torchvision
import torch
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import Linear

dataset = torchvision.datasets.CIFAR10("CIFAR10", train=False, transform=torchvision.transforms.ToTensor(),
download=True)

dataloader = DataLoader(dataset, batch_size=64, drop_last=False)
class Kev(nn.Module):
def __init__(self):
super(Kev, self).__init__()
self.linear1 = Linear(196608, 10)

def forward(self, input):
output = self.linear1(input)
return output


kev = Kev()

for data in dataloader:
imgs, targets = data
output = torch.reshape(imgs, (1, 1, 1, -1))
# output = torch.flatten(imgs)
output = kev(output)
print(output.shape)

There are two methods we can use to change the shape of imgs here, reshape and flatten. flatten will make imgs only have one dimension instead of 4 made by reshape

sequential

一种把多个layer放在一个序列里面的方法。可以方便管理,简化逻辑。

Loss function, back propagation, optimizer

# define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.SGD(net.parameters(), learning_rate)
# calculate loss
result_loss = loss_fn(outputs, targets)
optim.zero_grad() # zero previous gradient
result_loss.backward()
optim.step()

The code above should be run in a loop.

save and load neural network

vgg16 = torchvision.models.vgg16(pretrained=False)
'''
1st method to save
Both model's structure and parameters will be saved
'''

torch.save(vgg16, "vgg16_method1.pth")

'''
2nd method to save
Only model's parameters will be saved
'''
torch.save(vgg16.state_dict(), "vgg16_method2.pth")

# 1st method to load
model = torch.load("vgg16_method1.pth")
# 2nd method to load
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(model)

'''
方法1的问题:
自己定义的模型不可以保存后直接加载,必须让文件可以读取到原网络的结构才可以
官方推荐第二种方法进行加载和保存
'''