-
[기계학습] CIFAR-10 데이터를 분류하는 분류기를 만들어보자(with CNN) (코드실습)코딩(Coding)/기계학습 2022. 2. 25. 13:29728x90
CIFAR-10 데이터를 분류하는 분류기를 만들어보자(with CNN) (코드실습)
오늘은 CIFAR-10 데이터를 분류하는 분류기 모델 설계를 코드실습해 보려고 한다.
CIFAR-10 Dataset
- https://www.cs.toronto.edu/~kriz/cifar.html
- The CIFAR-10 and CIFAR-100 are labeled subsets of the 80 million tiny images dataset.
- They were collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton.
import pandas as pd import numpy as np import matplotlib.pyplot as plt from tqdm import tqdm, notebook import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchsummary import summary as summary_ USE_CUDA = torch.cuda.is_available() DEVICE = "cuda" if USE_CUDA else "cpu"
우선 필요한 라이브러리를 import 해준다.
데이터 Import & 전처리
from torchvision import transforms, datasets train_loader = torch.utils.data.DataLoader( datasets.CIFAR10("../data/CIFAR_10/", train = True, download = True, transform = transforms.Compose([ transforms.RandomHorizontalFlip(), # 랜덤으로 이미지의 상하반전함 transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), batch_size = 64, shuffle = True) # 픽셀의 값을 노말라이즈함(평균, 표준편차,) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10("../data/CIFAR_10", train = False, transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), batch_size = 64) label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
이후
torchvision
에 내장되어있는 CIFAR-10 데이터를 Load해준다.
train 데이터와 Test데이터를 불러오고 랜덤하게 데이터를 상하반전해주고, 정규화를 해준다.데이터 살펴보기
간단하게 데이터가 어떻게 생겼는지 살펴보자
해상도가 낮아서 인간이 보기엔 뭔 그림인가 싶겠지만, 어느정도 윤곽으로 구분이 가능하다._, (imgs, labels) = list(enumerate(train_loader))[27] i = 0 for img, label in zip(imgs, labels): if i != 5: print("label :", label_names[label]) print("img's shape : {}".format(img.shape)) plt.imshow(np.transpose(img, (1,2,0))) plt.show() print() i+=1
label : horse img's shape : torch.Size([3, 32, 32])
label : deer
img's shape : torch.Size([3, 32, 32])
label : horse
img's shape : torch.Size([3, 32, 32])
label : airplane
img's shape : torch.Size([3, 32, 32])
label : frog
img's shape : torch.Size([3, 32, 32])
학습, 검증 함수 정의
학습과 검증에 사용할 함수를 정의해준다.
loss_fn = nn.CrossEntropyLoss() def calc_acc(X, Y): x_val, x_idx = torch.max(X, dim=1) return (x_idx == Y).sum().item() def train(EPOCHS, model, train_loader, test_loader, opt): train_loss_history = [] test_loss_history = [] train_acc_history = [] test_acc_history = [] for epoch in range(1, EPOCHS+1): model.train() train_acc = 0 print("<<< EPOCH {} >>>".format(epoch)) for batch_idx, (img,label) in enumerate(notebook.tqdm(train_loader)): img, label = img.to(DEVICE), label.to(DEVICE) output = model(img) # 순전파 loss = loss_fn(output, label) # 오차 계산 opt.zero_grad() # opt내부 값 초기화 loss.backward() # 오차 역전파 opt.step() # 가중치 갱신 train_acc += calc_acc(output, label) if batch_idx % 100 == 0 and batch_idx != 0: print("Training : [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Acc : {:.3f}".format( batch_idx * len(img), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item(), train_acc / len(train_loader.dataset))) print("Training : [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Acc : {:.3f}".format( len(train_loader.dataset), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item(), train_acc / len(train_loader.dataset))) t_loss, t_acc = evaluate(model, test_loader) print("[{}] Test Loss : {:.4f}\t accuracy: {:.2f}%\n".format(epoch, t_loss, t_acc*100.)) train_loss_history.append(loss.item()) train_acc_history.append(train_acc / len(train_loader.dataset)) test_loss_history.append(t_loss.item()) test_acc_history.append(t_acc) return train_loss_history, train_acc_history, test_loss_history, test_acc_history def evaluate(model, test_loader): model.eval() t_loss = 0 correct = 0 with torch.no_grad(): for img, label in notebook.tqdm(test_loader): img, label = img.to(DEVICE), label.to(DEVICE) output = model(img) t_loss += loss_fn(output, label) correct += calc_acc(output, label) t_loss /= len(test_loader) t_acc = correct / len(test_loader.dataset) return t_loss, t_acc
모델 정의
모델은 총 3개를 정의하였다.
- Simple Linear Net
- Simple CNN Net
- Simple CNN Net + Channel Attention
모델 구현 코드는 아래에 차례대로 있다.
Linear
class LinearNet(nn.Module): def __init__(self): super(LinearNet, self).__init__() self.fc1 = nn.Linear(3*32*32, 1024) self.fc2 = nn.Linear(1024, 128) self.fc3 = nn.Linear(128, 10) self.act_fn = nn.ReLU() def forward(self, x): x = x.view(-1, 3*32*32) x = self.fc1(x) x = self.act_fn(x) x = self.fc2(x) x = self.act_fn(x) x = self.fc3(x) return x
CNN
class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1) self.bn = nn.BatchNorm2d(32) self.fc1 = nn.Linear(128, 32) self.fc2 = nn.Linear(32, 10) self.pool = nn.MaxPool2d(2,2) self.flatten = nn.AdaptiveAvgPool2d(1) self.act_fn = nn.ReLU() self.drop = nn.Dropout(p = 0.25) def forward(self, x): x = self.conv1(x) # (batch, 3, 32, 32) -> (batch, 16, 32, 32) x = self.act_fn(x) x = self.pool(x) # (batch, 16, 32, 32) -> (batch, 16, 16, 16) x = self.conv2(x) # (batch, 16, 16, 16) -> (batch, 32, 16 ,16) x = self.bn(x) x = self.act_fn(x) x = self.pool(x) # (batch, 32, 16, 16) -> (batch, 32, 8, 8) x = self.conv3(x) # (batch, 32, 8, 8) -> (batch, 64, 8, 8) x = self.act_fn(x) x = self.pool(x) # (batch, 64, 8, 8) -> (batch, 64, 4, 4) x = self.conv4(x) # (batch, 64, 4, 4) -> (batch, 128, 4, 4) x = self.act_fn(x) x = self.pool(x) # (batch, 128, 4, 4) -> (batch, 128, 2, 2) x = self.flatten(x) # (batch, 128, 2, 2) -> (batch, 128, 1, 1) x = x.view(-1, 128*1*1) # (batch, 128, 1, 1) -> (batch, 128) x = self.fc1(x) # (batch, 128) -> (batch, 32) x = self.act_fn(x) x = self.drop(x) x = self.fc2(x) # (batch, 32) -> (batch, 10) return x
CNN(+attention)
class CNN_with_Att(nn.Module): def __init__(self): super(CNN_with_Att, self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1) self.att1 = nn.Conv2d(16, 32, 3, 1, 1) self.att2 = nn.Conv2d(64, 128, 3, 1, 1) self.bn1 = nn.BatchNorm2d(16) self.bn2 = nn.BatchNorm2d(32) self.bn3 = nn.BatchNorm2d(64) self.bn4 = nn.BatchNorm2d(128) self.fc1 = nn.Linear(128, 32) self.fc2 = nn.Linear(32, 10) self.pool = nn.MaxPool2d(2,2) self.flatten = nn.AdaptiveAvgPool2d(1) self.sigmoid = nn.Sigmoid() self.act_fn = nn.LeakyReLU() self.drop = nn.Dropout(p = 0.25) def forward(self, x): x = self.conv1(x) # (batch, 3, 32, 32) -> (batch, 16, 32, 32) x = self.bn1(x) x = self.act_fn(x) x = self.pool(x) # (batch, 16, 32, 32) -> (batch, 16, 16, 16) att = self.att1(x) # (batch, 16, 16, 16) -> (batch, 32, 16 ,16) att = self.sigmoid(att) x = self.conv2(x) # (batch, 16, 16, 16) -> (batch, 32, 16 ,16) x = x * att x = self.bn2(x) x = self.act_fn(x) x = self.pool(x) # (batch, 32, 16, 16) -> (batch, 32, 8, 8) x = self.conv3(x) # (batch, 32, 8, 8) -> (batch, 64, 8, 8) x = self.bn3(x) x = self.act_fn(x) x = self.pool(x) # (batch, 64, 8, 8) -> (batch, 64, 4, 4) att = self.att2(x) # (batch, 64, 4, 4) -> (batch, 128, 4, 4) att = self.sigmoid(att) x = self.conv4(x) # (batch, 64, 4, 4) -> (batch, 128, 4, 4) x = x * att x = self.bn4(x) x = self.act_fn(x) x = self.pool(x) # (batch, 128, 4, 4) -> (batch, 128, 2, 2) x = self.flatten(x) # (batch, 128, 2, 2) -> (batch, 128, 1, 1) x = x.view(-1, 128*1*1) # (batch, 128, 1, 1) -> (batch, 128) x = self.fc1(x) # (batch, 128) -> (batch, 32) x = self.act_fn(x) x = self.drop(x) x = self.fc2(x) # (batch, 32) -> (batch, 10) return x
학습 & 검증
LinearNet
model = LinearNet().to(DEVICE) opt = optim.AdamW(model.parameters()) print("Device :", DEVICE) summary_(model,(3,32,32), device=DEVICE)
Device : cuda ---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Linear-1 [-1, 1024] 3,146,752 ReLU-2 [-1, 1024] 0 Linear-3 [-1, 128] 131,200 ReLU-4 [-1, 128] 0 Linear-5 [-1, 10] 1,290 ================================================================ Total params: 3,279,242 Trainable params: 3,279,242 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 0.02 Params size (MB): 12.51 Estimated Total Size (MB): 12.54 ----------------------------------------------------------------
Simple Linear Net의 가중치는 대략 300만개로 3계층으로 이루어진 Linear Net이다.
t_loss_his, t_acc_his, v_loss_his, v_acc_his = train(EPOCHS = 30, model = model, train_loader = train_loader, test_loader = test_loader, opt = opt)
<<< EPOCH 1 >>> Training : [6400/50000 (13%)] Loss: 1.826891 Acc : 0.042 ... 중간 생략 ... Training : [50000/50000 (100%)] Loss: 1.663226 Acc : 0.413 [1] Test Loss : 1.5190 accuracy: 46.53% ... 중간 생략 ... <<< EPOCH 30 >>> Training : [6400/50000 (13%)] Loss: 0.341410 Acc : 0.101 ... 중간 생략 ... Training : [50000/50000 (100%)] Loss: 0.732234 Acc : 0.773 [30] Test Loss : 1.6701 accuracy: 55.11%
학습 데이터에 대해서는 꾸준히 오르지만, Test 데이터에 대해서는 55%, 절반 정도의 정확도를 보인다.
(아주 구리다...!)plt.plot(t_loss_his, label="train") plt.plot(v_loss_his, label="test") plt.title("Loss") plt.legend() plt.show()
plt.plot(t_acc_his, label="train") plt.plot(v_acc_his, label="test") plt.title("Acc") plt.legend() plt.show()
CNN
model = CNN().to(DEVICE) opt = optim.AdamW(model.parameters()) print("Device :", DEVICE) summary_(model,(3,32,32), device=DEVICE)
Device : cuda ---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 16, 32, 32] 448 ReLU-2 [-1, 16, 32, 32] 0 MaxPool2d-3 [-1, 16, 16, 16] 0 Conv2d-4 [-1, 32, 16, 16] 4,640 BatchNorm2d-5 [-1, 32, 16, 16] 64 ReLU-6 [-1, 32, 16, 16] 0 MaxPool2d-7 [-1, 32, 8, 8] 0 Conv2d-8 [-1, 64, 8, 8] 18,496 ReLU-9 [-1, 64, 8, 8] 0 MaxPool2d-10 [-1, 64, 4, 4] 0 Conv2d-11 [-1, 128, 4, 4] 73,856 ReLU-12 [-1, 128, 4, 4] 0 MaxPool2d-13 [-1, 128, 2, 2] 0 AdaptiveAvgPool2d-14 [-1, 128, 1, 1] 0 Linear-15 [-1, 32] 4,128 ReLU-16 [-1, 32] 0 Dropout-17 [-1, 32] 0 Linear-18 [-1, 10] 330 ================================================================ Total params: 101,962 Trainable params: 101,962 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 0.59 Params size (MB): 0.39 Estimated Total Size (MB): 0.99 ----------------------------------------------------------------
Simple CNN Net은 4개의 Conv와 Pooling 조합을 가지고 있고 2개의 선형 분류기를 지나 출력되는 형태를 가졌다.
사용되는 가중치는 대략 10만개 정도이다.t_loss_his, t_acc_his, v_loss_his, v_acc_his = train(EPOCHS = 30, model = model, train_loader = train_loader, test_loader = test_loader, opt = opt)
<<< EPOCH 1 >>> Training : [6400/50000 (13%)] Loss: 1.974538 Acc : 0.028 ... 중간 생략 ... Training : [50000/50000 (100%)] Loss: 1.231495 Acc : 0.391 [1] Test Loss : 1.3294 accuracy: 50.69% ... 중간 생략 ... <<< EPOCH 30 >>> Training : [6400/50000 (13%)] Loss: 0.307197 Acc : 0.113 ... 중간 생략 ... Training : [50000/50000 (100%)] Loss: 0.266454 Acc : 0.866 [30] Test Loss : 0.6851 accuracy: 79.03%
Simple Linear Net과 비교했을떄, 확연한 성능차이를 보인다.
plt.plot(t_loss_his, label="train") plt.plot(v_loss_his, label="test") plt.title("Loss") plt.legend() plt.show()
plt.plot(t_acc_his, label="train") plt.plot(v_acc_his, label="test") plt.title("Acc") plt.legend() plt.show()
CNN(with Attention)
model = CNN_with_Att().to(DEVICE) opt = optim.AdamW(model.parameters()) print("Device :", DEVICE) summary_(model,(3,32,32), device=DEVICE)
Device : cuda ---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 16, 32, 32] 448 BatchNorm2d-2 [-1, 16, 32, 32] 32 LeakyReLU-3 [-1, 16, 32, 32] 0 MaxPool2d-4 [-1, 16, 16, 16] 0 Conv2d-5 [-1, 32, 16, 16] 4,640 Sigmoid-6 [-1, 32, 16, 16] 0 Conv2d-7 [-1, 32, 16, 16] 4,640 BatchNorm2d-8 [-1, 32, 16, 16] 64 LeakyReLU-9 [-1, 32, 16, 16] 0 MaxPool2d-10 [-1, 32, 8, 8] 0 Conv2d-11 [-1, 64, 8, 8] 18,496 BatchNorm2d-12 [-1, 64, 8, 8] 128 LeakyReLU-13 [-1, 64, 8, 8] 0 MaxPool2d-14 [-1, 64, 4, 4] 0 Conv2d-15 [-1, 128, 4, 4] 73,856 Sigmoid-16 [-1, 128, 4, 4] 0 Conv2d-17 [-1, 128, 4, 4] 73,856 BatchNorm2d-18 [-1, 128, 4, 4] 256 LeakyReLU-19 [-1, 128, 4, 4] 0 MaxPool2d-20 [-1, 128, 2, 2] 0 AdaptiveAvgPool2d-21 [-1, 128, 1, 1] 0 Linear-22 [-1, 32] 4,128 LeakyReLU-23 [-1, 32] 0 Dropout-24 [-1, 32] 0 Linear-25 [-1, 10] 330 ================================================================ Total params: 180,874 Trainable params: 180,874 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 0.92 Params size (MB): 0.69 Estimated Total Size (MB): 1.62 ----------------------------------------------------------------
Simple CNN Net에서 Channel Attention을 2번 주었다.
그 만큼의 가중치가 추가되어 대략 18만개의 가중치가 사용되었다.
과연 유의미한 성능 차이가 있을지...?t_loss_his, t_acc_his, v_loss_his, v_acc_his = train(EPOCHS = 30, model = model, train_loader = train_loader, test_loader = test_loader, opt = opt)
<<< EPOCH 1 >>> Training : [6400/50000 (13%)] Loss: 1.655612 Acc : 0.042 ... 중간 생략 ... Training : [50000/50000 (100%)] Loss: 1.449838 Acc : 0.536 [1] Test Loss : 1.0514 accuracy: 62.95% ... 중간 생략 ... <<< EPOCH 30 >>> Training : [6400/50000 (13%)] Loss: 0.230031 Acc : 0.117 ... 중간 생략 ... Training : [50000/50000 (100%)] Loss: 0.423190 Acc : 0.903 [30] Test Loss : 0.6940 accuracy: 80.06%
학습데이터에 대해서는 Simple CNN보다 더 나은 학습 성과를 보였다.
다만, Test 데이터에 대해서는 Attention이 제대로 안먹혔는지, 유의미한 성능차이는 나오지 않았다.
plt.plot(t_loss_his, label="train") plt.plot(v_loss_his, label="test") plt.title("Loss") plt.legend() plt.show()
plt.plot(t_acc_his, label="train") plt.plot(v_acc_his, label="test") plt.title("Loss") plt.legend() plt.show()
분류 성능 직접 확인하기
실제로 분류 성능을 직접 확인하려면 아래 코드를 실행해보자
(너무 길어서 출력은 제거 하였습니다. 확인을 원하시는 분들은 Github에 오셔서 보세용~ :D)model.eval() softmax = nn.Softmax(dim=1) with torch.no_grad(): _, (imgs, labels) = list(enumerate(test_loader))[13] for img, label in zip(imgs, labels): output = model(img.view(1,3,32,32).cuda()) output = softmax(output) o_val, o_idx = torch.max(output, dim=1) print("실제값 :", label_names[label]) print("예측값 :", label_names[o_idx]) print("img's shape : {}".format(img.shape)) plt.imshow(np.transpose(img, (1,2,0))) plt.title("Image") plt.show() print()
CNN에 대한 포스팅은 여기까지 하고 다음에는 RNN에 대해서 포스팅해보고자 한다.
포스팅에 사용된 코드에 대한 전문은 아래 링크에서 확인할 수 있습니다.
(https://github.com/JoSangYeon/Machine_Learning_Project/blob/master/Cifar_10%20Project/Cifar10%231.ipynb)728x90'코딩(Coding) > 기계학습' 카테고리의 다른 글
[기계학습]합성곱 신경망(CNN : Convolutional Nerual Network) (Part 2/2) (0) 2022.02.18 [기계학습] 합성곱 신경망(CNN : Convolutional Nerual Network) (Part 1/2) (0) 2022.02.09 [기계학습] IRIS 데이터를 분류하는 분류기를 만들어보자(코드 실습) (0) 2022.02.08 [기계학습] 로지스틱 회귀(분류 Classification)(Logistic Regression) (Part 2/2) (0) 2022.01.25 [기계학습] 로지스틱 회귀(분류 Classification)(Logistic Regression) (Part 1/2) (0) 2022.01.21