작성자: 15기 박지우
Convolutional Layer를 통과하여 feature map를 만든다. 이 Map를 가지고 RPN(region proposal netwrok) 가 ROI , 즉 보아야 할곳을 지정해준다. 그것을 통해서 Fast RCNN은 이미지를 분석한다고 보면 된다.
** Fast RCNN 학습 과정에서 conv features를 공유한다. (RPN/Fast RCNN)
Masks play a crucial role in protecting the health of individuals against respiratory diseases, as is one of the few precautions available for COVID-19 in the absence of immunization. With this dataset, it is possible to create a model to detect people wearing masks, not wearing them, or wearing masks improperly.
This dataset contains 853 images belonging to the 3 classes, as well as their bounding boxes in the PASCAL VOC format.
The classes are:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
import os
import time
import imgaug as ia
from imgaug import augmenters as iaa # image augmentation
import cv2
from PIL import Image
from torchvision import transforms
import xml.etree.ElementTree as ET # xml 읽어 오는 라이브러리
from xml.etree.ElementTree import Element, ElementTree
def xml_parser(xml_path):
xml_path = xml_path
xml = open(xml_path,"r") # xml 열기
tree = ET.parse(xml) # tree부분
root = tree.getroot() # root
size = root.find('size') #root 부분에서 size 변수 찾기
file_name = root.find('filename').text # filename 찾기
object_name = []
bbox = []
objects = root.findall('object')
for _object in objects:
name = _object.find('name').text
object_name.append(name)
bndbox = _object.find('bndbox') #bounding box
one_bbox = []
xmin = bndbox.find("xmin").text # x좌표 왼쪽부분
one_bbox.append(int(float(xmin)))
ymin = bndbox.find("ymin").text # y좌표 왼쪽부분
one_bbox.append(int(float(ymin)))
xmax = bndbox.find("xmax").text
one_bbox.append(int(float(xmax))) # x좌표 오른쪽부분
ymax = bndbox.find("ymax").text
one_bbox.append(int(float(ymax))) # y좌표 오른쪽부분
bbox.append(one_bbox)
return file_name, object_name, bbox
def makeBox(voc_im,bbox,objects):
image = voc_im.copy()
for i in range(len(objects)): # 이미지 내에 box 그리는 함수
cv2.rectangle(image,(int(bbox[i][0]),int(bbox[i][1])),(int(bbox[i][2]),int(bbox[i][3])),color = (0,255,0),thickness = 1)
cv2.putText(image, objects[i], (int(bbox[i][0]), int(bbox[i][1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
return image
xml_list = os.listdir("C:/Users/82102/Desktop/archive/annotations/") # xml list directory
xml_list.sort() # file name sort
label_set = set()
for i in range(len(xml_list)):
xml_path = 'C:/Users/82102/Desktop/archive/annotations/' + str(xml_list[i])
file_name, object_name, bbox = xml_parser(xml_path)
for name in object_name:
label_set.add(name) # object name add (채우기)
label_set = sorted(list(label_set))
label_dic = {}
for i, key in enumerate(label_set):
label_dic[key] = (i+1) # label 세 가지 1,2, 3
print(label_dic)
output
{'mask_weared_incorrect': 1, 'with_mask': 2, 'without_mask': 3}
class Pascal_Vo(Dataset):
def __init__(self, xml_list, len_data):
self.xml_list = xml_list
self.len_data = len_data # 데이터셋 길이
self.to_tensor = transforms.ToTensor()
self.flip = iaa.Fliplr(0.5) # augmentation flip 사용
self.resize = iaa.Resize({'shorter-side': 600, "longer-side":"keep-aspect-ratio"}) # augmentation resize 사용
def __len__(self):
return self.len_data
def __getitem__(self, idx):
xml_path = 'C:/Users/82102/Desktop/archive/annotations/' + str(xml_list[idx])
file_name, object_name, bbox = xml_parser(xml_path)
image_path = 'C:/Users/82102/Desktop/archive/images/'+str(file_name)
image = Image.open(image_path).convert('RGB')
image = np.array(image)
image, bbox = self.flip(image = image, bounding_boxes = np.array([bbox]))
image, bbox = self.resize(image=image, bounding_boxes = bbox)
bbox = bbox.squeeze(0).tolist()
image= self.to_tensor(image)
targets=[]
d = {}
d['boxes'] = torch.tensor(bbox)
d['labels'] = torch.tensor([label_dic[x] for x in object_name], dtype=torch.int64)
targets.append(d)
return image, targets
backbone = torchvision.models.vgg16(pretrained=True).features[:-1] # pretrained model 불러오기
backbone_out = 512 # output size
backbone.out_channels = backbone_out
anchor_generator = torchvision.models.detection.rpn.AnchorGenerator(sizes=((128,256,512),),aspect_ratios=((0.5,1.0,2.0),))
# detection generator 만들기
resolution = 7
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],output_size=resolution,sampling_ratio=2) # ROi 정의
box_head = torchvision.models.detection.faster_rcnn.TwoMLPHead(in_channels = backbone_out*(resolution**2),representation_size=4096)
box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(4096,4) # class 의 개수
# faster rcnn model Linear 부분 + Predictor 부분
model = torchvision.models.detection.FasterRCNN(backbone, num_classes=None,
min_size= 600, max_size = 1000,
rpn_anchor_generator= anchor_generator,
rpn_pre_nms_top_n_train=6000, rpn_pre_nms_top_n_test=6000,
rpn_post_nms_top_n_train= 2000, rpn_post_nms_top_n_test=300,
rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
rpn_batch_size_per_image= 256, rpn_positive_fraction=0.5,
box_roi_pool=roi_pooler, box_head= box_head, box_predictor = box_predictor,
box_score_thresh=0.05, box_nms_thresh=0.7, box_detections_per_img=300,
box_fg_iou_thresh=0.5, box_be_iou_thresh=0.5,
box_batch_size_per_image=128, box_positive_fraction=0.25
) # model 하이퍼 파리미터 부분 : output size 바꾸면 다 바꿔야 됨
for param in model.rpn.parameters():
torch.nn.init.normal_(param, mean=0.0, std=0.01) # normalize
for name, param in model.roi_heads.named_parameters():
if "bbox_pred" in name:
torch.nn.init.normal_(param, mean=0.0, std=0.001)
elif "weight" in name:
torch.nn.init.normal_(param, mean =0.0, std=0.01)
if "bias" in name:
torch.nn.init.zeros_(param)
def Total_loss(loss):
loss_objectness = loss['loss_objectness']
loss_rpn_box_reg = loss['loss_rpn_box_reg']
loss_classifier = loss['loss_classifier']
loss_box_reg = loss['loss_box_reg']
rpn_total = loss_objectness + 10*loss_rpn_box_reg #(람다라고 보면된다)
fast_rcnn_total = loss_classifier + 1*loss_box_reg
total_loss = rpn_total + fast_rcnn_total
return total_loss
total_epoch = 15
len_data = 25 # 한 에폭당 이미지 개수
loss_sum = 0
optimizer = torch.optim.SGD(params = model.parameters(), lr=0.001, momentum=0.9, weight_decay = 0.0005) # SGD optimizer
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_epoch,eta_min=0.00001) # scheduler 사용
start_epoch = 0
start_idx = 0
print("start_epoch = {} , start_idx = {}".format(start_epoch,start_idx))
print("Training Start")
model.train() # train 모드
start = time.time()
for epoch in range(start_epoch, total_epoch):
dataset = Pascal_Vo(xml_list[:len_data], len_data - start_idx) # dataset 불러오기
dataloader = DataLoader(dataset, shuffle=True) # dataloader 생성
for i, (image, targets) in enumerate(dataloader, start_idx): # index image target(label) 불러오기
optimizer.zero_grad()
targets[0]['boxes'].squeeze_(0)
targets[0]['labels'].squeeze_(0)
loss = model(image,targets)
total_loss = Total_loss(loss)
loss_sum += total_loss
total_loss.backward()
optimizer.step()
start_idx = 0
scheduler.step()
state ={
'epoch' : epoch,
'iter' :i+1,
'state_dict' :model.state_dict(),
'optimizer':optimizer.state_dict(),
'scheduler' : scheduler.state_dict()
}
print('epoch:' + str(epoch))
print('loss:' + str(total_loss))
model.eval()
preds = model(image)
boxes=preds[0]['boxes'] #예측한 box
labels = preds[0]['labels'] #예측한 labels
objects = []
for lb in labels:
objects.append([k for k, v in label_dic.items() if v == lb][0])
plot_image = image.squeeze().permute(1,2,0).numpy()
answer = makeBox(plot_image,boxes,objects) # image 내 box 그리고 object 인식
plt.imshow(answer)
gpu를 사용하지 못해서 제대로 데이터를 훈련시키지 못한 것이 아쉬움이 남는다. 그리고 평가를 하는 과정에서 box가 여러개 생기는 문제가 발생했는데 이점을 고치지 못하여 ... 다시 도전해보고 싶은 모델이다.
그리고 생각보다 torchvision안에 pretrained model이 많다는 것을 알 수 있었고 xml을 object detection의 dataset으로 많이 사용한다는 것 또한 배울 수 있었던 것 같다.
댓글 영역