在keras中实现yolov4目标检测

在yolov4中实现对自己的数据集进行训练

- 环境配置
- 1.数据集的制作
- 2.生成train、test、val数据集
- 3.训练
- 4.预测
- 5.总结

环境配置

win10+tensorflow-gpu1.13.2+cuda10.0+cudnn7.4

1.数据集的制作

先安装labelimg，步骤如下：
pip install PyQt5
pip install pyqt5-tools
pip install lxml
pip install labelImg
在安装好lxml之后，也可以直接将data文件复制到电脑，运行的时候将labelimg粘贴到桌面。
使用rename.py将数据集图片的名字统一转化成以序号命名，则高位会用0补齐至6位。

#rename.py
import os
path = "C:\\Users\\Admin\\Desktop\\VOC\\JPEGImages"
filelist = os.listdir(path) #该文件夹下所有的文件（包括文件夹）
count=0
for file in filelist:
    print(file)
for file in filelist:   #遍历所有文件
    Olddir=os.path.join(path,file)   #原来的文件路径
    if os.path.isdir(Olddir):   #如果是文件夹则跳过
        continue
    filename=os.path.splitext(file)[0]   #文件名
    filetype=os.path.splitext(file)[1]   #文件扩展名
    Newdir=os.path.join(path,str(count).zfill(6)+filetype)  #用字符串函数zfill 以0补全所需位数
    os.rename(Olddir,Newdir)#重命名
    count+=1

先创建如下图所示的三个文件夹，图片数据集存放在JPEGImages。
在labelimg将图片生成xml文件,如下图所示，选中区域之后输入对应的标签，然后将xml文件保存在Annotations文件夹下。

2.生成train、test、val数据集

运行formtxt.py文件生成如图所示的4个txt文件，存放在ImageSets/Main文件下。

#formtxt.py
import os
import random

trainval_percent = 0.7   # trainval占总数的比例
train_percent = 0.5   # train占trainval的比例
xmlfilepath = r'C:\Users\Admin\Desktop\VOC\Annotations'
txtsavepath = r'C:\Users\Admin\Desktop\VOC\ImageSets\mian'
total_xml = os.listdir(xmlfilepath)

num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)

ftrainval = open(txtsavepath + r'\trainval.txt', 'w')
ftest = open(txtsavepath + r'\test.txt', 'w')
ftrain = open(txtsavepath + r'\train.txt', 'w')
fval = open(txtsavepath + r'\val.txt', 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

运行voc_annotation.py文件生成一个txt文件，每一行对应其图片位置及其真实框的位置。

#voc_annotation.py
import xml.etree.ElementTree as ET
from os import getcwd

#sets=[('2012', 'train'), ('2012', 'val'), ('2012', 'test')]
sets=[('voc', 'train'), ('voc', 'val'), ('voc', 'test')]
#classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
classes = ["nazha","zuxian","bozhi"]
def convert_annotation(year, image_id, list_file):
    #in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
    in_file = open(r'C:\Users\Admin\Desktop\VOC\Annotations\%s.xml'%image_id)
    tree=ET.parse(in_file)
    root = tree.getroot()

    for obj in root.iter('object'):
        difficult = 0 
        if obj.find('difficult')!=None:
            difficult = obj.find('difficult').text
        print("obj.find('name')",obj.find('name'))
        cls = obj.find('name').text
        print("cls",cls)
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        #b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('ymin').text), float(xmlbox.find('xmax').text),
             float(xmlbox.find('ymax').text))
        #print("b",b)
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))

wd = getcwd()

for year, image_set in sets:
    #image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
    #list_file = open('%s_%s.txt'%(year, image_set), 'w')
    image_ids = open(r'C:\Users\Admin\Desktop\VOC\ImageSets\mian\%s.txt' % (image_set)).read().strip().split()
    list_file = open('%s_%s.txt' % (year,image_set), 'w')
    for image_id in image_ids:
        #list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
        list_file.write(r'C:\Users\Admin\Desktop\VOC\JPEGImages\%s.jpg' % image_id)
        convert_annotation(year, image_id, list_file)
        list_file.write('\n')
    list_file.close()

在运行文件前将classes = [“nazha”,“zuxian”,“bozhi”]改为自己数据集所对应的类别。

3.训练

运行kmeans_for_anchors.py生成训练及所对应的初始锚。

#主函数代码如下
if __name__ == '__main__':
    # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
    # 会生成yolo_anchors.txt
    SIZE = 416
    anchors_num = 9
    # 载入数据集，可以使用VOC的xml
    #path = r'./VOCdevkit/VOC2007/Annotations'
    path = r'C:\Users\Admin\Desktop\VOC\Annotations'
    # 载入所有的xml
    # 存储格式为转化为比例后的width,height
    data = load_data(path)
    
    # 使用k聚类算法
    out = kmeans(data,anchors_num)
    out = out[np.argsort(out[:,0])]
    print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
    print(out*SIZE)
    data = out*SIZE
    #f = open("yolo_anchors.txt", 'w')
    f = open("voc.txt", 'w')
    row = np.shape(data)[0]
    for i in range(row):
        if i == 0:
            x_y = "%d,%d" % (data[i][0], data[i][1])
        else:
            x_y = ", %d,%d" % (data[i][0], data[i][1])
        f.write(x_y)
    f.close()

path为xml文件的位置。
2. 开始训练自己的数据集。运行train.py文件。

#主函数部分代码
if __name__ == "__main__":
    # 标签的位置
    annotation_path = '2012_train.txt'
    # 获取classes和anchor的位置
    classes_path = 'model_data/voc_classes.txt'
    anchors_path = 'model_data/yolo_anchors.txt'
    #annotation_path = 'voc_train.txt'
    # 获取classes和anchor的位置
    #classes_path = 'model_data/new_classes.txt'
    #anchors_path = 'model_data/voc_anchors.txt'
    #------------------------------------------------------#
    #   权值文件请看README，百度网盘下载
    #   训练自己的数据集时提示维度不匹配正常
    #   预测的东西都不一样了自然维度不匹配
    #------------------------------------------------------#
    weights_path = 'model_data/yolo4_voc_weights.h5'
    # 获得classes和anchor
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    #print("anchors",anchors)
    #print("---------------")
    # 一共有多少类
    num_classes = len(class_names)
    num_anchors = len(anchors)
    # 训练后的模型保存的位置
    log_dir = 'logs/'
    # 输入的shape大小
    # 显存比较小可以使用416x416
    # 现存比较大可以使用608x608
    input_shape = (416,416)
    #mosaic = False
    mosaic = True
    Cosine_scheduler = False
    label_smoothing = 0

其中annotation_path 为运行voc_annotation.py所生成txt文件的位置。classes_path为训练数据所对应的类别，格式如下图所示。anchors_path为kmeans_for_anchors.py生成文件所对应的位置。weights_path 中模型权值需预先下载好。log_dir 为训练模型保存的位置。

4.预测

到这一步说明已经生成了自己训练好的权值参数。运行predict.py文件。

#predict.py
from yolo import YOLO
from PIL import Image
import os
import keras.backend as K
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF # 指定第一块GPU可用
K.clear_session()
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.ConfigProto()
#onfig.gpu_options.allow_growth=True #不全部占满显存, 按需分配
#config.gpu_options.per_process_gpu_memory_fraction = 0.7
#sess = tf.Session(config=config)
#KTF.set_session(sess)
#with sess:
yolo = YOLO()
#os.environ['CUDA_VISIBLE_DEVICES']='2'
while True:
    img = input('Input image filename:')
    #img = "img/street.jpg"
    try:
        image = Image.open(img)
    except:
        print('Open Error! Try again!')
        continue
    else:
        r_image = yolo.detect_image(image)
        r_image.show()
yolo.close_session()

其中，对yolo子函数中相应参数进行更改。

#yolo子函数部分代码
class YOLO(object):
    _defaults = {
        #"model_path"        : 'model_data/yolo4_weight.h5',
        "model_path"        : 'logs/last1.h5',
        "anchors_path"      : 'model_data/yolo_anchors.txt',
        "classes_path"      : 'model_data/voc_classes.txt',
        "score"             : 0.5,
        "iou"               : 0.3,
        "max_boxes"         : 100,
        # 显存比较小可以使用416x416
        # 显存比较大可以使用608x608
        "model_image_size"  : (416, 416)
    }

“model_path” 为自己训练的模型参数，最后一次参数文件被命名为last1.h5，"anchors_path"为一开始生成锚的文件夹。"classes_path"为自己训练集所对应的类别。
其中，loss函数包括三部分。
loss = 框回归函数(CIOU)+置信度损失函数(交叉熵损失函数)+分类损失函数(交叉熵损失函数)

5.总结

在运行前先配置好环境，大部分bug都是因为环境不匹配所造成的。配置好环境后，根据自己所需对代码慢慢更改。

代码地址：
https://github.com/bubbliiiing/yolov4-keras
该项目中包含所需要的初始权值。
特此感谢代码原作者Bubbliiiing，该大神有相关的讲解，b站还有相关的视频，若有不解，可找该大神求解。

本文地址：https://blog.csdn.net/weixin_45468373/article/details/109811017

在keras中实现yolov4目标检测

在yolov4中实现对自己的数据集进行训练

环境配置

1.数据集的制作

2.生成train、test、val数据集

3.训练

4.预测

5.总结

相关推荐

【opencv】传统目标检测：Haar检测器实现人脸检测

opencv-python 车牌检测和识别

深度学习-09(目标检测:Object Detection)

OCR -- 文本检测 - 训练DB文字检测模型

OCR 文字检测（Differentiable Binarization --- DB）

基于Emgu CV的人脸检测代码

C#使用Emgu CV来进行图片人脸检测

深入分析：恒虚警率检测算法之Switch-CFAR