我的设备：云平台的4核cpu，24g内存，2080ti-11g显卡。训练阶段，gpu正常使用，利用率100%。但是在测试时，gpu利用率0%，速度特别慢。之前的相

test.py的修改部分 <div class="snippet-clipboard-content notranslate position-relative o

validate的速度特别慢 about yolov3 HOT 4 CLOSED

peterisfar commented on August 15, 2024 1

validate的速度特别慢

from yolov3.

Comments (4)

crownz-sec commented on August 15, 2024

发现是云平台读写文件很耗时，而且作者代码是每次测试一张图片就把结果写入对应文件，我的解决办法是将代码改成多线程，同时把测试结果先保存起来，最后一次性写入文件。后面计算mAP的Reading annotation部分代码也比较耗时，是for循环单个遍历的，代码也改成多线程了，这样的话速度快了很多。

from yolov3.

crownz-sec commented on August 15, 2024

test.py的修改部分

        if self.__eval:
            mAP = 0
            print('*' * 20 + "Validate" + '*' * 20)

            with torch.no_grad():
                APs = Evaluator(self.__model).APs_voc()

                for i in APs:
                    print("{} --> mAP : {}".format(i, APs[i]))
                    mAP += APs[i]
                mAP = mAP / self.__num_class
                print('mAP:%g' % (mAP))

evaluator.py的修改部分

import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool        # 线程池

from tqdm import tqdm
from collections import defaultdict

class Evaluator(object):
    def __init__(self, model, visiual=True):
        self.classes = cfg.DATA["CLASSES"]
        self.pred_result_path = os.path.join('data', 'results')
        self.val_data_path = os.path.join('data', 'VOC', 'VOCtest-2007')
        self.conf_thresh = cfg.TEST["CONF_THRESH"]
        self.nms_thresh = cfg.TEST["NMS_THRESH"]
        self.val_shape =  cfg.TEST["TEST_IMG_SIZE"]

        self.__visiual = visiual
        self.__visual_imgs = 0

        self.model = model
        self.device = next(model.parameters()).device

        self.final_result = defaultdict(list)

    def APs_voc(self):
        img_inds_file = os.path.join(self.val_data_path,  'ImageSets', 'Main', 'test.txt')
        with open(img_inds_file, 'r') as f:
            lines = f.readlines()
            img_inds = [line.strip() for line in lines]

        if os.path.exists(self.pred_result_path):
            shutil.rmtree(self.pred_result_path)
        os.mkdir(self.pred_result_path)
        imgs_count = len(img_inds)
        cpu_nums = multiprocessing.cpu_count()
        pool = ThreadPool(cpu_nums)
        with tqdm(total=imgs_count) as pbar:
            for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds)):
                pbar.update()

#         with tqdm(total=20) as pbar:
#             for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds[:20])):
#                 pbar.update()

        for class_name in self.final_result:
            with open(os.path.join(self.pred_result_path, 'comp4_det_test_' + class_name + '.txt'), 'a') as f:
                    str_result = ''.join(self.final_result[class_name])
                    f.write(str_result)

        return self.__calc_APs()

    def Single_APs_voc(self, img_ind, multi_test=False, flip_test=False):
        img_path = os.path.join(self.val_data_path, 'JPEGImages', img_ind+'.jpg')
        img = cv2.imread(img_path)
        bboxes_prd = self.get_bbox(img, multi_test, flip_test)

        if bboxes_prd.shape[0]!=0 and self.__visiual and self.__visual_imgs < 100:
            boxes = bboxes_prd[..., :4]
            class_inds = bboxes_prd[..., 5].astype(np.int32)
            scores = bboxes_prd[..., 4]

            visualize_boxes(image=img, boxes=boxes, labels=class_inds, probs=scores, class_labels=self.classes)
            path = os.path.join(cfg.PROJECT_PATH, "data/results/{}.jpg".format(self.__visual_imgs))
            cv2.imwrite(path, img)

            self.__visual_imgs += 1

        for bbox in bboxes_prd:
            coor = np.array(bbox[:4], dtype=np.int32)
            score = bbox[4]
            class_ind = int(bbox[5])

            class_name = self.classes[class_ind]
            score = '%.4f' % score
            xmin, ymin, xmax, ymax = map(str, coor)
            result = ' '.join([img_ind, score, xmin, ymin, xmax, ymax]) + '\n'
            
            self.final_result[class_name].append(result)

voc_eval.py的修改部分

import multiprocessing
from tqdm import tqdm
from multiprocessing.dummy import Pool as ThreadPool        # 线程池

recs = {}
def parse_rec(param):
    """ Parse a PASCAL VOC xml file """
    path = param['path']
    img = param['img']
    filename = path.format(img)
    
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)

    recs[img] = objects

def voc_eval(detpath,
             annopath,
             imagesetfile,
             classname,
             cachedir,
             ovthresh=0.5,
             use_07_metric=False):
    """rec, prec, ap = voc_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the PASCAL VOC evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use VOC07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]
    
    print("Reading annotation...")
    if not os.path.isfile(cachefile):
        # load annots

        imgs_count = len(imagenames)
        cpu_nums = multiprocessing.cpu_count()
        pool = ThreadPool(cpu_nums)
        param_list = []
        for imagename in imagenames:
            param = {'path':annopath
                    ,'img':imagename}
            param_list.append(param)
        
        with tqdm(total=imgs_count) as pbar:
            for i, _ in enumerate(pool.imap_unordered(parse_rec, param_list)):
                pbar.update()

        global recs
        # save
        print ('Saving cached annotations to {:s}'.format(cachefile))
        with open(cachefile, 'wb') as f:
            pickle.dump(recs, f)
    else:
        # load
        with open(cachefile, 'rb') as f:
            recs = pickle.load(f)

因为也是刚接触多线程，代码可能多少有些问题，还请大家批评指正。

from yolov3.

BBBBchan commented on August 15, 2024

您好，我参考了您修改后的代码，能够成功运行，validate的速度也有了明显提升。
但是在成功validate几次之后，出现了如下报错

Traceback (most recent call last):
  File "train.py", line 160, in <module>
    gpu_id=opt.gpu_id).train()
  File "train.py", line 140, in train
    APs = Evaluator(self.yolov3).APs_voc()
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 51, in APs_voc
    for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds)):
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 731, in next
    raise value
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 68, in Single_APs_voc
    bboxes_prd = self.get_bbox(img, multi_test, flip_test)
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 106, in get_bbox
    bboxes = self.__predict(img, self.val_shape, (0, np.inf))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 119, in __predict
    _, p_d = self.model(img)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/yolov3.py", line 50, in forward
    x_s, x_m, x_l = self.__backnone(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/backbones/darknet53.py", line 53, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/layers/conv_module.py", line 36, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 353, in forward
    return self._conv_forward(input, self.weight)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 350, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED
terminate called without an active exception

我认为该报错可能是由于显存没有得到及时释放引起的，在多次validate之后，就有可能出现上述错误。请问您有好的解决方案吗？

from yolov3.

crownz-sec commented on August 15, 2024

您好，我参考了您修改后的代码，能够成功运行，validate的速度也有了明显提升。
但是在成功validate几次之后，出现了如下报错

Traceback (most recent call last):
  File "train.py", line 160, in <module>
    gpu_id=opt.gpu_id).train()
  File "train.py", line 140, in train
    APs = Evaluator(self.yolov3).APs_voc()
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 51, in APs_voc
    for i, _ in enumerate(pool.imap_unordered(self.Single_APs_voc, img_inds)):
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 731, in next
    raise value
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 68, in Single_APs_voc
    bboxes_prd = self.get_bbox(img, multi_test, flip_test)
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 106, in get_bbox
    bboxes = self.__predict(img, self.val_shape, (0, np.inf))
  File "/root/boyuansun/YOLOV3/eval/evaluator.py", line 119, in __predict
    _, p_d = self.model(img)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/yolov3.py", line 50, in forward
    x_s, x_m, x_l = self.__backnone(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/backbones/darknet53.py", line 53, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/boyuansun/YOLOV3/model/layers/conv_module.py", line 36, in forward
    x = self.__conv(x)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 353, in forward
    return self._conv_forward(input, self.weight)
  File "/root/anaconda3/envs/pytorch1.5/lib/python3.5/site-packages/torch/nn/modules/conv.py", line 350, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED
terminate called without an active exception

我认为该报错可能是由于显存没有得到及时释放引起的，在多次validate之后，就有可能出现上述错误。请问您有好的解决方案吗？

因为我没有在训练过程中使用这个代码，只是拿来最终测试了一下，所以也没有碰到你这个问题。我在这里找到了一些解决方案，可能减少batch_size是一个好点的解决方案。
也可以去pytorch的论坛用英文关键词找一下，有很多细节问题都可以在上面找到。

from yolov3.

validate的速度特别慢 about yolov3 HOT 4 CLOSED

Comments (4)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent