轉載自:faster rcnn源碼解讀(四)之數據類型imdb.py和pascal_voc.py(主要是imdb和roidb數據類型的解說) - 野孩子的專欄 - 博客頻道 - CSDN.NET
http://blog.csdn.net/u010668907/article/details/51945719
faster用python版本的https://github.com/rbgirshick/py-faster-rcnn
imdb.py源碼地址:https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py
imdb源碼:
[python]?view plaincopy print?
?? ?? ?? ?? ?? ?? ?? import?os?? import?os.path?as?osp?? import?PIL?? from?utils.cython_bbox?import?bbox_overlaps?? import?numpy?as?np?? import?scipy.sparse?? from?fast_rcnn.config?import?cfg?? ?? class?imdb(object):?? ?????? ?? ????def?__init__(self,?name):?? ????????self._name?=?name?? ????????self._num_classes?=?0?? ????????self._classes?=?[]?? ????????self._image_index?=?[]?? </span>????????self._obj_proposer?=?'selective_search'?? ????????self._roidb?=?None?? ????????self._roidb_handler?=?self.default_roidb?? ?????????? ????????self.config?=?{}?? ? ????@property?? ????def?name(self):?? ????????return?self._name?? ? ????@property?? ????def?num_classes(self):?? ????????return?len(self._classes)?? ? ????@property?? ????def?classes(self):?? ????????return?self._classes?? ? ????@property?? ????def?image_index(self):?? ????????return?self._image_index?? ? ????@property?? ????def?roidb_handler(self):?? ????????return?self._roidb_handler?? ? ????@roidb_handler.setter?? ????def?roidb_handler(self,?val):?? ????????self._roidb_handler?=?val?? ?? ????def?set_proposal_method(self,?method):?? ????????method?=?eval('self.'?+?method?+?'_roidb')?? ????????self.roidb_handler?=?method?? ? ????@property?? ????def?roidb(self):?? ?????????? ?????????? ?????????? ?????????? ?????????? ????????if?self._roidb?is?not?None:?? ????????????return?self._roidb?? ????????self._roidb?=?self.roidb_handler()?? ????????return?self._roidb?? ? ????@property?? ????def?cache_path(self):?? ????????cache_path?=?osp.abspath(osp.join(cfg.DATA_DIR,?'cache'))?? ????????if?not?os.path.exists(cache_path):?? ????????????os.makedirs(cache_path)?? ????????return?cache_path?? ? ????@property?? ????def?num_images(self):?? ??????return?len(self.image_index)?? ?? ????def?image_path_at(self,?i):?? ????????raise?NotImplementedError?? ?? ????def?default_roidb(self):?? ????????raise?NotImplementedError?? ?? ????def?evaluate_detections(self,?all_boxes,?output_dir=None):?? ????????? ? ? ? ? ? ? ?? ????????raise?NotImplementedError?? ?? ????def?_get_widths(self):?? ??????return?[PIL.Image.open(self.image_path_at(i)).size[0]?? ??????????????for?i?in?xrange(self.num_images)]?? ?? ????def?append_flipped_images(self):?? ????????num_images?=?self.num_images?? ????????widths?=?self._get_widths()?? ????????for?i?in?xrange(num_images):?? ????????????boxes?=?self.roidb[i]['boxes'].copy()?? ????????????oldx1?=?boxes[:,?0].copy()?? ????????????oldx2?=?boxes[:,?2].copy()?? ????????????boxes[:,?0]?=?widths[i]?-?oldx2?-?1?? ????????????boxes[:,?2]?=?widths[i]?-?oldx1?-?1?? ????????????assert?(boxes[:,?2]?>=?boxes[:,?0]).all()?? ????????????entry?=?{'boxes'?:?boxes,?? ?????????????????????'gt_overlaps'?:?self.roidb[i]['gt_overlaps'],?? ?????????????????????'gt_classes'?:?self.roidb[i]['gt_classes'],?? ?????????????????????'flipped'?:?True}?? ????????????self.roidb.append(entry)?? ????????self._image_index?=?self._image_index?*?2?? ?? ????def?evaluate_recall(self,?candidate_boxes=None,?thresholds=None,?? ????????????????????????area='all',?limit=None):?? ????????? ? ? ? ? ? ? ? ?? ?????????? ?????????? ????????areas?=?{?'all':?0,?'small':?1,?'medium':?2,?'large':?3,?? ??????????????????'96-128':?4,?'128-256':?5,?'256-512':?6,?'512-inf':?7}?? ????????area_ranges?=?[?[0**2,?1e5**2],?????? ????????????????????????[0**2,?32**2],??????? ????????????????????????[32**2,?96**2],?????? ????????????????????????[96**2,?1e5**2],????? ????????????????????????[96**2,?128**2],????? ????????????????????????[128**2,?256**2],???? ????????????????????????[256**2,?512**2],???? ????????????????????????[512**2,?1e5**2],???? ??????????????????????]?? ????????assert?areas.has_key(area),?'unknown?area?range:?{}'.format(area)?? ????????area_range?=?area_ranges[areas[area]]?? ????????gt_overlaps?=?np.zeros(0)?? ????????num_pos?=?0?? ????????for?i?in?xrange(self.num_images):?? ?????????????? ?????????????? ????????????max_gt_overlaps?=?self.roidb[i]['gt_overlaps'].toarray().max(axis=1)?? ????????????gt_inds?=?np.where((self.roidb[i]['gt_classes']?>?0)?&?? ???????????????????????????????(max_gt_overlaps?==?1))[0]?? ????????????gt_boxes?=?self.roidb[i]['boxes'][gt_inds,?:]?? ????????????gt_areas?=?self.roidb[i]['seg_areas'][gt_inds]?? ????????????valid_gt_inds?=?np.where((gt_areas?>=?area_range[0])?&?? ?????????????????????????????????????(gt_areas?<=?area_range[1]))[0]?? ????????????gt_boxes?=?gt_boxes[valid_gt_inds,?:]?? ????????????num_pos?+=?len(valid_gt_inds)?? ?? ????????????if?candidate_boxes?is?None:?? ?????????????????? ?????????????????? ????????????????non_gt_inds?=?np.where(self.roidb[i]['gt_classes']?==?0)[0]?? ????????????????boxes?=?self.roidb[i]['boxes'][non_gt_inds,?:]?? ????????????else:?? ????????????????boxes?=?candidate_boxes[i]?? ????????????if?boxes.shape[0]?==?0:?? ????????????????continue?? ????????????if?limit?is?not?None?and?boxes.shape[0]?>?limit:?? ????????????????boxes?=?boxes[:limit,?:]?? ?? ????????????overlaps?=?bbox_overlaps(boxes.astype(np.float),?? ?????????????????????????????????????gt_boxes.astype(np.float))?? ?? ????????????_gt_overlaps?=?np.zeros((gt_boxes.shape[0]))?? ????????????for?j?in?xrange(gt_boxes.shape[0]):?? ?????????????????? ????????????????argmax_overlaps?=?overlaps.argmax(axis=0)?? ?????????????????? ????????????????max_overlaps?=?overlaps.max(axis=0)?? ?????????????????? ????????????????gt_ind?=?max_overlaps.argmax()?? ????????????????gt_ovr?=?max_overlaps.max()?? ????????????????assert(gt_ovr?>=?0)?? ?????????????????? ????????????????box_ind?=?argmax_overlaps[gt_ind]?? ?????????????????? ????????????????_gt_overlaps[j]?=?overlaps[box_ind,?gt_ind]?? ????????????????assert(_gt_overlaps[j]?==?gt_ovr)?? ?????????????????? ????????????????overlaps[box_ind,?:]?=?-1?? ????????????????overlaps[:,?gt_ind]?=?-1?? ?????????????? ????????????gt_overlaps?=?np.hstack((gt_overlaps,?_gt_overlaps))?? ?? ????????gt_overlaps?=?np.sort(gt_overlaps)?? ????????if?thresholds?is?None:?? ????????????step?=?0.05?? ????????????thresholds?=?np.arange(0.5,?0.95?+?1e-5,?step)?? ????????recalls?=?np.zeros_like(thresholds)?? ?????????? ????????for?i,?t?in?enumerate(thresholds):?? ????????????recalls[i]?=?(gt_overlaps?>=?t).sum()?/?float(num_pos)?? ?????????? ????????ar?=?recalls.mean()?? ????????return?{'ar':?ar,?'recalls':?recalls,?'thresholds':?thresholds,?? ????????????????'gt_overlaps':?gt_overlaps}?? ?? ????def?create_roidb_from_box_list(self,?box_list,?gt_roidb):?? ????????assert?len(box_list)?==?self.num_images,?\?? ????????????????'Number?of?boxes?must?match?number?of?ground-truth?images'?? ????????roidb?=?[]?? ????????for?i?in?xrange(self.num_images):?? ????????????boxes?=?box_list[i]?? ????????????num_boxes?=?boxes.shape[0]?? ????????????overlaps?=?np.zeros((num_boxes,?self.num_classes),?dtype=np.float32)?? ?? ????????????if?gt_roidb?is?not?None?and?gt_roidb[i]['boxes'].size?>?0:?? ????????????????gt_boxes?=?gt_roidb[i]['boxes']?? ????????????????gt_classes?=?gt_roidb[i]['gt_classes']?? ????????????????gt_overlaps?=?bbox_overlaps(boxes.astype(np.float),?? ????????????????????????????????????????????gt_boxes.astype(np.float))?? ????????????????argmaxes?=?gt_overlaps.argmax(axis=1)?? ????????????????maxes?=?gt_overlaps.max(axis=1)?? ????????????????I?=?np.where(maxes?>?0)[0]?? ????????????????overlaps[I,?gt_classes[argmaxes[I]]]?=?maxes[I]?? ?? ????????????overlaps?=?scipy.sparse.csr_matrix(overlaps)?? ????????????roidb.append({?? ????????????????'boxes'?:?boxes,?? ????????????????'gt_classes'?:?np.zeros((num_boxes,),?dtype=np.int32),?? ????????????????'gt_overlaps'?:?overlaps,?? ????????????????'flipped'?:?False,?? ????????????????'seg_areas'?:?np.zeros((num_boxes,),?dtype=np.float32),?? ????????????})?? ????????return?roidb?? ? ????@staticmethod?? ????def?merge_roidbs(a,?b):?? ????????assert?len(a)?==?len(b)?? ????????for?i?in?xrange(len(a)):?? ????????????a[i]['boxes']?=?np.vstack((a[i]['boxes'],?b[i]['boxes']))?? ????????????a[i]['gt_classes']?=?np.hstack((a[i]['gt_classes'],?? ????????????????????????????????????????????b[i]['gt_classes']))?? ????????????a[i]['gt_overlaps']?=?scipy.sparse.vstack([a[i]['gt_overlaps'],?? ???????????????????????????????????????????????????????b[i]['gt_overlaps']])?? ????????????a[i]['seg_areas']?=?np.hstack((a[i]['seg_areas'],?? ???????????????????????????????????????????b[i]['seg_areas']))?? ????????return?a?? ?? ????def?competition_mode(self,?on):?? ?????????? ????????pass??
get_imdb->factory->pascal_voc->(繼承)imdb
factory
??year = ['2007', '2012']
??split = ['train', 'val', 'trainval', 'test']
?
imdb
??image_set: split
??devkit_path: config.DATA_DIR(root/data/) + VOCdevkit + year
??data_path: devkit_path + '/' + 'VOC' + year
??image_index: a list read image name from
??????例如,root/data + /VOCdevkit2007/VOC2007/ImageSets/Main/{image_set}.txt
??roidb: gt_roidb得到(cfg.TRAIN.PROPOSAL_METHOD=gt導致了此操作)
??classes: 類別定義
??num_classes: 類別的長度
??class_to_ind:{類別名:類別索引}字典
??num_images(): image_index'length,數據庫中圖片個數
??image_path_at(index): 得到第index圖片的地址,data_path + '/' + 'JPEGImages' + image_index[index] + image_ext(.jpg)
?
??在train_faster_rcnn_alt_opt.py的imdb.set_proposal_method之后一旦用imdb.roidb都會用gt_roidb讀取xml中的內容中得到部分信息
xml的地址:data_path + '/' + 'Annotations' + '/' + index + '.xml'
??????????(root/data/) + VOCdevkit + year ?+ '/' + 'VOC' + year + '/' + 'Annotations' + '/' + index + '.xml'
get_training_roidb: 對得到的roi做是否反轉(參見roidb的flipped,為了擴充數據庫)和到roidb.py的prepare_roidb中計算得到roidb的其他數據
?
一張圖有一個roidb,每個roidb是一個字典
roidb:
??boxes: four rows.the proposal.left-up,right-down
??gt_overlaps: len(box)*類別數(即,每個box對應的類別。初始化時,從xml讀出來的類別對應類別值是1.0,被壓縮保存)
??gt_classes:?每個box的類別索引
??flipped: true,代表圖片被水平反轉,改變了boxes里第一、三列的值(所有原圖都這樣的操作,imdb.image_index*2)(cfg.TRAIN.USE_FLIPPED會導致此操作的發生,見train.py 116行)
??seg_areas:?box的面積
??(下面的值在roidb.py的prepare_roidb中得到)
??image:image_path_at(index),此roi的圖片地址
??width:此圖片的寬
??height: 高
??max_classes: box的類別=labels(gt_overlaps行最大值索引)
??max_overlaps:?(gt_overlaps行最大值)(max_overlaps=0,max_classes=0,即都是背景,否則不正確) ?
output_dir:?ROOT_DIR + 'output' + EXP_DIR('faster_rcnn_alt_opt') + imdb.name("voc_2007_trainval" or "voc_2007_test")
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀
總結
以上是生活随笔為你收集整理的faster rcnn源码解读(四)之数据类型imdb.py和pascal_voc.py(主要是imdb和roidb数据类型的解说)的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。