當前位置：首頁 > 运维知识 > Ubuntu >内容正文

Ubuntu

【Caffe-Ubuntu】JSON 标签生成自己的 Caffe-LMDB 数据文件

發布時間：2023/12/15 Ubuntu 28 豆豆

生活随笔收集整理的這篇文章主要介紹了【Caffe-Ubuntu】JSON 标签生成自己的 Caffe-LMDB 数据文件小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

0：生成 LMDB 的流程

已有的 json 數據集，可以通過 labelme 等開源工具標注，或者自己寫腳本生成

將 json 文件轉成 voc2007 格式的文件（labelme 格式轉 VOC2007 數據集格式）

設置個人的 labelmap.prototxt

借用ssd-caffe的 create_list.sh 腳本生成待轉寫格式文件

借用ssd-caffe的 create_data.sh 腳本生成 LMDB 格式文件

1：制作自己的數據集

這里推薦幾款好用的標注工具
6. labelme：安裝簡單，支持標定關鍵點，分割等，非常好用，生成json格式的標簽文件。
格式：下面列出的標簽參數都是必須的，否則labelme無法正常識別

{"shapes": [{"shape_type": "polygon", "line_color": null, "points": [[ 634, 276 ], [ 703, 275 ], [ 705, 312 ], [ 635, 313 ]], "fill_color": null, "label": "traffic-4"}, {"shape_type": "polygon", "line_color": null, "points": [[ 715, 275 ], [ 785, 274 ], [ 786, 313 ], [ 716, 312 ]], "fill_color": null, "label": "traffic-4-occ-largely"}], "lineColor": [ 0, 255, 0, 128 ], "imagePath": "2012-3-23_20-23-25_0.jpg", "fillColor": [ 255, 0, 0, 128 ], "imageData": null }

labelImg：安裝簡單，非常方便的畫框的標定工具。
支持PASCAL VOC格式的XML標簽。

其他：待續。。。

2 ：JSON 2 VOC2007

# -*- coding: utf-8 -*- import os, re import json import cv2 import numpy as np import codecs from glob import glob import shutil from sklearn.model_selection import train_test_splitdef iter_files(data_root_path, saved_path):count = 0for root,dirs,files in os.walk(data_root_path):for json_file in files:if re.search(".json", json_file):file_name = json_file[0:json_file.find(".json")]file_path = os.path.join(root, json_file)count += 1print("====================================================================")print(count)print(file_path)# json 轉 voc2007json2voc2007(file_name, root, saved_path)for dirname in dirs:iter_files(dirname, saved_path)def json2voc2007(json_file_,labelme_path,saved_path):json_filename = os.path.join(labelme_path, json_file_ + ".json")json_file = json.load(open(json_filename, "r"))height, width, channels = cv2.imread(os.path.join(labelme_path, json_file_ + ".jpg")).shapewith codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:xml.write('<annotation>\n')xml.write('\t<folder>' + 'TrafficSign' + '</folder>\n')xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')xml.write('\t<source>\n')xml.write('\t\t<database>The UAV autolanding</database>\n')xml.write('\t\t<annotation>UAV AutoLanding</annotation>\n')xml.write('\t\t<image>flickr</image>\n')xml.write('\t\t<flickrid>NULL</flickrid>\n')xml.write('\t</source>\n')xml.write('\t<owner>\n')xml.write('\t\t<flickrid>NULL</flickrid>\n')xml.write('\t\t<name>TrafficSign</name>\n')xml.write('\t</owner>\n')xml.write('\t<size>\n')xml.write('\t\t<width>' + str(width) + '</width>\n')xml.write('\t\t<height>' + str(height) + '</height>\n')xml.write('\t\t<depth>' + str(channels) + '</depth>\n')xml.write('\t</size>\n')xml.write('\t\t<segmented>0</segmented>\n')for multi in json_file["shapes"]:label = multi["label"]# 下面這個if是我添加的一個標簽篩選的判斷if label == "traffic-3" or \label == "traffic-3-occ-partially":points = np.array(multi["points"])xmin = min(points[:, 0])xmax = max(points[:, 0])ymin = min(points[:, 1])ymax = max(points[:, 1])if xmax <= xmin:passelif ymax <= ymin:passelse:xml.write('\t<object>\n')xml.write('\t\t<name>' + label + '</name>\n')xml.write('\t\t<pose>Unspecified</pose>\n')xml.write('\t\t<truncated>1</truncated>\n')xml.write('\t\t<difficult>0</difficult>\n')xml.write('\t\t<bndbox>\n')xml.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')xml.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')xml.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')xml.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')xml.write('\t\t</bndbox>\n')xml.write('\t</object>\n')print(json_filename, xmin, ymin, xmax, ymax, label)xml.write('</annotation>')# 5.復制圖片到 VOC2007/JPEGImages/下image = glob(labelme_path + "/" + json_file_ + ".jpg")print("copy image files to VOC007/JPEGImages/")shutil.copyfile(image[0], saved_path + "JPEGImages/" + json_file_ + ".jpg")# 6.split files for txttxtsavepath = saved_path + "ImageSets/Main/"ftrainval = open(txtsavepath + '/trainval.txt', 'w')ftest = open(txtsavepath + '/test.txt', 'w')ftrain = open(txtsavepath + '/train.txt', 'w')fval = open(txtsavepath + '/val.txt', 'w')total_files = glob("./TrafficSign/Annotations/*.xml")total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]# test_filepath = ""for file in total_files:ftrainval.write(file + "\n")# test# for file in os.listdir(test_filepath):# ftest.write(file.split(".jpg")[0] + "\n")# split# test_size 設置train：val的劃分比例train_files, val_files = train_test_split(total_files, test_size=0.10, random_state=42)# trainfor file in train_files:ftrain.write(file + "\n")# valfor file in val_files:fval.write(file + "\n")ftrainval.close()ftrain.close()fval.close()# ftest.close()def main():# 1.標簽路徑saved_path = "./VOC2007/" # 保存路徑# 2.創建要求文件夾if not os.path.exists(saved_path + "Annotations"):os.makedirs(saved_path + "Annotations")if not os.path.exists(saved_path + "JPEGImages/"):os.makedirs(saved_path + "JPEGImages/")if not os.path.exists(saved_path + "ImageSets/Main/"):os.makedirs(saved_path + "ImageSets/Main/")data_root_path = "./data/"# 3. 迭代查詢各級文件夾iter_files(data_root_path, saved_path)if __name__ == '__main__':main()

3：labelmap.prototxt 設定（以背景，目標兩類為例）

item { name: "none_of_the_above" label: 0 display_name: "background" } item { name: "face" label: 1 display_name: "face" }

4：create_list.sh

這里主要注意，root_dir 要修改為自己 VOC2007 的路徑

#!/bin/bash root_dir=$HOME/data/VOC2007 sub_dir=ImageSets/Main echo $(dirname "${BASH_SOURCE[0]}") bash_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" for dataset in val dodst_file=$bash_dir/$dataset.txtif [ -f $dst_file ]thenrm -f $dst_filefiecho "Create list for $dataset..."echo $root_dir/$sub_dir/$dataset.txtdataset_file=$root_dir/$sub_dir/$dataset.txtimg_file=$bash_dir/$dataset"_img.txt"cp $dataset_file $img_filesed -i "s/^/\/JPEGImages\//g" $img_filesed -i "s/$/.jpg/g" $img_filelabel_file=$bash_dir/$dataset"_label.txt"cp $dataset_file $label_filesed -i "s/^/\/Annotations\//g" $label_filesed -i "s/$/.xml/g" $label_filepaste -d' ' $img_file $label_file >> $dst_filerm -f $label_filerm -f $img_file# Generate image name and size infomation.if [ $dataset == "val" ]then$bash_dir/../../build/tools/get_image_size $root_dir $dst_file $bash_dir/$dataset"_name_size.txt"fi# Shuffle trainval file.if [ $dataset == "train" ]thenrand_file=$dst_file.randomcat $dst_file | perl -MList::Util=shuffle -e 'print shuffle(<STDIN>);' > $rand_filemv $rand_file $dst_filefi done

5：create_data.sh

cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd ) echo $cur_dir root_dir=$cur_dir/../.. cd $root_dir redo=1 data_root_dir="$HOME/data/VOC2007" dataset_name="DataName" mapfile="$root_dir/data/$dataset_name/labelmap.prototxt" anno_type="detection" db="lmdb" min_dim=0 max_dim=0 width=0 height=0extra_cmd="--encode-type=jpg --encoded" if [ $redo ] thenextra_cmd="$extra_cmd --redo" fi for subset in train dosudo python2 $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name done

總結

以上是生活随笔為你收集整理的【Caffe-Ubuntu】JSON 标签生成自己的 Caffe-LMDB 数据文件的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：电脑怎么下王者荣耀(太平洋电脑网)
下一篇：智云4支持哪些手机