铁雪资源网 Design By www.gsvan.com
1.python提取COCO数据集中特定的类
安装pycocotools github地址:https://github.com/philferriere/cocoapi
pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
提取特定的类别如下:
from pycocotools.coco import COCO import os import shutil from tqdm import tqdm import skimage.io as io import matplotlib.pyplot as plt import cv2 from PIL import Image, ImageDraw #the path you want to save your results for coco to voc savepath="/media/huanglong/Newsmy/COCO/" #保存提取类的路径,我放在同一路径下 img_dir=savepath+'images/' anno_dir=savepath+'Annotations/' # datasets_list=['train2014', 'val2014'] datasets_list=['train2014'] classes_names = ['person'] #coco有80类,这里写要提取类的名字,以person为例 #Store annotations and train2014/val2014/... in this folder dataDir= '/media/huanglong/Newsmy/COCO/' #原coco数据集 headstr = """<annotation> <folder>VOC</folder> <filename>%s</filename> <source> <database>My Database</database> <annotation>COCO</annotation> <image>flickr</image> <flickrid>NULL</flickrid> </source> <owner> <flickrid>NULL</flickrid> <name>company</name> </owner> <size> <width>%d</width> <height>%d</height> <depth>%d</depth> </size> <segmented>0</segmented> """ objstr = """ <object> <name>%s</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>%d</xmin> <ymin>%d</ymin> <xmax>%d</xmax> <ymax>%d</ymax> </bndbox> </object> """ tailstr = '''</annotation> ''' #if the dir is not exists,make it,else delete it def mkr(path): if os.path.exists(path): shutil.rmtree(path) os.mkdir(path) else: os.mkdir(path) mkr(img_dir) mkr(anno_dir) def id2name(coco): classes=dict() for cls in coco.dataset['categories']: classes[cls['id']]=cls['name'] return classes def write_xml(anno_path,head, objs, tail): f = open(anno_path, "w") f.write(head) for obj in objs: f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4])) f.write(tail) def save_annotations_and_imgs(coco,dataset,filename,objs): #eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml anno_path=anno_dir+filename[:-3]+'xml' img_path=dataDir+dataset+'/'+filename print(img_path) dst_imgpath=img_dir+filename img=cv2.imread(img_path) #if (img.shape[2] == 1): # print(filename + " not a RGB image") # return shutil.copy(img_path, dst_imgpath) head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2]) tail = tailstr write_xml(anno_path,head, objs, tail) def showimg(coco,dataset,img,classes,cls_id,show=True): global dataDir I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name'])) #通过id,得到注释的信息 annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None) # print(annIds) anns = coco.loadAnns(annIds) # print(anns) # coco.showAnns(anns) objs = [] for ann in anns: class_name=classes[ann['category_id']] if class_name in classes_names: print(class_name) if 'bbox' in ann: bbox=ann['bbox'] xmin = int(bbox[0]) ymin = int(bbox[1]) xmax = int(bbox[2] + bbox[0]) ymax = int(bbox[3] + bbox[1]) obj = [class_name, xmin, ymin, xmax, ymax] objs.append(obj) draw = ImageDraw.Draw(I) draw.rectangle([xmin, ymin, xmax, ymax]) if show: plt.figure() plt.axis('off') plt.imshow(I) plt.show() return objs for dataset in datasets_list: #./COCO/annotations/instances_train2014.json annFile='{}/annotations/instances_{}.json'.format(dataDir,dataset) #COCO API for initializing annotated data coco = COCO(annFile) #show all classes in coco classes = id2name(coco) print(classes) #[1, 2, 3, 4, 6, 8] classes_ids = coco.getCatIds(catNms=classes_names) print(classes_ids) for cls in classes_names: #Get ID number of this class cls_id=coco.getCatIds(catNms=[cls]) img_ids=coco.getImgIds(catIds=cls_id) print(cls,len(img_ids)) # imgIds=img_ids[0:10] for imgId in tqdm(img_ids): img = coco.loadImgs(imgId)[0] filename = img['file_name'] # print(filename) objs=showimg(coco, dataset, img, classes,classes_ids,show=False) print(objs) save_annotations_and_imgs(coco, dataset, filename, objs)
2. 将上一步提取的COCO 某一类 xml转为COCO标准的json文件:
# -*- coding: utf-8 -*- # @Time : 2019/8/27 10:48 # @Author :Rock # @File : voc2coco.py # just for object detection import xml.etree.ElementTree as ET import os import json coco = dict() coco['images'] = [] coco['type'] = 'instances' coco['annotations'] = [] coco['categories'] = [] category_set = dict() image_set = set() category_item_id = 0 image_id = 0 annotation_id = 0 def addCatItem(name): global category_item_id category_item = dict() category_item['supercategory'] = 'none' category_item_id += 1 category_item['id'] = category_item_id category_item['name'] = name coco['categories'].append(category_item) category_set[name] = category_item_id return category_item_id def addImgItem(file_name, size): global image_id if file_name is None: raise Exception('Could not find filename tag in xml file.') if size['width'] is None: raise Exception('Could not find width tag in xml file.') if size['height'] is None: raise Exception('Could not find height tag in xml file.') img_id = "%04d" % image_id image_id += 1 image_item = dict() image_item['id'] = int(img_id) # image_item['id'] = image_id image_item['file_name'] = file_name image_item['width'] = size['width'] image_item['height'] = size['height'] coco['images'].append(image_item) image_set.add(file_name) return image_id def addAnnoItem(object_name, image_id, category_id, bbox): global annotation_id annotation_item = dict() annotation_item['segmentation'] = [] seg = [] # bbox[] is x,y,w,h # left_top seg.append(bbox[0]) seg.append(bbox[1]) # left_bottom seg.append(bbox[0]) seg.append(bbox[1] + bbox[3]) # right_bottom seg.append(bbox[0] + bbox[2]) seg.append(bbox[1] + bbox[3]) # right_top seg.append(bbox[0] + bbox[2]) seg.append(bbox[1]) annotation_item['segmentation'].append(seg) annotation_item['area'] = bbox[2] * bbox[3] annotation_item['iscrowd'] = 0 annotation_item['ignore'] = 0 annotation_item['image_id'] = image_id annotation_item['bbox'] = bbox annotation_item['category_id'] = category_id annotation_id += 1 annotation_item['id'] = annotation_id coco['annotations'].append(annotation_item) def parseXmlFiles(xml_path): for f in os.listdir(xml_path): if not f.endswith('.xml'): continue bndbox = dict() size = dict() current_image_id = None current_category_id = None file_name = None size['width'] = None size['height'] = None size['depth'] = None xml_file = os.path.join(xml_path, f) # print(xml_file) tree = ET.parse(xml_file) root = tree.getroot() if root.tag != 'annotation': raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag)) # elem is <folder>, <filename>, <size>, <object> for elem in root: current_parent = elem.tag current_sub = None object_name = None if elem.tag == 'folder': continue if elem.tag == 'filename': file_name = elem.text if file_name in category_set: raise Exception('file_name duplicated') # add img item only after parse <size> tag elif current_image_id is None and file_name is not None and size['width'] is not None: if file_name not in image_set: current_image_id = addImgItem(file_name, size) # print('add image with {} and {}'.format(file_name, size)) else: raise Exception('duplicated image: {}'.format(file_name)) # subelem is <width>, <height>, <depth>, <name>, <bndbox> for subelem in elem: bndbox['xmin'] = None bndbox['xmax'] = None bndbox['ymin'] = None bndbox['ymax'] = None current_sub = subelem.tag if current_parent == 'object' and subelem.tag == 'name': object_name = subelem.text if object_name not in category_set: current_category_id = addCatItem(object_name) else: current_category_id = category_set[object_name] elif current_parent == 'size': if size[subelem.tag] is not None: raise Exception('xml structure broken at size tag.') size[subelem.tag] = int(subelem.text) # option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox> for option in subelem: if current_sub == 'bndbox': if bndbox[option.tag] is not None: raise Exception('xml structure corrupted at bndbox tag.') bndbox[option.tag] = int(option.text) # only after parse the <object> tag if bndbox['xmin'] is not None: if object_name is None: raise Exception('xml structure broken at bndbox tag') if current_image_id is None: raise Exception('xml structure broken at bndbox tag') if current_category_id is None: raise Exception('xml structure broken at bndbox tag') bbox = [] # x bbox.append(bndbox['xmin']) # y bbox.append(bndbox['ymin']) # w bbox.append(bndbox['xmax'] - bndbox['xmin']) # h bbox.append(bndbox['ymax'] - bndbox['ymin']) # print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, # bbox)) addAnnoItem(object_name, current_image_id, current_category_id, bbox) if __name__ == '__main__': #修改这里的两个地址,一个是xml文件的父目录;一个是生成的json文件的绝对路径 xml_path = r'G:\dataset\COCO\person\coco_val2014\annotations\\' json_file = r'G:\dataset\COCO\person\coco_val2014\instances_val2014.json' parseXmlFiles(xml_path) json.dump(coco, open(json_file, 'w'))
3.python提取Pascal Voc数据集中特定的类
# -*- coding: utf-8 -*- # @Function:There are 20 classes in VOC data set. If you need to extract specific classes, you can use this program to extract them. import os import shutil ann_filepath='E:/VOCdevkit/VOC2012/Annotations/' img_filepath='E:/VOCdevkit/VOC2012/JPEGImages/' img_savepath='E:TrafficDatasets/JPEGImages/' ann_savepath='E:TrafficDatasets/Annotations/' if not os.path.exists(img_savepath): os.mkdir(img_savepath) if not os.path.exists(ann_savepath): os.mkdir(ann_savepath) names = locals() classes = ['aeroplane','bicycle','bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow','diningtable', 'dog', 'horse', 'motorbike', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', 'person'] for file in os.listdir(ann_filepath): print(file) fp = open(ann_filepath + '\\' + file) #打开Annotations文件 ann_savefile=ann_savepath+file fp_w = open(ann_savefile, 'w') lines = fp.readlines() ind_start = [] ind_end = [] lines_id_start = lines[:] lines_id_end = lines[:] classes1 = '\t\t<name>bicycle</name>\n' classes2 = '\t\t<name>bus</name>\n' classes3 = '\t\t<name>car</name>\n' classes4 = '\t\t<name>motorbike</name>\n' classes5 = '\t\t<name>train</name>\n' #在xml中找到object块,并将其记录下来 while "\t<object>\n" in lines_id_start: a = lines_id_start.index("\t<object>\n") ind_start.append(a) #ind_start是<object>的行数 lines_id_start[a] = "delete" while "\t</object>\n" in lines_id_end: b = lines_id_end.index("\t</object>\n") ind_end.append(b) #ind_end是</object>的行数 lines_id_end[b] = "delete" #names中存放所有的object块 i = 0 for k in range(0, len(ind_start)): names['block%d' % k] = [] for j in range(0, len(classes)): if classes[j] in lines[ind_start[i] + 1]: a = ind_start[i] for o in range(ind_end[i] - ind_start[i] + 1): names['block%d' % k].append(lines[a + o]) break i += 1 #print(names['block%d' % k]) #xml头 string_start = lines[0:ind_start[0]] #xml尾 if((file[2:4]=='09') | (file[2:4]=='10') | (file[2:4]=='11')): string_end = lines[(len(lines) - 11):(len(lines))] else: string_end = [lines[len(lines) - 1]] #在给定的类中搜索,若存在则,写入object块信息 a = 0 for k in range(0, len(ind_start)): if classes1 in names['block%d' % k]: a += 1 string_start += names['block%d' % k] if classes2 in names['block%d' % k]: a += 1 string_start += names['block%d' % k] if classes3 in names['block%d' % k]: a += 1 string_start += names['block%d' % k] if classes4 in names['block%d' % k]: a += 1 string_start += names['block%d' % k] if classes5 in names['block%d' % k]: a += 1 string_start += names['block%d' % k] string_start += string_end # print(string_start) for c in range(0, len(string_start)): fp_w.write(string_start[c]) fp_w.close() #如果没有我们寻找的模块,则删除此xml,有的话拷贝图片 if a == 0: os.remove(ann_savepath+file) else: name_img = img_filepath + os.path.splitext(file)[0] + ".jpg" shutil.copy(name_img, img_savepath) fp.close()
以上这篇python实现提取COCO,VOC数据集中特定的类就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。
铁雪资源网 Design By www.gsvan.com
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站文章均来自网站采集或用户投稿,网站不提供任何软件下载或自行开发的软件! 如有用户或公司发现本站内容信息存在侵权行为,请邮件告知! 858582#qq.com
免责声明:本站文章均来自网站采集或用户投稿,网站不提供任何软件下载或自行开发的软件! 如有用户或公司发现本站内容信息存在侵权行为,请邮件告知! 858582#qq.com
铁雪资源网 Design By www.gsvan.com
暂无python实现提取COCO,VOC数据集中特定的类的评论...
稳了!魔兽国服回归的3条重磅消息!官宣时间再确认!
昨天有一位朋友在大神群里分享,自己亚服账号被封号之后居然弹出了国服的封号信息对话框。
这里面让他访问的是一个国服的战网网址,com.cn和后面的zh都非常明白地表明这就是国服战网。
而他在复制这个网址并且进行登录之后,确实是网易的网址,也就是我们熟悉的停服之后国服发布的暴雪游戏产品运营到期开放退款的说明。这是一件比较奇怪的事情,因为以前都没有出现这样的情况,现在突然提示跳转到国服战网的网址,是不是说明了简体中文客户端已经开始进行更新了呢?