DOTA文本数据集转XML格式
说明:由于本人比较喜欢DIOR数据集格式,所以仿照DIOR数据集转的相同格式
DOTA数据集下载链接:
提取码:7vwv
DOIR数据集下载链接:
文章插图
提取码:jcpg
代码如下(改了网上的代码,此处贴上链接)
import osfrom xml.dom.minidom import Documentfrom xml.dom.minidom import parseimport xml.dom.minidomimport numpy as npimport csvimport cv2import stringdef WriterXMLFiles(filename,img_name, path, box_list, label_list, w, h, d):# dict_box[filename]=json_dict[filename]doc = xml.dom.minidom.Document()root = doc.createElement('annotation')doc.appendChild(root)# foldername = doc.createElement("folder")# foldername.appendChild(doc.createTextNode("JPEGImages"))# root.appendChild(foldername)nodeFilename = doc.createElement('filename')nodeFilename.appendChild(doc.createTextNode(img_name))root.appendChild(nodeFilename)# pathname = doc.createElement("path")# pathname.appendChild(doc.createTextNode("xxxx"))# root.appendChild(pathname)sourcename=doc.createElement("source")databasename = doc.createElement("database")databasename.appendChild(doc.createTextNode("DOTA"))sourcename.appendChild(databasename)# annotationname = doc.createElement("annotation")# annotationname.appendChild(doc.createTextNode("xxx"))# sourcename.appendChild(annotationname)# imagename = doc.createElement("image")# imagename.appendChild(doc.createTextNode("xxx"))# sourcename.appendChild(imagename)# flickridname = doc.createElement("flickrid")# flickridname.appendChild(doc.createTextNode("0"))# sourcename.appendChild(flickridname)root.appendChild(sourcename)nodesize = doc.createElement('size')nodewidth = doc.createElement('width')nodewidth.appendChild(doc.createTextNode(str(w)))nodesize.appendChild(nodewidth)nodeheight = doc.createElement('height')nodeheight.appendChild(doc.createTextNode(str(h)))nodesize.appendChild(nodeheight)nodedepth = doc.createElement('depth')nodedepth.appendChild(doc.createTextNode(str(d)))nodesize.appendChild(nodedepth)root.appendChild(nodesize)segname = doc.createElement("segmented")segname.appendChild(doc.createTextNode("0"))root.appendChild(segname)for (box, label) in zip(box_list, label_list):nodeobject = doc.createElement('object')nodename = doc.createElement('name')nodename.appendChild(doc.createTextNode(str(label)))nodeobject.appendChild(nodename)nodebndbox = doc.createElement('bndbox')nodex1 = doc.createElement('x1')nodex1.appendChild(doc.createTextNode(str(box[0])))nodebndbox.appendChild(nodex1)nodey1 = doc.createElement('y1')nodey1.appendChild(doc.createTextNode(str(box[1])))nodebndbox.appendChild(nodey1)nodex2 = doc.createElement('x2')nodex2.appendChild(doc.createTextNode(str(box[2])))nodebndbox.appendChild(nodex2)nodey2 = doc.createElement('y2')nodey2.appendChild(doc.createTextNode(str(box[3])))nodebndbox.appendChild(nodey2)nodex3 = doc.createElement('x3')nodex3.appendChild(doc.createTextNode(str(box[4])))nodebndbox.appendChild(nodex3)nodey3 = doc.createElement('y3')nodey3.appendChild(doc.createTextNode(str(box[5])))nodebndbox.appendChild(nodey3)nodex4 = doc.createElement('x4')nodex4.appendChild(doc.createTextNode(str(box[6])))nodebndbox.appendChild(nodex4)nodey4 = doc.createElement('y4')nodey4.appendChild(doc.createTextNode(str(box[7])))nodebndbox.appendChild(nodey4)# ang = doc.createElement('angle')# ang.appendChild(doc.createTextNode(str(angle)))# nodebndbox.appendChild(ang)nodeobject.appendChild(nodebndbox)root.appendChild(nodeobject)fp = open(path + filename, 'w')doc.writexml(fp, indent='\n')fp.close()def load_annoataion(p):'''load annotation from the text file:param p::return:'''text_polys = []text_tags = []if not os.path.exists(p):return np.array(text_polys, dtype=np.float32)with open(p, 'r') as f:for line in f.readlines()[2:]:label = 'text'# strip BOM. \ufeff for python3,\xef\xbb\bf for python2#line = [i.strip('\ufeff').strip('\xef\xbb\xbf') for i in line]#print(line)x1, y1, x2, y2, x3, y3, x4, y4 ,label= line.split(' ')[0:9]#print(label)text_polys.append([x1, y1, x2, y2, x3, y3, x4, y4])text_tags.append(label)return np.array(text_polys, dtype=np.float), np.array(text_tags, dtype=np.str)txt_path = r'E:/baidudisk/DOTA/train/labelTxt-v1.5/DOTA-v1.5_train/'xml_path = r'E:/baidudisk/DOTA/train/labelTxt-v1.5-voc/DOTA-v1.5_train/'img_path = r'E:/baidudisk/DOTA/train/images/images/'print(os.path.exists(txt_path))txts = os.listdir(txt_path)for count, t in enumerate(txts):path = os.path.join(txt_path,t)print(path)# boxes存储的是八个坐标# labels存储的是标签boxes, labels = load_annoataion(os.path.join(txt_path, t))# P0000.xmlxml_name = t.replace('.txt', '.xml')img_name = t.replace('.txt', '.png')# P0000.pngimg = cv2.imread(os.path.join(img_path, img_name))h, w, d = img.shape#print(xml_name, xml_path, boxes, labels, w, h, d)WriterXMLFiles(xml_name,img_name, xml_path, boxes, labels, w, h, d)if count % 1000 == 0:print(count)
说明:
文章插图
1:请务必将自己的文件路径进行修改
2:导包出现如下图横线,感觉也没有影响程序运行
【DOTA数据集转VOC数据集,模仿DIOR数据集类型】3:文件转换最终如下图:
- 【FIW2022精彩回顾】数据库领域资深专家韩锋:金融行业数据库自主创新之路
- 透过指标,盘点热门开源数据库
- 金融行业分布式数据库转型之路
- 按图索骥,再谈数据库迁移之路
- 双中台在医疗 | 复旦中山医院钱琨:数据中台是迈向“未来医院”的第一步
- 2类,含xml标签文件 【表面缺陷检测】钢轨表面缺陷检测数据集介绍
- 4个类别 【图像分类】钢轨表面缺陷分类数据集介绍
- 【图像分类】卷积神经网络之ResNet网络模型实现钢轨缺陷识别(附代码和数据集
- 数据分析pandas库 一 python进阶
- DBMS MySQL 数据库安装教程图文详解