上海网站建设招聘,免费物流公司网站模板,有赞微商城登录首页登录入口,wordpress 菜单相册目录
一、数据分析与数据集构建
二、所有相关的脚本
三、模型效果 一、数据分析与数据集构建 由于电科院数据集有17w-18w张#xff0c;标签错误的非常多#xff0c;且漏标非常多#xff0c;但是所有有效时间只有半个月左右#xff0c;显卡是M60#xff0c;训练速度特别…目录
一、数据分析与数据集构建
二、所有相关的脚本
三、模型效果 一、数据分析与数据集构建 由于电科院数据集有17w-18w张标签错误的非常多且漏标非常多但是所有有效时间只有半个月左右显卡是M60训练速度特别慢所以需要尽量留足训练时间至少是1周左右而且为了保证训练的轮数尽量多还需要使得数据集尽量有效减少冗余 数据复杂情况如下 由于只训练缺陷类效果难以达到较好的情况所以这里考虑加入正常数据作为辅助做法流程是 只筛选缺陷看看带出来多少正常——在里面剔除不需要的类这里是6和19——然后由于正常类不能和异常交叉存在所以剔除和异常交叉的正常类的标IOU阈值取0.5 得到数据情况如下 由于“绝缘子正常”太多这里考虑删除一部分使得绝缘子正常的数量也能在1000-2000做法是先统计“5_class27_0518_接着剔除和异常交叉的正常_0.5_抽取绝缘子正常”然后统计每个类和绝缘子共存的情况看看哪些较多能否剔除该类中共存的绝缘子达到目的数据统计如下 可见绝缘子并不是很大一部分分布在某一个或者几个类里面的所以这里无法剔除只是对“绝缘子破损”进行增强来弥补该类的数据不足 最终训练使用的数据是“6_2_class27_0518_接着剔除和异常交叉的正常_0.5_split”然后将数据20-25%作为val其余进行train进行训练寻找最佳的方法 数据集每类平衡的规则是不足2000的增强到2000幅补充的对照样本绝缘子正常等不足1000的增强到1000尽量均衡的前提下正样本不能多 寻找到最佳方法后所有是train不留val使得尽量多的数据参与训练以得到最佳模型 二、所有相关的脚本
1_abcd当指定类和它相关类iou过大时剔除该指定类
import osdef calculate_iou(box1, box2):# 提取边界框的坐标和尺寸x1, y1, w1, h1 box1[1:]x2, y2, w2, h2 box2[1:]# 计算边界框的右下角坐标x1_right, y1_bottom x1 w1, y1 h1x2_right, y2_bottom x2 w2, y2 h2# 计算相交区域的坐标x_intersect max(x1, x2)y_intersect max(y1, y2)x_intersect_right min(x1_right, x2_right)y_intersect_bottom min(y1_bottom, y2_bottom)# 计算相交区域的宽度和高度intersect_width max(0, x_intersect_right - x_intersect)intersect_height max(0, y_intersect_bottom - y_intersect)# 计算相交区域的面积intersect_area intersect_width * intersect_heightif intersect_area0.000001:return 1# 计算两个边界框的面积box1_area w1 * h1box2_area w2 * h2# 计算最小并集whole_area float(box1_area box2_area - intersect_area)min_area float(min(box1_area,min(box2_area,whole_area)))# 计算IOUiou intersect_area /min_areareturn ioudef filter_annotations(queding_id,id_list,filename):list1 []list2 []filtered_annotations []with open(filename, r) as file:lines file.readlines()print(all:\n,lines)for line in lines:class_label, x, y, width, height line.split( )x, y, width, height float(x), float(y), float(width), float(height)class_id int(class_label)if int(class_id) queding_id:list1.append([class_id, x, y, width, height])elif int(class_id) in id_list:list2.append([class_id, x, y, width, height])else:filtered_annotations.append(line)for annotation1 in list1:iou_greater_than_0_2 Falsefor annotation2 in list2:iou calculate_iou(annotation1, annotation2)if iou 0.2:print(iou,,iou)iou_greater_than_0_2 Truebreakif not iou_greater_than_0_2:line_dst1 str(annotation1[0]) str(annotation1[1]) str(annotation1[2]) str(annotation1[3]) str(annotation1[4])\nfiltered_annotations.append(line_dst1)for annotation2 in list2:line_dst2 str(annotation2[0]) str(annotation2[1]) str(annotation2[2]) str(annotation2[3]) str(annotation2[4])\nfiltered_annotations.append(line_dst2)with open(filename,w,encodingutf-8) as f:for line in filtered_annotations:f.write(line)return filtered_annotationsif __name____main__:queding_id 0id_list [1,2,3,4]--------------------------queding_id 5id_list [6,7]--------------------------queding_id 10id_list [11,12,13]queding_id 10id_list [11,12,13]folder_path./1_class27for root,_,files in os.walk(folder_path):if len(files)0:for file in files:if file.endswith(.txt):print(---------------)print(file)file_pathos.path.join(root,file)res filter_annotations(queding_id,id_list,file_path)for l in res:print(l)
2splitImgAndLabelByLabelid
# -*- encoding:utf-8 -*-
import os
import cv2
import sys
import shutil
from pathlib import Pathsuffixs [.png]if len(sys.argv) ! 2:print(input as:\n python 1splitImgAndLabelByLabelid.py imgFolder)sys.exit()path sys.argv[1]if not os.path.exists(path):print(sorry, you input empty floder ! )sys.exit()file_type_list [txt]for name in os.listdir(path):print(-*20)print(name,,name)file_pathos.path.join(path,name)file_typefile_path.split(.)[-1]for suffix in suffixs:file_namefile_path[0:file_path.rfind(., 1)]suffixif os.path.exists(file_name):imagecv2.imread(file_name)if image is None:continueelse:breakif(file_type in file_type_list):befopen(file_path)ids[]for line in bef.readlines():linenew line.strip().split( )if len(linenew) 5:ids.append(int(linenew[0]))ids_lenlen(ids)if ids_len 0:save_path emptyif not os.path.exists(save_path):os.mkdir(save_path)shutil.move(file_path,save_path)shutil.move(file_name,save_path)elif ids_len 1:save_path str(ids[0])if not os.path.exists(save_path):os.mkdir(save_path)shutil.move(file_path,save_path)shutil.move(file_name,save_path)else:ids.sort()if ids[0] ids[-1]:save_path str(ids[0])if not os.path.exists(save_path):os.mkdir(save_path)shutil.move(file_path,save_path)shutil.move(file_name,save_path)else:save_path variousif not os.path.exists(save_path):os.mkdir(save_path)shutil.move(file_path,save_path)shutil.move(file_name,save_path)print(ids)
3_copyfilesbyclassid
# encoding:utf-8import os
import cv2
import shutilsuffixs [.JPG,.PNG,.bmp,.jpeg,.jpg,.png]def backup_txt_files(src_dir, dst_dir):for root,_,files in os.walk(src_dir):for file in files:if file.endswith(.txt):# select labelsrc_path os.path.join(root, file)rel_path os.path.relpath(src_path,src_dir)dst_path os.path.join(dst_dir, rel_path)new_label_data []with open(src_path, r, encodingutf-8) as f:for line in f:line_tmp line.strip().split( )if len(line_tmp) 5:if int(line_tmp[0]) 6 :continueline_dst line_tmp[0] line_tmp[1] line_tmp[2] line_tmp[3] line_tmp[4]\nnew_label_data.append(line_dst)if len(new_label_data)0:# process labeldst_folderos.path.dirname(dst_path)os.makedirs(dst_folder, exist_okTrue)with open(dst_path,w,encodingutf-8) as f:for line in new_label_data:f.write(line)# process imagefor suffix in suffixs:file_namesrc_path[0:src_path.rfind(., 1)]suffixif os.path.exists(file_name):imagecv2.imread(file_name)if image is not None:shutil.copy(file_name, dst_folder)break# 指定源路径和备份路径最好使用绝对路径
src_dir various
dst_dir various_6# 执行备份操作
backup_txt_files(src_dir, dst_dir)
4_ccccc补充various到单类中
# encoding:utf-8import os
import shutil
from termios import PARODD
import cv2
import randomdef backup_txt_files(src_dir, sample_dir,class_id,num_thresh):src_num_files len([f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))])//2if src_num_files num_thresh:exit()# search_res[]for root,_,files in os.walk(sample_dir):for file in files:if file.endswith(.txt):flag Falselabel_path os.path.join(root, file)with open(label_path, r, encodingutf-8) as f:for line in f:line_tmp line.strip().split( )if len(line_tmp) 5:if int(line_tmp[0]) class_id :flag Trueif flag False:continuefile_namelabel_path[0:label_path.rfind(., 1)].jpgif os.path.exists(file_name):imagecv2.imread(file_name)if image is not None:search_res.append((file_name,label_path))# shufrandom.shuffle(search_res)sample_num_files len(search_res)//2# save_pathsrc_dir_variousos.makedirs(save_path,exist_okTrue)# add_num num_thresh - src_num_filesprint(src_dir, ,src_num_files, ,add_num)if sample_num_files add_num:for file,label in search_res:shutil.move(file,save_path)shutil.move(label,save_path)else:for i in range(add_num):shutil.move(search_res[i][0],save_path)shutil.move(search_res[i][1],save_path)# 指定源路径和备份路径最好使用绝对路径
src_dir single
sample_dir various
num_thresh 3000# 执行备份操作
for folder in os.listdir(src_dir):print(-*40)backup_txt_files(os.path.join(src_dir,folder),sample_dir,int(folder),num_thresh)
5_dedadada当指定类标过多时删去标抹去标签区域
import os
import random
import cv2def process(label_path,class_id):if label_path.endswith(.txt):# select label# print(-*40)# print(label_path,,label_path)new_label_data []with open(label_path, r, encodingutf-8) as f:for line in f:line_tmp line.strip().split( )if len(line_tmp) 5:if int(line_tmp[0]) class_id :# print(class_id)# process imagefile_namelabel_path[0:label_path.rfind(., 1)].jpgif os.path.exists(file_name):# print(drawignore ,class_id, ,file_name)imagecv2.imread(file_name)if image is not None:# class_label line_tmp[0]x, y, width, height map(float, line_tmp[1:])x_min int((x - width/2) * image.shape[1])y_min int((y - height/2) * image.shape[0])x_max int((x width/2) * image.shape[1])y_max int((y height/2) * image.shape[0])cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (125, 125, 125), -1)cv2.imwrite(file_name,image)# ignore labelcontinueline_dst line_tmp[0] line_tmp[1] line_tmp[2] line_tmp[3] line_tmp[4]\n# print(~~~~liuxia,,int(line_tmp[0]),class_id,line_dst)new_label_data.append(line_dst)# print(new_label_data,,new_label_data)with open(label_path,w,encodingutf-8) as f:for line in new_label_data:f.write(line)def getfilelistbyclassid(path,class_id,ignoreid):file_list[]for folder in os.listdir(path):if ignoreid1:if str(class_id) in folder:continueelif ignoreid2:if str(class_id)_various ! folder:continuefolder_pathos.path.join(path,folder)for file in os.listdir(folder_path):if file.endswith(.txt):label_pathos.path.join(folder_path,file)with open(label_path, r, encodingutf-8) as f:for line in f:line_tmp line.strip().split( )if len(line_tmp) 5:if int(line_tmp[0]) class_id :file_list.append(label_path)breakreturn file_listif __name____main__:id_list [0,1]path./imagesfor class_id in id_list:# print(-*40)# print(dddd,,class_id)id_pathos.path.join(path,str(class_id))file_numlen([f for f in os.listdir(id_path) if os.path.isfile(os.path.join(id_path, f))])//2if file_num 1000:# 当前超出限制把当前之外的抹去注意当前的还未处理需要加2023年05月20日11:44:58for folder in os.listdir(path):if folder str(class_id):continuefor file in os.listdir(os.path.join(path,folder)):if file.endswith(.txt):label_path os.path.join(os.path.join(path,folder), file)process(label_path,class_id)else:various_id_pathos.path.join(path,str(class_id)_various)various_file_numlen([f for f in os.listdir(id_path) if os.path.isfile(os.path.join(id_path, f))])//2file_various_numvarious_file_numfile_numif file_various_num 1000:# 另外的超出的标抹去file_listgetfilelistbyclassid(path,class_id,ignoreid1)if len(file_list)file_various_num1000:random.shuffle(file_list)for i in range(len(file_list)file_various_num-1000):process(file_list[i],class_id)else:# various超出的标抹去various_file_listgetfilelistbyclassid(path,class_id,ignoreid2)random.shuffle(various_file_list)for i in range(len(various_file_list)file_num-1000):process(various_file_list[i],class_id) # 另外的需要全部抹去other_file_listgetfilelistbyclassid(path,class_id,ignoreid1)random.shuffle(other_file_list)for i in range(len(other_file_list)):process(other_file_list[i],class_id) # 抹去数据过多类的标# 0,5,10,12,15,20,23# 1、如单类大于1000# 则删除various及其他中的标同时抹去图像上的区域# 2、如单类小于1000但是结合various大于1000# 则删除其他中的标同时抹去图像上的区域# 3、单类various还是小于1000# 则在其他中找到满足1000则删除剩余的标同时抹去图像上的区域三、模型效果
待补充