前言:
具体使用请查看我的github仓库:
再需要yolo识别物体之前,通常需要漫长的标注过程,而本程序主打一个自动化
把目标图片放进以目标名字命名的文件夹 把目标名字文件夹放在images文件夹下面 确保每个类别的图片数量是一致的 把背景图片放在back文件夹
Part 1 一点线性代数
我们可以发现在对图片进行仿射变换时,只需要对原图片的一组基向量进行变化即可
那么在进行透视变换的时候就需要对三个想来进行同时的变换即可
具体的数学计算看代码吧,太晚了我想睡一会了
这个:言って。好好听
下面有两个版本,分别是仿射变换和透视变换,效果都还行,目前GitHub上面是透视变换的版本qwq。
import pyautogui as gui
import cv2
import numpy
import os
import time
import glob
import cv2
import numpy as np
import random
# import numba
from numba import jit
import shutil
import re
def pre_make():
src_folder = "images/"
dst_folder = "target/"
# 获取源文件夹中所有子文件夹的路径
subfolders = [f.path for f in os.scandir(src_folder) if f.is_dir()]
shutil.rmtree(dst_folder)
os.makedirs(dst_folder)
# 遍历每个子文件夹,并将其中的第一张图片复制到目标文件夹中
for folder in subfolders:
copied = False
for file in os.listdir(folder):
# 只处理jpg和png文件
if file.endswith(".jpg") or file.endswith(".png"):
# 构建目标文件路径,以子文件夹名称作为文件名前缀
file_prefix = os.path.basename(folder)
dst_file = os.path.join(dst_folder, file_prefix + "_" + file)
# 复制文件并重命名
shutil.copy(os.path.join(folder, file), dst_file)
# 删除源文件
os.remove(os.path.join(folder, file))
# 设置标志位,表示已经复制了一张图片
copied = True
# 删除文件名中的数字
dst_file_no_digit = re.sub(r"\d+", "", os.path.basename(dst_file))
dst_file_no_digit = os.path.join(os.path.dirname(dst_file), dst_file_no_digit)
os.rename(dst_file, dst_file_no_digit)
# 仅复制第一张图片
break
# 如果没有复制任何图片,则输出提示信息
if not copied:
print("未找到任何图片:%s" % folder)
return 114514
#flags just do it
def save_yolo_data(class_id, label, img, data_dir='data/'):
# Parse YOLO data
boxes = []
x, y, w, h = label
boxes.append([class_id, x, y, w, h])
# Save image
img_name = str(time.time())
img_save_path = 'img/' + img_name
#print(img_save_path)
img_save_path = img_save_path + '.png'
cv2.imwrite(img_save_path, img)
# Save YOLO data
data_name = img_name + '.txt'
data_save_path = os.path.join(data_dir, data_name)
#print(data_save_path)
with open(data_save_path, 'w') as f:
for box in boxes:
f.write('{} {} {} {} {}\n'.format(*box))
@jit
def mix_pic(img, img_back, loca_x, loca_y):
# loca_x = 10
# loca_y = 10
for x in range(img.shape[0]): # 图片的高
for y in range(img.shape[1]): # 图片的宽
px = img[x, y]
if px[0] == 0 and px[1] == 0 and px[2] == 0:
pass
else:
img_back[loca_x + x, loca_y + y] = px
# print('1')
return img_back
def img_mix(target_img, back_img):
one_normal = np.random.normal(loc=1.0, scale=0.2, size=(1, 2))
zero_normal = np.random.normal(loc=0.0, scale=0.2, size=(1, 2))
a = one_normal[0, 0]
d = one_normal[0, 1]
b = zero_normal[0, 0]
c = zero_normal[0, 1]
# target_img = cv2.imread('target.jpg')
# back_img = cv2.imread('back.jpg')
target_img = cv2.resize(target_img, (460, 350))
cols, rows = target_img.shape[:2]
basis_vector = max(cols, rows)
# print(transfer_x)
# 传递小于0
anti_c = (abs(c) - c) / 2
anti_b = (abs(b) - b) / 2
x_delta = rows * anti_c
y_delta = cols * anti_b
rows = int((a * rows + c * rows) + (2 * x_delta))
cols = int((b * cols + d * cols) + (2 * y_delta))
a = int(a * basis_vector)
b = int(b * basis_vector)
c = int(c * basis_vector)
d = int(d * basis_vector)
a = a + x_delta
b = b + y_delta
c = c + x_delta
d = d + y_delta
transfer_x = [a, b]
transfer_y = [c, d]
pts1 = np.float32([[0, 0], [basis_vector, 0], [0, basis_vector]])
pts2 = np.float32([[x_delta, y_delta], transfer_x, transfer_y])
M = cv2.getAffineTransform(pts1, pts2)
dst = cv2.warpAffine(target_img, M, (rows, cols))
dst = cv2.blur(dst, (3, 3))
front_x, front_y = dst.shape[:2]
back_img = cv2.resize(back_img, (1920, 1080))
rand_x = random.randint(200, 400)
rand_y = int(rand_x * (192/108))
dst = cv2.resize(dst, (rand_x, rand_y))
position_x = random.randint(0, (1920 - rand_x))
position_y = random.randint(0, (1080 - rand_y))
fina = mix_pic(dst, back_img, position_y, position_x)
# back_img[0:af_cols, 0:af_rows] = dst
#cv2.imshow('fin', fina)
yolo_format = []
#print(position_x)
#print(position_y)
#print(rand_x)
#print(rand_y)
yolo_format.append((position_x + rand_x / 2) / 1920)
yolo_format.append((position_y + rand_y / 2) / 1080)
yolo_format.append((rand_x / 1920)*0.8)
yolo_format.append((rand_y / 1080)*0.8)
#print(yolo_format)
return fina, yolo_format
#cv2.waitKey(0)
class AutoMakerDataYolo:
'''
this is a class to automatic making the data of yolov5
这是一个自动化生成yolov5数据集的库
'''
def __init__(self):
self.all_kind_location = None
self.image_names_target = None
self.target_number = None
self.target_name = None
self.target_image_list = None
self.made_number = None
self.target_location = None
self.huge_location = None
self.num = 1
self.yse_or_no = gui.confirm(text='make sure you read the readme.md"', title='setup runing',
buttons=['yes', 'no'])
if self.yse_or_no == 'no':
quit()
self.huge_location = gui.prompt(text='the path of background image', title='qwq', default='back/')
self.target_location = gui.prompt(text='the path of target image', title='qwq', default='target/')
self.made_number = gui.prompt(text='the number you want for each target image', title='qwq', default='10')
self.made_number = int(self.made_number)
def read_target(self):
# 读取目标的数量种类,存在列表里
folder_path = self.target_location
# 指定目录下的所有jpg和png文件
img_files = glob.glob(folder_path+'*.jpg') + glob.glob(folder_path+'*.png')
# 存储文件名的列表
img_names = []
# 存储图像的列表
imgs = []
# 遍历所有图像文件
for file in img_files:
# 读取图像
img = cv2.imread(file)
# 获取文件名并去掉扩展名
#img_name = file.split('/')[-1].split('.')[0]
img_name = os.path.splitext(os.path.basename(file))[0]
# 添加到列表中
imgs.append(img)
img_names.append(img_name)
self.target_image_list = imgs
self.image_names_target = img_names
def read_back_image(self):
folder_path = self.huge_location
i = -1
for tg_img in self.target_image_list:
i = i + 1
print(i)
print('len'+str(len(self.target_image_list)))
for filename in os.listdir(folder_path):
# print(filename)
# print(os.listdir(folder_path))
# 判断文件类型是否为jpg或png图片
if filename.endswith('.JPG') or filename.endswith('.jfif') or filename.endswith('.jpg'):
#print('2')
# 读取图片
img_path = os.path.join(folder_path, filename)
#print(img_path)
img = cv2.imread(img_path)
# 调用处理函数
j = 0
while(j < self.made_number):
fina_img, yolo_num = img_mix(tg_img, img)
# img_name = self.image_names_target[i]
save_yolo_data(i, yolo_num, fina_img)
j = j + 1
else:
pass
def save_yolo_dataset_format(self,img_class, img_data, labels):
save_path = 'data/'
#print('1')
# labels = self.image_names_target
"""
Save data in YOLO dataset format with input labels, image class and img_data
Args:
labels (list): a list of bounding box labels, each label is a list of four values [x, y, w, h]
img_class (int): class number of the image
img_data (bytes): image data in binary format
save_path (str): path to save yolo dataset format file
"""
# calculate center coordinates, width and height values for each label
yolo_labels = []
# for label in labels:
#print(labels)
x, y, w, h = labels
yolo_labels.append(f"{img_class} {x} {y} {w} {h}\n")
# encode image data as JPEG format
def make_obj(self):
with open("obj.names", "w") as f:
for c in self.image_names_target:
#print(self.image_names_target)
f.write(c + "\n")
AUTO = AutoMakerDataYolo()
#AUTO.got_data()
while pre_make() != 114514:
AUTO.read_target()
AUTO.make_obj()
AUTO.read_back_image()
因为效果不太行,改成了透视变换
请把原图像文件夹放在/image下,背景图片放在/back下
import pyautogui as gui
import os
import time
import glob
import cv2
import numpy as np
import random
from numba import jit
import shutil
import re
def pre_move():
# 定义源目录和目标目录的路径
source_dir = '/image'
target_dir = '/images'
# 检查目标目录是否存在,如果存在就清空它
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
# 创建目标目录
os.makedirs(target_dir)
# 递归遍历源目录
for root, dirs, files in os.walk(source_dir):
# 构造目标目录的路径
target_root = os.path.join(target_dir, os.path.relpath(root, source_dir))
# 如果目标目录不存在,就创建它
if not os.path.exists(target_root):
os.makedirs(target_root)
# 移动所有文件到目标目录中
for file in files:
source_path = os.path.join(root, file)
target_path = os.path.join(target_root, file)
shutil.move(source_path, target_path)
# 移动当前目录到目标目录中
if root != source_dir:
shutil.move(root, target_root)
def pre_make():
src_folder = "images/"
dst_folder = "target/"
# 获取源文件夹中所有子文件夹的路径
subfolders = [f.path for f in os.scandir(src_folder) if f.is_dir()]
shutil.rmtree(dst_folder)
os.makedirs(dst_folder)
# 遍历每个子文件夹,并将其中的第一张图片复制到目标文件夹中
for folder in subfolders:
copied = False
for file in os.listdir(folder):
# 只处理jpg和png文件
if file.endswith(".JPG") or file.endswith(".png"):
# 构建目标文件路径,以子文件夹名称作为文件名前缀
file_prefix = os.path.basename(folder)
dst_file = os.path.join(dst_folder, file_prefix + "_" + file)
# 复制文件并重命名
shutil.copy(os.path.join(folder, file), dst_file)
# 删除源文件
os.remove(os.path.join(folder, file))
# 设置标志位,表示已经复制了一张图片
copied = True
# 删除文件名中的数字
dst_file_no_digit = re.sub(r"\d+", "", os.path.basename(dst_file))
dst_file_no_digit = os.path.join(os.path.dirname(dst_file), dst_file_no_digit)
os.rename(dst_file, dst_file_no_digit)
# 仅复制第一张图片
break
# 如果没有复制任何图片,则输出提示信息
if not copied:
print("未找到任何图片:%s" % folder)
return 114514
#flags just do it
def save_yolo_data(class_id, label, img, data_dir='labels/'):
# Parse YOLO data
boxes = []
x, y, w, h = label
boxes.append([class_id, x, y, w, h])
# Save image
img_name = str(time.time())
img_save_path = 'imagess/' + img_name
#print(img_save_path)
img_save_path = img_save_path + '.png'
cv2.imwrite(img_save_path, img)
# Save YOLO data
data_name = img_name + '.txt'
data_save_path = os.path.join(data_dir, data_name)
#print(data_save_path)
with open(data_save_path, 'w') as f:
for box in boxes:
f.write('{} {} {} {} {}\n'.format(*box))
@jit
def mix_pic(img, img_back, loca_x, loca_y):
# loca_x = 10
# loca_y = 10
for x in range(img.shape[0]): # 图片的高
for y in range(img.shape[1]): # 图片的宽
px = img[x, y]
if px[0] == 0 and px[1] == 0 and px[2] == 0:
pass
else:
img_back[loca_x + x, loca_y + y] = px
# print('1')
return img_back
def img_mix(target_img, back_img):
one_normal = np.random.normal(loc=0.5, scale=0.1, size=(1, 4))
# one_normal = np.random.random_sample(size=(1, 4))
a = one_normal[0, 0]
d = one_normal[0, 1]
b = one_normal[0, 2]
c = one_normal[0, 3]
a = (abs(a) + a) / 2
b = (abs(b) + b) / 2
c = (abs(c) + c) / 2
d = (abs(d) + d) / 2
target_img = cv2.resize(target_img, (500, 300))
cols, rows = target_img.shape[:2]
half_cols = cols / 2
half_rows = rows / 2
row_r_cols = cols / rows
left_up_point = [int(half_cols - half_cols * a), int(half_rows - a * half_rows)]
right_up_point = [int(half_cols + b * half_cols), int(half_rows - b * half_rows)]
left_down_point = [int(half_cols - half_cols * c), int(half_rows + c * half_rows)]
right_down_point = [int(half_cols + d * half_cols), int(half_rows + d * half_rows)]
x_sharp = int(half_cols * (1 - max(a, c)))
y_sharp = int(half_rows * (1 - max(a, b)))
left_up_point[0] = left_up_point[0] - x_sharp
left_up_point[1] = left_up_point[1] - y_sharp
right_down_point[0] = right_down_point[0] - x_sharp
right_down_point[1] = right_down_point[1] - y_sharp
left_down_point[0] = left_down_point[0] - x_sharp
left_down_point[1] = left_down_point[1] - y_sharp
right_up_point[0] = right_up_point[0] - x_sharp
right_up_point[1] = right_up_point[1] - y_sharp
targ_col = int(half_cols * max(b, d) + half_cols * max(a, c))
targ_row = int(half_rows * max(a, b) + half_rows * max(c, d))
# print(left_up_point,left_down_point,right_up_point,right_down_point)
point1 = np.float32([[0, 0], [cols, 0], [0, rows], [cols, rows]])
# print(cols)
# print(rows)
point2 = np.float32([left_up_point, right_up_point, left_down_point, right_down_point])
M = cv2.getPerspectiveTransform(point1, point2)
dst = cv2.warpPerspective(target_img, M, (targ_col, targ_row))
position_x = random.randint(0, (1920 - cols))
position_y = random.randint(0, (1080 - rows))
back_img = cv2.resize(back_img, (1920, 1080))
fina = mix_pic(dst, back_img, position_y, position_x)
# back_img[0:af_cols, 0:af_rows] = dst
cv2.imshow('fin', fina)
yolo_format = []
yolo_format.append((position_x + targ_col / 2) / 1920)
yolo_format.append((position_y + targ_row / 2) / 1080)
yolo_format.append((targ_col / 1920) * 0.8)
yolo_format.append((targ_row / 1080) * 0.8)
# print(position_x)
# print(position_y)
# print(rand_x)
# print(rand_y)
# print(yolo_format)
#cv2.waitKey(0)
return fina, yolo_format
class AutoMakerDataYolo:
'''
this is a class to automatic making the data of yolov5
这是一个自动化生成yolov5数据集的库
'''
def __init__(self):
self.all_kind_location = None
self.image_names_target = None
self.target_number = None
self.target_name = None
self.target_image_list = None
self.made_number = None
self.target_location = None
self.huge_location = None
self.num = 1
self.yse_or_no = gui.confirm(text='make sure you read the readme.md"', title='setup runing',
buttons=['yes', 'no'])
if self.yse_or_no == 'no':
quit()
self.huge_location = gui.prompt(text='the path of background image', title='qwq', default='back/')
self.target_location = gui.prompt(text='the path of target image', title='qwq', default='target/')
self.made_number = gui.prompt(text='the number you want for each target image', title='qwq', default='10')
self.made_number = int(self.made_number)
def read_target(self):
# 读取目标的数量种类,存在列表里
folder_path = self.target_location
# 指定目录下的所有jpg和png文件
img_files = glob.glob(folder_path+'*.jpg') + glob.glob(folder_path+'*.png')
# 存储文件名的列表
img_names = []
# 存储图像的列表
imgs = []
# 遍历所有图像文件
for file in img_files:
# 读取图像
img = cv2.imread(file)
# 获取文件名并去掉扩展名
#img_name = file.split('/')[-1].split('.')[0]
img_name = os.path.splitext(os.path.basename(file))[0]
# 添加到列表中
imgs.append(img)
img_names.append(img_name)
self.target_image_list = imgs
self.image_names_target = img_names
def read_back_image(self):
folder_path = self.huge_location
i = -1
for tg_img in self.target_image_list:
i = i + 1
print(i)
print('len'+str(len(self.target_image_list)))
for filename in os.listdir(folder_path):
# print(filename)
# print(os.listdir(folder_path))
# 判断文件类型是否为jpg或png图片
if filename.endswith('.JPG') or filename.endswith('.jfif') or filename.endswith('.jpg'):
#print('2')
# 读取图片
img_path = os.path.join(folder_path, filename)
#print(img_path)
img = cv2.imread(img_path)
# 调用处理函数
j = 0
while(j < self.made_number):
fina_img, yolo_num = img_mix(tg_img, img)
# img_name = self.image_names_target[i]
save_yolo_data(i, yolo_num, fina_img)
j = j + 1
else:
pass
def save_yolo_dataset_format(self,img_class, img_data, labels):
save_path = 'data/'
#print('1')
# labels = self.image_names_target
"""
Save data in YOLO dataset format with input labels, image class and img_data
Args:
labels (list): a list of bounding box labels, each label is a list of four values [x, y, w, h]
img_class (int): class number of the image
img_data (bytes): image data in binary format
save_path (str): path to save yolo dataset format file
"""
# calculate center coordinates, width and height values for each label
yolo_labels = []
# for label in labels:
#print(labels)
x, y, w, h = labels
yolo_labels.append(f"{img_class} {x} {y} {w} {h}\n")
# encode image data as JPEG format
def make_obj(self):
with open("obj.names", "w") as f:
for c in self.image_names_target:
#print(self.image_names_target)
f.write(c + "\n")
AUTO = AutoMakerDataYolo()
#AUTO.got_data()
pre_move()
while pre_make() != 114514:
AUTO.read_target()
AUTO.make_obj()
AUTO.read_back_image()