💡You can run the below code in Kaggle by using the copy and edit buttons. Kaggle Notebook Link
This is a notebook to do the following:
- Data Format conversion from the YOLO OBB to the YOLO format
- Data Format conversion from the YOLO format to the YOLO OBB format
- Data augmentation in YOLO Format using the Albumentation package.
It is better if you run the code sequentially
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
import math
import shutil
The below function 'obb_to_yolo' converts the yolo obb format annotation to yolo format annotation for the datum
def obb_to_yolo(class_index, x1, y1, x2, y2, x3, y3, x4, y4):
xmin = min(x1, x2, x3, x4)
xmax = max(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
ymax = max(y1, y2, y3, y4)
x_center = (xmin + xmax) / 2
y_center = (ymin + ymax) / 2
width = xmax - xmin
height = ymax - ymin
yolo_annotation = f"{int(class_index)} {x_center} {y_center} {width} {height}"
return yolo_annotation
!mkdir /kaggle/working/test
def convert_obb_to_yolo(input_file_path, output_directory):
base_name = os.path.basename(input_file_path)
file_name_without_extension = os.path.splitext(base_name)[0]
if not os.path.exists(output_directory):
os.makedirs(output_directory)
output_file_path = os.path.join(output_directory, f"{file_name_without_extension}_yolo.txt")
with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file:
for line in input_file:
parts = line.strip().split()
if len(parts) != 9:
raise ValueError("Each line must contain 9 values: class_index, x1, y1, x2, y2, x3, y3, x4, y4.")
class_index, x1, y1, x2, y2, x3, y3, x4, y4 = map(float, parts)
yolo_line = obb_to_yolo(class_index, x1, y1, x2, y2, x3, y3, x4, y4)
output_file.write(yolo_line + '\n')
print(f"Converted line to YOLO format: {yolo_line}")
print(f"Conversion complete. Output file saved to: {output_file_path}")
input_obb_annotations_file = '/kaggle/input/yolo-obb/alexandrite_11_jpg.rf.1052aaef4bac9e1051010f35dabc5e87.txt'
output_directory = '/kaggle/working/test'
convert_obb_to_yolo(input_obb_annotations_file, output_directory)
!cat /kaggle/input/face-mask-dataset-yolo-format/dataset/images/test/NUZZHAB7IMI6VHQQW44IVOMBHU.txt
1 0.49528571428571433 0.5424346335190742 0.10085714285714287 0.143591941705958
def yolo_to_obb(class_index, x_center, y_center, width, height):
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center - height / 2
x3 = x_center + width / 2
y3 = y_center + height / 2
x4 = x_center - width / 2
y4 = y_center + height / 2
obb_annotation = f"{class_index} {x1} {y1} {x2} {y2} {x3} {y3} {x4} {y4}"
return obb_annotation
def convert_yolo_to_obb(input_file_path, output_directory):
base_name = os.path.basename(input_file_path)
file_name_without_extension = os.path.splitext(base_name)[0]
if not os.path.exists(output_directory):
os.makedirs(output_directory)
output_file_path = os.path.join(output_directory, f"{file_name_without_extension}_obb.txt")
with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file:
for line in input_file:
parts = line.strip().split()
if len(parts) != 5:
raise ValueError("Each line must contain 5 values: class_index, x_center, y_center, width, height.")
class_index, x_center, y_center, width, height = map(float, parts)
obb_line = yolo_to_obb(class_index, x_center, y_center, width, height)
output_file.write(obb_line + '\n')
print(f"Converted line to OBB format: {obb_line}")
print(f"Conversion complete. Output file saved to: {output_file_path}")
input_yolo_annotations_file = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/test/NUZZHAB7IMI6VHQQW44IVOMBHU.txt'
output_directory = '/kaggle/working/test'
convert_yolo_to_obb(input_yolo_annotations_file, output_directory)
Converted line to OBB format: 1.0 0.4448571428571429 0.47063866266609516 0.5457142857142858 0.47063866266609516 0.5457142857142858 0.6142306043720531 0.4448571428571429 0.6142306043720531
Conversion complete. Output file saved to: /kaggle/working/test/NUZZHAB7IMI6VHQQW44IVOMBHU_obb.txt
!cat /kaggle/working/test/NUZZHAB7IMI6VHQQW44IVOMBHU_obb.txt
1.0 0.4448571428571429 0.47063866266609516 0.5457142857142858 0.47063866266609516 0.5457142857142858 0.6142306043720531 0.4448571428571429 0.6142306043720531
Visualizing the image with the bounding box annotations in obb format
def draw_obb_bounding_box(image, obb_annotation):
parts = obb_annotation.strip().split()
if len(parts) != 9:
print(f"Wrong annotation format: {obb_annotation}")
return image
class_index, x1, y1, x2, y2, x3, y3, x4, y4 = map(float, parts)
height, width = image.shape[:2]
points = [(x1 * width, y1 * height), (x2 * width, y2 * height),
(x3 * width, y3 * height), (x4 * width, y4 * height)]
color = (255, 0, 0)
thickness = 2
num_points = len(points)
for i in range(num_points):
pt1 = (int(points[i][0]), int(points[i][1]))
pt2 = (int(points[(i + 1) % num_points][0]), int(points[(i + 1) % num_points][1]))
cv2.line(image, pt1, pt2, color, thickness)
return image
def visualize_obb(image_path, obb_annotations_file):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Unable to read the image file {image_path}")
return
print(f"Image dimensions: {image.shape}")
with open(obb_annotations_file, 'r') as file:
annotations = file.readlines()
if not annotations:
print(f"No annotations found in file {obb_annotations_file}")
return
for annotation in annotations:
image = draw_obb_bounding_box(image, annotation)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(10, 10))
plt.imshow(image_rgb)
plt.axis('off')
plt.show()
image_path = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/test/NUZZHAB7IMI6VHQQW44IVOMBHU.jpg'
obb_annotations_file = '/kaggle/working/test/NUZZHAB7IMI6VHQQW44IVOMBHU_obb.txt'
visualize_obb(image_path, obb_annotations_file)
Image dimensions: (2333, 3500, 3)

Visualizing the yolo bounding box with the image to check if it is correctly generated
def draw_yolo_bounding_boxes(image_path, annotations_path):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Unable to read image file {image_path}")
return
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
height, width, _ = image.shape
plt.figure(figsize=(10, 10))
plt.imshow(image)
ax = plt.gca()
with open(annotations_path, 'r') as file:
for line in file.readlines():
parts = line.strip().split()
if len(parts) == 5:
cls, x_center, y_center, w, h = map(float, parts)
x_center, y_center, w, h = (x_center * width, y_center * height, w * width, h * height)
x_min, y_min = int(x_center - w / 2), int(y_center - h / 2)
rect = patches.Rectangle((x_min, y_min), w, h, linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
plt.axis('off')
plt.show()
image_path = '/kaggle/input/yolo-obb/alexandrite_11_jpg.rf.1052aaef4bac9e1051010f35dabc5e87.jpg'
annotations_path = '/kaggle/working/test/alexandrite_11_jpg.rf.1052aaef4bac9e1051010f35dabc5e87_yolo.txt'
draw_yolo_bounding_boxes(image_path, annotations_path)
def convert_obb_to_yolo(input_obb_file, output_yolo_file, img_width, img_height):
with open(input_obb_file, 'r') as input_file, open(output_yolo_file, 'w') as output_file:
for line in input_file:
parts = line.strip().split()
if len(parts) == 9:
class_index, x1, y1, x2, y2, x3, y3, x4, y4 = map(float, parts)
yolo_annotation = obb_to_yolo(class_index, x1, y1, x2, y2, x3, y3, x4, y4, img_width, img_height)
output_file.write(yolo_annotation + '\n')
def process_directory(input_dir, output_dir):
for root, dirs, files in os.walk(input_dir):
rel_path = os.path.relpath(root, input_dir)
current_output_dir = os.path.join(output_dir, rel_path)
os.makedirs(current_output_dir, exist_ok=True)
for file in files:
if file.endswith('.txt'):
input_obb_file = os.path.join(root, file)
output_yolo_file = os.path.join(current_output_dir, file)
img_name, _ = os.path.splitext(file)
img_file = None
for ext in ['.jpg', '.png', '.jpeg']:
if os.path.exists(os.path.join(root, img_name + ext)):
img_file = os.path.join(root, img_name + ext)
break
if img_file:
img = cv2.imread(img_file)
if img is not None:
height, width = img.shape[:2]
convert_obb_to_yolo(input_obb_file, output_yolo_file, width, height)
def main(input_root_dir, output_root_dir):
for subdir in ['train', 'test', 'valid']:
input_dir = os.path.join(input_root_dir, subdir)
if os.path.exists(input_dir):
process_directory(input_dir, output_root_dir)
input_root_dir = '/path/to/root/input/directory'
output_root_dir = '/path/to/root/output/directory'
main(input_root_dir, output_root_dir)
def yolo_to_yolo_obb(class_index, x_center, y_center, width, height):
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center - height / 2
x3 = x_center + width / 2
y3 = y_center + height / 2
x4 = x_center - width / 2
y4 = y_center + height / 2
yolo_obb_annotation = f"{class_index} {x_center} {y_center} {width} {height} {x1} {y1} {x2} {y2} {x3} {y3} {x4} {y4}"
return yolo_obb_annotation
def process_directory(input_dir, output_dir):
for root, dirs, files in os.walk(input_dir):
rel_path = os.path.relpath(root, input_dir)
current_output_dir = os.path.join(output_dir, rel_path)
os.makedirs(current_output_dir, exist_ok=True)
for file in files:
if file.endswith('.txt'):
input_yolo_file = os.path.join(root, file)
output_yolo_obb_file = os.path.join(current_output_dir, file)
img_name, _ = os.path.splitext(file)
img_file = None
for ext in ['.jpg', '.png', '.jpeg']:
if os.path.exists(os.path.join(root, img_name + ext)):
img_file = os.path.join(root, img_name + ext)
break
if img_file:
img = cv2.imread(img_file)
if img is not None:
height, width = img.shape[:2]
convert_yolo_to_obb(input_yolo_file, output_yolo_obb_file, width, height)
def main(input_root_dir, output_root_dir):
for subdir in ['train', 'test', 'valid']:
input_dir = os.path.join(input_root_dir, subdir)
if os.path.exists(input_dir):
process_directory(input_dir, output_root_dir)
input_root_dir = '/path/to/root/input/directory'
output_root_dir = '/path/to/root/output/directory'
main(input_root_dir, output_root_dir)
Augmentation
- The below code is for doing augmentation on the single image with bounding box
import os
import shutil
from tqdm import tqdm
from tqdm import tqdm
import os
import numpy as np
import cv2
from albumentations.pytorch import ToTensorV2
import albumentations as A
augmentations = A.Compose(
[
A.VerticalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.HorizontalFlip(p=0.5),
A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),
A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=0.5),
A.RandomResizedCrop(height=416, width=416, p=1),
ToTensorV2(p=1.0),
],
bbox_params=A.BboxParams(format='yolo', min_area=1024, min_visibility=0.3, label_fields=['labels'])
)
def load_image_and_bboxes(image_path, bboxes_path):
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
bboxes = []
labels = []
with open(bboxes_path, 'r') as file:
for line in file:
elements = line.strip().split()
class_label, x_center, y_center, width, height = map(float, elements)
labels.append(int(class_label))
bboxes.append([x_center, y_center, width, height])
return image, bboxes, labels
def augment(image, bboxes, labels):
augmented = augmentations(image=image, bboxes=bboxes, labels=labels)
return augmented['image'], augmented['bboxes'], augmented['labels']
image_path = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/train/-1x-1.jpg'
bboxes_path = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/train/-1x-1.txt'
image, bboxes, labels = load_image_and_bboxes(image_path, bboxes_path)
augmented_image, augmented_bboxes, augmented_labels = augment(image, bboxes, labels)
def yolo_to_mpl_bbox(bbox, image_size):
img_width, img_height = image_size
x_center, y_center, width, height = bbox
x_min = (x_center - width / 2) * img_width
y_min = (y_center - height / 2) * img_height
bbox_width = width * img_width
bbox_height = height * img_height
return x_min, y_min, bbox_width, bbox_height
def plot_image_with_bboxes(image_np, bboxes, ax):
ax.imshow(image_np)
for bbox in bboxes:
x_min, y_min, bbox_width, bbox_height = yolo_to_mpl_bbox(bbox, image_np.shape[1::-1])
rect = patches.Rectangle((x_min, y_min), bbox_width, bbox_height, linewidth=2, edgecolor='r', facecolor='none')
ax.add_patch(rect)
image, bboxes, labels = load_image_and_bboxes(image_path, bboxes_path)
augmented_image, augmented_bboxes, augmented_labels = augment(image, bboxes, labels)
augmented_image_np = augmented_image.mul(255).permute(1, 2, 0).byte().numpy()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
plot_image_with_bboxes(image, bboxes, ax1)
ax1.set_title('Original Image')
plot_image_with_bboxes(augmented_image_np, augmented_bboxes, ax2)
ax2.set_title('Augmented Image')
plt.show()
The below code is for creating the augmentation on the yolo format dataset (Train, Test and Valid)
augmentations = A.Compose(
[
A.VerticalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.HorizontalFlip(p=0.5),
A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),
A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=0.5),
A.RandomResizedCrop(height=416, width=416, p=1),
ToTensorV2(p=1.0),
],
bbox_params=A.BboxParams(format='yolo', min_area=1024, min_visibility=0.3, label_fields=['labels'])
)
def process_dataset(dataset_dir, output_dir):
image_files = [f for f in os.listdir(dataset_dir) if os.path.splitext(f)[1].lower() in ['.jpg', '.png', '.jpeg']]
for image_filename in tqdm(image_files, desc=f"Processing {output_dir}"):
image_path = os.path.join(dataset_dir, image_filename)
annotation_filename = os.path.splitext(image_filename)[0] + '.txt'
annotation_path = os.path.join(dataset_dir, annotation_filename)
image, bboxes, labels = load_image_and_bboxes(image_path, annotation_path)
augmented = augmentations(image=image, bboxes=bboxes, labels=labels)
augmented_image, augmented_bboxes, _ = augmented['image'], augmented['bboxes'], augmented['labels']
if isinstance(augmented_image, np.ndarray):
image_to_save = augmented_image
else:
image_to_save = augmented_image.numpy().astype(np.uint8)
if image_to_save.shape[0] == 3:
image_to_save = image_to_save.transpose(1, 2, 0)
output_image_path = os.path.join(output_dir, image_filename)
output_annotation_path = os.path.join(output_dir, annotation_filename)
cv2.imwrite(output_image_path, image_to_save)
with open(output_annotation_path, 'w') as f:
for bbox in augmented_bboxes:
class_id = int(bbox[0])
bbox_str = ' '.join(map(str, bbox))
f.write(f"{class_id} {bbox_str}\n")
dataset_root = "/kaggle/input/face-mask-dataset-yolo-format/dataset/images"
output_root = "/kaggle/working/augmentation"
for dataset_type in ['train', 'test', 'valid']:
dataset_dir = os.path.join(dataset_root, dataset_type)
output_dir = os.path.join(output_root, dataset_type)
os.makedirs(output_dir, exist_ok=True)
process_dataset(dataset_dir, output_dir)
dir_path= "/kaggle/working/augmentation/train"
print(len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))]))
from IPython.display import Image
Image(filename=f"/kaggle/working/augmentation/valid/images19.jpg", width=600)
from IPython.display import Image
Image(filename=f"/kaggle/input/face-mask-dataset-yolo-format/dataset/images/valid/images19.jpg", width=600)
!rm -r /kaggle/working/*
The code provided facilitates the augmentation of a single image by applying a variety of transformations. For each specified augmentation type, it generates multiple unique augmented versions of the original image. Each resulting image bears a distinct filename that incorporates an identifying tag corresponding to the applied augmentation, effectively distinguishing between different augmented versions. The augmentation process adheres to the predefined logic without alteration, ensuring that the core functionality remains intact.
Make sure that you are running the load_image_and_bboxes implemented below not the above one otherwise this will throw and error
def load_image_and_bboxes(image_path, annotation_path):
if not os.path.isfile(annotation_path):
print(f"Annotation file does not exist: {annotation_path}")
return None, [], []
image = cv2.imread(image_path)
if image is None:
print(f"Failed to read the image: {image_path}")
return None, [], []
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
bboxes, labels = [], []
with open(annotation_path, "r") as file:
for line in file:
elements = line.strip().split()
if len(elements) == 5:
class_id, x_center, y_center, width, height = map(float, elements)
labels.append(int(class_id))
bboxes.append([x_center, y_center, width, height])
return image, bboxes, labels
vertical_flip = A.VerticalFlip(p=1)
random_bright_contrast = A.RandomBrightnessContrast(p=1)
horizontal_flip = A.HorizontalFlip(p=1)
shift_scale_rotate = A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0)
rgb_shift = A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=1)
random_resized_crop = A.RandomResizedCrop(height=416, width=416, p=1)
augmentations_list = [
('vflip', vertical_flip),
('bright_contrast', random_bright_contrast),
('hflip', horizontal_flip),
('shift_scale_rotate', shift_scale_rotate),
('rgb_shift', rgb_shift),
('resized_crop', random_resized_crop),
]
def process_dataset(dataset_dir, output_dir):
image_files = [f for f in os.listdir(dataset_dir) if os.path.splitext(f)[1].lower() in ['.jpg', '.jpeg']]
for image_filename in tqdm(image_files, desc=f"Processing {output_dir}"):
image_path = os.path.join(dataset_dir, image_filename)
annotation_filename = os.path.splitext(image_filename)[0] + '.txt'
annotation_path = os.path.join(dataset_dir, annotation_filename)
image, bboxes, labels = load_image_and_bboxes(image_path, annotation_path)
for aug_name, aug in augmentations_list:
augmented = aug(image=image, bboxes=bboxes, labels=labels)
augmented_image = augmented['image']
augmented_bboxes = augmented['bboxes']
filename_without_ext, ext = os.path.splitext(image_filename)
new_image_filename = f"{filename_without_ext}_{aug_name}{ext}"
new_annotation_filename = f"{filename_without_ext}_{aug_name}.txt"
output_image_path = os.path.join(output_dir, new_image_filename)
output_annotation_path = os.path.join(output_dir, new_annotation_filename)
save_augmented_data(augmented_image, augmented_bboxes, output_image_path, output_annotation_path)
def save_augmented_data(image, bboxes, image_path, annotation_path):
if not isinstance(image, np.ndarray):
image = image.numpy().astype(np.uint8)
if image.shape[0] == 3:
image = image.transpose(1, 2, 0)
cv2.imwrite(image_path, image[:, :, ::-1])
class_id = int(bboxes[0][0])
with open(annotation_path, 'w') as file:
for bbox in bboxes:
bbox_str = ' '.join(map(str, bbox))
file.write(f"{class_id} {bbox_str}\n")
dataset_root = "/kaggle/input/face-mask-dataset-yolo-format/dataset/images"
output_root = "/kaggle/working/augmentation"
for dataset_type in ['train', 'test', 'valid']:
dataset_dir = os.path.join(dataset_root, dataset_type)
output_dir = os.path.join(output_root, dataset_type)
os.makedirs(output_dir, exist_ok=True)
process_dataset(dataset_dir, output_dir)
print("--------------------Done---------------------")
dir_path= "/kaggle/working/augmentation/train"
print(len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))]))
This is a notebook to do the following:
It is better if you run the code sequentially
Data Format Conversion Yolo OBB to Yolo Format
#Importing the libraries import albumentations as A from albumentations.pytorch.transforms import ToTensorV2 import cv2 import matplotlib.pyplot as plt import matplotlib.patches as patches import os import math import shutilThe below function 'obb_to_yolo' converts the yolo obb format annotation to yolo format annotation for the datum
def obb_to_yolo(class_index, x1, y1, x2, y2, x3, y3, x4, y4): # Find min and max for x and y among all four corner points xmin = min(x1, x2, x3, x4) xmax = max(x1, x2, x3, x4) ymin = min(y1, y2, y3, y4) ymax = max(y1, y2, y3, y4) # Calculate the axis-aligned bounding box (AABB) x_center = (xmin + xmax) / 2 y_center = (ymin + ymax) / 2 width = xmax - xmin height = ymax - ymin # Create the YOLO format annotation yolo_annotation = f"{int(class_index)} {x_center} {y_center} {width} {height}" return yolo_annotationdef convert_obb_to_yolo(input_file_path, output_directory): # Get the base name without the extension and the name of the input file base_name = os.path.basename(input_file_path) file_name_without_extension = os.path.splitext(base_name)[0] # Ensure the output directory exists if not os.path.exists(output_directory): os.makedirs(output_directory) # Output file has the same name as the input file, saved in the output directory with '_yolo' appended output_file_path = os.path.join(output_directory, f"{file_name_without_extension}_yolo.txt") # Read the input file, convert each line, and write to output file with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file: for line in input_file: parts = line.strip().split() if len(parts) != 9: raise ValueError("Each line must contain 9 values: class_index, x1, y1, x2, y2, x3, y3, x4, y4.") # Convert string parts to float and unpack them class_index, x1, y1, x2, y2, x3, y3, x4, y4 = map(float, parts) # Call the conversion function yolo_line = obb_to_yolo(class_index, x1, y1, x2, y2, x3, y3, x4, y4) # Write the result to the output file output_file.write(yolo_line + '\n') print(f"Converted line to YOLO format: {yolo_line}") print(f"Conversion complete. Output file saved to: {output_file_path}") input_obb_annotations_file = '/kaggle/input/yolo-obb/alexandrite_11_jpg.rf.1052aaef4bac9e1051010f35dabc5e87.txt' output_directory = '/kaggle/working/test' convert_obb_to_yolo(input_obb_annotations_file, output_directory)!cat /kaggle/input/face-mask-dataset-yolo-format/dataset/images/test/NUZZHAB7IMI6VHQQW44IVOMBHU.txtData Format Conversion Yolo Format to Yolo OBB Format
def yolo_to_obb(class_index, x_center, y_center, width, height): # Calculate the coordinates of the four corner points of the OBB x1 = x_center - width / 2 y1 = y_center - height / 2 x2 = x_center + width / 2 y2 = y_center - height / 2 x3 = x_center + width / 2 y3 = y_center + height / 2 x4 = x_center - width / 2 y4 = y_center + height / 2 # Create the OBB format annotation obb_annotation = f"{class_index} {x1} {y1} {x2} {y2} {x3} {y3} {x4} {y4}" return obb_annotationdef convert_yolo_to_obb(input_file_path, output_directory): base_name = os.path.basename(input_file_path) file_name_without_extension = os.path.splitext(base_name)[0] # Ensure the output directory exists if not os.path.exists(output_directory): os.makedirs(output_directory) # Output file has the same name as the input file, saved in the output directory with '_obb' appended output_file_path = os.path.join(output_directory, f"{file_name_without_extension}_obb.txt") # Read the input file, convert each line, and write to output file with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file: for line in input_file: parts = line.strip().split() if len(parts) != 5: raise ValueError("Each line must contain 5 values: class_index, x_center, y_center, width, height.") # Convert string parts to float and unpack them class_index, x_center, y_center, width, height = map(float, parts) # Call the conversion function obb_line = yolo_to_obb(class_index, x_center, y_center, width, height) # Write the result to the output file output_file.write(obb_line + '\n') print(f"Converted line to OBB format: {obb_line}") print(f"Conversion complete. Output file saved to: {output_file_path}") input_yolo_annotations_file = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/test/NUZZHAB7IMI6VHQQW44IVOMBHU.txt' output_directory = '/kaggle/working/test' convert_yolo_to_obb(input_yolo_annotations_file, output_directory)Visualizing the image with the bounding box annotations in obb format
def draw_obb_bounding_box(image, obb_annotation): # Split the annotation into parts parts = obb_annotation.strip().split() if len(parts) != 9: print(f"Wrong annotation format: {obb_annotation}") return image # Skip this annotation if the format is wrong # Convert to floating point numbers class_index, x1, y1, x2, y2, x3, y3, x4, y4 = map(float, parts) # Denormalize coordinates if necessary height, width = image.shape[:2] points = [(x1 * width, y1 * height), (x2 * width, y2 * height), (x3 * width, y3 * height), (x4 * width, y4 * height)] # Draw the bounding box as lines between each corner point color = (255, 0, 0) # Blue color in BGR thickness = 2 num_points = len(points) for i in range(num_points): pt1 = (int(points[i][0]), int(points[i][1])) pt2 = (int(points[(i + 1) % num_points][0]), int(points[(i + 1) % num_points][1])) cv2.line(image, pt1, pt2, color, thickness) return image def visualize_obb(image_path, obb_annotations_file): # Read the image image = cv2.imread(image_path) if image is None: print(f"Error: Unable to read the image file {image_path}") return print(f"Image dimensions: {image.shape}") # Read the OBB annotations file with open(obb_annotations_file, 'r') as file: annotations = file.readlines() if not annotations: print(f"No annotations found in file {obb_annotations_file}") return # Draw each OBB annotation on the image for annotation in annotations: image = draw_obb_bounding_box(image, annotation) # Convert to RGB and plot the image using matplotlib image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) plt.figure(figsize=(10, 10)) plt.imshow(image_rgb) plt.axis('off') # Hide the axis plt.show()# Provide the input file path of the image and the annotations image_path = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/test/NUZZHAB7IMI6VHQQW44IVOMBHU.jpg' obb_annotations_file = '/kaggle/working/test/NUZZHAB7IMI6VHQQW44IVOMBHU_obb.txt' visualize_obb(image_path, obb_annotations_file)Visualizing the yolo bounding box with the image to check if it is correctly generated
def draw_yolo_bounding_boxes(image_path, annotations_path): # Read the image image = cv2.imread(image_path) if image is None: print(f"Error: Unable to read image file {image_path}") return # Convert to RGB image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) height, width, _ = image.shape # Start plot plt.figure(figsize=(10, 10)) plt.imshow(image) ax = plt.gca() # Read annotations with open(annotations_path, 'r') as file: for line in file.readlines(): parts = line.strip().split() if len(parts) == 5: cls, x_center, y_center, w, h = map(float, parts) # Denormalize coordinates x_center, y_center, w, h = (x_center * width, y_center * height, w * width, h * height) x_min, y_min = int(x_center - w / 2), int(y_center - h / 2) # Create a Rectangle patch rect = patches.Rectangle((x_min, y_min), w, h, linewidth=1, edgecolor='r', facecolor='none') # Add the patch to the Axes ax.add_patch(rect) plt.axis('off') # Hide axis plt.show()# Example usage image_path = '/kaggle/input/yolo-obb/alexandrite_11_jpg.rf.1052aaef4bac9e1051010f35dabc5e87.jpg' # Replace with the path to your image annotations_path = '/kaggle/working/test/alexandrite_11_jpg.rf.1052aaef4bac9e1051010f35dabc5e87_yolo.txt' # Replace with the path to your YOLO annotation file draw_yolo_bounding_boxes(image_path, annotations_path)Pipeline to format the yolo obb dataset annotations inside directories to yolo format dataset annotations in respective directories
# modifying the convert obb to yolo function for the output path file def convert_obb_to_yolo(input_obb_file, output_yolo_file, img_width, img_height): with open(input_obb_file, 'r') as input_file, open(output_yolo_file, 'w') as output_file: for line in input_file: parts = line.strip().split() if len(parts) == 9: class_index, x1, y1, x2, y2, x3, y3, x4, y4 = map(float, parts) # Convert annotation to YOLO format yolo_annotation = obb_to_yolo(class_index, x1, y1, x2, y2, x3, y3, x4, y4, img_width, img_height) output_file.write(yolo_annotation + '\n')def process_directory(input_dir, output_dir): # Recursively process files in input directory and convert annotations for root, dirs, files in os.walk(input_dir): # Create corresponding structure in output_dir rel_path = os.path.relpath(root, input_dir) current_output_dir = os.path.join(output_dir, rel_path) os.makedirs(current_output_dir, exist_ok=True) for file in files: # Check if the file is an annotation file (assumes .txt extension) if file.endswith('.txt'): input_obb_file = os.path.join(root, file) output_yolo_file = os.path.join(current_output_dir, file) # Assume image with the same name (but different extension) exists img_name, _ = os.path.splitext(file) img_file = None for ext in ['.jpg', '.png', '.jpeg']: # Add other image extensions if necessary if os.path.exists(os.path.join(root, img_name + ext)): img_file = os.path.join(root, img_name + ext) break if img_file: # Read image to get width and height img = cv2.imread(img_file) if img is not None: height, width = img.shape[:2] # Convert the annotation file convert_obb_to_yolo(input_obb_file, output_yolo_file, width, height)#Change the subdirectory name if required def main(input_root_dir, output_root_dir): # Process each subdirectory ('train', 'test', 'valid') for subdir in ['train', 'test', 'valid']: input_dir = os.path.join(input_root_dir, subdir) if os.path.exists(input_dir): process_directory(input_dir, output_root_dir)input_root_dir = '/path/to/root/input/directory' # Replace with your input root directory path output_root_dir = '/path/to/root/output/directory' # Replace with your desired output root directory path main(input_root_dir, output_root_dir)Pipeline to format the yolo dataset annotations inside directories to yolo obb format dataset annotations in respective directories
def yolo_to_yolo_obb(class_index, x_center, y_center, width, height): # Calculate the coordinates of the four corner points of the OBB x1 = x_center - width / 2 y1 = y_center - height / 2 x2 = x_center + width / 2 y2 = y_center - height / 2 x3 = x_center + width / 2 y3 = y_center + height / 2 x4 = x_center - width / 2 y4 = y_center + height / 2 # Create the YOLO OBB format annotation yolo_obb_annotation = f"{class_index} {x_center} {y_center} {width} {height} {x1} {y1} {x2} {y2} {x3} {y3} {x4} {y4}" return yolo_obb_annotationdef process_directory(input_dir, output_dir): # Recursively process files in input directory and convert annotations for root, dirs, files in os.walk(input_dir): # Create corresponding structure in output_dir rel_path = os.path.relpath(root, input_dir) current_output_dir = os.path.join(output_dir, rel_path) os.makedirs(current_output_dir, exist_ok=True) for file in files: # Check if the file is an annotation file (assumes .txt extension) if file.endswith('.txt'): input_yolo_file = os.path.join(root, file) output_yolo_obb_file = os.path.join(current_output_dir, file) # Assume image with the same name (but different extension) exists img_name, _ = os.path.splitext(file) img_file = None for ext in ['.jpg', '.png', '.jpeg']: # Add other image extensions if necessary if os.path.exists(os.path.join(root, img_name + ext)): img_file = os.path.join(root, img_name + ext) break if img_file: # Read image to get width and height img = cv2.imread(img_file) if img is not None: height, width = img.shape[:2] # Convert the annotation file convert_yolo_to_obb(input_yolo_file, output_yolo_obb_file, width, height)#Change the subdirectory name if required def main(input_root_dir, output_root_dir): # Process each subdirectory ('train', 'test', 'valid') for subdir in ['train', 'test', 'valid']: input_dir = os.path.join(input_root_dir, subdir) if os.path.exists(input_dir): process_directory(input_dir, output_root_dir)input_root_dir = '/path/to/root/input/directory' # Replace with your input root directory path output_root_dir = '/path/to/root/output/directory' # Replace with your desired output root directory path main(input_root_dir, output_root_dir)Augmentation
#Importing the libraries import os import shutil from tqdm import tqdm from tqdm import tqdm import os import numpy as np import cv2 from albumentations.pytorch import ToTensorV2 import albumentations as A# Define the augmentations augmentations = A.Compose( [ A.VerticalFlip(p=0.5), A.RandomBrightnessContrast(p=0.2), A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0), A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=0.5), A.RandomResizedCrop(height=416, width=416, p=1), ToTensorV2(p=1.0), # Converts image to pytorch tensor and scales it to [0,1] ], bbox_params=A.BboxParams(format='yolo', min_area=1024, min_visibility=0.3, label_fields=['labels']) )#Loading the image and the bounding boxes def load_image_and_bboxes(image_path, bboxes_path): # Load the image image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Load the bounding boxes bboxes = [] labels = [] with open(bboxes_path, 'r') as file: for line in file: elements = line.strip().split() class_label, x_center, y_center, width, height = map(float, elements) labels.append(int(class_label)) bboxes.append([x_center, y_center, width, height]) return image, bboxes, labels # Apply augmentations def augment(image, bboxes, labels): augmented = augmentations(image=image, bboxes=bboxes, labels=labels) return augmented['image'], augmented['bboxes'], augmented['labels'] # Input image path and the bounding box path image_path = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/train/-1x-1.jpg' bboxes_path = '/kaggle/input/face-mask-dataset-yolo-format/dataset/images/train/-1x-1.txt' image, bboxes, labels = load_image_and_bboxes(image_path, bboxes_path) augmented_image, augmented_bboxes, augmented_labels = augment(image, bboxes, labels)# Function to convert YOLO bboxes to matplotlib format def yolo_to_mpl_bbox(bbox, image_size): img_width, img_height = image_size x_center, y_center, width, height = bbox x_min = (x_center - width / 2) * img_width y_min = (y_center - height / 2) * img_height bbox_width = width * img_width bbox_height = height * img_height return x_min, y_min, bbox_width, bbox_height # Function to plot an image and draw the bounding boxes def plot_image_with_bboxes(image_np, bboxes, ax): # Convert numpy image array to matplotlib format ax.imshow(image_np) # Draw bounding boxes for bbox in bboxes: x_min, y_min, bbox_width, bbox_height = yolo_to_mpl_bbox(bbox, image_np.shape[1::-1]) rect = patches.Rectangle((x_min, y_min), bbox_width, bbox_height, linewidth=2, edgecolor='r', facecolor='none') ax.add_patch(rect) # Load and augment the image and bboxes image, bboxes, labels = load_image_and_bboxes(image_path, bboxes_path) augmented_image, augmented_bboxes, augmented_labels = augment(image, bboxes, labels) # Convert augmented_image (tensor) to numpy for visualization augmented_image_np = augmented_image.mul(255).permute(1, 2, 0).byte().numpy() # Create the subplot for original and augmented images fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10)) # Plot the original image with bounding boxes plot_image_with_bboxes(image, bboxes, ax1) ax1.set_title('Original Image') # Plot the augmented image with bounding boxes plot_image_with_bboxes(augmented_image_np, augmented_bboxes, ax2) ax2.set_title('Augmented Image') # Display the images plt.show()The below code is for creating the augmentation on the yolo format dataset (Train, Test and Valid)
# Define the augmentations augmentations = A.Compose( [ A.VerticalFlip(p=0.5), A.RandomBrightnessContrast(p=0.2), A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0), A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=0.5), A.RandomResizedCrop(height=416, width=416, p=1), ToTensorV2(p=1.0), # Converts image to pytorch tensor and scales it to [0,1] ], bbox_params=A.BboxParams(format='yolo', min_area=1024, min_visibility=0.3, label_fields=['labels']) )# Function to process and augment a dataset directory (train, test or val) def process_dataset(dataset_dir, output_dir): # Get a list of all image files and corresponding annotation files image_files = [f for f in os.listdir(dataset_dir) if os.path.splitext(f)[1].lower() in ['.jpg', '.png', '.jpeg']] for image_filename in tqdm(image_files, desc=f"Processing {output_dir}"): image_path = os.path.join(dataset_dir, image_filename) annotation_filename = os.path.splitext(image_filename)[0] + '.txt' annotation_path = os.path.join(dataset_dir, annotation_filename) # Load image and bounding boxes image, bboxes, labels = load_image_and_bboxes(image_path, annotation_path) # Augment the image and the bounding boxes augmented = augmentations(image=image, bboxes=bboxes, labels=labels) augmented_image, augmented_bboxes, _ = augmented['image'], augmented['bboxes'], augmented['labels'] # Convert the augmented image to a numpy array with uint8 type (assuming it's already in the [0, 255] range) if isinstance(augmented_image, np.ndarray): image_to_save = augmented_image else: image_to_save = augmented_image.numpy().astype(np.uint8) if image_to_save.shape[0] == 3: # If the image has channels-first format (C, H, W) # Convert the tensor to channels-last format (H, W, C) image_to_save = image_to_save.transpose(1, 2, 0) # Define the output paths output_image_path = os.path.join(output_dir, image_filename) output_annotation_path = os.path.join(output_dir, annotation_filename) # Save the augmented image using OpenCV cv2.imwrite(output_image_path, image_to_save) # Save the augmented bounding boxes (in YOLO format) with open(output_annotation_path, 'w') as f: for bbox in augmented_bboxes: class_id = int(bbox[0]) # Assuming the first element is the class ID bbox_str = ' '.join(map(str, bbox)) f.write(f"{class_id} {bbox_str}\n") # Create output folders for train, test, and val dataset_root = "/kaggle/input/face-mask-dataset-yolo-format/dataset/images" output_root = "/kaggle/working/augmentation" for dataset_type in ['train', 'test', 'valid']: dataset_dir = os.path.join(dataset_root, dataset_type) output_dir = os.path.join(output_root, dataset_type) os.makedirs(output_dir, exist_ok=True) # Process dataset process_dataset(dataset_dir, output_dir)dir_path= "/kaggle/working/augmentation/train" print(len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))]))from IPython.display import Image #Image(filename=f"{HOME}/runs/train/exp/results.png", width=1000) Image(filename=f"/kaggle/working/augmentation/valid/images19.jpg", width=600)from IPython.display import Image #Image(filename=f"/kaggle/working/augmentation/valid/images19.jpg", width=600) Image(filename=f"/kaggle/input/face-mask-dataset-yolo-format/dataset/images/valid/images19.jpg", width=600)# Used for removing the directories wrongly created !rm -r /kaggle/working/*The code provided facilitates the augmentation of a single image by applying a variety of transformations. For each specified augmentation type, it generates multiple unique augmented versions of the original image. Each resulting image bears a distinct filename that incorporates an identifying tag corresponding to the applied augmentation, effectively distinguishing between different augmented versions. The augmentation process adheres to the predefined logic without alteration, ensuring that the core functionality remains intact.
def load_image_and_bboxes(image_path, annotation_path): # Check the existence of the annotation file if not os.path.isfile(annotation_path): print(f"Annotation file does not exist: {annotation_path}") return None, [], [] # Read the image image = cv2.imread(image_path) if image is None: print(f"Failed to read the image: {image_path}") return None, [], [] # Convert BGR to RGB image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Initialize bounding boxes and labels lists bboxes, labels = [], [] # Read and parse the YOLO annotation file. with open(annotation_path, "r") as file: for line in file: elements = line.strip().split() if len(elements) == 5: class_id, x_center, y_center, width, height = map(float, elements) labels.append(int(class_id)) bboxes.append([x_center, y_center, width, height]) # YOLO format return image, bboxes, labels# Define individual augmentations vertical_flip = A.VerticalFlip(p=1) random_bright_contrast = A.RandomBrightnessContrast(p=1) horizontal_flip = A.HorizontalFlip(p=1) shift_scale_rotate = A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0) rgb_shift = A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=1) random_resized_crop = A.RandomResizedCrop(height=416, width=416, p=1) # Group all augmentations in a list augmentations_list = [ ('vflip', vertical_flip), ('bright_contrast', random_bright_contrast), ('hflip', horizontal_flip), ('shift_scale_rotate', shift_scale_rotate), ('rgb_shift', rgb_shift), ('resized_crop', random_resized_crop), ]# Function to process and augment a dataset directory (train, test or val) def process_dataset(dataset_dir, output_dir): # Get a list of all image files and corresponding annotation files image_files = [f for f in os.listdir(dataset_dir) if os.path.splitext(f)[1].lower() in ['.jpg', '.jpeg']] for image_filename in tqdm(image_files, desc=f"Processing {output_dir}"): image_path = os.path.join(dataset_dir, image_filename) annotation_filename = os.path.splitext(image_filename)[0] + '.txt' annotation_path = os.path.join(dataset_dir, annotation_filename) # Load image and bounding boxes image, bboxes, labels = load_image_and_bboxes(image_path, annotation_path) # Apply each augmentation separately and save the results for aug_name, aug in augmentations_list: augmented = aug(image=image, bboxes=bboxes, labels=labels) augmented_image = augmented['image'] augmented_bboxes = augmented['bboxes'] # Add a tag for the augmentation filename_without_ext, ext = os.path.splitext(image_filename) new_image_filename = f"{filename_without_ext}_{aug_name}{ext}" new_annotation_filename = f"{filename_without_ext}_{aug_name}.txt" output_image_path = os.path.join(output_dir, new_image_filename) output_annotation_path = os.path.join(output_dir, new_annotation_filename) save_augmented_data(augmented_image, augmented_bboxes, output_image_path, output_annotation_path) # Save the augmented image and bounding boxes def save_augmented_data(image, bboxes, image_path, annotation_path): # Handle conversion to uint8 if needed if not isinstance(image, np.ndarray): image = image.numpy().astype(np.uint8) if image.shape[0] == 3: # Channels-first format image = image.transpose(1, 2, 0) # Convert to channels-last format # Save the image cv2.imwrite(image_path, image[:, :, ::-1]) # Convert RGB to BGR # Save the bboxes class_id = int(bboxes[0][0]) # Assuming the class ID is the first element in bbox with open(annotation_path, 'w') as file: for bbox in bboxes: bbox_str = ' '.join(map(str, bbox)) file.write(f"{class_id} {bbox_str}\n")# Define the paths and start the process. dataset_root = "/kaggle/input/face-mask-dataset-yolo-format/dataset/images" output_root = "/kaggle/working/augmentation" for dataset_type in ['train', 'test', 'valid']: dataset_dir = os.path.join(dataset_root, dataset_type) output_dir = os.path.join(output_root, dataset_type) os.makedirs(output_dir, exist_ok=True) # Process the dataset process_dataset(dataset_dir, output_dir) print("--------------------Done---------------------")# For checking the no of files generated inside the output directory dir_path= "/kaggle/working/augmentation/train" print(len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))]))