diff --git a/my_models/yolo/utilz.py b/my_models/yolo/utilz.py index 1e19fedfc564e4721545752d285efbf8a0c21f5e..5870e213269be4ca9fa33ffa03f8fd34cb81d6d0 100644 --- a/my_models/yolo/utilz.py +++ b/my_models/yolo/utilz.py @@ -1,161 +1,161 @@ -# from typing import Tuple -from ultralytics.utils import ops -import torch -import numpy as np -import cv2 - -try: - scale_segments = ops.scale_segments -except AttributeError: - scale_segments = ops.scale_coords - - -def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scale_fill=False, scaleup=False, stride=32): - """ - Resize image and padding for detection. Takes image as input, - resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraints - - Parameters: - img (np.ndarray): image for preprocessing - new_shape (Tuple(int, int)): image size after preprocessing in format [height, width] - color (Tuple(int, int, int)): color for filling padded area - auto (bool): use dynamic input size, only padding for stride constrins applied - scale_fill (bool): scale image to fill new_shape - scaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracy - stride (int): input padding stride - Returns: - img (np.ndarray): image after preprocessing - ratio (Tuple(float, float)): hight and width scaling ratio - padding_size (Tuple(int, int)): height and width padding size - - - """ - # Resize and pad image while meeting stride-multiple constraints - shape = img.shape[:2] # current shape [height, width] - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not scaleup: # only scale down, do not scale up (for better test mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if auto: # minimum rectangle - dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding - elif scale_fill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border - return img, ratio, (dw, dh) - - -def preprocess_image_(img0, imgsz): - """ - Preprocess image according to YOLOv8 input requirements. - Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. - - Parameters: - img0 (np.ndarray): image for preprocessing - Returns: - img (np.ndarray): image after preprocessing - """ - # resize - img = letterbox(img0, new_shape=(imgsz, imgsz))[0] - - # Convert HWC to CHW - img = img.transpose(2, 0, 1) - img = np.ascontiguousarray(img) - return img - - -def image_to_tensor_(image): - """ - Preprocess image according to YOLOv8 input requirements. - Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. - - Parameters: - image (np.ndarray): image for preprocessing - Returns: - input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range - """ - input_tensor = image.astype(np.float32) # uint8 to fp32 - input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 - - # add batch dimension - if input_tensor.ndim == 3: - input_tensor = np.expand_dims(input_tensor, 0) - return input_tensor - - -def postprocess_( - pred_boxes, - input_hw, - orig_img, - min_conf_threshold=0.25, - nms_iou_threshold=0.7, - agnosting_nms=False, - max_detections=300, - pred_masks=None, - retina_mask=False, - nc=80, -): - """ - YOLOv8 model postprocessing function. Applied non maximum suppression algorithm to detections and rescale boxes to original image size - Parameters: - pred_boxes (np.ndarray): model output prediction boxes - input_hw (np.ndarray): preprocessed image - orig_image (np.ndarray): image before preprocessing - min_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filtering - nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS - agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not - max_detections (int, *optional*, 300): maximum detections after NMS - pred_masks (np.ndarray, *optional*, None): model ooutput prediction masks, if not provided only boxes will be postprocessed - retina_mask (bool, *optional*, False): retina mask postprocessing instead of native decoding - Returns: - pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and segment - segmentation polygons for each element in batch - """ - nms_kwargs = {"agnostic": agnosting_nms, "max_det": max_detections} - # if pred_masks is not None: - # nms_kwargs["nm"] = 32 - preds = ops.non_max_suppression( - torch.from_numpy(pred_boxes), - min_conf_threshold, - nms_iou_threshold, - nc=nc, - **nms_kwargs - ) - results = [] - proto = torch.from_numpy(pred_masks) if pred_masks is not None else None - - for i, pred in enumerate(preds): - shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape - if not len(pred): - results.append({"det": [], "segment": []}) - continue - if proto is None: - pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() - results.append({"det": pred}) - continue - if retina_mask: - pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() - masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWC - segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] - else: - masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], input_hw, upsample=True) - pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() - segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] - results.append({"det": pred[:, :6].numpy(), "segment": segments}) - return results +# # from typing import Tuple +# from ultralytics.utils import ops +# import torch +# import numpy as np +# import cv2 +# +# try: +# scale_segments = ops.scale_segments +# except AttributeError: +# scale_segments = ops.scale_coords +# +# +# def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scale_fill=False, scaleup=False, stride=32): +# """ +# Resize image and padding for detection. Takes image as input, +# resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraints +# +# Parameters: +# img (np.ndarray): image for preprocessing +# new_shape (Tuple(int, int)): image size after preprocessing in format [height, width] +# color (Tuple(int, int, int)): color for filling padded area +# auto (bool): use dynamic input size, only padding for stride constrins applied +# scale_fill (bool): scale image to fill new_shape +# scaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracy +# stride (int): input padding stride +# Returns: +# img (np.ndarray): image after preprocessing +# ratio (Tuple(float, float)): hight and width scaling ratio +# padding_size (Tuple(int, int)): height and width padding size +# +# +# """ +# # Resize and pad image while meeting stride-multiple constraints +# shape = img.shape[:2] # current shape [height, width] +# if isinstance(new_shape, int): +# new_shape = (new_shape, new_shape) +# +# # Scale ratio (new / old) +# r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) +# if not scaleup: # only scale down, do not scale up (for better test mAP) +# r = min(r, 1.0) +# +# # Compute padding +# ratio = r, r # width, height ratios +# new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) +# dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding +# if auto: # minimum rectangle +# dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding +# elif scale_fill: # stretch +# dw, dh = 0.0, 0.0 +# new_unpad = (new_shape[1], new_shape[0]) +# ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios +# +# dw /= 2 # divide padding into 2 sides +# dh /= 2 +# +# if shape[::-1] != new_unpad: # resize +# img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) +# top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) +# left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) +# img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border +# return img, ratio, (dw, dh) +# +# +# def preprocess_image_(img0, imgsz): +# """ +# Preprocess image according to YOLOv8 input requirements. +# Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. +# +# Parameters: +# img0 (np.ndarray): image for preprocessing +# Returns: +# img (np.ndarray): image after preprocessing +# """ +# # resize +# img = letterbox(img0, new_shape=(imgsz, imgsz))[0] +# +# # Convert HWC to CHW +# img = img.transpose(2, 0, 1) +# img = np.ascontiguousarray(img) +# return img +# +# +# def image_to_tensor_(image): +# """ +# Preprocess image according to YOLOv8 input requirements. +# Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW. +# +# Parameters: +# image (np.ndarray): image for preprocessing +# Returns: +# input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range +# """ +# input_tensor = image.astype(np.float32) # uint8 to fp32 +# input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0 +# +# # add batch dimension +# if input_tensor.ndim == 3: +# input_tensor = np.expand_dims(input_tensor, 0) +# return input_tensor +# +# +# def postprocess_( +# pred_boxes, +# input_hw, +# orig_img, +# min_conf_threshold=0.25, +# nms_iou_threshold=0.7, +# agnosting_nms=False, +# max_detections=300, +# pred_masks=None, +# retina_mask=False, +# nc=80, +# ): +# """ +# YOLOv8 model postprocessing function. Applied non maximum suppression algorithm to detections and rescale boxes to original image size +# Parameters: +# pred_boxes (np.ndarray): model output prediction boxes +# input_hw (np.ndarray): preprocessed image +# orig_image (np.ndarray): image before preprocessing +# min_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filtering +# nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS +# agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not +# max_detections (int, *optional*, 300): maximum detections after NMS +# pred_masks (np.ndarray, *optional*, None): model ooutput prediction masks, if not provided only boxes will be postprocessed +# retina_mask (bool, *optional*, False): retina mask postprocessing instead of native decoding +# Returns: +# pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and segment - segmentation polygons for each element in batch +# """ +# nms_kwargs = {"agnostic": agnosting_nms, "max_det": max_detections} +# # if pred_masks is not None: +# # nms_kwargs["nm"] = 32 +# preds = ops.non_max_suppression( +# torch.from_numpy(pred_boxes), +# min_conf_threshold, +# nms_iou_threshold, +# nc=nc, +# **nms_kwargs +# ) +# results = [] +# proto = torch.from_numpy(pred_masks) if pred_masks is not None else None +# +# for i, pred in enumerate(preds): +# shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape +# if not len(pred): +# results.append({"det": [], "segment": []}) +# continue +# if proto is None: +# pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() +# results.append({"det": pred}) +# continue +# if retina_mask: +# pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() +# masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWC +# segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] +# else: +# masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], input_hw, upsample=True) +# pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round() +# segments = [scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)] +# results.append({"det": pred[:, :6].numpy(), "segment": segments}) +# return results diff --git a/train_bb_detector.py b/train_bb_detector.py index bad733c4cc2f4c010491ae8a1e62009f73300804..638f306bba8e5c43cc11135c91f3a9836f2bab31 100644 --- a/train_bb_detector.py +++ b/train_bb_detector.py @@ -28,3 +28,12 @@ if __name__ == '__main__': args = parser.parse_args() + train_file = args.initial_cv + train_home = args.images_path + check_path(train_home) + + check_path(train_file) + train_pd = pd.read_csv(train_file) + print("Initial available:", train_pd.shape) + + \ No newline at end of file