diff --git a/README.md b/README.md index 5ca5e8eff2a0cb3362c4b6d48177043adfd263f9..1ed91e0e630228a038dc03e860160a7b473e67e2 100644 --- a/README.md +++ b/README.md @@ -467,6 +467,9 @@ Stage2 images already downloaded and cropped are available [here](https://mosqui python train_bb_detector.py --config ./resources/yolov8n.json --images_path ./data/images --initial_cv ./resources/files_10k_bb_4folds.csv +First, it will create a labels/ folder under images/ folder with Yolo expected format. Second it will create .yaml file for each Yolo fold training. + + **3/ Run classifiers training scripts:** You can remove --wandb_project if you don’t have a [wandb](https://wandb.ai/) account --full_train option if you want to train CV models to monitor F1/Precision/Recall score. diff --git a/mqt/training/train_bb_detector.py b/mqt/training/train_bb_detector.py index 76a3872e39385443a0ccdb42d53c82ed882e41c8..1ab977fb4fbdcb3a9d3d83d828690e66d98f526d 100644 --- a/mqt/training/train_bb_detector.py +++ b/mqt/training/train_bb_detector.py @@ -78,23 +78,23 @@ def generate_yolo_labels(train_pd, data_home): if xmin_ < 0: xmin_ = 0 if xmin_ > w: - print("w", row["uid"]) + # print("w", row["uid"]) xmin_ = 0 # w if ymin_ < 0: ymin_ = 0 if ymin_ > h: - print("h", row["uid"]) + # print("h", row["uid"]) ymin_ = 0 # h if xmax_ < 0: xmax_ = 0 if xmax_ > w: - print("w", row["uid"]) + # print("w", row["uid"]) xmax_ = w if ymax_ < 0: ymax_ = 0 if ymax_ > h: - print("h", row["uid"]) + # print("h", row["uid"]) ymax_ = h filename = row["img_fName"].replace(".jpeg", ".txt") @@ -110,11 +110,15 @@ def generate_yolo_labels(train_pd, data_home): bbx = ((xmax_ + xmin_) / 2.0) / w bby = ((ymax_ + ymin_) / 2.0) / h + labels_path = os.path.join(data_home, "labels") + if not os.path.exists(labels_path): + os.makedirs(labels_path) + with open(os.path.join(data_home, "labels", filename), "w") as f: f.write(str(label) + " " + str(bbx) + " " + str(bby) + " " + str(bbw) + " " + str(bbh)) -def generate_yolo_list(train_home, train_pd, seed=SEEDS[0], exclude_noisy=NOISY, folds=FOLDS, single_class=SINGLE_CLASS): +def generate_yolo_list(train_pd, train_home, seed=SEEDS[0], exclude_noisy=NOISY, folds=FOLDS, single_class=SINGLE_CLASS): # Generate YOLO lists for fold_ in range(folds): @@ -190,9 +194,8 @@ def get_model(architecture, weights): def train_yolo(config, architecture="YOLO", model_backbone="yolov8n", image_size=IMAGE_SIZE, - models_folder="mosquito_yolo_models", seed=SEEDS[0]): + models_folder="mosquito_yolo_models", seed=SEEDS[0], resume_fold=0): - resume_fold = 0 for fold_ in range(FOLDS): torch.cuda.empty_cache() if fold_ < resume_fold: diff --git a/mqt/training/train_classifier.py b/mqt/training/train_classifier.py index 31fb3247432144e013d91fe76888a90d71c392b0..4afb5352db09de8d817b4d2e3a0f74bbe5250126 100644 --- a/mqt/training/train_classifier.py +++ b/mqt/training/train_classifier.py @@ -192,7 +192,7 @@ BAD_YOLO8N_768_OOF = [ def train_cls(train_boxes_home, config, train_pd, train_background_pd=None, external_pd=None, wandb_project=None, models_home="./mosquito_models", full_train=False, - exclude_noisy=BAD_YOLO8N_768_OOF): + exclude_noisy=BAD_YOLO8N_768_OOF, resume_fold=0): seed_everything(config.seed, workers=True) @@ -279,7 +279,7 @@ def train_cls(train_boxes_home, config, train_pd, train_background_pd=None, exte wandb.finish() else: - resume_fold = 0 + # resume_fold = 0 for fold_ in range(config.folds): if fold_ < resume_fold: continue diff --git a/train_bb_detector.py b/train_bb_detector.py index 62aab4b454cda7a4bf68c3cf49c85da34aa56bf0..72c152a1ebe092da0465ba409adeb75f3102d371 100644 --- a/train_bb_detector.py +++ b/train_bb_detector.py @@ -3,8 +3,8 @@ import pandas as pd import os import argparse -from mqt.training.train_bb_detector import train_yolo -from my_models.utils.torch import Config +from mqt.training.train_bb_detector import train_yolo, generate_yolo_labels, generate_yolo_list +from my_models.utils.torch import Config, SEEDS def check_path(path): @@ -22,17 +22,35 @@ if __name__ == '__main__': parser.add_argument('--config', type=str, default='./resources/yolov8n.json', help='Path to yolo configuration file') parser.add_argument('--images_path', type=str, default="./data/images", help='Path to images folder') parser.add_argument('--initial_cv', type=str, default="./resources/files_10k_bb_4folds.csv", help='Path to initial files with CV split') - parser.add_argument('--models_home', type=str, default="./mosquito_yolo_models", help='Yolo model home') + parser.add_argument('--models_home', type=str, default="mosquito_yolo_models", help='Yolo model home') parser.add_argument('--image_size', type=int, default=768, help='Image size') parser.add_argument('--epochs', type=int, default=128, help='Epochs') + parser.add_argument('--resume_fold', type=int, default=0, help='Resume training from a given fold') args = parser.parse_args() train_file = args.initial_cv train_home = args.images_path check_path(train_home) + config_path = args.config + resume_fold = args.resume_fold + image_size = args.image_size + models_home = args.models_home check_path(train_file) train_pd = pd.read_csv(train_file) - print("Initial available:", train_pd.shape) + print("Initial data available:", train_pd.shape) + + check_path(config_path) + config = Config(json.load(open(config_path, "r"))) + print("Config loaded:", config.__dict__) + + print("Preparing Yolo bounding boxes labels...") + generate_yolo_labels(train_pd, train_home) + + print("Preparing Yolo training...") + generate_yolo_list(train_pd, train_home) + + train_yolo(config, architecture="YOLO", model_backbone="yolov8n", image_size=image_size, + models_folder=models_home, seed=SEEDS[0], resume_fold=0) diff --git a/train_classifier.py b/train_classifier.py index 093a513d4f4e44ccd8f3b2d37069e2b1b7e8ac10..62ee72310c159ff90cc3ef3342cb57bc3edd5646 100644 --- a/train_classifier.py +++ b/train_classifier.py @@ -31,6 +31,7 @@ if __name__ == '__main__': parser.add_argument('--epochs', type=int, default=96, help='Epochs') parser.add_argument('--batch_size', type=int, default=32, help='Batch size') parser.add_argument('--num_workers', type=int, default=8, help='CPU workers') + parser.add_argument('--resume_fold', type=int, default=0, help='Resume training from a given fold') args = parser.parse_args() @@ -44,6 +45,7 @@ if __name__ == '__main__': config_path = args.config full_train = args.full_train wandb_project = args.wandb_project + resume_fold = args.resume_fold if wandb_project is not None: print("Wandb project:", wandb_project) @@ -77,4 +79,5 @@ if __name__ == '__main__': print() train_cls(train_boxes_home, config, train_pd, train_background_pd=train_background_pd, external_pd=external_pd, - wandb_project=wandb_project, models_home=args.models_home, full_train=full_train) + wandb_project=wandb_project, models_home=args.models_home, full_train=full_train, + resume_fold=resume_fold)