diff --git a/embedding_net/augmentations.py b/embedding_net/augmentations.py index 3bb233f..907c683 100644 --- a/embedding_net/augmentations.py +++ b/embedding_net/augmentations.py @@ -3,53 +3,83 @@ def get_aug(name='default', input_shape=[48, 48, 3]): if name == 'default': - augmentations = A.Compose([ - A.RandomBrightnessContrast(p=0.4), - A.RandomGamma(p=0.4), - A.HueSaturationValue(hue_shift_limit=20, - sat_shift_limit=30, val_shift_limit=30, p=0.4), - A.CLAHE(p=0.4), - A.Blur(blur_limit=1, p=0.3), - A.GaussNoise(var_limit=(50, 80), p=0.3) - ], p=1) + return A.Compose( + [ + A.RandomBrightnessContrast(p=0.4), + A.RandomGamma(p=0.4), + A.HueSaturationValue( + hue_shift_limit=20, + sat_shift_limit=30, + val_shift_limit=30, + p=0.4, + ), + A.CLAHE(p=0.4), + A.Blur(blur_limit=1, p=0.3), + A.GaussNoise(var_limit=(50, 80), p=0.3), + ], + p=1, + ) + elif name == 'plates': - augmentations = A.Compose([ - A.RandomBrightnessContrast(p=0.4), - A.RandomGamma(p=0.4), - A.HueSaturationValue(hue_shift_limit=20, - sat_shift_limit=30, - val_shift_limit=30, - p=0.4), - A.CLAHE(p=0.4), - A.HorizontalFlip(p=0.5), - A.VerticalFlip(p=0.5), - A.Blur(blur_limit=1, p=0.3), - A.GaussNoise(var_limit=(50, 80), p=0.3), - A.RandomCrop(p=0.8, height=2*input_shape[1]/3, width=2*input_shape[0]/3) - ], p=1) + return A.Compose( + [ + A.RandomBrightnessContrast(p=0.4), + A.RandomGamma(p=0.4), + A.HueSaturationValue( + hue_shift_limit=20, + sat_shift_limit=30, + val_shift_limit=30, + p=0.4, + ), + A.CLAHE(p=0.4), + A.HorizontalFlip(p=0.5), + A.VerticalFlip(p=0.5), + A.Blur(blur_limit=1, p=0.3), + A.GaussNoise(var_limit=(50, 80), p=0.3), + A.RandomCrop( + p=0.8, + height=2 * input_shape[1] / 3, + width=2 * input_shape[0] / 3, + ), + ], + p=1, + ) + elif name == 'deepfake': - augmentations = A.Compose([ - A.HorizontalFlip(p=0.5), - ], p=1) + return A.Compose( + [ + A.HorizontalFlip(p=0.5), + ], + p=1, + ) + elif name == 'plates2': - augmentations = A.Compose([ - A.CLAHE(clip_limit=(1,4),p=0.3), - A.HorizontalFlip(p=0.5), - A.VerticalFlip(p=0.5), - A.RandomBrightness(limit=0.2, p=0.3), - A.RandomContrast(limit=0.2, p=0.3), - # A.Rotate(limit=360, p=0.9), - A.RandomRotate90(p=0.3), - A.HueSaturationValue(hue_shift_limit=(-50,50), - sat_shift_limit=(-15,15), - val_shift_limit=(-15,15), - p=0.5), -# A.Blur(blur_limit=(5,7), p=0.3), - A.GaussNoise(var_limit=(10, 50), p=0.3), - A.CenterCrop(p=1, height=2*input_shape[1]//3, width=2*input_shape[0]//3), - A.Resize(p=1, height=input_shape[1], width=input_shape[0]) - ], p=1) - else: - augmentations = None + return A.Compose( + [ + A.CLAHE(clip_limit=(1, 4), p=0.3), + A.HorizontalFlip(p=0.5), + A.VerticalFlip(p=0.5), + A.RandomBrightness(limit=0.2, p=0.3), + A.RandomContrast(limit=0.2, p=0.3), + # A.Rotate(limit=360, p=0.9), + A.RandomRotate90(p=0.3), + A.HueSaturationValue( + hue_shift_limit=(-50, 50), + sat_shift_limit=(-15, 15), + val_shift_limit=(-15, 15), + p=0.5, + ), + # # A.Blur(blur_limit=(5,7), p=0.3), + A.GaussNoise(var_limit=(10, 50), p=0.3), + A.CenterCrop( + p=1, + height=2 * input_shape[1] // 3, + width=2 * input_shape[0] // 3, + ), + A.Resize(p=1, height=input_shape[1], width=input_shape[0]), + ], + p=1, + ) - return augmentations + else: + return None diff --git a/embedding_net/datagenerators.py b/embedding_net/datagenerators.py index bee596c..15c0b81 100644 --- a/embedding_net/datagenerators.py +++ b/embedding_net/datagenerators.py @@ -26,12 +26,12 @@ def __init__(self, dataset_path, self.dataset_path = dataset_path self.class_files_paths = {} self.class_names = [] - + if train_csv_file is not None: self.class_files_paths = self._load_from_dataframe(train_csv_file, image_id_column, label_column, is_google) else: self.class_files_paths = self._load_from_directory() - + self.n_classes = len(self.class_names) self.n_samples = {k: len(v) for k, v in self.class_files_paths.items()} @@ -94,7 +94,7 @@ def _load_from_directory(self): for class_name, class_dir_path in tqdm.tqdm(zip(self.class_names, class_dir_paths)): subdirs = [f.path for f in os.scandir(class_dir_path) if f.is_dir()] temp_list = [] - if len(subdirs)>0: + if subdirs: for subdir in subdirs: class_image_paths = [f.path for f in os.scandir(subdir) if f.is_file() and (f.name.endswith('.jpg') or @@ -134,10 +134,7 @@ def __init__(self, class_files_paths, self.n_samples = {k: len(v) for k, v in self.class_files_paths.items()} def __len__(self): - if self.val_gen: - return self.n_batches_val - else: - return self.n_batches + return self.n_batches_val if self.val_gen else self.n_batches def __getitem__(self, index): pass @@ -207,7 +204,7 @@ def get_batch_triplets_mining(self): all_embeddings_list = [] all_images_list = [] - + for idx, cl_img_idxs in enumerate(selected_images): images = self._get_images_set(selected_classes[idx], cl_img_idxs, with_aug=self.augmentations) all_images_list.append(images) @@ -243,7 +240,7 @@ def get_batch_triplets_mining(self): triplet_negatives.append(all_images[hard_negative]) targets.append(1) - if len(triplet_anchors) == 0: + if not triplet_anchors: triplet_anchors.append(all_images[anchor_positive[0]]) triplet_positives.append(all_images[anchor_positive[1]]) triplet_negatives.append(all_images[negative_indices[0]]) @@ -282,9 +279,7 @@ def get_batch_triplets(self): np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3))] targets = np.zeros((self.batch_size,)) - count = 0 - - for i in range(self.batch_size): + for count, i in enumerate(range(self.batch_size)): selected_class_idx = random.randrange(0, self.n_classes) selected_class = self.class_names[selected_class_idx] selected_class_n_elements = self.n_samples[selected_class] @@ -306,8 +301,6 @@ def get_batch_triplets(self): triplets[1][count, :, :, :] = imgs[1] triplets[2][count, :, :, :] = imgs[2] targets[i] = 1 - count += 1 - return triplets, targets def __getitem__(self, index): @@ -398,9 +391,8 @@ def get_batch(self): np.zeros((self.batch_size, self.input_shape[0], self.input_shape[1], 3))] targets = np.zeros((self.batch_size, self.n_classes)) - count = 0 with_aug = self.augmentations - for i in range(self.batch_size): + for count, i in enumerate(range(self.batch_size)): selected_class_idx = random.randrange(0, self.n_classes) selected_class = self.class_names[selected_class_idx] selected_class_n_elements = len(self.class_files_paths[selected_class]) @@ -410,8 +402,6 @@ def get_batch(self): img = self._get_images_set([selected_class], [indx], with_aug=with_aug) images[0][count, :, :, :] = img[0] targets[i][selected_class_idx] = 1 - count += 1 - return images, targets def __getitem__(self, index): diff --git a/embedding_net/models.py b/embedding_net/models.py index 1105a36..f333910 100644 --- a/embedding_net/models.py +++ b/embedding_net/models.py @@ -45,8 +45,7 @@ def _create_base_model(self): self.classification_model = Model(inputs=[self.base_model.layers[0].input],outputs=[output]) def _generate_encodings(self, imgs): - encodings = self.base_model.predict(imgs) - return encodings + return self.base_model.predict(imgs) def train_embeddings_classifier(self, data_loader, @@ -61,15 +60,13 @@ def train_embeddings_classifier(self, data_loader, def generate_encodings(self, data_loader, max_n_samples=10, shuffle=True): data_paths, data_labels, data_encodings = [], [], [] - encoded_training_data = {} - for class_name in data_loader.class_names: data_list = data_loader.train_data[class_name] if len(data_list)>max_n_samples: if shuffle: random.shuffle(data_list) data_list = data_list[:max_n_samples] - + data_paths += data_list imgs = get_images(data_list, self.params_model['input_shape']) encods = self._generate_encodings(imgs) @@ -77,11 +74,11 @@ def generate_encodings(self, data_loader, max_n_samples=10, data_encodings.append(encod) data_labels.append(class_name) - encoded_training_data['paths'] = data_paths - encoded_training_data['labels'] = data_labels - encoded_training_data['encodings'] = np.squeeze(np.array(data_encodings)) - - return encoded_training_data + return { + 'paths': data_paths, + 'labels': data_labels, + 'encodings': np.squeeze(np.array(data_encodings)), + } def save_encodings(self, encoded_training_data, save_folder='./', @@ -113,23 +110,16 @@ def save_onnx(self, save_folder, save_name='base_model.onnx'): keras2onnx.save_model(onnx_model, os.path.join(save_folder, save_name)) def predict(self, image): - if type(image) is str: - img = cv2.imread(image) - else: - img = image + img = cv2.imread(image) if type(image) is str else image img = cv2.resize(img, (self.params_model['input_shape'][0], self.params_model['input_shape'][1])) encoding = self.base_model.predict(np.expand_dims(img, axis=0)) distances = self.calculate_distances(encoding) max_element = np.argmin(distances) - predicted_label = self.encoded_training_data['labels'][max_element] - return predicted_label + return self.encoded_training_data['labels'][max_element] def predict_knn(self, image, with_top5=False): - if type(image) is str: - img = cv2.imread(image) - else: - img = image + img = cv2.imread(image) if type(image) is str else image img = cv2.resize(img, (self.input_shape[0], self.input_shape[1])) encoding = self.base_model.predict(np.expand_dims(img, axis=0)) @@ -145,8 +135,6 @@ def calculate_prediction_accuracy(self, data_loader): correct_top1 = 0 correct_top5 = 0 - accuracies = {'top1':0, - 'top5':0 } total_n_of_images = len(data_loader.images_paths['val']) for img_path, img_label in zip(data_loader.images_paths['val'], data_loader.images_labels['val']): @@ -155,10 +143,10 @@ def calculate_prediction_accuracy(self, data_loader): correct_top1 += 1 if img_label in prediction_top5: correct_top5 += 1 - accuracies['top1'] = correct_top1/total_n_of_images - accuracies['top5'] = correct_top5/total_n_of_images - - return accuracies + return { + 'top1': correct_top1 / total_n_of_images, + 'top5': correct_top5 / total_n_of_images, + } class TripletNet(EmbeddingNet): diff --git a/embedding_net/utils.py b/embedding_net/utils.py index 12b8a45..ac76c9a 100644 --- a/embedding_net/utils.py +++ b/embedding_net/utils.py @@ -13,7 +13,7 @@ def get_image(img_path, input_shape=None): img = cv2.imread(img_path) if img is None: - print('image is not exist ' + img_path) + print(f'image is not exist {img_path}') return None if input_shape: img = cv2.resize( @@ -39,7 +39,7 @@ def plot_tsne(encodings_path, save_plot_dir, show=True): tsne = TSNE() tsne_train = tsne.fit_transform(encodings['encodings']) fig, ax = plt.subplots(figsize=(16, 16)) - for i, l in enumerate(labels): + for l in labels: xs = tsne_train[np.array(encodings['labels']) == l, 0] ys = tsne_train[np.array(encodings['labels']) == l, 1] ax.scatter(xs, ys, label=l) @@ -55,7 +55,7 @@ def plot_tsne(encodings_path, save_plot_dir, show=True): if show: fig.show() - fig.savefig("{}{}.png".format(save_plot_dir, 'tsne.png')) + fig.savefig(f"{save_plot_dir}tsne.png.png") def plot_tsne_interactive(encodings): @@ -66,12 +66,11 @@ def plot_tsne_interactive(encodings): tsne = TSNE() tsne_train = tsne.fit_transform(encodings['encodings']) fig = go.Figure() - for i, l in enumerate(labels): + for l in labels: xs = tsne_train[np.array(encodings['labels']) == l, 0] ys = tsne_train[np.array(encodings['labels']) == l, 1] - color = 'rgba({},{},{},{})'.format(int(255*np.random.rand()), - int(255*np.random.rand()), - int(255*np.random.rand()), 0.8) + color = f'rgba({int(255 * np.random.rand())},{int(255 * np.random.rand())},{int(255 * np.random.rand())},0.8)' + fig.add_trace(go.Scatter(x=xs, y=ys, mode='markers', @@ -97,11 +96,10 @@ def plot_grapths(history, save_path): fig, ax = plt.subplots() ax.plot(t, v) - ax.set(xlabel='epoch', ylabel='{}'.format(k), - title='{}'.format(k)) + ax.set(xlabel='epoch', ylabel=f'{k}', title=f'{k}') ax.grid() - fig.savefig("{}{}.png".format(save_path, k)) + fig.savefig(f"{save_path}{k}.png") def plot_batch_simple(data, targets, class_names): num_imgs = data[0].shape[0] @@ -110,7 +108,7 @@ def plot_batch_simple(data, targets, class_names): full_img = np.zeros((img_h,num_imgs*img_w,3), dtype=np.uint8) indxs = np.argmax(targets, axis=1) class_names = [class_names[i] for i in indxs] - + for i in range(num_imgs): full_img[:,i*img_w:(i+1)*img_w,:] = data[0][i,:,:,::-1]*255 cv2.putText(full_img, class_names[i], (img_w*i + 5, 20), cv2.FONT_HERSHEY_SIMPLEX, @@ -142,15 +140,14 @@ def plot_batch(data, targets): def get_optimizer(name, learning_rate): if name == 'adam': - optimizer = optimizers.Adam(lr=learning_rate) + return optimizers.Adam(lr=learning_rate) elif name == 'rms_prop': - optimizer = optimizers.RMSprop(lr=learning_rate) + return optimizers.RMSprop(lr=learning_rate) elif name == 'radam': from keras_radam import RAdam - optimizer = RAdam(learning_rate) + return RAdam(learning_rate) else: - optimizer = optimizers.SGD(lr=learning_rate) - return optimizer + return optimizers.SGD(lr=learning_rate) def parse_params(filename='configs/road_signs.yml'): @@ -192,6 +189,6 @@ def parse_params(filename='configs/road_signs.yml'): cfg['SOFTMAX_PRETRAINING']['learning_rate']) params_softmax['optimizer'] = softmax_optimizer params['softmax'] = params_softmax - + return params diff --git a/tools/test.py b/tools/test.py index 614e69b..80117e1 100644 --- a/tools/test.py +++ b/tools/test.py @@ -22,4 +22,4 @@ model.load_encodings(encodings_path) model_prediction = model.predict(image_path) - print('Model prediction: {}'.format(model_prediction)) + print(f'Model prediction: {model_prediction}') diff --git a/tools/train.py b/tools/train.py index b956172..2185107 100644 --- a/tools/train.py +++ b/tools/train.py @@ -26,9 +26,7 @@ def parse_args(): parser.add_argument('config', help='model config file path') parser.add_argument('--resume_from', help='the checkpoint file to resume from') - args = parser.parse_args() - - return args + return parser.parse_args() def create_save_folders(params): work_dir_path = os.path.join(params['work_dir'], params['project_name']) @@ -64,17 +62,13 @@ def main(): work_dir_path = os.path.join(cfg_params['general']['work_dir'], cfg_params['general']['project_name']) weights_save_path = os.path.join(work_dir_path, 'weights/') - + initial_lr = params_train['learning_rate'] decay_factor = params_train['decay_factor'] step_size = params_train['step_size'] - if params_dataloader['validate']: - callback_monitor = 'val_loss' - else: - callback_monitor = 'loss' - + callback_monitor = 'val_loss' if params_dataloader['validate'] else 'loss' print('LOADING COMPLETED') callbacks = [ LearningRateScheduler(lambda x: initial_lr * @@ -89,7 +83,7 @@ def main(): save_best_only=True, verbose=1) ] - + print('CREATE DATALOADER') data_loader = ENDataLoader(**params_dataloader) print('DATALOADER CREATED!') @@ -131,7 +125,7 @@ def main(): os.environ["CUDA_VISIBLE_DEVICES"] = '0' n_gpu = 1 print('Use single gpu mode') - + model = TripletNet(cfg_params, training=True) if n_gpu>1: strategy = tf.distribute.MirroredStrategy() @@ -155,7 +149,7 @@ def main(): if args.resume_from is not None: model.load_model(args.resume_from) - + print('COMPILE MODEL') model.model.compile(loss=losses, optimizer=params_train['optimizer'],