From 34bb59fc1a30ed73d932d000a60ab0254f7691b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Faz=C4=B1l=20Alt=C4=B1nel?= Date: Fri, 7 Dec 2018 00:56:09 +0900 Subject: [PATCH] Add files via upload --- detectRecognizeLight.py | 182 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 detectRecognizeLight.py diff --git a/detectRecognizeLight.py b/detectRecognizeLight.py new file mode 100644 index 0000000..8eb5fb1 --- /dev/null +++ b/detectRecognizeLight.py @@ -0,0 +1,182 @@ +import tensorflow as tf +import numpy as np +from PIL import Image +import cv2 +import os + + +def recognize_color(im): + ''' + Given a traffic light image, returns class of the image (red, yellow, green) + + Args: + im: traffic light image (detected bbox of traffic light) + + Returns: + predicted_label: output label of the light color + ([1, 0, 0]: red, [0, 1, 0]: yellow, [0, 0, 1]: green) + ''' + + # Standardize input and convert it from RGB to HSV + standard_im = cv2.resize(im, (32, 32)) + hsv = cv2.cvtColor(standard_im, cv2.COLOR_RGB2HSV) + h = hsv[:, :, 0] + s = hsv[:, :, 1] + v = hsv[:, :, 2] + + # Define the mask & its boundaries, based on value channel + # The limits of the masks are deduced by trial & error + lower_hsv = np.array([0, 0, 0]) + upper_hsv = np.array([255, 255, 40]) + mask_hsv = cv2.inRange(hsv, lower_hsv, upper_hsv) + masked_image=np.copy(hsv) + masked_image[mask_hsv != 0] = [0, 0, 0] + + # Grayscale is used to enhance the quality of brightness + grayimage = cv2.cvtColor(standard_im, cv2.COLOR_RGB2GRAY) + + # Using image slicing, crop the image into 3 horizontal parts + upper_part_v = masked_image[3:12, 5:27, 2] + mid_part_v = masked_image[12:21, 5:27, 2] + lower_part_v = masked_image[21:30, 5:27, 2] + + upper_part_g = grayimage[3:12, 5:27] + mid_part_g = grayimage[12:21, 5:27] + lower_part_g = grayimage[21:30, 5:27] + + # Average brightness of all the pixels on value channel & grayscale for the masked image + area = 9.0*22 + avg_brightness_upper = (np.sum(upper_part_g[:,:]) + np.sum(upper_part_v[:,:]))/area + avg_brightness_mid = (np.sum(mid_part_g[:, :]) + np.sum(mid_part_v[:, :]))/area + avg_brightness_lower = (np.sum(lower_part_g[:, :]) + np.sum(lower_part_v[:, :]))/area + + # Feature vector + feature = [] + feature = [avg_brightness_upper, avg_brightness_mid, avg_brightness_lower] + + # Convert feature vector to output label + predicted_label = [0, 0, 0] + predicted_label[np.argmax(feature)] = 1 + return predicted_label + + +def draw_boxes(image_fed, best_boxes_roi, best_boxes_classes, best_boxes_scores, frame_w, frame_h, num_pred, video_writer): + ''' + Draws boxes for detected objects + + Args: + image_fed: image batch in which object detection performed + best_boxes_roi: roi of detected objects in images + best_boxes_classes: classes of detected objects in images + best_boxes_scores: scores of detected objects in images + frame_w: output frame width + frame_h: output frame height + num_pred: number of predictions + video_writer: video writer object to write drawn frame to video + ''' + + for i in range(best_boxes_roi.shape[0]): + im = np.reshape(image_fed[i], (frame_w, frame_h, 3)) + im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) + + for j in range(num_pred): + + # Choose traffic lights from detected objects (label number of traffic light category in COCO dataset is 10) + if best_boxes_scores[i][j] > 0.35 and best_boxes_classes[i][j] == 10.: + x = best_boxes_roi[i][j][1] + y = best_boxes_roi[i][j][0] + x_max = best_boxes_roi[i][j][3] + y_max = best_boxes_roi[i][j][2] + + # Classify color of the traffic light detected + color = recognize_color(im[int(x):int(x_max), int(y):int(y_max), :]) + + # Use different colors according to classified color of traffic light + if color == [1, 0, 0]: + cv2.rectangle(im, (x,y), (x_max,y_max), (255, 0, 0), 2) + font = cv2.FONT_HERSHEY_SIMPLEX + cv2.putText(im, 'Red', (x,y), font, 1e-3*frame_h, (0, 0, 255), 2) + elif color == [0, 1, 0]: + cv2.rectangle(im, (x,y), (x_max,y_max), (255, 0, 0), 2) + font = cv2.FONT_HERSHEY_SIMPLEX + cv2.putText(im, 'Yellow', (x,y), font, 1e-3*frame_h, (0, 255, 255), 2) + elif color == [0, 0, 1]: + cv2.rectangle(im, (x,y), (x_max,y_max), (255, 0, 0), 2) + font = cv2.FONT_HERSHEY_SIMPLEX + cv2.putText(im, 'Green', (x,y), font, 1e-3*frame_h, (0, 255, 0), 2) + + video_writer.write(im) + + +def main(): + im_size = 512 + pb_dir = './model/frozen_inference_graph.pb' # Pretrained model path + img_dir = './object-dataset' # Dataset folder path + vid_out = './out' # Output video path + + # Load pretrained model + graph = tf.Graph() + with graph.as_default(): + with tf.gfile.FastGFile(pb_dir, 'rb') as file: + graph_def = tf.GraphDef() + graph_def.ParseFromString(file.read()) + tf.import_graph_def(graph_def, name='') + + img = graph.get_tensor_by_name('image_tensor:0') + detection_boxes = graph.get_tensor_by_name('detection_boxes:0') + detection_scores = graph.get_tensor_by_name('detection_scores:0') + num_detections = graph.get_tensor_by_name('num_detections:0') + detection_classes = graph.get_tensor_by_name('detection_classes:0') + sess = tf.Session(graph=graph) + + vid_out = vid_out + '/outputVideo.mp4' + num_iter = 67 + batch_size = 32 + num_pred = 30 + frame_w = 512 + frame_h = 512 + video_writer = cv2.VideoWriter(vid_out, cv2.VideoWriter_fourcc(*'MP4V'), 5.0, (frame_w, frame_h)) + + # Read image name list from the directory and sort it (os.listdir generates output in random order) + images = sorted(os.listdir(img_dir)) + k = 2 + for i in range((len(images)-2)//batch_size): + image_bat = [] + for j in range(batch_size): + # Read images from dataset directory + image = cv2.imread(img_dir + '/' + images[k]) + # Resize images and make them batch + image = cv2.resize(image, (im_size, im_size)) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_bat.append(image) + image_batch = np.asarray(image_bat) + k = k + 1 + + # Give image batch to the model as input + feed_dict = {img:image_batch} + y_p_boxes, y_p_scores, y_p_num_detections, y_p_classes = sess.run([detection_boxes, + detection_scores, + num_detections, + detection_classes], feed_dict=feed_dict) + + # Process detection outputs + best_boxes_roi = [] + best_boxes_scores = [] + best_boxes_classes = [] + for i in range(y_p_boxes.shape[0]): + temp = y_p_boxes[i, :num_pred] * frame_w + best_boxes_roi.append(temp) + best_boxes_scores.append(y_p_scores[i, :num_pred]) + best_boxes_classes.append(y_p_classes[i, :num_pred]) + best_boxes_roi = np.asarray(best_boxes_roi) + best_boxes_scores = np.asarray(best_boxes_scores) + best_boxes_classes = np.asarray(best_boxes_classes) + + # Draw boxes for detected objects + draw_boxes(image_batch, best_boxes_roi, best_boxes_classes, best_boxes_scores, frame_w, frame_h, num_pred, video_writer) + + video_writer.release() + + +if __name__ == "__main__": + main()