ByFan-coder
diff --git a/‎core/__pycache__/common.cpython-35.pyc
0 Bytes b/‎core/__pycache__/common.cpython-35.pyc
0 Bytes
diff --git a/‎core/__pycache__/utils.cpython-35.pyc
605 Bytes b/‎core/__pycache__/utils.cpython-35.pyc
605 Bytes
diff --git a/‎core/__pycache__/yolov3.cpython-35.pyc
0 Bytes b/‎core/__pycache__/yolov3.cpython-35.pyc
0 Bytes
diff --git a/‎core/utils.py
Lines changed: 78 additions & 84 deletions b/‎core/utils.py
Lines changed: 78 additions & 84 deletions
diff --git a/‎data/train_data/tfrecords/quick_train_data0000.tfrecords
1.27 MB b/‎data/train_data/tfrecords/quick_train_data0000.tfrecords
1.27 MB
diff --git a/‎data/train_data/tfrecords/quick_train_data0001.tfrecords
660 KB b/‎data/train_data/tfrecords/quick_train_data0001.tfrecords
660 KB
diff --git a/‎data/train_data/tfrecords/quick_train_data0002.tfrecords
1.07 MB b/‎data/train_data/tfrecords/quick_train_data0002.tfrecords
1.07 MB
diff --git a/‎data/train_data/tfrecords/quick_train_data0003.tfrecords
332 KB b/‎data/train_data/tfrecords/quick_train_data0003.tfrecords
332 KB
diff --git a/‎quick_train.py
Lines changed: 4 additions & 7 deletions b/‎quick_train.py
Lines changed: 4 additions & 7 deletions
@@ -163,16 +163,16 @@ def cpu_nms(boxes, scores, num_classes, max_boxes=20, score_thresh=0.4, iou_thre
 
 def resize_image_correct_bbox(image, bboxes, input_shape):
 
-    image_size = tf.to_float(tf.shape(image)[1:3])[::-1]
+    image_size = tf.to_float(tf.shape(image)[0:2])[::-1]
     image = tf.image.resize_images(image, size=input_shape)
 
     # correct bbox
-    xx1 = bboxes[..., 0] * input_shape[0] / image_size[0]
-    yy1 = bboxes[..., 1] * input_shape[1] / image_size[1]
-    xx2 = bboxes[..., 2] * input_shape[0] / image_size[0]
-    yy2 = bboxes[..., 3] * input_shape[1] / image_size[1]
+    xx1 = bboxes[:, 0] * input_shape[0] / image_size[0]
+    yy1 = bboxes[:, 1] * input_shape[1] / image_size[1]
+    xx2 = bboxes[:, 2] * input_shape[0] / image_size[0]
+    yy2 = bboxes[:, 3] * input_shape[1] / image_size[1]
 
-    bboxes = tf.stack([xx1, yy1, xx2, yy2], axis=2)
+    bboxes = tf.stack([xx1, yy1, xx2, yy2], axis=1)
     return image, bboxes
 
 
@@ -306,14 +306,12 @@ def load_weights(var_list, weights_file):
     return assign_ops
 
 
-
 def preprocess_true_boxes(true_boxes, true_labels, input_shape, anchors, num_classes):
     """
     Preprocess true boxes to training input format
     Parameters:
     -----------
-    :param true_boxes: numpy.ndarray of shape [N, T, 4]
-                        N: the number of images,
+    :param true_boxes: numpy.ndarray of shape [T, 4]
                         T: the number of boxes in each image.
                         4: coordinate => x_min, y_min, x_max, y_max
     :param true_labels: class id
@@ -322,65 +320,61 @@ def preprocess_true_boxes(true_boxes, true_labels, input_shape, anchors, num_cla
     :param num_classes: integer, for coco dataset, it is 80
     Returns:
     ----------
-    y_true: list(3 array), shape like yolo_outputs, [N,, 13, 13, 3, 85]
+    y_true: list(3 array), shape like yolo_outputs, [13, 13, 3, 85]
                            13:cell szie, 3:number of anchors
                            85: box_centers, box_sizes, confidence, probability
     """
-
     input_shape = np.array(input_shape, dtype=np.int32)
-    num_images = true_boxes.shape[0]
     num_layers = len(anchors) // 3
     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
     grid_sizes = [input_shape//32, input_shape//16, input_shape//8]
 
-    box_centers = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) / 2 # the center of box
-    box_sizes =  true_boxes[..., 2:4] - true_boxes[..., 0:2] # the height and width of box
+    box_centers = (true_boxes[:, 0:2] + true_boxes[:, 2:4]) / 2 # the center of box
+    box_sizes =  true_boxes[:, 2:4] - true_boxes[:, 0:2] # the height and width of box
 
-    true_boxes[..., 0:2] = box_centers
-    true_boxes[..., 2:4] = box_sizes
+    true_boxes[:, 0:2] = box_centers
+    true_boxes[:, 2:4] = box_sizes
 
-    y_true_13 = np.zeros(shape=[num_images, grid_sizes[0][0], grid_sizes[0][1], 3, 5+num_classes], dtype=np.float32)
-    y_true_26 = np.zeros(shape=[num_images, grid_sizes[1][0], grid_sizes[1][1], 3, 5+num_classes], dtype=np.float32)
-    y_true_52 = np.zeros(shape=[num_images, grid_sizes[2][0], grid_sizes[2][1], 3, 5+num_classes], dtype=np.float32)
+    y_true_13 = np.zeros(shape=[grid_sizes[0][0], grid_sizes[0][1], 3, 5+num_classes], dtype=np.float32)
+    y_true_26 = np.zeros(shape=[grid_sizes[1][0], grid_sizes[1][1], 3, 5+num_classes], dtype=np.float32)
+    y_true_52 = np.zeros(shape=[grid_sizes[2][0], grid_sizes[2][1], 3, 5+num_classes], dtype=np.float32)
 
     y_true = [y_true_13, y_true_26, y_true_52]
-    anchors = np.expand_dims(anchors, 0)
     anchors_max =  anchors / 2.
     anchors_min = -anchors_max
-    valid_mask = box_sizes[..., 0] > 0
-
-    for b in range(num_images): # for each image, do:
-        # Discard zero rows.
-        wh = box_sizes[b, valid_mask[b]]
-        if len(wh) == 0: continue
-        # set the center of all boxes as the origin of their coordinates
-        # and correct their coordinates
-        wh = np.expand_dims(wh, -2)
-        boxes_max = wh / 2.
-        boxes_min = -boxes_max
-
-        intersect_mins = np.maximum(boxes_min, anchors_min)
-        intersect_maxs = np.minimum(boxes_max, anchors_max)
-        intersect_wh = np.maximum(intersect_maxs - intersect_mins, 0.)
-        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
-        box_area = wh[..., 0] * wh[..., 1]
-        anchor_area = anchors[..., 0] * anchors[..., 1]
-        iou = intersect_area / (box_area + anchor_area - intersect_area)
-        # Find best anchor for each true box
-        best_anchor = np.argmax(iou, axis=-1)
-
-        for t, n in enumerate(best_anchor):
-            for l in range(num_layers):
-                if n not in anchor_mask[l]: continue
-                i = np.floor(true_boxes[b,t,1]/input_shape[::-1]*grid_sizes[l][0]).astype('int32')
-                j = np.floor(true_boxes[b,t,0]/input_shape[::-1]*grid_sizes[l][1]).astype('int32')
-                k = anchor_mask[l].index(n)
-                c = true_labels[b,t].astype('int32')
-                y_true[l][b, i, j, k, 0:4] = true_boxes[b,t, 0:4]
-                y_true[l][b, i, j, k,   4] = 1
-                y_true[l][b, i, j, k, 5+c] = 1
-
-    return y_true
+    valid_mask = box_sizes[:, 0] > 0
+
+    # Discard zero rows.
+    wh = box_sizes[valid_mask]
+    # set the center of all boxes as the origin of their coordinates
+    # and correct their coordinates
+    wh = np.expand_dims(wh, -2)
+    boxes_max = wh / 2.
+    boxes_min = -boxes_max
+
+    intersect_mins = np.maximum(boxes_min, anchors_min)
+    intersect_maxs = np.minimum(boxes_max, anchors_max)
+    intersect_wh = np.maximum(intersect_maxs - intersect_mins, 0.)
+    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+    box_area = wh[..., 0] * wh[..., 1]
+
+    anchor_area = anchors[:, 0] * anchors[:, 1]
+    iou = intersect_area / (box_area + anchor_area - intersect_area)
+    # Find best anchor for each true box
+    best_anchor = np.argmax(iou, axis=-1)
+
+    for t, n in enumerate(best_anchor):
+        for l in range(num_layers):
+            if n not in anchor_mask[l]: continue
+            i = np.floor(true_boxes[t,1]/input_shape[::-1]*grid_sizes[l][0]).astype('int32')
+            j = np.floor(true_boxes[t,0]/input_shape[::-1]*grid_sizes[l][1]).astype('int32')
+            k = anchor_mask[l].index(n)
+            c = true_labels[t].astype('int32')
+            y_true[l][i, j, k, 0:4] = true_boxes[t, 0:4]
+            y_true[l][i, j, k,   4] = 1
+            y_true[l][i, j, k, 5+c] = 1
+
+    return y_true_13, y_true_26, y_true_52
 
 
 
@@ -414,44 +408,44 @@ def get_anchors(anchors_path):
     return anchors.reshape(-1, 2)
 
 
+class parser(object):
+    def __init__(self, anchors, num_classes, input_shape=[416, 416]):
+        self.anchors = anchors
+        self.num_classes = num_classes
+        self.input_shape = input_shape
 
-def parser(serialized_example):
-    features = tf.parse_single_example(
-        serialized_example,
-        features = {
-            'image' : tf.FixedLenFeature([], dtype = tf.string),
-            'bboxes': tf.FixedLenFeature([], dtype = tf.string),
-            'labels': tf.VarLenFeature(dtype = tf.int64),
-        }
-    )
-
-    image = tf.image.decode_jpeg(features['image'], channels = 3)
-    image = tf.image.convert_image_dtype(image, tf.uint8)
-
-    bboxes = tf.decode_raw(features['bboxes'], tf.float32)
-    bboxes = tf.reshape(bboxes, shape=[-1,4])
-
-    labels = features['labels'].values
-    return image, bboxes, labels
-
-
-
-
-
-
-
-
-
-
-
-
+    def preprocess(self, image, true_labels, true_boxes):
+        # resize_image_correct_bbox
+        image, true_boxes = resize_image_correct_bbox(image, true_boxes,
+                                                      input_shape=self.input_shape)
 
+        y_true_13, y_true_26, y_true_52 = tf.py_func(preprocess_true_boxes,
+                            inp=[true_boxes, true_labels, self.input_shape, self.anchors, self.num_classes],
+                            Tout = [tf.float32, tf.float32, tf.float32])
+        # data augmentation
+        # pass
 
+        return image, y_true_13, y_true_26, y_true_52
 
+    def parser_example(self, serialized_example):
 
+        features = tf.parse_single_example(
+            serialized_example,
+            features = {
+                'image' : tf.FixedLenFeature([], dtype = tf.string),
+                'bboxes': tf.FixedLenFeature([], dtype = tf.string),
+                'labels': tf.VarLenFeature(dtype = tf.int64),
+            }
+        )
 
+        image = tf.image.decode_jpeg(features['image'], channels = 3)
+        image = tf.image.convert_image_dtype(image, tf.uint8)
 
+        true_boxes = tf.decode_raw(features['bboxes'], tf.float32)
+        true_boxes = tf.reshape(true_boxes, shape=[-1,4])
+        true_labels = features['labels'].values
 
+        return self.preprocess(image, true_labels, true_boxes)
 
 
 
 
@@ -25,16 +25,13 @@
 file_pattern = "./data/train_data/tfrecords/quick_train_data*.tfrecords"
 anchors = utils.get_anchors('./data/yolo_anchors.txt')
 
+
 dataset = tf.data.TFRecordDataset(filenames = tf.gfile.Glob(file_pattern))
-dataset = dataset.map(utils.parser, num_parallel_calls = 10)
+dataset = dataset.map(utils.parser(anchors, num_classes).parser_example, num_parallel_calls = 10)
 dataset = dataset.repeat().batch(BATCH_SIZE).prefetch(BATCH_SIZE)
 iterator = dataset.make_one_shot_iterator()
-images, true_boxes, true_labels = iterator.get_next()
-images, true_boxes = utils.resize_image_correct_bbox(images, true_boxes, [INPUT_SIZE, INPUT_SIZE])
-
-y_true = tf.py_func(utils.preprocess_true_boxes,
-                    inp=[true_boxes, true_labels, [INPUT_SIZE, INPUT_SIZE], anchors, num_classes],
-                    Tout = [tf.float32, tf.float32, tf.float32])
+example = iterator.get_next()
+images, *y_true = example
 
 model = yolov3.yolov3(num_classes)
 with tf.variable_scope('yolov3'):