rk3399pro下yolov3-tiny开发使用

将darknet框架下yolov3-tiny训练车辆数据集下训练好的模型在rk3399pro下使用

1. 转为rknn模型

rknn_transform_car_tiny

import tensorflow as tf

from rknn.api import RKNN


if __name__ == '__main__':
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    tf.compat.v1.Session(config=config)

    # Create RKNN object
    rknn = RKNN()

    # Load tensorflow model
    print('--> Loading model')
    rknn.load_darknet(model='./model/yolov3-tiny_car.cfg', weight="./model/yolov3-tiny_car.weights")

    print('done')

    rknn.config(channel_mean_value='0 0 0 255', reorder_channel='0 1 2')

    # Build model
    print('--> Building model')
    rknn.build(do_quantization=True, dataset='./dataset_car_yolov3_tiny.txt')
    print('done')

    rknn.export_rknn('./model/yolov3_tiny_car.rknn')

    exit(0)

dataset_car_yolov3_tiny.txt内容如下

data/2bdc0303-810c5f47.jpg
data/MVI_39781__img00301.jpg
data/af7e1bb8-dca379b3.jpg
data/MVI_40152__img00361.jpg
data/MVI_40991__img00811.jpg

2. 图片检测

rknn_picture_tiny.py

import numpy as np
import cv2
from PIL import Image
from rknn.api import RKNN

GRID0 = 13
GRID1 = 26
LISTSIZE = 8
SPAN = 3
NUM_CLS = 80
MAX_BOXES = 500
OBJ_THRESH = 0.35
NMS_THRESH = 0.6


CLASSES = ("car", "bus", "truck")



def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def process(input, mask, anchors):
    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = input[..., 4]
    obj_thresh = -np.log(1 / OBJ_THRESH - 1)
    pos = np.where(box_confidence > obj_thresh)
    input = input[pos]
    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2])
    box_wh = np.exp(input[..., 2:4])
    for idx, val in enumerate(pos[2]):
        box_wh[idx] = box_wh[idx] * anchors[pos[2][idx]]
    pos0 = np.array(pos[0])[:, np.newaxis]
    pos1 = np.array(pos[1])[:, np.newaxis]
    grid = np.concatenate((pos1, pos0), axis=1)
    box_xy += grid
    box_xy /= (grid_w, grid_h)
    box_wh /= (416, 416)
    box_xy -= (box_wh / 2.)
    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs


def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with object threshold.

    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.

    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    box_scores = box_confidences * box_class_probs
    box_classes = np.argmax(box_scores, axis=-1)
    box_class_scores = np.max(box_scores, axis=-1)
    pos = np.where(box_class_scores >= OBJ_THRESH)

    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]

    return boxes, classes, scores


def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.

    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.

    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2]
    h = boxes[:, 3]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov3_post_process(input_data):
    # # yolov3
    # masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    # anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
    #            [59, 119], [116, 90], [156, 198], [373, 326]]
    # yolov3-tiny
    masks = [[3, 4, 5], [0, 1, 2]]
    anchors = [[10, 14], [23, 27], [37, 58], [81, 82], [135, 169], [344, 319]]

    boxes, classes, scores = [], [], []
    for input, mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    # # Scale boxes back to original image shape.
    # width, height = 416, 416 #shape[1], shape[0]
    # image_dims = [width, height, width, height]
    # boxes = boxes * image_dims

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores


def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box
        print('class: {}, score: {}'.format(CLASSES[cl], score))
        print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(x, y, x + w, y + h))
        x *= image.shape[1]
        y *= image.shape[0]
        w *= image.shape[1]
        h *= image.shape[0]
        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        # print('class: {}, score: {}'.format(CLASSES[cl], score))
        # print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)

        # print('class: {0}, score: {1:.2f}'.format(CLASSES[cl], score))
        # print('box coordinate x,y,w,h: {0}'.format(box))


def load_model():
    rknn = RKNN()
    print('-->loading model')
    model_path = "model/yolov3_tiny_car.rknn"

    rknn.load_rknn(model_path)
    # rknn.load_rknn('./yolov3.rknn')
    print('loading model done')

    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    if ret != 0:
        print('Init runtime environment failed')
        exit(ret)
    print('done')
    return rknn


import os
import time

if __name__ == '__main__':
    rknn = load_model()
    base_img_path = "test_img"
    filelist = os.listdir(base_img_path)
    use_total_time = 0;
    file_count = 0
    for file in filelist:
        file_count += 1
        im_file = base_img_path + '/' + file
        im = Image.open(im_file)
        im = im.resize((416, 416))
        # im = im.resize((608, 608))
        mat = np.asarray(im.convert('RGB'))
        begin_time = time.time()
        out_boxes, out_boxes2 = rknn.inference(inputs=[mat])
        _user_time = time.time() - begin_time
        print (file, _user_time, " s")
        use_total_time += _user_time
        out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0)
        out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1)
        input_data = []
        input_data.append(np.transpose(out_boxes, (2, 3, 0, 1)))
        input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1)))

        boxes, classes, scores = yolov3_post_process(input_data)

        image = cv2.imread(im_file)
        if boxes is not None:
            draw(image, boxes, scores, classes)
        print (file_count,".________________________________________________________________________")
        out_path = "out_img"
        cv2.imwrite(out_path + "/" + file, image)
    # cv2.imshow("results",image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    rknn.release()
    print("use_total_time:", use_total_time, "s avg time:", use_total_time / file_count, " s")

3. 视频检测

rknn_camera_tiny.py

import numpy as np
import cv2
from PIL import Image
from rknn.api import RKNN
from timeit import default_timer as timer

GRID0 = 13
GRID1 = 26
LISTSIZE = 8
SPAN = 3
NUM_CLS = 80
MAX_BOXES = 500
OBJ_THRESH = 0.2
NMS_THRESH = 0.2

CLASSES = ("car", "bus", "truck")


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def process(input, mask, anchors):

    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = input[..., 4]
    obj_thresh = -np.log(1/OBJ_THRESH - 1)
    pos = np.where(box_confidence > obj_thresh)
    input = input[pos]
    box_confidence = sigmoid(input[..., 4])
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = sigmoid(input[..., 5:])

    box_xy = sigmoid(input[..., :2])
    box_wh = np.exp(input[..., 2:4])
    for idx, val in enumerate(pos[2]):
        box_wh[idx] = box_wh[idx] * anchors[pos[2][idx]]
    pos0 = np.array(pos[0])[:, np.newaxis]
    pos1 = np.array(pos[1])[:, np.newaxis]
    grid = np.concatenate((pos1, pos0), axis=1)
    box_xy += grid
    box_xy /= (grid_w, grid_h)
    box_wh /= (416, 416)
    box_xy -= (box_wh / 2.)
    box = np.concatenate((box_xy, box_wh), axis=-1)

    return box, box_confidence, box_class_probs

def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with object threshold.

    # Arguments
        boxes: ndarray, boxes of objects.
        box_confidences: ndarray, confidences of objects.
        box_class_probs: ndarray, class_probs of objects.

    # Returns
        boxes: ndarray, filtered boxes.
        classes: ndarray, classes for boxes.
        scores: ndarray, scores for boxes.
    """
    box_scores = box_confidences * box_class_probs
    box_classes = np.argmax(box_scores, axis=-1)
    box_class_scores = np.max(box_scores, axis=-1)
    pos = np.where(box_class_scores >= OBJ_THRESH)

    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]

    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes.

    # Arguments
        boxes: ndarray, boxes of objects.
        scores: ndarray, scores of objects.

    # Returns
        keep: ndarray, index of effective boxes.
    """
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2]
    h = boxes[:, 3]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def yolov3_post_process(input_data):
    # # yolov3
    # masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    # anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
    #            [59, 119], [116, 90], [156, 198], [373, 326]]
    # yolov3-tiny
    masks = [[3, 4, 5], [0, 1, 2]]
    anchors = [[10, 14], [23, 27], [37, 58], [81, 82], [135, 169], [344, 319]]

    boxes, classes, scores = [], [], []
    for input,mask in zip(input_data, masks):
        b, c, s = process(input, mask, anchors)
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    boxes = np.concatenate(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    # # Scale boxes back to original image shape.
    # width, height = 416, 416 #shape[1], shape[0]
    # image_dims = [width, height, width, height]
    # boxes = boxes * image_dims

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores

def draw(image, boxes, scores, classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box
        print('class: {}, score: {}'.format(CLASSES[cl], score))
        print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(x, y, x+w, y+h))
        x *= image.shape[1]
        y *= image.shape[0]
        w *= image.shape[1]
        h *= image.shape[0]
        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        # print('class: {}, score: {}'.format(CLASSES[cl], score))
        # print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)

        # print('class: {0}, score: {1:.2f}'.format(CLASSES[cl], score))
        # print('box coordinate x,y,w,h: {0}'.format(box))

def load_model():
        rknn = RKNN()
        print('-->loading model')
        rknn.load_rknn('model/yolov3_tiny_car.rknn')
        #rknn.load_rknn('./yolov3.rknn')
        print('loading model done')

        print('--> Init runtime environment')
        ret = rknn.init_runtime()
        if ret != 0:
                print('Init runtime environment failed')
                exit(ret)
        print('done')
        return rknn

if __name__ == '__main__':
    rknn = load_model()
    font = cv2.FONT_HERSHEY_SIMPLEX;
    capture = cv2.VideoCapture("video/1588960845.6161065.mp4")
    #capture = cv2.VideoCapture(0)
    accum_time = 0
    curr_fps = 0
    prev_time = timer()
    fps = "FPS: ??"
    try:
        while(True):
            ret, frame = capture.read()
            if ret == True:
                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (416, 416))

                testtime=timer()
                out_boxes, out_boxes2 = rknn.inference(inputs=[image])
                testtime2=timer()
                print("rknn use time {}", testtime2-testtime)

                out_boxes = out_boxes.reshape(SPAN, LISTSIZE, GRID0, GRID0)
                out_boxes2 = out_boxes2.reshape(SPAN, LISTSIZE, GRID1, GRID1)
                input_data = []
                input_data.append(np.transpose(out_boxes, (2, 3, 0, 1)))
                input_data.append(np.transpose(out_boxes2, (2, 3, 0, 1)))
            
                testtime=timer()
                boxes, classes, scores = yolov3_post_process(input_data)
                testtime2=timer()
                print("process use time: {}", testtime2-testtime)
            
                testtime=timer()
                if boxes is not None:
                    draw(frame, boxes, scores, classes)
                curr_time = timer()
                exec_time = curr_time - prev_time
                prev_time = curr_time
                accum_time += exec_time
                curr_fps += 1
                if accum_time > 1:
                    accum_time -= 1
                    fps = "FPS: " + str(curr_fps)
                    curr_fps = 0
                cv2.putText(frame, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                             fontScale=0.50, color=(255, 0, 0), thickness=2)
                cv2.imshow("results", frame)
                c = cv2.waitKey(5) & 0xff
                if c == 27:
                    cv2.destroyAllWindows()
                    capture.release()
                    print("before rknn release")
                    rknn.release()
                    print("after rknn release")
                    break;
                testtime2=timer()
                print("show image use time: {}", testtime2-testtime)
    except KeyboardInterrupt:
        cv2.destroyAllWindows()
        capture.release()
        rknn.release()