当使用yolov5进行目标检测且进行边缘计算的场景时,要考虑性价比或者国产化的话,rk3588板子是个不错的选择。
本篇介绍yolov5的pytorch模型转化为rknn的流程,并展示在rk板子上如何调用相关api来使用转好的rknn模型进行前向推理。
pt转rknn流程
pt转onnx
首先将训练好的pt模型转为onnx中间模型,在转之前需要先修改主目录底下的models下的yolo.py的部分代码,将如图的forward
推理部分进行注释。
替换为:
def forward(self, x):z = []for i in range(self.nl):x[i] = self.m[i](x[i])return x
如果识别出现乱框的现象,则替换为:
def forward(self, x):z = []for i in range(self.nl):x[i] = torch.sigmoid(self.m[i](x[i]))return x
记得在训练时再给他改回去,否则会报错,在转模型时才需要改这部分代码。
接下来运行export.py来转onnx:
python export.py --weights runs/train/exp/weights/best.pt --img 320 --batch 1 --include onnx
onnx转rknn
首先创建一个虚拟环境,我创建的python版本为3.8。可以直接使用pip install -r requirements.txt来创建环境;
如果出现报错,则使用conda env create -f environment.yml来创建。如果pip安装可以的话,可以直接-i换源比较方便。
这里rknn-toolkit2
安装会失败是正常的,下面会手动进行安装。
*以上为直接复制我的环境,也可以根据下面的步骤来自己安装相应的包。
接下来下载RKNN-Toolkit2,可以通过官网下载,也可以通过我上传的资源下载。
下载完先进入刚才创建的环境安装rknn-toolkit2
包,如图,进入packages底下,安装相关依赖:
pip install -r requirements_cp38-1.6.0.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
然后安装相关rknn-toolkit2
包:
pip install rknn_toolkit2-1.6.0+81f21f4d-cp38-cp38-linux_x86_64.whl
*接下来进行onnx转rknn
在如图所示路径下打开test.py
修改相关参数
最后一个参数要改为rk3588,默认为rk3566。改完后直接运行test.py文件即可转为rknn。
rk板子上进行前向推理
因为rk3588是aarch64架构的,所以不能用rknn-toolkit2
包,而是要用rknn-toolkit-lite2
包,在rk上安装对应的whl:
代码如下:
from copy import copy
import time
import numpy as np
import cv2
from rknnlite.api import RKNNLiteRKNN_MODEL = 'best.rknn'
# IMG_PATH = './input/240513_00000741.jpg'
OBJ_THRESH = 0.25
NMS_THRESH = 0.45
IMG_SIZE = (640, 640)
OUTPUT_VIDEO_PATH = 'output_1.mp4'
BOX = (450, 150, 1100, 550)
CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone','microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear','hair drier', 'toothbrush']anchors = [[[10, 13], [16, 30], [33, 23]],[[30, 61], [62, 45], [59, 119]],[[116, 90], [156, 198], [373, 326]]]class Letter_Box_Info():def __init__(self, shape, new_shape, w_ratio, h_ratio, dw, dh, pad_color) -> None:self.origin_shape = shapeself.new_shape = new_shapeself.w_ratio = w_ratioself.h_ratio = h_ratioself.dw = dwself.dh = dhself.pad_color = pad_colordef box_process(position, anchors):grid_h, grid_w = position.shape[2:4]col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h)) # (80, 80) (80, 80)col = col.reshape(1, 1, grid_h, grid_w) # (1, 1, 80, 80)row = row.reshape(1, 1, grid_h, grid_w)grid = np.concatenate((col, row), axis=1) # (1, 2, 80, 80)stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) # 8 8col = col.repeat(len(anchors), axis=0)row = row.repeat(len(anchors), axis=0)anchors = np.array(anchors)anchors = anchors.reshape(*anchors.shape, 1, 1) # (3, 2, 1, 1)box_xy = position[:,:2,:,:]*2 - 0.5box_wh = pow(position[:,2:4,:,:]*2, 2) * anchorsbox_xy += gridbox_xy *= stridebox = np.concatenate((box_xy, box_wh), axis=1) # (3, 4, 80, 80)# Convert [c_x, c_y, w, h] to [x1, y1, x2, y2]xyxy = np.copy(box)xyxy[:, 0, :, :] = box[:, 0, :, :] - box[:, 2, :, :]/ 2 # top left xxyxy[:, 1, :, :] = box[:, 1, :, :] - box[:, 3, :, :]/ 2 # top left yxyxy[:, 2, :, :] = box[:, 0, :, :] + box[:, 2, :, :]/ 2 # bottom right xxyxy[:, 3, :, :] = box[:, 1, :, :] + box[:, 3, :, :]/ 2 # bottom right yreturn xyxy
#
def filter_boxes(boxes, box_confidences, box_class_probs):"""Filter boxes with object threshold."""box_confidences = box_confidences.reshape(-1)class_max_score = np.max(box_class_probs, axis=-1)classes = np.argmax(box_class_probs, axis=-1)_class_pos = np.where(class_max_score* box_confidences >= OBJ_THRESH)scores = (class_max_score* box_confidences)[_class_pos]boxes = boxes[_class_pos]classes = classes[_class_pos]return boxes, classes, scores# def filter_boxes(boxes, box_confidences, box_class_probs):
# """Filter boxes with object threshold.
# """
# boxes = boxes.reshape(-1, 4)
# box_confidences = box_confidences.reshape(-1)
# box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])
#
# _box_pos = np.where(box_confidences >= OBJ_THRESH)
# boxes = boxes[_box_pos]
# box_confidences = box_confidences[_box_pos]
# box_class_probs = box_class_probs[_box_pos]
#
# class_max_score = np.max(box_class_probs, axis=-1)
# classes = np.argmax(box_class_probs, axis=-1)
# _class_pos = np.where(class_max_score >= OBJ_THRESH)
#
# boxes = boxes[_class_pos]
# classes = classes[_class_pos]
# scores = (class_max_score * box_confidences)[_class_pos]
#
# return boxes, classes, scoresdef nms_boxes(boxes, scores):"""Suppress non-maximal boxes.# Returnskeep: ndarray, index of effective boxes."""x = boxes[:, 0]y = boxes[:, 1]w = boxes[:, 2] - boxes[:, 0]h = boxes[:, 3] - boxes[:, 1]areas = w * horder = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x[i], x[order[1:]])yy1 = np.maximum(y[i], y[order[1:]])xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)inter = w1 * h1ovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= NMS_THRESH)[0]order = order[inds + 1]keep = np.array(keep)return keepdef post_process(input_data, anchors):boxes, scores, classes_conf = [], [], []# 1*255*h*w -> 3*85*h*winput_data = [_in.reshape([len(anchors[0]),-1]+list(_in.shape[-2:])) for _in in input_data]for i in range(len(input_data)): # (3, 85, 80, 80)boxes.append(box_process(input_data[i][:,:4,:,:], anchors[i])) # (3, 4, 80, 80)scores.append(input_data[i][:,4:5,:,:]) # (3, 1, 80, 80)classes_conf.append(input_data[i][:,5:,:,:]) # (3, 80, 80, 80)def sp_flatten(_in):ch = _in.shape[1]_in = _in.transpose(0,2,3,1)return _in.reshape(-1, ch)boxes = [sp_flatten(_v) for _v in boxes] # (3, 19200, 4)classes_conf = [sp_flatten(_v) for _v in classes_conf] # (3, 19200, 80)scores = [sp_flatten(_v) for _v in scores] # (3, 19200, 1)boxes = np.concatenate(boxes) # (25200, 4)classes_conf = np.concatenate(classes_conf) # (25200, 80)scores = np.concatenate(scores) # (25200, 1)# filter according to thresholdboxes, classes, scores = filter_boxes(boxes, scores, classes_conf)# (12, 4) 12 12# nmsnboxes, nclasses, nscores = [], [], []for c in set(classes):inds = np.where(classes == c)b = boxes[inds]c = classes[inds]s = scores[inds]keep = nms_boxes(b, s)if len(keep) != 0:nboxes.append(b[keep])nclasses.append(c[keep])nscores.append(s[keep])if not nclasses and not nscores:return None, None, Noneboxes = np.concatenate(nboxes)classes = np.concatenate(nclasses)scores = np.concatenate(nscores)return boxes, classes, scoresdef draw(image, boxes, scores, classes):for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = [int(_b) for _b in box]print("%s @ (%d %d %d %d) %.3f" % (CLASSES[cl], top, left, right, bottom, score))cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),(top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), letter_box_info_list=[]):shape = im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])ratio = r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding# dw, dh = np.mod(dw, 32), np.mod(dh, 32)dw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add borderletter_box_info_list.append(Letter_Box_Info(shape, new_shape, ratio, ratio, dw, dh, color))return im, letter_box_info_listdef get_real_box(box, in_format='xyxy', letter_box_info_list=[]):bbox = copy(box)# unletter_box resultif in_format=='xyxy':bbox[:,0] -= letter_box_info_list[-1].dwbbox[:,0] /= letter_box_info_list[-1].w_ratiobbox[:,0] = np.clip(bbox[:,0], 0, letter_box_info_list[-1].origin_shape[1])bbox[:,1] -= letter_box_info_list[-1].dhbbox[:,1] /= letter_box_info_list[-1].h_ratiobbox[:,1] = np.clip(bbox[:,1], 0, letter_box_info_list[-1].origin_shape[0])bbox[:,2] -= letter_box_info_list[-1].dwbbox[:,2] /= letter_box_info_list[-1].w_ratiobbox[:,2] = np.clip(bbox[:,2], 0, letter_box_info_list[-1].origin_shape[1])bbox[:,3] -= letter_box_info_list[-1].dhbbox[:,3] /= letter_box_info_list[-1].h_ratiobbox[:,3] = np.clip(bbox[:,3], 0, letter_box_info_list[-1].origin_shape[0])return bboxif __name__ == '__main__':rknn = RKNNLite()print('--> Load RKNN model')ret = rknn.load_rknn(RKNN_MODEL)if ret != 0:print('Load RKNN model failed')exit(ret)print('done')ret = rknn.init_runtime()if ret != 0:print('Init runtime environment failed!')exit(ret)print('done')cap = cv2.VideoCapture("./input/out_240715151339.mp4")# 获取视频的一些属性fps = cap.get(cv2.CAP_PROP_FPS)# width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))# 创建 VideoWriter 对象x, y, w, h = BOXfourcc = cv2.VideoWriter_fourcc(*'mp4v') # 或者使用 'XVID'out = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, fps, (w, h))fps = 0.0while True:t1 = time.time()# 读取一帧ret, frame = cap.read()if not ret:break# 加载帧img0 = frame[y:y+h, x:x+w, :]img_size = (640, 640)img, letter_box_info_list = letterbox(im= img0.copy(), new_shape=(IMG_SIZE[1], IMG_SIZE[0])) # padded resizeimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # HWC to CHW, BGR to RGBif len(img.shape) == 3:img = img[None] # expand for batch dimoutputs = rknn.inference(inputs=[img]) # Inferenceboxes, classes, scores = post_process(outputs, anchors)boxes_filter, scores_filter, classes_filter = [0, 0, 0, 0], [], []max_box = [0, 0, 0, 0]for box, score, cl in zip(boxes, scores, classes):if cl == 0:if (box[2]-box[0])*(box[3]-box[1]) > (max_box[2]-max_box[0])*(max_box[3]-max_box[1]):max_box = boxboxes_filter = np.expand_dims(max_box, axis=0)scores_filter = np.expand_dims(score, axis=0)classes_filter = np.expand_dims(cl, axis=0)img_p = img0.copy()draw(img_p, get_real_box(boxes_filter, 'xyxy', letter_box_info_list), scores_filter, classes_filter)cv2.imwrite("11.jpg", img_p)out.write(img_p)cap.release()out.release()rknn.release()
这里我是输入视频,输出一帧一帧的图片来查看,可以自行改变输入输出的格式。
后续再补充实现npu进行加速。