1.训练好的pt模型转换为onnx格式
使用yolov5-lite自带的export.py导出onnx格式,图像大小设置320,batch 1
之后可以使用 onnxsim对模型进一步简化
onnxsim参考链接:onnxsim-让导出的onnx模型更精简_alex1801的博客-CSDN博客
python export.py --weights weights/v5lite-e.pt --img 320 --batch 1python -m onnxsim weights/v5lite-e.onnx weights/yolov5-lite-sim.onnx
2.使用onnxruntime调用onnx模型实时推理(python版本
这个版本的推理FPS能有11+FPS
这两处换成自己的模型和训练的类别即可:
parser.add_argument(‘–modelpath’, type=str, default=”/media/xcy/dcd05f09-46df-4879-bfeb-3bab03a6cc3a/YOLOv5-Lite/weights/v5lite-e.onnx”,
help=”onnx filepath”)
parser.add_argument(‘–classfile’, type=str, default=’coco.names’,
help=”classname filepath”)
参考github:GitHub – hpc203/yolov5-lite-onnxruntime: 使用ONNXRuntime部署yolov5-lite目标检测,包含C++和Python两个版本的程序
import cv2import numpy as npimport argparseimport onnxruntime as ortimport timeclass yolov5_lite():def __init__(self, model_pb_path, label_path, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5):so = ort.SessionOptions()so.log_severity_level = 3self.net = ort.InferenceSession(model_pb_path, so)self.classes = list(map(lambda x: x.strip(), open(label_path, 'r').readlines()))self.num_classes = len(self.classes)anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]self.nl = len(anchors)self.na = len(anchors[0]) // 2self.no = self.num_classes + 5self.grid = [np.zeros(1)] * self.nlself.stride = np.array([8., 16., 32.])self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)self.confThreshold = confThresholdself.nmsThreshold = nmsThresholdself.objThreshold = objThresholdself.input_shape = (self.net.get_inputs()[0].shape[2], self.net.get_inputs()[0].shape[3])def resize_image(self, srcimg, keep_ratio=True):top, left, newh, neww = 0, 0, self.input_shape[0], self.input_shape[1]if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:hw_scale = srcimg.shape[0] / srcimg.shape[1]if hw_scale > 1:newh, neww = self.input_shape[0], int(self.input_shape[1] / hw_scale)img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)left = int((self.input_shape[1] - neww) * 0.5)img = cv2.copyMakeBorder(img, 0, 0, left, self.input_shape[1] - neww - left, cv2.BORDER_CONSTANT, value=0)# add borderelse:newh, neww = int(self.input_shape[0] * hw_scale), self.input_shape[1]img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)top = int((self.input_shape[0] - newh) * 0.5)img = cv2.copyMakeBorder(img, top, self.input_shape[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)else:img = cv2.resize(srcimg, self.input_shape, interpolation=cv2.INTER_AREA)return img, newh, neww, top, leftdef _make_grid(self, nx=20, ny=20):xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)def postprocess(self, frame, outs, pad_hw):newh, neww, padh, padw = pad_hwframeHeight = frame.shape[0]frameWidth = frame.shape[1]ratioh, ratiow = frameHeight / newh, frameWidth / neww# Scan through all the bounding boxes output from the network and keep only the# ones with high confidence scores. Assign the box's class label as the class with the highest score.classIds = []confidences = []boxes = []for detection in outs:scores = detection[5:]classId = np.argmax(scores)confidence = scores[classId]if confidence > self.confThreshold and detection[4] > self.objThreshold:center_x = int((detection[0] - padw) * ratiow)center_y = int((detection[1] - padh) * ratioh)width = int(detection[2] * ratiow)height = int(detection[3] * ratioh)left = int(center_x - width / 2)top = int(center_y - height / 2)classIds.append(classId)confidences.append(float(confidence))boxes.append([left, top, width, height])# Perform non maximum suppression to eliminate redundant overlapping boxes with# lower confidences.indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)for i in indices:i = i[0] if isinstance(i, (tuple, list)) else ibox = boxes[i]left = box[0]top = box[1]width = box[2]height = box[3]frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)return framedef drawPred(self, frame, classId, conf, left, top, right, bottom):# Draw a bounding box.cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)label = '%.2f' % conflabel = '%s:%s' % (self.classes[classId], label)# Display the label at the top of the bounding boxlabelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)top = max(top, labelSize[1])# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)return framedef detect(self, srcimg):img, newh, neww, top, left = self.resize_image(srcimg)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)img = img.astype(np.float32) / 255.0blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)outs = self.net.run(None, {self.net.get_inputs()[0].name: blob})[0].squeeze(axis=0)row_ind = 0for i in range(self.nl):h, w = int(self.input_shape[0] / self.stride[i]), int(self.input_shape[1] / self.stride[i])length = int(self.na * h * w)if self.grid[i].shape[2:4] != (h, w):self.grid[i] = self._make_grid(w, h)outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(self.grid[i], (self.na, 1))) * int(self.stride[i])outs[row_ind:row_ind + length, 2:4] = (outs[row_ind:row_ind + length, 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i], h * w, axis=0)row_ind += lengthsrcimg = self.postprocess(srcimg, outs, (newh, neww, top, left))# cv2.imwrite('result.jpg', srcimg)return srcimgif __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--imgpath', type=str, default="",help="image path")parser.add_argument('--modelpath', type=str, default="/media/xcy/dcd05f09-46df-4879-bfeb-3bab03a6cc3a/YOLOv5-Lite/weights/v5lite-e.onnx",help="onnx filepath")parser.add_argument('--classfile', type=str, default='coco.names',help="classname filepath")parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence')parser.add_argument('--nmsThreshold', default=0.6, type=float, help='nms iou thresh')args = parser.parse_args()# srcimg = cv2.imread(args.imgpath)# print(args.imgpath,srcimg)net = yolov5_lite(args.modelpath, args.classfile, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold)print(net)counter = 0start_time = time.time()# 1 加载视频文件capture = cv2.VideoCapture(0)# 2 读取视频ret, frame = capture.read()fps = capture.get(cv2.CAP_PROP_FPS)# 视频平均帧率while ret:counter += 1# 计算帧数if (time.time() - start_time) != 0:# 实时显示帧数cv2.putText(frame, "FPS {0}".format(float('%.1f' % (counter / (time.time() - start_time)))), (30, 50),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),2)# 3 ret 是否读取到了帧,读取到了则为Truecv2.imshow("video", frame)ret, frame = capture.read()print("FPS: ", counter / (time.time() - start_time))counter = 0start_time = time.time()srcimg = net.detect(frame)# winName = 'Deep learning object detection in onnxruntime'# cv2.namedWindow(winName, cv2.WINDOW_NORMAL)# cv2.imshow(winName, srcimg)# 4 若键盘按下q则退出播放if cv2.waitKey(20) & 0xff == ord('q'):break# 5 释放资源capture.release()# 6 关闭所有窗口cv2.destroyAllWindows()
3.使用NCNN+opencv来读取模型实时推理(C++版本
此版本能够在笔记本上达到33+FPS,正在整理代码。后续发
代码整理好了,如下:需要VS2019配置ncnn之后即可运行。
LINUX配置NCNN可以参考我的另一篇博客:Ubuntu20.04配置NCNN推理框架(转换yolov5 onnx格式到ncnn格式-CSDN博客
WINDOWS配置比较简单,大家搜一搜都能搜到。
#include "layer.h"#include "net.h"#if defined(USE_NCNN_SIMPLEOCV)#include "simpleocv.h"#else#include #include #include #endif#include #include #include #include#include //#define YOLOV5_V60 1 //YOLOv5 v6.0#define YOLOV5_V62 1 //YOLOv5 v6.2 exportonnx model method https://github.com/shaoshengsong/yolov5_62_export_ncnn#if YOLOV5_V60 || YOLOV5_V62#define MAX_STRIDE 64#else#define MAX_STRIDE 32class YoloV5Focus : public ncnn::Layer{public:YoloV5Focus(){one_blob_only = true;}virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const{int w = bottom_blob.w;int h = bottom_blob.h;int channels = bottom_blob.c;int outw = w / 2;int outh = h / 2;int outc = channels * 4;top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);if (top_blob.empty())return -100;#pragma omp parallel for num_threads(opt.num_threads)for (int p = 0; p < outc; p++){const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);float* outptr = top_blob.channel(p);for (int i = 0; i < outh; i++){for (int j = 0; j < outw; j++){*outptr = *ptr;outptr += 1;ptr += 2;}ptr += w;}}return 0;}};DEFINE_LAYER_CREATOR(YoloV5Focus)#endif //YOLOV5_V60YOLOV5_V62struct Object{cv::Rect_ rect;int label;float prob;};static inline float intersection_area(const Object& a, const Object& b){cv::Rect_ inter = a.rect & b.rect;return inter.area();}static void qsort_descent_inplace(std::vector