OpenVINO 2022.3实战四：POT API 实现 YOLOv5 模型 INT8 量化

将预训练的 YOLOv5m Pytorch 模型转换为 OpenVINO™ FP32 Intermediate Representation (IR) 模型。下一步，通过 OpenVINO™ Post-Training Optimization Tool (POT) API 来定义客制化DataLoader和Metric，从而复用 YOLOv5 客制化的前后处理（letterbox，Non-maximum Suppression）及精度计算等模块。采用 “DefaultQuantization” 的量化算法，定义和运行量化流水线对FP32模型进行 INT8 量化。

1 准备需要量化的模型

下载yolov5代码 ultralytics/yolov5

python export.py --weights yolov5s.pt --include torchscript onnx openvino

导出模型为 yolov5s_openvino_model
在这里插入图片描述

2 定义数据加载

继承来自 openvino.tools.pot.api 的 DataLoader类, 创建 YOLOv5DataLoader Class：定义数据和annotation加载和预处理；

class YOLOv5DataLoader(DataLoader):""" Inherit from DataLoader function and implement for YOLOv5."""def __init__(self, config):if not isinstance(config, Dict):config = Dict(config)super().__init__(config)self._data_source = config.data_sourceself._imgsz = config.imgszself._batch_size = 1self._stride = 32self._single_cls = config.single_clsself._pad = 0.5self._rect = Falseself._workers = 1self._data_loader = self._init_dataloader()self._data_iter = iter(self._data_loader)def __len__(self):return len(self._data_loader.dataset)def _init_dataloader(self):dataloader = create_dataloader(self._data_source['val'], imgsz=self._imgsz, batch_size=self._batch_size, stride=self._stride,single_cls=self._single_cls, pad=self._pad, rect=self._rect, workers=self._workers)[0]return dataloaderdef __getitem__(self, item):try:batch_data = next(self._data_iter)except StopIteration:self._data_iter = iter(self._data_loader)batch_data = next(self._data_iter)im, target, path, shape = batch_dataim = im.float()im /= 255nb, _, height, width = im.shapeimg = im.cpu().detach().numpy()target = target.cpu().detach().numpy()annotation = dict()annotation['image_path'] = pathannotation['target'] = targetannotation['batch_size'] = nbannotation['shape'] = shapeannotation['width'] = widthannotation['height'] = heightannotation['img'] = imgreturn (item, annotation), img

3 精度验证功能

继承来自 openvino.tools.pot.api 的 Metric 类, 创建 COCOMetric Class：定义模型后处理及精度计算方法；

class COCOMetric(Metric):""" Inherit from DataLoader function and implement for YOLOv5."""def __init__(self, config):super().__init__()self._metric_dict = {"AP@0.5": [], "AP@0.5:0.95": []}self._names = (*self._metric_dict,)self._stats = []self._last_stats = []self._conf_thres = config.conf_thresself._iou_thres = config.iou_thresself._single_cls = config.single_clsself._nc = config.ncself._class_names = {idx:name for idx,name in enumerate(config.names)}self._device = config.device@propertydef value(self):""" Returns metric value for the last model output.Both use AP@0.5 and AP@0.5:0.95"""mp, mr, map50, map = self._process_stats(self._last_stats)return {self._names[0]: [map50], self._names[1]: [map]}@propertydef avg_value(self):""" Returns metric value for all model outputs.Both use AP@0.5 and AP@0.5:0.95"""mp, mr, map50, map = self._process_stats(self._stats)return {self._names[0]: map50, self._names[1]: map}def _process_stats(self, stats):mp, mr, map50, map = 0.0, 0.0, 0.0, 0.0stats = [np.concatenate(x, 0) for x in zip(*stats)]if len(stats) and stats[0].any():tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=False, save_dir=None, names=self._class_names)ap50, ap = ap[:, 0], ap.mean(1)mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()np.bincount(stats[3].astype(np.int64), minlength=self._nc)else:torch.zeros(1)return mp, mr, map50, mapdef update(self, output, target):""" Calculates and updates metric valueContains postprocessing part from Ultralytics YOLOv5 project:param output: model output:param target: annotations"""annotation = target[0]["target"]width = target[0]["width"]height = target[0]["height"]shapes = target[0]["shape"]paths = target[0]["image_path"]im = target[0]["img"]iouv = torch.linspace(0.5, 0.95, 10).to(self._device)  # iou vector for mAP@0.5:0.95niou = iouv.numel()seen = 0stats = []# NMSannotation = torch.Tensor(annotation)annotation[:, 2:] *= torch.Tensor([width, height, width, height]).to(self._device)  # to pixelslb = []out = output[0]out = torch.Tensor(out).to(self._device)out = non_max_suppression(out, self._conf_thres, self._iou_thres, labels=lb,multi_label=True, agnostic=self._single_cls)# Metricsfor si, pred in enumerate(out):labels = annotation[annotation[:, 0] == si, 1:]nl = len(labels)tcls = labels[:, 0].tolist() if nl else []  # target class_, shape = Path(paths[si]), shapes[si][0]seen += 1if len(pred) == 0:if nl:stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))continue# Predictionsif self._single_cls:pred[:, 5] = 0predn = pred.clone()scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred# Evaluateif nl:tbox = xywh2xyxy(labels[:, 1:5])  # target boxesscale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labelslabelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labelscorrect = process_batch(predn, labelsn, iouv)else:correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))self._stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))self._last_stats = statsdef reset(self):""" Resets metric """self._metric_dict = {"AP@0.5": [], "AP@0.5:0.95": []}self._last_stats = []self._stats = []def get_attributes(self):"""Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.Required attributes: 'direction': 'higher-better' or 'higher-worse''type': metric type"""return {self._names[0]: {'direction': 'higher-better','type': 'AP@0.5'},self._names[1]: {'direction': 'higher-better','type': 'AP@0.5:0.95'}}

4 运行优化流程

设置量化算法及相关参数，定义并运行量化流水线。

def get_config():""" Set the configuration of the model, engine,dataset, metric and quantization algorithm."""config = dict()data_yaml = check_yaml("./data/coco128.yaml")data = check_dataset(data_yaml)model_config = Dict({"model_name": "yolov5s","model": "./weights/yolov5s_openvino_model/yolov5s.xml","weights": "./weights/yolov5s_openvino_model/yolov5s.bin"})engine_config = Dict({"device": "CPU","stat_requests_number": 8,"eval_requests_number": 8})dataset_config = Dict({"data_source": data,"imgsz": 640,"single_cls": True,})metric_config = Dict({"conf_thres": 0.001,"iou_thres": 0.65,"single_cls": True,"nc": 1,  # if opt.single_cls else int(data['nc']),"names": data["names"],"device": "cpu"})algorithms = [{"name": "DefaultQuantization",  # or AccuracyAware"params": {"target_device": "CPU","preset": "mixed","stat_subset_size": 300}}]config["model"] = model_configconfig["engine"] = engine_configconfig["dataset"] = dataset_configconfig["metric"] = metric_configconfig["algorithms"] = algorithmsreturn config""" Download dataset and set config
"""
print("Run the POT. This will take few minutes...")
config = get_config()
init_logger(level='INFO')save_dir = Path("./weights/yolov5s_openvino_model/")
save_dir.mkdir(parents=True, exist_ok=True)  # make dir# Step 1: Load the model.
model = load_model(config["model"])# Step 2: Initialize the data loader.
data_loader = YOLOv5DataLoader(config["dataset"])# Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric.
metric = COCOMetric(config["metric"])# Step 4: Initialize the engine for metric calculation and statistics collection.
engine = IEEngine(config=config["engine"], data_loader=data_loader, metric=metric)# Step 5: Create a pipeline of compression algorithms.
pipeline = create_pipeline(config["algorithms"], engine)metric_results = None# Step 6: Execute the pipeline to calculate Min-Max value
compressed_model = pipeline.run(model)# Step 7 (Optional):  Compress model weights to quantized precision
#                     in order to reduce the size of final .bin file.
compress_model_weights(compressed_model)# Step 8: Save the compressed model to the desired path.
optimized_save_dir = Path(save_dir).joinpath("optimized")
save_model(compressed_model, Path(Path.cwd()).joinpath(optimized_save_dir), config["model"]["model_name"])

5 比较原始模型和量化模型的准确性

FP32：

# Step 9 (Optional): Evaluate the compressed model. Print the results.
metric_results_i8 = pipeline.evaluate(compressed_model)
print("Quantized INT8 model metric_results: {}".format(metric_results_i8))

输出：

FP32 model metric_results: {'AP@0.5': 0.7051576693437555, 'AP@0.5:0.95': 0.44624265930493545}

INT8：

# Step 9 (Optional): Evaluate the compressed model. Print the results.
metric_results_i8 = pipeline.evaluate(compressed_model)print("Quantized INT8 model metric_results: {}".format(metric_results_i8))

输出：

Quantized INT8 model metric_results: {'AP@0.5': 0.6924341121617621, 'AP@0.5:0.95': 0.43698028961534857}

6 比较原始模型和量化模型的性能

使用OpenVINO中的Benchmark Tool（推理性能测量工具）测量FP32和INT8模型的推理性能

FP32:

 benchmark_app -m .\weights\yolov5s_openvino_model\yolov5s.xml -d CPU -api async

输出：
在这里插入图片描述

INT8：

benchmark_app -m .\weights\yolov5s_openvino_model\optimized\yolov5s.xml -d CPU -api async

输出：
在这里插入图片描述