using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Drawing; using System.Drawing.Drawing2D; using System.Drawing.Imaging; using System.IO; using System.Linq; using System.Threading.Tasks; namespace yolov5_onnx.Mod { ///

/// Yolov5 scorer. ///

public class YoloScorer : IDisposable where T : YoloModel { private readonly T _model; //YOLO模型 private readonly InferenceSession _inferenceSession; ///

/// Outputs value between 0 and 1.輸出 0 到 1 之間的值。 ///

public float Sigmoid(float value) { return 1 / (1 + (float)Math.Exp(-value)); } ///

/// Converts xywh bbox format to xyxy.將 xywh bbox 格式轉換為 xyxy。 ///

public float[] Xywh2xyxy(float[] source) { var result = new float[4]; result[0] = source[0] - source[2] / 2f; result[1] = source[1] - source[3] / 2f; result[2] = source[0] + source[2] / 2f; result[3] = source[1] + source[3] / 2f; return result; } ///

/// Returns value clamped to the inclusive range of min and max.傳回限制在最小值和最大值範圍內的值 ///

public float Clamp(float value, float min, float max) { return (value < min) ? min : (value > max) ? max : value; } ///

/// Resizes image keeping ratio to fit model input size.調整影像保持比例以適合模型輸入尺寸。 ///

public Bitmap ResizeImage(Image image) { PixelFormat format = image.PixelFormat; var output = new Bitmap(_model.Width, _model.Height, format); var (w, h) = (image.Width, image.Height); // image width and height 影像寬度和高度 var (xRatio, yRatio) = (_model.Width / (float)w, _model.Height / (float)h); // x, y ratios. x、y 比率 var ratio = Math.Min(xRatio, yRatio); // ratio = resized / original 比例=調整大小/原始 var (width, height) = ((int)(w * ratio), (int)(h * ratio)); // roi width and height ROI 寬度和高度 var (x, y) = ((_model.Width / 2) - (width / 2), (_model.Height / 2) - (height / 2)); // roi x and y coordinates. ROI x 和 y 座標 var roi = new Rectangle(x, y, width, height); // region of interest. ROI using (var graphics = Graphics.FromImage(output)) { graphics.Clear(Color.FromArgb(0, 0, 0, 0)); // clear canvas 清空畫布 graphics.SmoothingMode = SmoothingMode.None; // no smoothing 無平滑 graphics.InterpolationMode = InterpolationMode.Bilinear; // bilinear interpolation 雙線性插值 graphics.PixelOffsetMode = PixelOffsetMode.Half; // half pixel offset 半像素偏移 graphics.DrawImage(image, roi); // draw scaled 繪製比例 } return output; } ///

/// Extracts pixels into tensor for net input. 從影像中提取象素以供神經網路輸入 ///

public Tensor ExtractPixels(Image image) { var bitmap = (Bitmap)image; var rectangle = new Rectangle(0, 0, bitmap.Width, bitmap.Height); BitmapData bitmapData = bitmap.LockBits(rectangle, ImageLockMode.ReadOnly, bitmap.PixelFormat); int bytesPerPixel = Image.GetPixelFormatSize(bitmap.PixelFormat) / 8; var tensor = new DenseTensor(new[] { 1, 3, _model.Height, _model.Width }); unsafe // speed up conversion by direct work with memory 透過直接使用記憶體來加速轉換 { Parallel.For(0, bitmapData.Height, (y) => { byte* row = (byte*)bitmapData.Scan0 + (y * bitmapData.Stride); Parallel.For(0, bitmapData.Width, (x) => { tensor[0, 0, y, x] = row[x * bytesPerPixel + 2] / 255.0F; // r tensor[0, 1, y, x] = row[x * bytesPerPixel + 1] / 255.0F; // g tensor[0, 2, y, x] = row[x * bytesPerPixel + 0] / 255.0F; // b }); }); bitmap.UnlockBits(bitmapData); } return tensor; } ///

/// Runs inference session. 運行推理函試 ///

public DenseTensor[] Inference(Image image) { Bitmap resized = null; if (image.Width != _model.Width || image.Height != _model.Height) { resized = ResizeImage(image); // fit image size to specified input size 使影像大小適合指定的輸入大小 } var inputs = new List // add image as onnx input 新增影像作為 onnx 輸入 { NamedOnnxValue.CreateFromTensor("images", ExtractPixels(resized ?? image)) }; IDisposableReadOnlyCollection result = _inferenceSession.Run(inputs); // run inference 運行推理 var output = new List>(); foreach (var item in _model.Outputs) // add outputs for processing 添加輸出以供處理 { output.Add(result.First(x => x.Name == item).Value as DenseTensor); }; return output.ToArray(); } ///

/// Parses net output (detect) to predictions. 將神經網路輸出（檢測）解析為預測 ///

public List ParseDetect(DenseTensor output, Image image) { var result = new ConcurrentBag(); var (w, h) = (image.Width, image.Height); // image w and h image w and h var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains x、y 增益 var gain = Math.Min(xGain, yGain); // gain = resized / original 增益 = 調整大小 / 原始值 var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads 左、右填充 Parallel.For(0, (int)output.Length / _model.Dimensions, (i) => { if (output[0, i, 4] <= _model.Confidence) return; // skip low obj_conf results 跳過低 obj_conf 結果 Parallel.For(5, _model.Dimensions, (j) => { output[0, i, j] = output[0, i, j] * output[0, i, 4]; // mul_conf = obj_conf * cls_conf }); Parallel.For(5, _model.Dimensions, (k) => { if (output[0, i, k] <= _model.MulConfidence) return; // skip low mul_conf results 跳過低 mul_conf 結果 float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / gain; // unpad bbox tlx to original 將 bbox tlx 還原為原始版本 float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / gain; // unpad bbox tly to original float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / gain; // unpad bbox brx to original float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / gain; // unpad bbox bry to original xMin = Clamp(xMin, 0, w - 0); // clip bbox tlx to boundaries 將 bbox tlx 裁剪到邊界 yMin = Clamp(yMin, 0, h - 0); // clip bbox tly to boundaries xMax = Clamp(xMax, 0, w - 1); // clip bbox brx to boundaries yMax = Clamp(yMax, 0, h - 1); // clip bbox bry to boundaries YoloLabel label = _model.Labels[k - 5]; var prediction = new YoloPrediction(label, output[0, i, k]) { Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin) }; result.Add(prediction); }); }); return result.ToList(); } ///

/// Parses net outputs (sigmoid) to predictions.將神經網路輸出(sigmoid)解析為預測 ///

public List ParseSigmoid(DenseTensor[] output, Image image) { var result = new ConcurrentBag(); var (w, h) = (image.Width, image.Height); // image w and h var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains var gain = Math.Min(xGain, yGain); // gain = resized / original 增益=調整後/原始 var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads Parallel.For(0, output.Length, (i) => // iterate model outputs 迭代模型輸出 { int shapes = _model.Shapes[i]; // shapes per output 每個輸出形狀 Parallel.For(0, _model.Anchors[0].Length, (a) => // iterate anchors 迭代锚點 { Parallel.For(0, shapes, (y) => // iterate shapes (rows) 迭代形狀(行) { Parallel.For(0, shapes, (x) => // iterate shapes (columns) 迭代形狀(列) { int offset = (shapes * shapes * a + shapes * y + x) * _model.Dimensions; float[] buffer = output[i].Skip(offset).Take(_model.Dimensions).Select(Sigmoid).ToArray(); if (buffer[4] <= _model.Confidence) return; // skip low obj_conf results List scores = buffer.Skip(5).Select(b => b * buffer[4]).ToList(); // mul_conf = obj_conf * cls_conf float mulConfidence = scores.Max(); // max confidence score 最大值信心分數 if (mulConfidence <= _model.MulConfidence) return; // skip low mul_conf results float rawX = (buffer[0] * 2 - 0.5f + x) * _model.Strides[i]; // predicted bbox x (center) 預測的 bbox x（中心） float rawY = (buffer[1] * 2 - 0.5f + y) * _model.Strides[i]; // predicted bbox y (center) 預測的 bbox y（中心） float rawW = (float)Math.Pow(buffer[2] * 2, 2) * _model.Anchors[i][a][0]; // predicted bbox w 預測的bbox 寬度 float rawH = (float)Math.Pow(buffer[3] * 2, 2) * _model.Anchors[i][a][1]; // predicted bbox h float[] xyxy = Xywh2xyxy(new float[] { rawX, rawY, rawW, rawH }); float xMin = Clamp((xyxy[0] - xPad) / gain, 0, w - 0); // unpad, clip tlx 取消填充，裁剪 tlx float yMin = Clamp((xyxy[1] - yPad) / gain, 0, h - 0); // unpad, clip tly float xMax = Clamp((xyxy[2] - xPad) / gain, 0, w - 1); // unpad, clip brx float yMax = Clamp((xyxy[3] - yPad) / gain, 0, h - 1); // unpad, clip bry YoloLabel label = _model.Labels[scores.IndexOf(mulConfidence)]; var prediction = new YoloPrediction(label, mulConfidence) { Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin) }; result.Add(prediction); }); }); }); }); return result.ToList(); } ///

/// Parses net outputs (sigmoid or detect layer) to predictions. 解析神經網路輸出(sigmoid或detect層)為預測 ///

public List ParseOutput(DenseTensor[] output, Image image) { return _model.UseDetect ? ParseDetect(output[0], image) : ParseSigmoid(output, image); } ///

/// Removes overlaped duplicates (nms).去除重疊的重複項(NMS) ///

public List Supress(List items) { var result = new List(items); foreach (var item in items) // iterate every prediction 迭代每個預測 { foreach (var current in result.ToList()) // make a copy for each iteration 為每次迭代製作副本 { if (current == item) continue; var (rect1, rect2) = (item.Rectangle, current.Rectangle); RectangleF intersection = RectangleF.Intersect(rect1, rect2); float intArea = Area(intersection); // intersection area 交叉區域 float unionArea = Area(rect1) + Area(rect2) - intArea; // union area 並集區域 float overlap = intArea / unionArea; // overlap ratio 重疊比例 if (overlap >= _model.Overlap) { if (item.Score >= current.Score) { result.Remove(current); } } } } return result; } ///

/// Runs object detection. 運行對象檢測 ///

public List Predict(Image image) { return Supress(ParseOutput(Inference(image), image)); } ///

/// Creates new instance of YoloScorer. 創建 YoloScorer新實例 ///

public YoloScorer() { _model = Activator.CreateInstance(); } ///

/// Creates new instance of YoloScorer with weights path and options.使用權種路徑和選項創建YoloScorer新實例 ///

public YoloScorer(string weights, SessionOptions opts = null) : this() { _inferenceSession = new InferenceSession(File.ReadAllBytes(weights), opts ?? new SessionOptions()); } ///

/// Creates new instance of YoloScorer with weights stream and options.使用權種流和選項創建YoloScorer新實例 ///

public YoloScorer(Stream weights, SessionOptions opts = null) : this() { using (var reader = new BinaryReader(weights)) { _inferenceSession = new InferenceSession(reader.ReadBytes((int)weights.Length), opts ?? new SessionOptions()); } } ///

/// Creates new instance of YoloScorer with weights bytes and options. 使用權種字節數組和選項創建YoloScorer新實例 ///

public YoloScorer(byte[] weights, SessionOptions opts = null) : this() { _inferenceSession = new InferenceSession(weights, opts ?? new SessionOptions()); } public float Area(RectangleF source) { return source.Width * source.Height; } ///

/// Disposes YoloScorer instance. 釋放YoloScorer 實例 ///

public void Dispose() { _inferenceSession.Dispose(); } } }