arm/yolov5_onnx/Mod/YoloScorer.cs
2025-02-04 20:09:10 +08:00

360 lines
15 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
namespace yolov5_onnx.Mod
{
/// <summary>
/// Yolov5 scorer.
/// </summary>
public class YoloScorer<T> : IDisposable where T : YoloModel
{
private readonly T _model; //YOLO模型
private readonly InferenceSession _inferenceSession;
/// <summary>
/// Outputs value between 0 and 1.輸出 0 到 1 之間的值。
/// </summary>
public float Sigmoid(float value)
{
return 1 / (1 + (float)Math.Exp(-value));
}
/// <summary>
/// Converts xywh bbox format to xyxy.將 xywh bbox 格式轉換為 xyxy。
/// </summary>
public float[] Xywh2xyxy(float[] source)
{
var result = new float[4];
result[0] = source[0] - source[2] / 2f;
result[1] = source[1] - source[3] / 2f;
result[2] = source[0] + source[2] / 2f;
result[3] = source[1] + source[3] / 2f;
return result;
}
/// <summary>
/// Returns value clamped to the inclusive range of min and max.傳回限制在最小值和最大值範圍內的值
/// </summary>
public float Clamp(float value, float min, float max)
{
return (value < min) ? min : (value > max) ? max : value;
}
/// <summary>
/// Resizes image keeping ratio to fit model input size.調整影像保持比例以適合模型輸入尺寸。
/// </summary>
public Bitmap ResizeImage(Image image)
{
PixelFormat format = image.PixelFormat;
var output = new Bitmap(_model.Width, _model.Height, format);
var (w, h) = (image.Width, image.Height); // image width and height 影像寬度和高度
var (xRatio, yRatio) = (_model.Width / (float)w, _model.Height / (float)h); // x, y ratios. x、y 比率
var ratio = Math.Min(xRatio, yRatio); // ratio = resized / original 比例=調整大小/原始
var (width, height) = ((int)(w * ratio), (int)(h * ratio)); // roi width and height ROI 寬度和高度
var (x, y) = ((_model.Width / 2) - (width / 2), (_model.Height / 2) - (height / 2)); // roi x and y coordinates. ROI x 和 y 座標
var roi = new Rectangle(x, y, width, height); // region of interest. ROI
using (var graphics = Graphics.FromImage(output))
{
graphics.Clear(Color.FromArgb(0, 0, 0, 0)); // clear canvas 清空畫布
graphics.SmoothingMode = SmoothingMode.None; // no smoothing 無平滑
graphics.InterpolationMode = InterpolationMode.Bilinear; // bilinear interpolation 雙線性插值
graphics.PixelOffsetMode = PixelOffsetMode.Half; // half pixel offset 半像素偏移
graphics.DrawImage(image, roi); // draw scaled 繪製比例
}
return output;
}
/// <summary>
/// Extracts pixels into tensor for net input. 從影像中提取象素以供神經網路輸入
/// </summary>
public Tensor<float> ExtractPixels(Image image)
{
var bitmap = (Bitmap)image;
var rectangle = new Rectangle(0, 0, bitmap.Width, bitmap.Height);
BitmapData bitmapData = bitmap.LockBits(rectangle, ImageLockMode.ReadOnly, bitmap.PixelFormat);
int bytesPerPixel = Image.GetPixelFormatSize(bitmap.PixelFormat) / 8;
var tensor = new DenseTensor<float>(new[] { 1, 3, _model.Height, _model.Width });
unsafe // speed up conversion by direct work with memory 透過直接使用記憶體來加速轉換
{
Parallel.For(0, bitmapData.Height, (y) =>
{
byte* row = (byte*)bitmapData.Scan0 + (y * bitmapData.Stride);
Parallel.For(0, bitmapData.Width, (x) =>
{
tensor[0, 0, y, x] = row[x * bytesPerPixel + 2] / 255.0F; // r
tensor[0, 1, y, x] = row[x * bytesPerPixel + 1] / 255.0F; // g
tensor[0, 2, y, x] = row[x * bytesPerPixel + 0] / 255.0F; // b
});
});
bitmap.UnlockBits(bitmapData);
}
return tensor;
}
/// <summary>
/// Runs inference session. 運行推理函試
/// </summary>
public DenseTensor<float>[] Inference(Image image)
{
Bitmap resized = null;
if (image.Width != _model.Width || image.Height != _model.Height)
{
resized = ResizeImage(image); // fit image size to specified input size 使影像大小適合指定的輸入大小
}
var inputs = new List<NamedOnnxValue> // add image as onnx input 新增影像作為 onnx 輸入
{
NamedOnnxValue.CreateFromTensor("images", ExtractPixels(resized ?? image))
};
IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result = _inferenceSession.Run(inputs); // run inference 運行推理
var output = new List<DenseTensor<float>>();
foreach (var item in _model.Outputs) // add outputs for processing 添加輸出以供處理
{
output.Add(result.First(x => x.Name == item).Value as DenseTensor<float>);
};
return output.ToArray();
}
/// <summary>
/// Parses net output (detect) to predictions. 將神經網路輸出(檢測)解析為預測
/// </summary>
public List<YoloPrediction> ParseDetect(DenseTensor<float> output, Image image)
{
var result = new ConcurrentBag<YoloPrediction>();
var (w, h) = (image.Width, image.Height); // image w and h image w and h
var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains x、y 增益
var gain = Math.Min(xGain, yGain); // gain = resized / original 增益 = 調整大小 / 原始值
var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads 左、右填充
Parallel.For(0, (int)output.Length / _model.Dimensions, (i) =>
{
if (output[0, i, 4] <= _model.Confidence) return; // skip low obj_conf results 跳過低 obj_conf 結果
Parallel.For(5, _model.Dimensions, (j) =>
{
output[0, i, j] = output[0, i, j] * output[0, i, 4]; // mul_conf = obj_conf * cls_conf
});
Parallel.For(5, _model.Dimensions, (k) =>
{
if (output[0, i, k] <= _model.MulConfidence) return; // skip low mul_conf results 跳過低 mul_conf 結果
float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / gain; // unpad bbox tlx to original 將 bbox tlx 還原為原始版本
float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / gain; // unpad bbox tly to original
float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / gain; // unpad bbox brx to original
float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / gain; // unpad bbox bry to original
xMin = Clamp(xMin, 0, w - 0); // clip bbox tlx to boundaries 將 bbox tlx 裁剪到邊界
yMin = Clamp(yMin, 0, h - 0); // clip bbox tly to boundaries
xMax = Clamp(xMax, 0, w - 1); // clip bbox brx to boundaries
yMax = Clamp(yMax, 0, h - 1); // clip bbox bry to boundaries
YoloLabel label = _model.Labels[k - 5];
var prediction = new YoloPrediction(label, output[0, i, k])
{
Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
};
result.Add(prediction);
});
});
return result.ToList();
}
/// <summary>
/// Parses net outputs (sigmoid) to predictions.將神經網路輸出(sigmoid)解析為預測
/// </summary>
public List<YoloPrediction> ParseSigmoid(DenseTensor<float>[] output, Image image)
{
var result = new ConcurrentBag<YoloPrediction>();
var (w, h) = (image.Width, image.Height); // image w and h
var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains
var gain = Math.Min(xGain, yGain); // gain = resized / original 增益=調整後/原始
var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads
Parallel.For(0, output.Length, (i) => // iterate model outputs 迭代模型輸出
{
int shapes = _model.Shapes[i]; // shapes per output 每個輸出形狀
Parallel.For(0, _model.Anchors[0].Length, (a) => // iterate anchors 迭代锚點
{
Parallel.For(0, shapes, (y) => // iterate shapes (rows) 迭代形狀(行)
{
Parallel.For(0, shapes, (x) => // iterate shapes (columns) 迭代形狀(列)
{
int offset = (shapes * shapes * a + shapes * y + x) * _model.Dimensions;
float[] buffer = output[i].Skip(offset).Take(_model.Dimensions).Select(Sigmoid).ToArray();
if (buffer[4] <= _model.Confidence) return; // skip low obj_conf results
List<float> scores = buffer.Skip(5).Select(b => b * buffer[4]).ToList(); // mul_conf = obj_conf * cls_conf
float mulConfidence = scores.Max(); // max confidence score 最大值信心分數
if (mulConfidence <= _model.MulConfidence) return; // skip low mul_conf results
float rawX = (buffer[0] * 2 - 0.5f + x) * _model.Strides[i]; // predicted bbox x (center) 預測的 bbox x中心
float rawY = (buffer[1] * 2 - 0.5f + y) * _model.Strides[i]; // predicted bbox y (center) 預測的 bbox y中心
float rawW = (float)Math.Pow(buffer[2] * 2, 2) * _model.Anchors[i][a][0]; // predicted bbox w 預測的bbox 寬度
float rawH = (float)Math.Pow(buffer[3] * 2, 2) * _model.Anchors[i][a][1]; // predicted bbox h
float[] xyxy = Xywh2xyxy(new float[] { rawX, rawY, rawW, rawH });
float xMin = Clamp((xyxy[0] - xPad) / gain, 0, w - 0); // unpad, clip tlx 取消填充,裁剪 tlx
float yMin = Clamp((xyxy[1] - yPad) / gain, 0, h - 0); // unpad, clip tly
float xMax = Clamp((xyxy[2] - xPad) / gain, 0, w - 1); // unpad, clip brx
float yMax = Clamp((xyxy[3] - yPad) / gain, 0, h - 1); // unpad, clip bry
YoloLabel label = _model.Labels[scores.IndexOf(mulConfidence)];
var prediction = new YoloPrediction(label, mulConfidence)
{
Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
};
result.Add(prediction);
});
});
});
});
return result.ToList();
}
/// <summary>
/// Parses net outputs (sigmoid or detect layer) to predictions. 解析神經網路輸出(sigmoid或detect層)為預測
/// </summary>
public List<YoloPrediction> ParseOutput(DenseTensor<float>[] output, Image image)
{
return _model.UseDetect ? ParseDetect(output[0], image) : ParseSigmoid(output, image);
}
/// <summary>
/// Removes overlaped duplicates (nms).去除重疊的重複項(NMS)
/// </summary>
public List<YoloPrediction> Supress(List<YoloPrediction> items)
{
var result = new List<YoloPrediction>(items);
foreach (var item in items) // iterate every prediction 迭代每個預測
{
foreach (var current in result.ToList()) // make a copy for each iteration 為每次迭代製作副本
{
if (current == item) continue;
var (rect1, rect2) = (item.Rectangle, current.Rectangle);
RectangleF intersection = RectangleF.Intersect(rect1, rect2);
float intArea = Area(intersection); // intersection area 交叉區域
float unionArea = Area(rect1) + Area(rect2) - intArea; // union area 並集區域
float overlap = intArea / unionArea; // overlap ratio 重疊比例
if (overlap >= _model.Overlap)
{
if (item.Score >= current.Score)
{
result.Remove(current);
}
}
}
}
return result;
}
/// <summary>
/// Runs object detection. 運行對象檢測
/// </summary>
public List<YoloPrediction> Predict(Image image)
{
return Supress(ParseOutput(Inference(image), image));
}
/// <summary>
/// Creates new instance of YoloScorer. 創建 YoloScorer新實例
/// </summary>
public YoloScorer()
{
_model = Activator.CreateInstance<T>();
}
/// <summary>
/// Creates new instance of YoloScorer with weights path and options.使用權種路徑和選項創建YoloScorer新實例
/// </summary>
public YoloScorer(string weights, SessionOptions opts = null) : this()
{
_inferenceSession = new InferenceSession(File.ReadAllBytes(weights), opts ?? new SessionOptions());
}
/// <summary>
/// Creates new instance of YoloScorer with weights stream and options.使用權種流和選項創建YoloScorer新實例
/// </summary>
public YoloScorer(Stream weights, SessionOptions opts = null) : this()
{
using (var reader = new BinaryReader(weights))
{
_inferenceSession = new InferenceSession(reader.ReadBytes((int)weights.Length), opts ?? new SessionOptions());
}
}
/// <summary>
/// Creates new instance of YoloScorer with weights bytes and options. 使用權種字節數組和選項創建YoloScorer新實例
/// </summary>
public YoloScorer(byte[] weights, SessionOptions opts = null) : this()
{
_inferenceSession = new InferenceSession(weights, opts ?? new SessionOptions());
}
public float Area(RectangleF source)
{
return source.Width * source.Height;
}
/// <summary>
/// Disposes YoloScorer instance. 釋放YoloScorer 實例
/// </summary>
public void Dispose()
{
_inferenceSession.Dispose();
}
}
}