arm/yolov5_onnx/Mod/YoloScorer.cs

360 lines
15 KiB
C#
Raw Normal View History

2025-02-04 20:09:10 +08:00
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
namespace yolov5_onnx.Mod
{
/// <summary>
/// Yolov5 scorer.
/// </summary>
public class YoloScorer<T> : IDisposable where T : YoloModel
{
private readonly T _model; //YOLO模型
private readonly InferenceSession _inferenceSession;
/// <summary>
/// Outputs value between 0 and 1.輸出 0 到 1 之間的值。
/// </summary>
public float Sigmoid(float value)
{
return 1 / (1 + (float)Math.Exp(-value));
}
/// <summary>
/// Converts xywh bbox format to xyxy.將 xywh bbox 格式轉換為 xyxy。
/// </summary>
public float[] Xywh2xyxy(float[] source)
{
var result = new float[4];
result[0] = source[0] - source[2] / 2f;
result[1] = source[1] - source[3] / 2f;
result[2] = source[0] + source[2] / 2f;
result[3] = source[1] + source[3] / 2f;
return result;
}
/// <summary>
/// Returns value clamped to the inclusive range of min and max.傳回限制在最小值和最大值範圍內的值
/// </summary>
public float Clamp(float value, float min, float max)
{
return (value < min) ? min : (value > max) ? max : value;
}
/// <summary>
/// Resizes image keeping ratio to fit model input size.調整影像保持比例以適合模型輸入尺寸。
/// </summary>
public Bitmap ResizeImage(Image image)
{
PixelFormat format = image.PixelFormat;
var output = new Bitmap(_model.Width, _model.Height, format);
var (w, h) = (image.Width, image.Height); // image width and height 影像寬度和高度
var (xRatio, yRatio) = (_model.Width / (float)w, _model.Height / (float)h); // x, y ratios. x、y 比率
var ratio = Math.Min(xRatio, yRatio); // ratio = resized / original 比例=調整大小/原始
var (width, height) = ((int)(w * ratio), (int)(h * ratio)); // roi width and height ROI 寬度和高度
var (x, y) = ((_model.Width / 2) - (width / 2), (_model.Height / 2) - (height / 2)); // roi x and y coordinates. ROI x 和 y 座標
var roi = new Rectangle(x, y, width, height); // region of interest. ROI
using (var graphics = Graphics.FromImage(output))
{
graphics.Clear(Color.FromArgb(0, 0, 0, 0)); // clear canvas 清空畫布
graphics.SmoothingMode = SmoothingMode.None; // no smoothing 無平滑
graphics.InterpolationMode = InterpolationMode.Bilinear; // bilinear interpolation 雙線性插值
graphics.PixelOffsetMode = PixelOffsetMode.Half; // half pixel offset 半像素偏移
graphics.DrawImage(image, roi); // draw scaled 繪製比例
}
return output;
}
/// <summary>
/// Extracts pixels into tensor for net input. 從影像中提取象素以供神經網路輸入
/// </summary>
public Tensor<float> ExtractPixels(Image image)
{
var bitmap = (Bitmap)image;
var rectangle = new Rectangle(0, 0, bitmap.Width, bitmap.Height);
BitmapData bitmapData = bitmap.LockBits(rectangle, ImageLockMode.ReadOnly, bitmap.PixelFormat);
int bytesPerPixel = Image.GetPixelFormatSize(bitmap.PixelFormat) / 8;
var tensor = new DenseTensor<float>(new[] { 1, 3, _model.Height, _model.Width });
unsafe // speed up conversion by direct work with memory 透過直接使用記憶體來加速轉換
{
Parallel.For(0, bitmapData.Height, (y) =>
{
byte* row = (byte*)bitmapData.Scan0 + (y * bitmapData.Stride);
Parallel.For(0, bitmapData.Width, (x) =>
{
tensor[0, 0, y, x] = row[x * bytesPerPixel + 2] / 255.0F; // r
tensor[0, 1, y, x] = row[x * bytesPerPixel + 1] / 255.0F; // g
tensor[0, 2, y, x] = row[x * bytesPerPixel + 0] / 255.0F; // b
});
});
bitmap.UnlockBits(bitmapData);
}
return tensor;
}
/// <summary>
/// Runs inference session. 運行推理函試
/// </summary>
public DenseTensor<float>[] Inference(Image image)
{
Bitmap resized = null;
if (image.Width != _model.Width || image.Height != _model.Height)
{
resized = ResizeImage(image); // fit image size to specified input size 使影像大小適合指定的輸入大小
}
var inputs = new List<NamedOnnxValue> // add image as onnx input 新增影像作為 onnx 輸入
{
NamedOnnxValue.CreateFromTensor("images", ExtractPixels(resized ?? image))
};
IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result = _inferenceSession.Run(inputs); // run inference 運行推理
var output = new List<DenseTensor<float>>();
foreach (var item in _model.Outputs) // add outputs for processing 添加輸出以供處理
{
output.Add(result.First(x => x.Name == item).Value as DenseTensor<float>);
};
return output.ToArray();
}
/// <summary>
/// Parses net output (detect) to predictions. 將神經網路輸出(檢測)解析為預測
/// </summary>
public List<YoloPrediction> ParseDetect(DenseTensor<float> output, Image image)
{
var result = new ConcurrentBag<YoloPrediction>();
var (w, h) = (image.Width, image.Height); // image w and h image w and h
var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains x、y 增益
var gain = Math.Min(xGain, yGain); // gain = resized / original 增益 = 調整大小 / 原始值
var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads 左、右填充
Parallel.For(0, (int)output.Length / _model.Dimensions, (i) =>
{
if (output[0, i, 4] <= _model.Confidence) return; // skip low obj_conf results 跳過低 obj_conf 結果
Parallel.For(5, _model.Dimensions, (j) =>
{
output[0, i, j] = output[0, i, j] * output[0, i, 4]; // mul_conf = obj_conf * cls_conf
});
Parallel.For(5, _model.Dimensions, (k) =>
{
if (output[0, i, k] <= _model.MulConfidence) return; // skip low mul_conf results 跳過低 mul_conf 結果
float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / gain; // unpad bbox tlx to original 將 bbox tlx 還原為原始版本
float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / gain; // unpad bbox tly to original
float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / gain; // unpad bbox brx to original
float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / gain; // unpad bbox bry to original
xMin = Clamp(xMin, 0, w - 0); // clip bbox tlx to boundaries 將 bbox tlx 裁剪到邊界
yMin = Clamp(yMin, 0, h - 0); // clip bbox tly to boundaries
xMax = Clamp(xMax, 0, w - 1); // clip bbox brx to boundaries
yMax = Clamp(yMax, 0, h - 1); // clip bbox bry to boundaries
YoloLabel label = _model.Labels[k - 5];
var prediction = new YoloPrediction(label, output[0, i, k])
{
Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
};
result.Add(prediction);
});
});
return result.ToList();
}
/// <summary>
/// Parses net outputs (sigmoid) to predictions.將神經網路輸出(sigmoid)解析為預測
/// </summary>
public List<YoloPrediction> ParseSigmoid(DenseTensor<float>[] output, Image image)
{
var result = new ConcurrentBag<YoloPrediction>();
var (w, h) = (image.Width, image.Height); // image w and h
var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains
var gain = Math.Min(xGain, yGain); // gain = resized / original 增益=調整後/原始
var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads
Parallel.For(0, output.Length, (i) => // iterate model outputs 迭代模型輸出
{
int shapes = _model.Shapes[i]; // shapes per output 每個輸出形狀
Parallel.For(0, _model.Anchors[0].Length, (a) => // iterate anchors 迭代锚點
{
Parallel.For(0, shapes, (y) => // iterate shapes (rows) 迭代形狀(行)
{
Parallel.For(0, shapes, (x) => // iterate shapes (columns) 迭代形狀(列)
{
int offset = (shapes * shapes * a + shapes * y + x) * _model.Dimensions;
float[] buffer = output[i].Skip(offset).Take(_model.Dimensions).Select(Sigmoid).ToArray();
if (buffer[4] <= _model.Confidence) return; // skip low obj_conf results
List<float> scores = buffer.Skip(5).Select(b => b * buffer[4]).ToList(); // mul_conf = obj_conf * cls_conf
float mulConfidence = scores.Max(); // max confidence score 最大值信心分數
if (mulConfidence <= _model.MulConfidence) return; // skip low mul_conf results
float rawX = (buffer[0] * 2 - 0.5f + x) * _model.Strides[i]; // predicted bbox x (center) 預測的 bbox x中心
float rawY = (buffer[1] * 2 - 0.5f + y) * _model.Strides[i]; // predicted bbox y (center) 預測的 bbox y中心
float rawW = (float)Math.Pow(buffer[2] * 2, 2) * _model.Anchors[i][a][0]; // predicted bbox w 預測的bbox 寬度
float rawH = (float)Math.Pow(buffer[3] * 2, 2) * _model.Anchors[i][a][1]; // predicted bbox h
float[] xyxy = Xywh2xyxy(new float[] { rawX, rawY, rawW, rawH });
float xMin = Clamp((xyxy[0] - xPad) / gain, 0, w - 0); // unpad, clip tlx 取消填充,裁剪 tlx
float yMin = Clamp((xyxy[1] - yPad) / gain, 0, h - 0); // unpad, clip tly
float xMax = Clamp((xyxy[2] - xPad) / gain, 0, w - 1); // unpad, clip brx
float yMax = Clamp((xyxy[3] - yPad) / gain, 0, h - 1); // unpad, clip bry
YoloLabel label = _model.Labels[scores.IndexOf(mulConfidence)];
var prediction = new YoloPrediction(label, mulConfidence)
{
Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
};
result.Add(prediction);
});
});
});
});
return result.ToList();
}
/// <summary>
/// Parses net outputs (sigmoid or detect layer) to predictions. 解析神經網路輸出(sigmoid或detect層)為預測
/// </summary>
public List<YoloPrediction> ParseOutput(DenseTensor<float>[] output, Image image)
{
return _model.UseDetect ? ParseDetect(output[0], image) : ParseSigmoid(output, image);
}
/// <summary>
/// Removes overlaped duplicates (nms).去除重疊的重複項(NMS)
/// </summary>
public List<YoloPrediction> Supress(List<YoloPrediction> items)
{
var result = new List<YoloPrediction>(items);
foreach (var item in items) // iterate every prediction 迭代每個預測
{
foreach (var current in result.ToList()) // make a copy for each iteration 為每次迭代製作副本
{
if (current == item) continue;
var (rect1, rect2) = (item.Rectangle, current.Rectangle);
RectangleF intersection = RectangleF.Intersect(rect1, rect2);
float intArea = Area(intersection); // intersection area 交叉區域
float unionArea = Area(rect1) + Area(rect2) - intArea; // union area 並集區域
float overlap = intArea / unionArea; // overlap ratio 重疊比例
if (overlap >= _model.Overlap)
{
if (item.Score >= current.Score)
{
result.Remove(current);
}
}
}
}
return result;
}
/// <summary>
/// Runs object detection. 運行對象檢測
/// </summary>
public List<YoloPrediction> Predict(Image image)
{
return Supress(ParseOutput(Inference(image), image));
}
/// <summary>
/// Creates new instance of YoloScorer. 創建 YoloScorer新實例
/// </summary>
public YoloScorer()
{
_model = Activator.CreateInstance<T>();
}
/// <summary>
/// Creates new instance of YoloScorer with weights path and options.使用權種路徑和選項創建YoloScorer新實例
/// </summary>
public YoloScorer(string weights, SessionOptions opts = null) : this()
{
_inferenceSession = new InferenceSession(File.ReadAllBytes(weights), opts ?? new SessionOptions());
}
/// <summary>
/// Creates new instance of YoloScorer with weights stream and options.使用權種流和選項創建YoloScorer新實例
/// </summary>
public YoloScorer(Stream weights, SessionOptions opts = null) : this()
{
using (var reader = new BinaryReader(weights))
{
_inferenceSession = new InferenceSession(reader.ReadBytes((int)weights.Length), opts ?? new SessionOptions());
}
}
/// <summary>
/// Creates new instance of YoloScorer with weights bytes and options. 使用權種字節數組和選項創建YoloScorer新實例
/// </summary>
public YoloScorer(byte[] weights, SessionOptions opts = null) : this()
{
_inferenceSession = new InferenceSession(weights, opts ?? new SessionOptions());
}
public float Area(RectangleF source)
{
return source.Width * source.Height;
}
/// <summary>
/// Disposes YoloScorer instance. 釋放YoloScorer 實例
/// </summary>
public void Dispose()
{
_inferenceSession.Dispose();
}
}
}