360 lines
15 KiB
C#
360 lines
15 KiB
C#
using Microsoft.ML.OnnxRuntime;
|
||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||
using System;
|
||
using System.Collections.Concurrent;
|
||
using System.Collections.Generic;
|
||
using System.Drawing;
|
||
using System.Drawing.Drawing2D;
|
||
using System.Drawing.Imaging;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Threading.Tasks;
|
||
|
||
|
||
|
||
namespace yolov5_onnx.Mod
|
||
{
|
||
/// <summary>
|
||
/// Yolov5 scorer.
|
||
/// </summary>
|
||
public class YoloScorer<T> : IDisposable where T : YoloModel
|
||
{
|
||
private readonly T _model; //YOLO模型
|
||
|
||
private readonly InferenceSession _inferenceSession;
|
||
|
||
/// <summary>
|
||
/// Outputs value between 0 and 1.輸出 0 到 1 之間的值。
|
||
/// </summary>
|
||
public float Sigmoid(float value)
|
||
{
|
||
return 1 / (1 + (float)Math.Exp(-value));
|
||
}
|
||
|
||
/// <summary>
|
||
/// Converts xywh bbox format to xyxy.將 xywh bbox 格式轉換為 xyxy。
|
||
/// </summary>
|
||
public float[] Xywh2xyxy(float[] source)
|
||
{
|
||
var result = new float[4];
|
||
|
||
result[0] = source[0] - source[2] / 2f;
|
||
result[1] = source[1] - source[3] / 2f;
|
||
result[2] = source[0] + source[2] / 2f;
|
||
result[3] = source[1] + source[3] / 2f;
|
||
|
||
return result;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Returns value clamped to the inclusive range of min and max.傳回限制在最小值和最大值範圍內的值
|
||
/// </summary>
|
||
public float Clamp(float value, float min, float max)
|
||
{
|
||
return (value < min) ? min : (value > max) ? max : value;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Resizes image keeping ratio to fit model input size.調整影像保持比例以適合模型輸入尺寸。
|
||
/// </summary>
|
||
public Bitmap ResizeImage(Image image)
|
||
{
|
||
PixelFormat format = image.PixelFormat;
|
||
|
||
var output = new Bitmap(_model.Width, _model.Height, format);
|
||
|
||
var (w, h) = (image.Width, image.Height); // image width and height 影像寬度和高度
|
||
var (xRatio, yRatio) = (_model.Width / (float)w, _model.Height / (float)h); // x, y ratios. x、y 比率
|
||
var ratio = Math.Min(xRatio, yRatio); // ratio = resized / original 比例=調整大小/原始
|
||
var (width, height) = ((int)(w * ratio), (int)(h * ratio)); // roi width and height ROI 寬度和高度
|
||
var (x, y) = ((_model.Width / 2) - (width / 2), (_model.Height / 2) - (height / 2)); // roi x and y coordinates. ROI x 和 y 座標
|
||
var roi = new Rectangle(x, y, width, height); // region of interest. ROI
|
||
|
||
using (var graphics = Graphics.FromImage(output))
|
||
{
|
||
graphics.Clear(Color.FromArgb(0, 0, 0, 0)); // clear canvas 清空畫布
|
||
graphics.SmoothingMode = SmoothingMode.None; // no smoothing 無平滑
|
||
graphics.InterpolationMode = InterpolationMode.Bilinear; // bilinear interpolation 雙線性插值
|
||
graphics.PixelOffsetMode = PixelOffsetMode.Half; // half pixel offset 半像素偏移
|
||
|
||
graphics.DrawImage(image, roi); // draw scaled 繪製比例
|
||
}
|
||
|
||
return output;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Extracts pixels into tensor for net input. 從影像中提取象素以供神經網路輸入
|
||
/// </summary>
|
||
public Tensor<float> ExtractPixels(Image image)
|
||
{
|
||
var bitmap = (Bitmap)image;
|
||
|
||
var rectangle = new Rectangle(0, 0, bitmap.Width, bitmap.Height);
|
||
BitmapData bitmapData = bitmap.LockBits(rectangle, ImageLockMode.ReadOnly, bitmap.PixelFormat);
|
||
int bytesPerPixel = Image.GetPixelFormatSize(bitmap.PixelFormat) / 8;
|
||
|
||
var tensor = new DenseTensor<float>(new[] { 1, 3, _model.Height, _model.Width });
|
||
|
||
unsafe // speed up conversion by direct work with memory 透過直接使用記憶體來加速轉換
|
||
{
|
||
Parallel.For(0, bitmapData.Height, (y) =>
|
||
{
|
||
byte* row = (byte*)bitmapData.Scan0 + (y * bitmapData.Stride);
|
||
|
||
Parallel.For(0, bitmapData.Width, (x) =>
|
||
{
|
||
tensor[0, 0, y, x] = row[x * bytesPerPixel + 2] / 255.0F; // r
|
||
tensor[0, 1, y, x] = row[x * bytesPerPixel + 1] / 255.0F; // g
|
||
tensor[0, 2, y, x] = row[x * bytesPerPixel + 0] / 255.0F; // b
|
||
});
|
||
});
|
||
|
||
bitmap.UnlockBits(bitmapData);
|
||
}
|
||
|
||
return tensor;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Runs inference session. 運行推理函試
|
||
/// </summary>
|
||
public DenseTensor<float>[] Inference(Image image)
|
||
{
|
||
Bitmap resized = null;
|
||
|
||
if (image.Width != _model.Width || image.Height != _model.Height)
|
||
{
|
||
resized = ResizeImage(image); // fit image size to specified input size 使影像大小適合指定的輸入大小
|
||
}
|
||
|
||
var inputs = new List<NamedOnnxValue> // add image as onnx input 新增影像作為 onnx 輸入
|
||
{
|
||
NamedOnnxValue.CreateFromTensor("images", ExtractPixels(resized ?? image))
|
||
};
|
||
|
||
IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result = _inferenceSession.Run(inputs); // run inference 運行推理
|
||
|
||
var output = new List<DenseTensor<float>>();
|
||
|
||
foreach (var item in _model.Outputs) // add outputs for processing 添加輸出以供處理
|
||
{
|
||
output.Add(result.First(x => x.Name == item).Value as DenseTensor<float>);
|
||
};
|
||
|
||
return output.ToArray();
|
||
}
|
||
|
||
/// <summary>
|
||
/// Parses net output (detect) to predictions. 將神經網路輸出(檢測)解析為預測
|
||
/// </summary>
|
||
public List<YoloPrediction> ParseDetect(DenseTensor<float> output, Image image)
|
||
{
|
||
var result = new ConcurrentBag<YoloPrediction>();
|
||
|
||
var (w, h) = (image.Width, image.Height); // image w and h image w and h
|
||
var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains x、y 增益
|
||
var gain = Math.Min(xGain, yGain); // gain = resized / original 增益 = 調整大小 / 原始值
|
||
|
||
var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads 左、右填充
|
||
|
||
Parallel.For(0, (int)output.Length / _model.Dimensions, (i) =>
|
||
{
|
||
if (output[0, i, 4] <= _model.Confidence) return; // skip low obj_conf results 跳過低 obj_conf 結果
|
||
|
||
Parallel.For(5, _model.Dimensions, (j) =>
|
||
{
|
||
output[0, i, j] = output[0, i, j] * output[0, i, 4]; // mul_conf = obj_conf * cls_conf
|
||
});
|
||
|
||
Parallel.For(5, _model.Dimensions, (k) =>
|
||
{
|
||
if (output[0, i, k] <= _model.MulConfidence) return; // skip low mul_conf results 跳過低 mul_conf 結果
|
||
|
||
float xMin = ((output[0, i, 0] - output[0, i, 2] / 2) - xPad) / gain; // unpad bbox tlx to original 將 bbox tlx 還原為原始版本
|
||
float yMin = ((output[0, i, 1] - output[0, i, 3] / 2) - yPad) / gain; // unpad bbox tly to original
|
||
float xMax = ((output[0, i, 0] + output[0, i, 2] / 2) - xPad) / gain; // unpad bbox brx to original
|
||
float yMax = ((output[0, i, 1] + output[0, i, 3] / 2) - yPad) / gain; // unpad bbox bry to original
|
||
|
||
xMin = Clamp(xMin, 0, w - 0); // clip bbox tlx to boundaries 將 bbox tlx 裁剪到邊界
|
||
yMin = Clamp(yMin, 0, h - 0); // clip bbox tly to boundaries
|
||
xMax = Clamp(xMax, 0, w - 1); // clip bbox brx to boundaries
|
||
yMax = Clamp(yMax, 0, h - 1); // clip bbox bry to boundaries
|
||
|
||
YoloLabel label = _model.Labels[k - 5];
|
||
|
||
var prediction = new YoloPrediction(label, output[0, i, k])
|
||
{
|
||
Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
|
||
};
|
||
|
||
result.Add(prediction);
|
||
});
|
||
});
|
||
|
||
return result.ToList();
|
||
}
|
||
|
||
/// <summary>
|
||
/// Parses net outputs (sigmoid) to predictions.將神經網路輸出(sigmoid)解析為預測
|
||
/// </summary>
|
||
public List<YoloPrediction> ParseSigmoid(DenseTensor<float>[] output, Image image)
|
||
{
|
||
var result = new ConcurrentBag<YoloPrediction>();
|
||
|
||
var (w, h) = (image.Width, image.Height); // image w and h
|
||
var (xGain, yGain) = (_model.Width / (float)w, _model.Height / (float)h); // x, y gains
|
||
var gain = Math.Min(xGain, yGain); // gain = resized / original 增益=調整後/原始
|
||
|
||
var (xPad, yPad) = ((_model.Width - w * gain) / 2, (_model.Height - h * gain) / 2); // left, right pads
|
||
|
||
Parallel.For(0, output.Length, (i) => // iterate model outputs 迭代模型輸出
|
||
{
|
||
int shapes = _model.Shapes[i]; // shapes per output 每個輸出形狀
|
||
|
||
Parallel.For(0, _model.Anchors[0].Length, (a) => // iterate anchors 迭代锚點
|
||
{
|
||
Parallel.For(0, shapes, (y) => // iterate shapes (rows) 迭代形狀(行)
|
||
{
|
||
Parallel.For(0, shapes, (x) => // iterate shapes (columns) 迭代形狀(列)
|
||
{
|
||
int offset = (shapes * shapes * a + shapes * y + x) * _model.Dimensions;
|
||
|
||
float[] buffer = output[i].Skip(offset).Take(_model.Dimensions).Select(Sigmoid).ToArray();
|
||
|
||
if (buffer[4] <= _model.Confidence) return; // skip low obj_conf results
|
||
|
||
List<float> scores = buffer.Skip(5).Select(b => b * buffer[4]).ToList(); // mul_conf = obj_conf * cls_conf
|
||
|
||
float mulConfidence = scores.Max(); // max confidence score 最大值信心分數
|
||
|
||
if (mulConfidence <= _model.MulConfidence) return; // skip low mul_conf results
|
||
|
||
float rawX = (buffer[0] * 2 - 0.5f + x) * _model.Strides[i]; // predicted bbox x (center) 預測的 bbox x(中心)
|
||
float rawY = (buffer[1] * 2 - 0.5f + y) * _model.Strides[i]; // predicted bbox y (center) 預測的 bbox y(中心)
|
||
|
||
float rawW = (float)Math.Pow(buffer[2] * 2, 2) * _model.Anchors[i][a][0]; // predicted bbox w 預測的bbox 寬度
|
||
float rawH = (float)Math.Pow(buffer[3] * 2, 2) * _model.Anchors[i][a][1]; // predicted bbox h
|
||
|
||
float[] xyxy = Xywh2xyxy(new float[] { rawX, rawY, rawW, rawH });
|
||
|
||
float xMin = Clamp((xyxy[0] - xPad) / gain, 0, w - 0); // unpad, clip tlx 取消填充,裁剪 tlx
|
||
float yMin = Clamp((xyxy[1] - yPad) / gain, 0, h - 0); // unpad, clip tly
|
||
float xMax = Clamp((xyxy[2] - xPad) / gain, 0, w - 1); // unpad, clip brx
|
||
float yMax = Clamp((xyxy[3] - yPad) / gain, 0, h - 1); // unpad, clip bry
|
||
|
||
YoloLabel label = _model.Labels[scores.IndexOf(mulConfidence)];
|
||
|
||
var prediction = new YoloPrediction(label, mulConfidence)
|
||
{
|
||
Rectangle = new RectangleF(xMin, yMin, xMax - xMin, yMax - yMin)
|
||
};
|
||
|
||
result.Add(prediction);
|
||
});
|
||
});
|
||
});
|
||
});
|
||
|
||
return result.ToList();
|
||
}
|
||
|
||
/// <summary>
|
||
/// Parses net outputs (sigmoid or detect layer) to predictions. 解析神經網路輸出(sigmoid或detect層)為預測
|
||
/// </summary>
|
||
public List<YoloPrediction> ParseOutput(DenseTensor<float>[] output, Image image)
|
||
{
|
||
return _model.UseDetect ? ParseDetect(output[0], image) : ParseSigmoid(output, image);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Removes overlaped duplicates (nms).去除重疊的重複項(NMS)
|
||
/// </summary>
|
||
public List<YoloPrediction> Supress(List<YoloPrediction> items)
|
||
{
|
||
var result = new List<YoloPrediction>(items);
|
||
|
||
foreach (var item in items) // iterate every prediction 迭代每個預測
|
||
{
|
||
foreach (var current in result.ToList()) // make a copy for each iteration 為每次迭代製作副本
|
||
{
|
||
if (current == item) continue;
|
||
|
||
var (rect1, rect2) = (item.Rectangle, current.Rectangle);
|
||
|
||
RectangleF intersection = RectangleF.Intersect(rect1, rect2);
|
||
|
||
float intArea = Area(intersection); // intersection area 交叉區域
|
||
float unionArea = Area(rect1) + Area(rect2) - intArea; // union area 並集區域
|
||
float overlap = intArea / unionArea; // overlap ratio 重疊比例
|
||
if (overlap >= _model.Overlap)
|
||
{
|
||
if (item.Score >= current.Score)
|
||
{
|
||
result.Remove(current);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Runs object detection. 運行對象檢測
|
||
/// </summary>
|
||
public List<YoloPrediction> Predict(Image image)
|
||
{
|
||
return Supress(ParseOutput(Inference(image), image));
|
||
}
|
||
|
||
/// <summary>
|
||
/// Creates new instance of YoloScorer. 創建 YoloScorer新實例
|
||
/// </summary>
|
||
public YoloScorer()
|
||
{
|
||
_model = Activator.CreateInstance<T>();
|
||
}
|
||
|
||
/// <summary>
|
||
/// Creates new instance of YoloScorer with weights path and options.使用權種路徑和選項創建YoloScorer新實例
|
||
/// </summary>
|
||
public YoloScorer(string weights, SessionOptions opts = null) : this()
|
||
{
|
||
_inferenceSession = new InferenceSession(File.ReadAllBytes(weights), opts ?? new SessionOptions());
|
||
}
|
||
|
||
/// <summary>
|
||
/// Creates new instance of YoloScorer with weights stream and options.使用權種流和選項創建YoloScorer新實例
|
||
/// </summary>
|
||
public YoloScorer(Stream weights, SessionOptions opts = null) : this()
|
||
{
|
||
using (var reader = new BinaryReader(weights))
|
||
{
|
||
_inferenceSession = new InferenceSession(reader.ReadBytes((int)weights.Length), opts ?? new SessionOptions());
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Creates new instance of YoloScorer with weights bytes and options. 使用權種字節數組和選項創建YoloScorer新實例
|
||
/// </summary>
|
||
public YoloScorer(byte[] weights, SessionOptions opts = null) : this()
|
||
{
|
||
_inferenceSession = new InferenceSession(weights, opts ?? new SessionOptions());
|
||
}
|
||
public float Area(RectangleF source)
|
||
{
|
||
return source.Width * source.Height;
|
||
}
|
||
/// <summary>
|
||
/// Disposes YoloScorer instance. 釋放YoloScorer 實例
|
||
/// </summary>
|
||
public void Dispose()
|
||
{
|
||
_inferenceSession.Dispose();
|
||
}
|
||
|
||
|
||
}
|
||
}
|