using MathNet.Numerics.LinearAlgebra; using MathNet.Numerics.LinearAlgebra.Double; using System; using System.Collections.Generic; using System.Data; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; using Validation.Algorithms; using Validation.Core; namespace Validation.Rule.Data { /// /// /// public class MCDValidationRule : IValidationRule { string IValidationRule.RuleName => "MCD"; string IValidationRule.Description => "检测 x y z 是否有异常值"; private readonly string xColumn; private readonly string yColumn; private readonly string zColumn; public MCDValidationRule(string xColumn, string yColumn, string zColumn) { this.xColumn = xColumn; this.yColumn = yColumn; this.zColumn = zColumn; } ValidationResult IValidationRule.Validate(DataTable dataTable) { if (!ValidateRequiredColumns(dataTable)) { var result = new ValidationResult(); result.AddError(-1, "x y z 列都不能为空"); return result; } var dataMatrix = BuildDataMatrix(dataTable); Debug.Assert(dataMatrix.RowCount >= 3); var calculator = new RobustMahalanobis(numRandomStarts: 100); var distances = calculator.Calculate(dataMatrix); var outliers = IdentifyOutliers(distances); return BuildValidationResult(outliers, distances); } /// /// 验证必需的列是否存在 /// private bool ValidateRequiredColumns(DataTable dataTable) { return dataTable.Columns.Contains(xColumn) && dataTable.Columns.Contains(yColumn) && dataTable.Columns.Contains(zColumn); } bool IsValidRow(DataRow row) { var columns = new[] { xColumn, yColumn, zColumn }; foreach (var col in columns) { var value = row[col]; if (value == DBNull.Value) { return false; } if (!double.TryParse(value.ToString(), out _)) { return false; } } return true; } /// /// 构建数据矩阵 /// private Matrix BuildDataMatrix(DataTable dataTable) { var columns = new[] { xColumn, yColumn, zColumn }; // 确定有效列 var validRows = dataTable.AsEnumerable() .Where(IsValidRow) .ToList(); if (validRows.Count == 0) { return DenseMatrix.Create(0, columns.Length, 0.0); } var matrix = DenseMatrix.Create(validRows.Count, columns.Length, 0.0); for (int i = 0; i < validRows.Count; i++) { for (int j = 0; j < columns.Length; j++) { matrix[i, j] = Convert.ToDouble(validRows[i][columns[j]]); } } return matrix; } private int[] IdentifyOutliers(double[] distances) { if (distances.Length == 0) { return Array.Empty(); } double threshold = Math.Sqrt(7.81); var outliers = distances .Select((d, idx) => new { d, idx }) .Where(x => x.d > threshold) .Select(x => x.idx) .ToArray(); return outliers; } private ValidationResult BuildValidationResult(int[] outlierIndices, double[] distances) { var result = new ValidationResult(); for (int i = 0; i < outlierIndices.Length; i++) { result.AddError(outlierIndices[i], $"行 {i} 为异常值"); } return result; } public ValidationResult Validate(string[] headers, int rowIndex, object[] values) { throw new NotImplementedException(); } } }