You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
4.2 KiB
C#
154 lines
4.2 KiB
C#
using MathNet.Numerics.LinearAlgebra;
|
|
using MathNet.Numerics.LinearAlgebra.Double;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Data;
|
|
using System.Diagnostics;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
using Validation.Algorithms;
|
|
using Validation.Core;
|
|
|
|
namespace Validation.Rule.Data
|
|
{
|
|
/// <summary>
|
|
///
|
|
/// </summary>
|
|
public class MCDValidationRule : IValidationRule
|
|
{
|
|
string IValidationRule.RuleName => "MCD";
|
|
|
|
string IValidationRule.Description => "检测 x y z 是否有异常值";
|
|
|
|
private readonly string xColumn;
|
|
|
|
private readonly string yColumn;
|
|
|
|
private readonly string zColumn;
|
|
|
|
public MCDValidationRule(string xColumn, string yColumn, string zColumn)
|
|
{
|
|
this.xColumn = xColumn;
|
|
this.yColumn = yColumn;
|
|
this.zColumn = zColumn;
|
|
}
|
|
|
|
ValidationResult IValidationRule.Validate(DataTable dataTable)
|
|
{
|
|
if (!ValidateRequiredColumns(dataTable))
|
|
{
|
|
var result = new ValidationResult();
|
|
result.AddError(-1, "x y z 列都不能为空");
|
|
return result;
|
|
}
|
|
|
|
var dataMatrix = BuildDataMatrix(dataTable);
|
|
Debug.Assert(dataMatrix.RowCount >= 3);
|
|
|
|
var calculator = new RobustMahalanobis(numRandomStarts: 100);
|
|
var distances = calculator.Calculate(dataMatrix);
|
|
|
|
var outliers = IdentifyOutliers(distances);
|
|
|
|
return BuildValidationResult(outliers, distances);
|
|
}
|
|
|
|
/// <summary>
|
|
/// 验证必需的列是否存在
|
|
/// </summary>
|
|
private bool ValidateRequiredColumns(DataTable dataTable)
|
|
{
|
|
return dataTable.Columns.Contains(xColumn)
|
|
&& dataTable.Columns.Contains(yColumn)
|
|
&& dataTable.Columns.Contains(zColumn);
|
|
}
|
|
|
|
bool IsValidRow(DataRow row)
|
|
{
|
|
var columns = new[] { xColumn, yColumn, zColumn };
|
|
|
|
foreach (var col in columns)
|
|
{
|
|
var value = row[col];
|
|
|
|
if (value == DBNull.Value)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (!double.TryParse(value.ToString(), out _))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// <summary>
|
|
/// 构建数据矩阵
|
|
/// </summary>
|
|
private Matrix<double> BuildDataMatrix(DataTable dataTable)
|
|
{
|
|
var columns = new[] { xColumn, yColumn, zColumn };
|
|
|
|
// 确定有效列
|
|
var validRows = dataTable.AsEnumerable()
|
|
.Where(IsValidRow)
|
|
.ToList();
|
|
|
|
if (validRows.Count == 0)
|
|
{
|
|
return DenseMatrix.Create(0, columns.Length, 0.0);
|
|
}
|
|
|
|
var matrix = DenseMatrix.Create(validRows.Count, columns.Length, 0.0);
|
|
|
|
for (int i = 0; i < validRows.Count; i++)
|
|
{
|
|
for (int j = 0; j < columns.Length; j++)
|
|
{
|
|
matrix[i, j] = Convert.ToDouble(validRows[i][columns[j]]);
|
|
}
|
|
}
|
|
|
|
return matrix;
|
|
}
|
|
|
|
private int[] IdentifyOutliers(double[] distances)
|
|
{
|
|
if (distances.Length == 0)
|
|
{
|
|
return Array.Empty<int>();
|
|
}
|
|
|
|
double threshold = Math.Sqrt(7.81);
|
|
var outliers = distances
|
|
.Select((d, idx) => new { d, idx })
|
|
.Where(x => x.d > threshold)
|
|
.Select(x => x.idx)
|
|
.ToArray();
|
|
|
|
return outliers;
|
|
}
|
|
|
|
private ValidationResult BuildValidationResult(int[] outlierIndices, double[] distances)
|
|
{
|
|
var result = new ValidationResult();
|
|
|
|
for (int i = 0; i < outlierIndices.Length; i++)
|
|
{
|
|
result.AddError(outlierIndices[i], $"行 {i} 为异常值");
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
public ValidationResult Validate(string[] headers, int rowIndex, object[] values)
|
|
{
|
|
throw new NotImplementedException();
|
|
}
|
|
}
|
|
}
|