using MathNet.Numerics.LinearAlgebra; using MathNet.Numerics.LinearAlgebra.Double; namespace Validation.Algorithms { /// /// 稳健马氏距离计算器(基于 Minimum Covariance Determinant, MCD) /// python sklearn MinCovDet 就是采用此方法,C# 没有相应的库,只能自己实现 /// public class RobustMahalanobis { private readonly int _h; // 子集大小 (如果未指定,会自动设置为 (n+p+1)/2) private readonly int _maxIterations; // MCD 最大迭代次数 private readonly double _convergenceThreshold; // 收敛判定阈值 private readonly double _regularization; // 协方差正则化系数(避免奇异) private readonly int _numRandomStarts; // 随机子集的尝试次数(越多越稳健) public RobustMahalanobis( int? h = null, int maxIterations = 100, double convergenceThreshold = 1e-6, double regularization = 1e-8, int numRandomStarts = 50) // 默认多尝试一些随机初始子集 { _maxIterations = maxIterations; _convergenceThreshold = convergenceThreshold; _regularization = regularization; _numRandomStarts = numRandomStarts; _h = h ?? 0; } /// /// 计算所有样本的稳健马氏距离 /// public double[] Calculate(Matrix data) { var (mean, covariance) = ComputeRobustEstimates(data); return CalculateDistances(data, mean, covariance); } /// /// 计算稳健均值和协方差 (MCD) /// private (Vector mean, Matrix covariance) ComputeRobustEstimates(Matrix data) { int n = data.RowCount; int p = data.ColumnCount; int h = _h > 0 ? _h : Math.Max(p + 1, (n + p + 1) / 2); if (h <= p || h > n) { throw new ArgumentException($"h must satisfy p < h <= n, where h={h}, p={p}, n={n}"); } var random = new Random(42); // 多次随机初始化,取行列式最小的解 (Vector mean, Matrix cov)? best = null; double bestDet = double.MaxValue; for (int i = 0; i < _numRandomStarts; i++) { var indices = GetRandomIndices(n, h, random); var subset = ExtractRows(data, indices); var estimate = RunMCD(data, subset, h); var covReg = AddRegularization(estimate.cov, _regularization); double det = covReg.Determinant(); if (det > 0 && det < bestDet) { bestDet = det; best = (estimate.mean, covReg); } } return best ?? ComputeMeanCovariance(data); // 兜底:普通均值+协方差 } /// /// 在一个初始子集上运行 MCD 算法 /// private (Vector mean, Matrix cov) RunMCD(Matrix data, Matrix initialSubset, int h) { var (mean, cov) = ComputeMeanCovariance(initialSubset); for (int iter = 0; iter < _maxIterations; iter++) { var distances = CalculateDistances(data, mean, cov); var bestIndices = GetBestIndices(distances, h); var newSubset = ExtractRows(data, bestIndices); var (newMean, newCov) = ComputeMeanCovariance(newSubset); double detOld = cov.Determinant(); double detNew = newCov.Determinant(); if (HasConverged(detOld, detNew)) return (newMean, newCov); mean = newMean; cov = newCov; } return (mean, cov); // 达到迭代上限 } private bool HasConverged(double prevDet, double currentDet) { return Math.Abs(prevDet - currentDet) < _convergenceThreshold || double.IsNaN(currentDet) || currentDet <= 0; } private static int[] GetBestIndices(double[] distances, int h) { return distances .Select((dist, idx) => new { dist, idx }) .OrderBy(x => x.dist) .Take(h) .Select(x => x.idx) .ToArray(); } private static int[] GetRandomIndices(int n, int h, Random random) { return Enumerable.Range(0, n).OrderBy(_ => random.Next()).Take(h).ToArray(); } private static (Vector mean, Matrix cov) ComputeMeanCovariance(Matrix subset) { int n = subset.RowCount; int p = subset.ColumnCount; if (n <= 1) { return (Vector.Build.Dense(p), DenseMatrix.CreateIdentity(p)); } var mean = Vector.Build.Dense(p, i => subset.Column(i).Average()); var centered = DenseMatrix.Create(n, p, (i, j) => subset[i, j] - mean[j]); var cov = centered.Transpose() * centered / (n - 1); return (mean, cov); } private static double[] CalculateDistances(Matrix data, Vector mean, Matrix cov) { var invCov = ComputeInverseCovariance(cov); return data.EnumerateRows().Select(row => CalculateDistance(row, mean, invCov)).ToArray(); } private static double CalculateDistance(Vector point, Vector mean, Matrix invCov) { var diff = point - mean; return Math.Sqrt(Math.Max(0, diff * invCov * diff)); } private static Matrix ComputeInverseCovariance(Matrix cov) { try { return cov.Inverse(); } catch { return ComputePseudoInverse(cov); } } private static Matrix ComputePseudoInverse(Matrix cov) { var svd = cov.Svd(true); var u = svd.U; var s = svd.S; var vt = svd.VT; var invS = DenseMatrix.Create(s.Count, s.Count, (i, j) => i == j ? (s[i] > 1e-10 ? 1.0 / s[i] : 0.0) : 0.0); return vt.Transpose() * invS * u.Transpose(); } private static Matrix AddRegularization(Matrix cov, double reg) { var result = cov.Clone(); for (int i = 0; i < cov.RowCount; i++) { result[i, i] += reg; } return result; } private static Matrix ExtractRows(Matrix data, int[] indices) { return DenseMatrix.Create(indices.Length, data.ColumnCount, (i, j) => data[indices[i], j]); } } }