From 2bf6e10f8570baaf34ee930ec75f57f59092c13f Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Wed, 29 Apr 2026 11:15:24 -0600 Subject: [PATCH 1/9] Use InvariantCulture for numeric parsing in serialization and external-data paths. Persistence paths that relied on CurrentCulture silently corrupted numeric data on non-US locales: a USGS daily value of "1234.56" parsed by double.TryParse on de-DE returned 123456.0 (treating "." as a group separator) rather than failing. USGS, GHCN, BOM, and the embedded Sobol resource always emit US-format numbers, so any non-US-locale user importing them today silently received NaN or wrong values. - TimeSeriesDownload.cs: add NumberStyles + InvariantCulture / DateTimeStyles to USGS daily, USGS peak, GHCN, and BOM parsing - TimeSeries.cs: fallback DateTime.TryParse in the XElement constructor now uses InvariantCulture (matches the "o" round-trip primary path) - SobolSequence.cs, LinkController.cs: int.TryParse now uses InvariantCulture for embedded-resource and XML-attribute reads - MCMCDiagnostics.cs: replace the wasteful decimal.Parse(N.ToString()) / 100M round-trip with direct double math, eliminating both the string conversion and a latent CurrentCulture dependency Display code (Summary tables, ParameterToString, DisplayLabel, exception messages) intentionally continues to use CurrentCulture so users see numbers in their locale's format. XML/JSON serialization layers were already correct and unchanged. Added a new method in MCMCResults so that parameter results can be recomputed if the credible interval width changes. --- .../Time Series/Support/TimeSeriesDownload.cs | 27 +++++----- Numerics/Data/Time Series/TimeSeries.cs | 2 +- .../Link Functions/LinkController.cs | 3 +- .../Sampling/MCMC/Support/MCMCDiagnostics.cs | 2 +- Numerics/Sampling/MCMC/Support/MCMCResults.cs | 51 ++++++++++++++++++- Numerics/Sampling/SobolSequence.cs | 9 ++-- 6 files changed, 72 insertions(+), 22 deletions(-) diff --git a/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs b/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs index 3360492a..5d75920b 100644 --- a/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs +++ b/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs @@ -221,8 +221,8 @@ public static async Task FromGHCN(string siteNumber, TimeSeriesType { // Extract year, month, and parse days - int.TryParse(line.Substring(11, 4), out var year); - int.TryParse(line.Substring(15, 2), out var month); + int.TryParse(line.Substring(11, 4), NumberStyles.Integer, CultureInfo.InvariantCulture, out var year); + int.TryParse(line.Substring(15, 2), NumberStyles.Integer, CultureInfo.InvariantCulture, out var month); int daysInMonth = DateTime.DaysInMonth(year, month); for (int i = 0; i < daysInMonth; i++) @@ -238,7 +238,7 @@ public static async Task FromGHCN(string siteNumber, TimeSeriesType } // Parse the precipitation or snow value - if (int.TryParse(strgValue, out int rawValue) && rawValue != -9999) // Valid precipitation value + if (int.TryParse(strgValue, NumberStyles.Integer, CultureInfo.InvariantCulture, out int rawValue) && rawValue != -9999) // Valid precipitation value { double convertedValue = ConvertToDesiredUnit(rawValue, unit); timeSeries.Add(new SeriesOrdinate(currentDate, convertedValue)); @@ -423,7 +423,7 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType continue; // Parse date (assume fields[2] contains the date) - if (!DateTime.TryParse(fields[2], out DateTime index)) + if (!DateTime.TryParse(fields[2], CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime index)) { continue; } @@ -440,7 +440,8 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType // Get and parse value string valueStr = fields[valueIndex]; - double value = string.IsNullOrWhiteSpace(valueStr) ? double.NaN : double.TryParse(valueStr, out double tempVal) ? tempVal : double.NaN; + double value = string.IsNullOrWhiteSpace(valueStr) ? double.NaN + : double.TryParse(valueStr, NumberStyles.Float, CultureInfo.InvariantCulture, out double tempVal) ? tempVal : double.NaN; timeSeries.Add(new SeriesOrdinate(index, value)); } } @@ -465,26 +466,26 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType int month = 1; int day = 1; var dateString = segments[2].Split('-'); - DateTime.TryParse(segments[2], out index); + DateTime.TryParse(segments[2], CultureInfo.InvariantCulture, DateTimeStyles.None, out index); if (index == DateTime.MinValue) { // The date parsing failed, so try to manually parse it if (dateString[1] == "00" && dateString[2] != "00") { - int.TryParse(dateString[0], out year); - int.TryParse(dateString[2], out day); + int.TryParse(dateString[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out year); + int.TryParse(dateString[2], NumberStyles.Integer, CultureInfo.InvariantCulture, out day); index = new DateTime(year, month, day, 0, 0, 0); } else if (dateString[1] != "00" && dateString[2] == "00") { - int.TryParse(dateString[0], out year); - int.TryParse(dateString[1], out month); + int.TryParse(dateString[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out year); + int.TryParse(dateString[1], NumberStyles.Integer, CultureInfo.InvariantCulture, out month); index = new DateTime(year, month, day, 0, 0, 0); } else if (dateString[1] == "00" && dateString[2] == "00") { - int.TryParse(dateString[0], out year); + int.TryParse(dateString[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out year); index = new DateTime(year, month, day, 0, 0, 0); } } @@ -493,7 +494,7 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType int idx = timeSeriesType == TimeSeriesType.PeakDischarge ? 4 : 6; if (segments[idx] != "" && segments[idx] != " " && segments[idx] != " " && !string.IsNullOrEmpty(segments[idx])) { - double.TryParse(segments[idx], out value); + double.TryParse(segments[idx], NumberStyles.Float, CultureInfo.InvariantCulture, out value); timeSeries.Add(new SeriesOrdinate(index, value)); } } @@ -1148,7 +1149,7 @@ public static async Task FromABOM( // Parse timestamp string? timestampStr = point[0].GetString(); - if (!DateTime.TryParse(timestampStr, out DateTime date)) + if (!DateTime.TryParse(timestampStr, CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime date)) continue; // Parse value diff --git a/Numerics/Data/Time Series/TimeSeries.cs b/Numerics/Data/Time Series/TimeSeries.cs index dd1ed23d..1f10cc98 100644 --- a/Numerics/Data/Time Series/TimeSeries.cs +++ b/Numerics/Data/Time Series/TimeSeries.cs @@ -127,7 +127,7 @@ public TimeSeries(XElement xElement) var valueAttr = ordinate.Attribute("Value"); if (indexAttr != null && !DateTime.TryParseExact(indexAttr.Value, "o", CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out index)) { - DateTime.TryParse(indexAttr.Value, out index); + DateTime.TryParse(indexAttr.Value, CultureInfo.InvariantCulture, DateTimeStyles.None, out index); } if (valueAttr != null) double.TryParse(valueAttr.Value, NumberStyles.Any, CultureInfo.InvariantCulture, out value); diff --git a/Numerics/Functions/Link Functions/LinkController.cs b/Numerics/Functions/Link Functions/LinkController.cs index 94bc6712..0c3b6d89 100644 --- a/Numerics/Functions/Link Functions/LinkController.cs +++ b/Numerics/Functions/Link Functions/LinkController.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.Linq; using System.Xml.Linq; using Numerics.Mathematics.LinearAlgebra; @@ -73,7 +74,7 @@ public LinkController(XElement xElement) { var indexAttr = slot.Attribute("Index"); if (indexAttr == null) continue; - if (!int.TryParse(indexAttr.Value, out int index)) continue; + if (!int.TryParse(indexAttr.Value, NumberStyles.Integer, CultureInfo.InvariantCulture, out int index)) continue; if (index < 0 || index >= Links.Length) continue; var child = slot.Elements().FirstOrDefault(); if (child != null) diff --git a/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs b/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs index d4b8835a..7371f31c 100644 --- a/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs +++ b/Numerics/Sampling/MCMC/Support/MCMCDiagnostics.cs @@ -202,7 +202,7 @@ public static int MinimumSampleSize(double quantile, double tolerance, double pr double r = tolerance; double s = probability; double N = (q * (1d - q) * Math.Pow(Normal.StandardZ(0.5d * (s + 1d)), 2d)) / Math.Pow(r, 2d); - int Nmin = (int)Math.Round(decimal.Parse(N.ToString()) / 100M, 0) * 100; + int Nmin = (int)Math.Round(N / 100d) * 100; return Nmin; } diff --git a/Numerics/Sampling/MCMC/Support/MCMCResults.cs b/Numerics/Sampling/MCMC/Support/MCMCResults.cs index 196309d4..dab56975 100644 --- a/Numerics/Sampling/MCMC/Support/MCMCResults.cs +++ b/Numerics/Sampling/MCMC/Support/MCMCResults.cs @@ -134,7 +134,7 @@ private void ProcessParameterResults(MCMCSampler sampler, double alpha = 0.1) /// /// Process a parameter results using the output list. /// - /// The confidence level; Default = 0.1, which will result in the 90% confidence intervals. + /// The confidence level; Default = 0.1, which will result in the 90% confidence intervals. private void ProcessParameterResults(double alpha = 0.1) { int p = Output.First().Values.Length; @@ -146,10 +146,57 @@ private void ProcessParameterResults(double alpha = 0.1) ParameterResults[i] = new ParameterResults(x, alpha); postMean[i] = ParameterResults[i].SummaryStatistics.Mean; } - // Set the posterior mean parameter set. + // Set the posterior mean parameter set. PosteriorMean = new ParameterSet(postMean, double.NaN); } + /// + /// Recompute parameter summary statistics at a new credible-interval level + /// (alpha) without rerunning the chain. Preserves Rhat, ESS, autocorrelation, + /// MarkovChains, AcceptanceRates, MeanLogLikelihood, MAP, and Output. + /// + /// + /// The new significance level (e.g., 0.05 for 95% credible intervals, + /// 0.10 for 90% credible intervals). + /// + /// + /// Used when the user changes the credible-interval width on a Bayesian + /// analysis whose MCMC chain has already converged. The chain output is + /// independent of alpha; only the posterior summary percentiles + /// (LowerCI/UpperCI) need to be recomputed. Snapshots the per-parameter + /// Rhat, ESS, and autocorrelation diagnostics (which depend on the chain, + /// not on alpha) before delegating to the existing private reprocess, then + /// restores them on the freshly-built ParameterResults instances. + /// + public void RecomputeParameterResults(double alpha) + { + if (ParameterResults == null || Output == null || Output.Count == 0) return; + + // Snapshot diagnostics that are independent of alpha. Length matches + // the parameter count of the existing ParameterResults array. + int n = ParameterResults.Length; + var rhats = new double[n]; + var esss = new double[n]; + var acfs = new double[n][,]; + for (int i = 0; i < n; i++) + { + rhats[i] = ParameterResults[i].SummaryStatistics.Rhat; + esss[i] = ParameterResults[i].SummaryStatistics.ESS; + acfs[i] = ParameterResults[i].Autocorrelation; + } + + // Recompute (replaces ParameterResults[] and PosteriorMean — Output untouched) + ProcessParameterResults(alpha); + + // Restore preserved diagnostics + for (int i = 0; i < ParameterResults.Length; i++) + { + ParameterResults[i].SummaryStatistics.Rhat = rhats[i]; + ParameterResults[i].SummaryStatistics.ESS = esss[i]; + ParameterResults[i].Autocorrelation = acfs[i]; + } + } + #region Serialization /// diff --git a/Numerics/Sampling/SobolSequence.cs b/Numerics/Sampling/SobolSequence.cs index a6399858..4721a186 100644 --- a/Numerics/Sampling/SobolSequence.cs +++ b/Numerics/Sampling/SobolSequence.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.IO; namespace Numerics.Sampling @@ -112,7 +113,7 @@ private void initialize() var st = line.Split(' '); int dim; - int.TryParse(st[0], out dim); + int.TryParse(st[0], NumberStyles.Integer, CultureInfo.InvariantCulture, out dim); if (dim >= 2 && dim <= Dimension) { @@ -122,7 +123,7 @@ private void initialize() { if (st[i] != "") { - int.TryParse(st[i], out s); + int.TryParse(st[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out s); break; } } @@ -131,7 +132,7 @@ private void initialize() { if (st[i] != "") { - int.TryParse(st[i], out a); + int.TryParse(st[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out a); break; } } @@ -143,7 +144,7 @@ private void initialize() { for (int j = 1; j <= s; j++) { - int.TryParse(st[i + j - 1], out m[j]); + int.TryParse(st[i + j - 1], NumberStyles.Integer, CultureInfo.InvariantCulture, out m[j]); } break; } From 4faca846b50ce1df8a136737acfdbd382f41b08b Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Wed, 29 Apr 2026 15:44:04 -0600 Subject: [PATCH 2/9] Propagate NaN in MovingSum/Average and block aggregations instead of silently treating it as 0. A user reported that running peaks-over-threshold analysis in RMC-BestFit on GHCN precipitation data with missing days produced wrong results when smoothing was set to a multi-day moving sum: missing days were silently treated as zero precipitation, producing spurious sub-threshold exceedances and biased annual sums. The defect spanned MovingSum, MovingAverage, and the Sum/Average paths in CalendarYearSeries and CustomYearSeries, all of which masked NaN with 0 in the running aggregation. The defect was inconsistent within the library too: MonthlySeries and QuarterlySeries already propagated NaN naively, so the same input produced different answers depending on which block method was called. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adopts the convention used by every major data-analysis package (pandas Series.rolling min_periods, bottleneck move_sum min_count, xarray rolling, MATLAB movsum/movmean nanflag, R zoo rollsum/rollmean fallback): the default is strict NaN propagation — any NaN in the window or block produces NaN out. MovingSum and MovingAverage gain an optional minValidCount parameter (default = period) that maps directly to pandas's min_periods, letting power users opt into skip-NaN aggregation; in that mode MovingAverage divides by the count of observed values rather than the window size, fixing the prior low-bias. Block methods (CalendarYearSeries, CustomYearSeries, MonthlySeries, QuarterlySeries) now consistently propagate NaN for Sum and Average, and guard against empty blocks so series with sparse months no longer throw on .Last(). Difference is unchanged — IEEE-754 subtraction already propagates NaN. Adds NaN-coverage tests for moving-window, block, and POT pipelines that were previously absent. --- Numerics/Data/Time Series/TimeSeries.cs | 148 +++++++---- .../Data/Time Series/Test_TimeSeries.cs | 161 ++++++++++++ .../MCMC/Test_MCMCResults_Recompute.cs | 238 ++++++++++++++++++ 3 files changed, 505 insertions(+), 42 deletions(-) create mode 100644 Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs diff --git a/Numerics/Data/Time Series/TimeSeries.cs b/Numerics/Data/Time Series/TimeSeries.cs index 1f10cc98..4fdd52cc 100644 --- a/Numerics/Data/Time Series/TimeSeries.cs +++ b/Numerics/Data/Time Series/TimeSeries.cs @@ -945,28 +945,41 @@ private bool CheckIfMinStepsExceeded(DateTime startTime, DateTime endTime, int m /// Returns a moving average time-series based on the specified time period. The average is computed based on the previous n=period ordinates. /// /// The time period to average over. If time interval is 1-hour, and period is 12, the moving average will be computed over a moving 12 hour block. - public TimeSeries MovingAverage(int period) + /// + /// Optional. The minimum number of non-missing (non-NaN) values that must be present in a window for the average to be computed; otherwise the output is NaN. + /// Defaults to (strict propagation: any NaN in the window produces NaN). Pass a smaller value to skip missing values and average over the observed entries only, + /// matching the semantics of pandas min_periods and bottleneck min_count. + /// + public TimeSeries MovingAverage(int period, int? minValidCount = null) { if (period >= Count) throw new ArgumentException(nameof(period), "The period must be less than the length of the time-series."); + int minCount = minValidCount ?? period; + if (minCount < 1 || minCount > period) + throw new ArgumentOutOfRangeException(nameof(minValidCount), "minValidCount must be between 1 and period."); SortByTime(); var timeSeries = new TimeSeries(TimeInterval); double sum = 0d; - double avg = 0d; + int nanCount = 0; for (int i = 1; i <= Count; i++) { - sum += !double.IsNaN(this[i - 1].Value) ? this[i - 1].Value : 0; + double newVal = this[i - 1].Value; + if (double.IsNaN(newVal)) nanCount++; + else sum += newVal; + if (i > period) { - sum -= !double.IsNaN(this[i - period - 1].Value) ? this[i - period - 1].Value : 0; - avg = sum / period; - } - else - { - avg = sum / i; + double oldVal = this[i - period - 1].Value; + if (double.IsNaN(oldVal)) nanCount--; + else sum -= oldVal; } + if (i >= period) + { + int validCount = period - nanCount; + double avg = validCount >= minCount ? sum / validCount : double.NaN; timeSeries.Add(new SeriesOrdinate(this[i - 1].Index, avg)); + } } return timeSeries; } @@ -975,20 +988,41 @@ public TimeSeries MovingAverage(int period) /// Returns a moving sum time-series based on the specified time period. The sum is computed based on the previous n=period ordinates. /// /// The time period to sum over. If time interval is 1-hour, and period is 12, the moving sum will be computed over a moving 12 hour block. - public TimeSeries MovingSum(int period) + /// + /// Optional. The minimum number of non-missing (non-NaN) values that must be present in a window for the sum to be computed; otherwise the output is NaN. + /// Defaults to (strict propagation: any NaN in the window produces NaN). Pass a smaller value to sum only the observed entries within each window, + /// matching the semantics of pandas min_periods and bottleneck min_count. Note that no scaling is applied — the sum is over observed values only. + /// + public TimeSeries MovingSum(int period, int? minValidCount = null) { if (period >= Count) throw new ArgumentException(nameof(period), "The period must be less than the length of the time-series."); + int minCount = minValidCount ?? period; + if (minCount < 1 || minCount > period) + throw new ArgumentOutOfRangeException(nameof(minValidCount), "minValidCount must be between 1 and period."); SortByTime(); var timeSeries = new TimeSeries(TimeInterval); double sum = 0d; + int nanCount = 0; for (int i = 1; i <= Count; i++) { - sum += !double.IsNaN(this[i - 1].Value) ? this[i - 1].Value : 0; + double newVal = this[i - 1].Value; + if (double.IsNaN(newVal)) nanCount++; + else sum += newVal; + if (i > period) - sum -= !double.IsNaN(this[i - period - 1].Value) ? this[i - period - 1].Value : 0; + { + double oldVal = this[i - period - 1].Value; + if (double.IsNaN(oldVal)) nanCount--; + else sum -= oldVal; + } + if (i >= period) - timeSeries.Add(new SeriesOrdinate(this[i - 1].Index, sum)); + { + int validCount = period - nanCount; + double output = validCount >= minCount ? sum : double.NaN; + timeSeries.Add(new SeriesOrdinate(this[i - 1].Index, output)); + } } return timeSeries; } @@ -1610,20 +1644,26 @@ public TimeSeries CalendarYearSeries(BlockFunctionType blockFunction = BlockFunc double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; } else if (blockFunction == BlockFunctionType.Average) { double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; } if (!double.IsNaN(ordinate.Value)) @@ -1633,7 +1673,7 @@ public TimeSeries CalendarYearSeries(BlockFunctionType blockFunction = BlockFunc } /// - /// Returns an annual (irregular) block series based on a 12-month year with a customized starting month. + /// Returns an annual (irregular) block series based on a 12-month year with a customized starting month. /// /// Optional. The month when the custom year begins. Default = 10 (or October). /// Optional. The block function type; e.g. min, max, sum, or average. Default = Maximum. @@ -1704,20 +1744,26 @@ public TimeSeries CustomYearSeries(int startMonth = 10, BlockFunctionType blockF double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; } else if (blockFunction == BlockFunctionType.Average) { double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; } if (!double.IsNaN(ordinate.Value)) @@ -1826,20 +1872,26 @@ public TimeSeries CustomYearSeries(int startMonth, int endMonth, BlockFunctionTy double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; } else if (blockFunction == BlockFunctionType.Average) { double sum = 0; for (int j = 0; j < blockData.Count; j++) { - sum += !double.IsNaN(blockData[j].Value) ? blockData[j].Value : 0; + sum += blockData[j].Value; + } + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; } if (!double.IsNaN(ordinate.Value)) @@ -1849,7 +1901,7 @@ public TimeSeries CustomYearSeries(int startMonth, int endMonth, BlockFunctionTy } /// - /// Returns an monthly (irregular) block series. + /// Returns an monthly (irregular) block series. /// /// Optional. The block function type; e.g. min, max, sum, or average. Default = Maximum. /// Optional. The smoothing function type. Default = None. @@ -1914,8 +1966,11 @@ public TimeSeries MonthlySeries(BlockFunctionType blockFunction = BlockFunctionT { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; + } } else if (blockFunction == BlockFunctionType.Average) { @@ -1924,8 +1979,11 @@ public TimeSeries MonthlySeries(BlockFunctionType blockFunction = BlockFunctionT { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; + } } if (!double.IsNaN(ordinate.Value)) @@ -1938,7 +1996,7 @@ public TimeSeries MonthlySeries(BlockFunctionType blockFunction = BlockFunctionT } /// - /// Returns an quarterly (irregular) block series. + /// Returns an quarterly (irregular) block series. /// /// Optional. The block function type; e.g. min, max, sum, or average. Default = Maximum. /// Optional. The smoothing function type. Default = None. @@ -2011,8 +2069,11 @@ public TimeSeries QuarterlySeries(BlockFunctionType blockFunction = BlockFunctio { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum; + } } else if (blockFunction == BlockFunctionType.Average) { @@ -2021,8 +2082,11 @@ public TimeSeries QuarterlySeries(BlockFunctionType blockFunction = BlockFunctio { sum += blockData[j].Value; } - ordinate.Index = blockData.Last().Index; - ordinate.Value = sum / blockData.Count; + if (blockData.Count > 0) + { + ordinate.Index = blockData.Last().Index; + ordinate.Value = sum / blockData.Count; + } } if (!double.IsNaN(ordinate.Value)) diff --git a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs index ce63a623..17f47d72 100644 --- a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs +++ b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs @@ -447,6 +447,167 @@ public void Test_MovingSum() } } + /// + /// Test that MovingSum propagates NaN strictly by default (any NaN in window -> NaN out). + /// Matches pandas Series.rolling(w).sum() default min_periods=w behavior. + /// + [TestMethod] + public void Test_MovingSum_NaN_Strict() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingSum(2); + // window positions: [1,NaN], [NaN,3], [3,4], [4,5] -> [NaN, NaN, 7, 9] + var valid = new double[] { double.NaN, double.NaN, 7.0, 9.0 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Test that MovingSum with minValidCount=1 skips NaN values (pandas min_periods=1 semantics). + /// Sum is over observed values only; no scaling is applied. + /// + [TestMethod] + public void Test_MovingSum_NaN_SkipMode() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingSum(2, minValidCount: 1); + // window positions: [1,NaN]->1, [NaN,3]->3, [3,4]->7, [4,5]->9 + var valid = new double[] { 1.0, 3.0, 7.0, 9.0 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Test that MovingAverage propagates NaN strictly by default. + /// + [TestMethod] + public void Test_MovingAverage_NaN_Strict() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingAverage(2); + var valid = new double[] { double.NaN, double.NaN, 3.5, 4.5 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Test that MovingAverage with minValidCount=1 averages over non-NaN values only. + /// Denominator is the count of observed values, not the window size — this fixes the prior low-bias. + /// + [TestMethod] + public void Test_MovingAverage_NaN_SkipMode() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 4.0, 5.0 }); + + var newTS = ts.MovingAverage(2, minValidCount: 1); + // [1,NaN]->1/1=1, [NaN,3]->3/1=3, [3,4]->7/2=3.5, [4,5]->9/2=4.5 + var valid = new double[] { 1.0, 3.0, 3.5, 4.5 }; + Assert.AreEqual(valid.Length, newTS.Count); + for (int i = 0; i < newTS.Count; i++) + { + Assert.AreEqual(valid[i], newTS[i].Value); + } + } + + /// + /// Regression guard: Difference relies on IEEE-754 subtraction to propagate NaN automatically. + /// + [TestMethod] + public void Test_Difference_NaN() + { + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), new double[] { 1.0, double.NaN, 3.0, 7.0 }); + + var newTS = ts.Difference(); + // diffs: NaN-1=NaN, 3-NaN=NaN, 7-3=4 + Assert.AreEqual(3, newTS.Count); + Assert.AreEqual(double.NaN, newTS[0].Value); + Assert.AreEqual(double.NaN, newTS[1].Value); + Assert.AreEqual(4.0, newTS[2].Value); + } + + /// + /// Test that CalendarYearSeries with Sum/Average drops years containing any NaN + /// (rather than the previous behavior of silently treating NaN as zero). + /// + [TestMethod] + public void Test_CalendarYearSeries_NaN() + { + // Two years of monthly data; year 2 has one NaN month. + var values = new double[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 1, double.NaN, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; + var ts = new TimeSeries(TimeInterval.OneMonth, new DateTime(2023, 01, 01), values); + + // Sum: year 1 = 78, year 2 = NaN (dropped from output) + var sumTS = ts.CalendarYearSeries(BlockFunctionType.Sum); + Assert.AreEqual(1, sumTS.Count); + Assert.AreEqual(78.0, sumTS[0].Value); + Assert.AreEqual(2023, sumTS[0].Index.Year); + + // Average: same — year 2 dropped because NaN propagates + var avgTS = ts.CalendarYearSeries(BlockFunctionType.Average); + Assert.AreEqual(1, avgTS.Count); + Assert.AreEqual(78.0 / 12.0, avgTS[0].Value, 1E-9); + + // Maximum: NaN values are skipped via the < / > comparisons (false on NaN), so both years survive. + var maxTS = ts.CalendarYearSeries(BlockFunctionType.Maximum); + Assert.AreEqual(2, maxTS.Count); + Assert.AreEqual(12.0, maxTS[0].Value); + Assert.AreEqual(12.0, maxTS[1].Value); + } + + /// + /// Regression guard: MonthlySeries already propagated NaN naively before this fix; verify it still does. + /// + [TestMethod] + public void Test_MonthlySeries_NaN() + { + // Two months of weekly data; month 2 has one NaN. + var values = new double[] { 10, 20, 30, 40, 50, 60, double.NaN, 80 }; + var ts = new TimeSeries(TimeInterval.SevenDay, new DateTime(2023, 01, 01), values); + + var sumTS = ts.MonthlySeries(BlockFunctionType.Sum); + // Jan 2023 has 5 weekly values: 10+20+30+40+50 = 150 + // Feb 2023 has 3 weekly values, one NaN -> sum = NaN, dropped + // Mar-Dec 2023 are empty -> skipped (no spurious zeros) + Assert.AreEqual(1, sumTS.Count); + Assert.AreEqual(150.0, sumTS[0].Value, 1E-9); + } + + /// + /// Test that PeaksOverThresholdSeries with a multi-day MovingSum smoothing on data with NaN + /// does not produce spurious exceedances from windows that touched a missing day. + /// This is the user-reported bug: 2-day moving sum on precipitation with gaps was returning + /// the value of the single non-missing day instead of NaN, causing zero-baseline exceedances. + /// + [TestMethod] + public void Test_PeaksOverThreshold_MovingSum_NaN() + { + // One legitimate 2-day exceedance (5 + 6 = 11). The trailing 8.0 sits at the very end + // of the series with a NaN before it, so the only window touching it is [NaN, 8.0]. + // Pre-fix: [NaN, 8.0] silently became 8.0 -> spurious extra event above threshold 7. + // Post-fix: that window is NaN -> excluded. Only [5, 6] = 11 remains. + var values = new double[] { 0.1, 0.2, 5.0, 6.0, 0.1, 0.0, 0.1, 0.2, 0.0, double.NaN, 8.0 }; + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2023, 01, 01), values); + + var pot = ts.PeaksOverThresholdSeries(threshold: 7.0, minStepsBetweenEvents: 1, + smoothingFunction: SmoothingFunctionType.MovingSum, period: 2); + + Assert.AreEqual(1, pot.Count); + Assert.AreEqual(11.0, pot[0].Value, 1E-9); + } + /// /// Test the method that shifts all dates to a new starting date /// diff --git a/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs b/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs new file mode 100644 index 00000000..e5f3aca9 --- /dev/null +++ b/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs @@ -0,0 +1,238 @@ +using System.Collections.Generic; +using System.Linq; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics.Mathematics.Optimization; +using Numerics.Sampling; +using Numerics.Sampling.MCMC; + +namespace Sampling.MCMC +{ + /// + /// Unit tests for . The contract: + /// recomputing parameter results at a new alpha must update the credible-interval + /// percentiles (LowerCI/UpperCI) while preserving alpha-independent diagnostics + /// (Rhat, ESS, Autocorrelation) AND the underlying chain output (Output, MAP). + /// + /// + /// This is the cornerstone of the RMC.BestFit Phase 2 reprocess-don't-clear refactor. + /// A regression that drops one of the three snapshot/restore lines silently corrupts + /// every convergence-diagnostic display the moment a user changes CredibleIntervalWidth + /// on an estimated analysis. + /// + [TestClass] + public class Test_MCMCResults_Recompute + { + /// + /// Builds a minimal populated via the + /// constructor. + /// MarkovChains/AcceptanceRates/MeanLogLikelihood remain null per the constructor's + /// documented behavior. + /// + private static MCMCResults BuildResults(double alpha) + { + // Three parameters (mu, sigma, tau) sampled from synthetic distributions. + // 1000 samples is enough for smooth percentile estimates. + var rng = new MersenneTwister(2026); + var output = new List(capacity: 1000); + for (int i = 0; i < 1000; i++) + { + // Param 0: ~ Normal(10, 1) + double p0 = 10.0 + StandardNormal(rng); + // Param 1: ~ Normal(2, 0.5) + double p1 = 2.0 + 0.5 * StandardNormal(rng); + // Param 2: ~ Uniform(-1, 1) + double p2 = -1.0 + 2.0 * rng.NextDouble(); + output.Add(new ParameterSet(new[] { p0, p1, p2 }, 0)); + } + var map = new ParameterSet(new[] { 10.0, 2.0, 0.0 }, 0); + return new MCMCResults(map, output, alpha); + } + + /// Box-Muller transform, single sample. + private static double StandardNormal(MersenneTwister rng) + { + double u1 = rng.NextDouble(); + double u2 = rng.NextDouble(); + if (u1 < 1e-300) u1 = 1e-300; + return System.Math.Sqrt(-2.0 * System.Math.Log(u1)) * System.Math.Cos(2.0 * System.Math.PI * u2); + } + + /// + /// Seed each parameter's alpha-independent diagnostics with sentinel values. + /// These would normally be populated by the + /// path; for test purposes we set them directly to verify the snapshot/restore. + /// + private static (double[] rhats, double[] esss, double[][,] acfs) SeedDiagnostics(MCMCResults results) + { + int n = results.ParameterResults.Length; + var rhats = new double[n]; + var esss = new double[n]; + var acfs = new double[n][,]; + for (int i = 0; i < n; i++) + { + rhats[i] = 1.0 + 0.01 * (i + 1); // 1.01, 1.02, 1.03 + esss[i] = 800.0 + 10.0 * i; // 800, 810, 820 + acfs[i] = new double[1, 5] { { 1.0, 0.5 - 0.1 * i, 0.25, 0.125, 0.0625 } }; + results.ParameterResults[i].SummaryStatistics.Rhat = rhats[i]; + results.ParameterResults[i].SummaryStatistics.ESS = esss[i]; + results.ParameterResults[i].Autocorrelation = acfs[i]; + } + return (rhats, esss, acfs); + } + + [TestMethod] + public void Recompute_PreservesOutputReference() + { + var results = BuildResults(alpha: 0.10); + var outputRef = results.Output; + + results.RecomputeParameterResults(alpha: 0.05); + + // The Output list must be the same reference — the chain itself is not touched. + Assert.AreSame(outputRef, results.Output, "Output reference must not change."); + Assert.AreEqual(1000, results.Output.Count, "Output count must be preserved."); + } + + [TestMethod] + public void Recompute_PreservesMAP() + { + var results = BuildResults(alpha: 0.10); + var valuesBefore = (double[])results.MAP.Values.Clone(); + var fitnessBefore = results.MAP.Fitness; + + results.RecomputeParameterResults(alpha: 0.05); + + Assert.AreEqual(valuesBefore.Length, results.MAP.Values.Length, "MAP value count must not change."); + for (int i = 0; i < valuesBefore.Length; i++) + Assert.AreEqual(valuesBefore[i], results.MAP.Values[i], 1e-12, $"MAP value [{i}] must not change."); + Assert.AreEqual(fitnessBefore, results.MAP.Fitness, 1e-12, "MAP fitness must not change."); + } + + [TestMethod] + public void Recompute_PreservesRhat() + { + var results = BuildResults(alpha: 0.10); + var (rhats, _, _) = SeedDiagnostics(results); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + Assert.AreEqual(rhats[i], results.ParameterResults[i].SummaryStatistics.Rhat, 1e-12, + $"Parameter {i}: Rhat must be preserved across alpha change."); + } + } + + [TestMethod] + public void Recompute_PreservesESS() + { + var results = BuildResults(alpha: 0.10); + var (_, esss, _) = SeedDiagnostics(results); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + Assert.AreEqual(esss[i], results.ParameterResults[i].SummaryStatistics.ESS, 1e-12, + $"Parameter {i}: ESS must be preserved across alpha change."); + } + } + + [TestMethod] + public void Recompute_PreservesAutocorrelation() + { + var results = BuildResults(alpha: 0.10); + var (_, _, acfs) = SeedDiagnostics(results); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + var acfAfter = results.ParameterResults[i].Autocorrelation; + Assert.AreEqual(acfs[i].GetLength(0), acfAfter.GetLength(0)); + Assert.AreEqual(acfs[i].GetLength(1), acfAfter.GetLength(1)); + for (int r = 0; r < acfs[i].GetLength(0); r++) + { + for (int c = 0; c < acfs[i].GetLength(1); c++) + { + Assert.AreEqual(acfs[i][r, c], acfAfter[r, c], 1e-12, + $"Parameter {i} ACF[{r},{c}]: must be preserved across alpha change."); + } + } + } + } + + [TestMethod] + public void Recompute_NarrowsCIWhenAlphaIncreases() + { + // alpha=0.10 → 90% CI; alpha=0.20 → 80% CI (narrower band). + var results = BuildResults(alpha: 0.10); + var lower90 = results.ParameterResults.Select(p => p.SummaryStatistics.LowerCI).ToArray(); + var upper90 = results.ParameterResults.Select(p => p.SummaryStatistics.UpperCI).ToArray(); + + results.RecomputeParameterResults(alpha: 0.20); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + double lower80 = results.ParameterResults[i].SummaryStatistics.LowerCI; + double upper80 = results.ParameterResults[i].SummaryStatistics.UpperCI; + Assert.IsTrue(lower80 > lower90[i], + $"Parameter {i}: 80% LowerCI ({lower80}) must be > 90% LowerCI ({lower90[i]}) — band narrows when alpha increases."); + Assert.IsTrue(upper80 < upper90[i], + $"Parameter {i}: 80% UpperCI ({upper80}) must be < 90% UpperCI ({upper90[i]}) — band narrows when alpha increases."); + } + } + + [TestMethod] + public void Recompute_WidensCIWhenAlphaDecreases() + { + // alpha=0.10 → 90% CI; alpha=0.05 → 95% CI (wider band). + var results = BuildResults(alpha: 0.10); + var lower90 = results.ParameterResults.Select(p => p.SummaryStatistics.LowerCI).ToArray(); + var upper90 = results.ParameterResults.Select(p => p.SummaryStatistics.UpperCI).ToArray(); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + double lower95 = results.ParameterResults[i].SummaryStatistics.LowerCI; + double upper95 = results.ParameterResults[i].SummaryStatistics.UpperCI; + Assert.IsTrue(lower95 < lower90[i], + $"Parameter {i}: 95% LowerCI ({lower95}) must be < 90% LowerCI ({lower90[i]}) — band widens when alpha decreases."); + Assert.IsTrue(upper95 > upper90[i], + $"Parameter {i}: 95% UpperCI ({upper95}) must be > 90% UpperCI ({upper90[i]}) — band widens when alpha decreases."); + } + } + + [TestMethod] + public void Recompute_PreservesMeanAndMedian() + { + // Mean and Median are alpha-independent — should be preserved exactly. + var results = BuildResults(alpha: 0.10); + var meansBefore = results.ParameterResults.Select(p => p.SummaryStatistics.Mean).ToArray(); + var mediansBefore = results.ParameterResults.Select(p => p.SummaryStatistics.Median).ToArray(); + + results.RecomputeParameterResults(alpha: 0.05); + + for (int i = 0; i < results.ParameterResults.Length; i++) + { + Assert.AreEqual(meansBefore[i], results.ParameterResults[i].SummaryStatistics.Mean, 1e-10, + $"Parameter {i}: Mean must be preserved."); + Assert.AreEqual(mediansBefore[i], results.ParameterResults[i].SummaryStatistics.Median, 1e-10, + $"Parameter {i}: Median must be preserved."); + } + } + + [TestMethod] + public void Recompute_OnEmptyResults_DoesNotThrow() + { + // ParameterResults == null guard. + var results = new MCMCResults(); + + results.RecomputeParameterResults(alpha: 0.05); + + // Should be a no-op — ParameterResults stays null, no exception. + Assert.IsNull(results.ParameterResults); + } + } +} From 7f2bdf90d2f3eea7d49d8d3635ab150334b18608 Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Thu, 30 Apr 2026 13:12:26 -0600 Subject: [PATCH 3/9] =?UTF-8?q?Harden=20the=20time=20series=20download=20l?= =?UTF-8?q?ibrary:=20retry=20transient=20failures,=20fix=20offset-bearing?= =?UTF-8?q?=20timestamp=20parsing,=20and=20stop=20silently=20masking=20BOM?= =?UTF-8?q?=20API=20drift.=20A=20user=20reported=20that=20BOM=20integratio?= =?UTF-8?q?n=20tests=20were=20"all=20passing"=20only=20because=20BomAvaila?= =?UTF-8?q?ble()=20was=20returning=20false=20on=20a=20single=20transient?= =?UTF-8?q?=20KiWIS=20500/DatasourceError,=20which=20the=20test-side=20=5F?= =?UTF-8?q?serviceAvailability=20cache=20then=20memoized=20as=20"down"=20f?= =?UTF-8?q?or=20the=20rest=20of=20the=20run,=20silently=20short-circuiting?= =?UTF-8?q?=20every=20BOM=20test.=20The=20same=20hazards=20lived=20in=20CH?= =?UTF-8?q?MN,=20USGS,=20and=20GHCN=20=E2=80=94=20none=20had=20retry=20on?= =?UTF-8?q?=20transient=205xx,=20all=20four=20ran=20every=20download=20thr?= =?UTF-8?q?ough=20an=20IsConnectedToInternet()=20probe=20that=20hit=20USGS?= =?UTF-8?q?=20specifically=20(so=20a=20USGS=20slowdown=20looked=20like=20"?= =?UTF-8?q?no=20internet"=20to=20BOM/CHMN/GHCN=20callers),=20and=20the=20c?= =?UTF-8?q?onnectivity=20probe=20didn't=20even=20check=20IsSuccessStatusCo?= =?UTF-8?q?de.=20Two=20correctness=20bugs=20were=20also=20lurking:=20BOM?= =?UTF-8?q?=20and=20CHMN=20real-time=20timestamps=20include=20explicit=20o?= =?UTF-8?q?ffsets=20like=20"+10:00"=20or=20"-08:00",=20and=20DateTime.TryP?= =?UTF-8?q?arse=20with=20DateTimeStyles.None=20converts=20those=20to=20loc?= =?UTF-8?q?al=20time=20per=20Microsoft's=20documented=20behavior,=20shifti?= =?UTF-8?q?ng=20daily=20series=20by=20a=20calendar=20day=20on=20any=20non-?= =?UTF-8?q?AEST/non-PST=20machine;=20and=20the=20USGS=20peak-data=20parser?= =?UTF-8?q?=20checked=20segments.Count()=20>=3D=205=20before=20indexing=20?= =?UTF-8?q?segments[6]=20for=20PeakStage,=20which=20would=20IndexOutOfRang?= =?UTF-8?q?e=20on=20short=20historical=20rows.=20This=20change=20adds=20a?= =?UTF-8?q?=20single=20GetWithRetryAsync=20helper=20(three=20attempts,=200?= =?UTF-8?q?.5s/1s/2s=20backoff,=20retries=205xx/408/429/network=20errors?= =?UTF-8?q?=20and=20KiWIS=20DatasourceError=20bodies)=20used=20by=20all=20?= =?UTF-8?q?four=20providers,=20switches=20the=20global=20connectivity=20ch?= =?UTF-8?q?eck=20to=20https://www.google.com/generate=5F204=20with=20a=20s?= =?UTF-8?q?uccess-status=20check,=20replaces=20DateTime.TryParse=20with=20?= =?UTF-8?q?DateTimeOffset.TryParse=20for=20BOM=20and=20CHMN=20real-time=20?= =?UTF-8?q?timestamps,=20deletes=20the=20BOM=20silent=20ts=5Fid=20fallback?= =?UTF-8?q?=20that=20was=20quietly=20returning=20hourly=20data=20when=20th?= =?UTF-8?q?e=20caller=20asked=20for=20daily,=20fixes=20the=20USGS=20peak?= =?UTF-8?q?=20bounds=20check,=20URL-encodes=20the=20spaces=20in=20the=20CH?= =?UTF-8?q?MN=20real-time=20URL,=20and=20gives=20the=20test-side=20Availab?= =?UTF-8?q?leAsync=20cache=20a=2030-second=20TTL=20on=20negative=20results?= =?UTF-8?q?=20so=20a=20single=20transient=20flake=20no=20longer=20disables?= =?UTF-8?q?=20a=20provider's=20whole=20test=20suite.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Time Series/Support/TimeSeriesDownload.cs | 267 ++++++++++++------ .../Time Series/Test_TimeSeriesDownload.cs | 50 +++- 2 files changed, 210 insertions(+), 107 deletions(-) diff --git a/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs b/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs index 5d75920b..6c294aa1 100644 --- a/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs +++ b/Numerics/Data/Time Series/Support/TimeSeriesDownload.cs @@ -35,7 +35,40 @@ public class TimeSeriesDownload private const string UserAgent = "USACE-Numerics/2.0"; /// - /// Checks if there is an Internet connection. + /// GET with up to three attempts and exponential backoff (0.5s, 1s, 2s + /// + jitter) on transient failures. Retries 5xx, 408, 429, network + /// exceptions, and KiWIS DatasourceError bodies. Does NOT retry 4xx — + /// those mean the request itself is wrong. + /// + private static async Task<(HttpStatusCode status, string body)> GetWithRetryAsync( + HttpClient client, string url, int maxAttempts = 3) + { + Exception? lastNet = null; + for (int attempt = 1; attempt <= maxAttempts; attempt++) + { + try + { + using var resp = await client.GetAsync(url); + string body = await resp.Content.ReadAsStringAsync(); + bool transient = + (int)resp.StatusCode >= 500 || + resp.StatusCode == HttpStatusCode.RequestTimeout || + (int)resp.StatusCode == 429 || + body.Contains("DatasourceError") || + body.Contains("Error connecting to WDP"); + if (!transient || attempt == maxAttempts) return (resp.StatusCode, body); + } + catch (HttpRequestException ex) { lastNet = ex; if (attempt == maxAttempts) throw; } + catch (TaskCanceledException ex) { lastNet = ex; if (attempt == maxAttempts) throw; } + + await Task.Delay(500 * (int)Math.Pow(2, attempt - 1)); + } + throw lastNet ?? new Exception("GetWithRetryAsync exhausted retries"); + } + + /// + /// Checks if there is an Internet connection by probing Google's + /// connectivity-check endpoint (HTTP 204 No Content). /// public static async Task IsConnectedToInternet() { @@ -43,8 +76,9 @@ public static async Task IsConnectedToInternet() { using (var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5))) { - await _defaultClient.GetAsync("https://waterservices.usgs.gov/nwis/", cts.Token); - return true; + var resp = await _defaultClient.GetAsync( + "https://www.google.com/generate_204", cts.Token); + return resp.IsSuccessStatusCode; } } catch @@ -192,10 +226,28 @@ public static async Task FromGHCN(string siteNumber, TimeSeriesType string stationFileUrl = $"{ghcnBaseUrl}{Uri.EscapeDataString(siteNumber)}.dly"; { var client = _defaultClient; - // Request the file and ensure that response headers are read first. - using (HttpResponseMessage response = await client.GetAsync(stationFileUrl, HttpCompletionOption.ResponseHeadersRead)) + // GHCN is large enough to want streaming, so we can't use GetWithRetryAsync + // directly (it buffers the whole body). Inline the same retry shape on the + // headers-only GetAsync, then stream from the successful response. + HttpResponseMessage? response = null; + for (int attempt = 1; attempt <= 3; attempt++) + { + try + { + response = await client.GetAsync(stationFileUrl, HttpCompletionOption.ResponseHeadersRead); + if ((int)response.StatusCode < 500 + && response.StatusCode != HttpStatusCode.RequestTimeout + && (int)response.StatusCode != 429) break; + if (attempt == 3) break; + response.Dispose(); + } + catch (HttpRequestException) when (attempt < 3) { /* retry */ } + catch (TaskCanceledException) when (attempt < 3) { /* retry */ } + await Task.Delay(500 * (int)Math.Pow(2, attempt - 1)); + } + using (response) { - response.EnsureSuccessStatusCode(); + response!.EnsureSuccessStatusCode(); using (Stream stream = await response.Content.ReadAsStreamAsync()) using (FileStream fs = new FileStream(tempFilePath, FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: 81920, useAsync: true)) { @@ -389,61 +441,66 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType { var client = _decompressClient; - HttpResponseMessage response = await client.GetAsync(url); - response.EnsureSuccessStatusCode(); - - using (Stream contentStream = await response.Content.ReadAsStreamAsync()) - using (StreamReader reader = new StreamReader(contentStream)) + var (status, body) = await GetWithRetryAsync(client, url); + if ((int)status >= 400) { - string? line; - bool isHeader = true; + string snippet = body.Length > 500 ? body.Substring(0, 500) + "…" : body; + throw new Exception( + $"Failed to retrieve USGS data. Status={(int)status} ({status}). Body={snippet}"); + } + + using var reader = new StringReader(body); + string? line; + bool isHeader = true; - while ((line = await reader.ReadLineAsync()) != null) + while ((line = await reader.ReadLineAsync()) != null) + { + // Skip header row + if (isHeader) { - // Skip header row - if (isHeader) - { - isHeader = false; - continue; - } + isHeader = false; + continue; + } - // Skip comment lines - if (line.StartsWith("#")) + // Skip comment lines + if (line.StartsWith("#")) + { + if (line.Trim() == "# No sites found matching all criteria") { - if (line.Trim() == "# No sites found matching all criteria") - { - throw new Exception("No data found matching all criteria."); - } - continue; + throw new Exception("No data found matching all criteria."); } + continue; + } - string[] fields = line.Split('\t'); - // Validate expected number of fields and record type - if (fields.Length <= valueIndex || fields[0] != "USGS") - continue; + string[] fields = line.Split('\t'); + // Validate expected number of fields and record type + if (fields.Length <= valueIndex || fields[0] != "USGS") + continue; - // Parse date (assume fields[2] contains the date) - if (!DateTime.TryParse(fields[2], CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime index)) - { - continue; - } + // USGS instantaneous timestamps are local-time strings without an offset; + // the tz_cd column at fields[3] reports the offset separately. We currently + // parse these as naive DateTime (Kind=Unspecified). Future work: synthesize + // a DateTimeOffset from fields[2] + fields[3] for full timezone fidelity. + if (!DateTime.TryParse(fields[2], CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime index)) + { + continue; + } - // Fill in missing days only for daily series (not instantaneous) - if (!isInstantaneous && timeSeries.Count > 0 && index != TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay)) + // Fill in missing days only for daily series (not instantaneous) + if (!isInstantaneous && timeSeries.Count > 0 && index != TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay)) + { + while (timeSeries.Last().Index < TimeSeries.SubtractTimeInterval(index, TimeInterval.OneDay)) { - while (timeSeries.Last().Index < TimeSeries.SubtractTimeInterval(index, TimeInterval.OneDay)) - { - timeSeries.Add(new SeriesOrdinate( - TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay), double.NaN)); - } + timeSeries.Add(new SeriesOrdinate( + TimeSeries.AddTimeInterval(timeSeries.Last().Index, TimeInterval.OneDay), double.NaN)); } - - // Get and parse value - string valueStr = fields[valueIndex]; - double value = string.IsNullOrWhiteSpace(valueStr) ? double.NaN - : double.TryParse(valueStr, NumberStyles.Float, CultureInfo.InvariantCulture, out double tempVal) ? tempVal : double.NaN; - timeSeries.Add(new SeriesOrdinate(index, value)); } + + // Get and parse value + string valueStr = fields[valueIndex]; + double value = string.IsNullOrWhiteSpace(valueStr) ? double.NaN + : double.TryParse(valueStr, NumberStyles.Float, CultureInfo.InvariantCulture, out double tempVal) ? tempVal : double.NaN; + timeSeries.Add(new SeriesOrdinate(index, value)); } } } @@ -451,14 +508,25 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType else if (timeSeriesType == TimeSeriesType.PeakDischarge || timeSeriesType == TimeSeriesType.PeakStage) { { - // Download data as string (assumes USGS peak data is not compressed) - textDownload = await _defaultClient.GetStringAsync(url); + var (peakStatus, peakBody) = await GetWithRetryAsync(_defaultClient, url); + if ((int)peakStatus >= 400) + { + string snippet = peakBody.Length > 500 ? peakBody.Substring(0, 500) + "…" : peakBody; + throw new Exception( + $"Failed to retrieve USGS peak data. Status={(int)peakStatus} ({peakStatus}). Body={snippet}"); + } + textDownload = peakBody; + int idx = timeSeriesType == TimeSeriesType.PeakDischarge ? 4 : 6; var lines = textDownload.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); foreach (string line in lines) { var segments = line.Split('\t'); - if (segments.First() == "USGS" && segments.Count() >= 5) + // Bounds-check against the actual column we'll read (peak_va at 4 for + // discharge, gage_ht at 6 for stage), not a fixed minimum. Old guard + // was Length >= 5 which would IndexOutOfRange on a 5-column historical + // row when reading stage at index 6. + if (segments.Length > idx && segments[0] == "USGS") { // Get date DateTime index = DateTime.Now; @@ -491,8 +559,7 @@ private static string CreateURLForUSGSDownload(string siteNumber, TimeSeriesType } // Get value double value = 0; - int idx = timeSeriesType == TimeSeriesType.PeakDischarge ? 4 : 6; - if (segments[idx] != "" && segments[idx] != " " && segments[idx] != " " && !string.IsNullOrEmpty(segments[idx])) + if (!string.IsNullOrWhiteSpace(segments[idx])) { double.TryParse(segments[idx], NumberStyles.Float, CultureInfo.InvariantCulture, out value); timeSeries.Add(new SeriesOrdinate(index, value)); @@ -531,17 +598,13 @@ private static async Task ParseUSGSOgcApiPages(HttpClient client, string initial while (nextUrl != null) { - // Retry with exponential backoff for rate limiting (429) - HttpResponseMessage response = await client.GetAsync(nextUrl); - for (int attempt = 1; attempt < 6; attempt++) + var (status, json) = await GetWithRetryAsync(client, nextUrl); + if ((int)status >= 400) { - if ((int)response.StatusCode != 429) break; - int delayMs = Math.Min((int)Math.Pow(2, attempt) * 2000, 60000); - await System.Threading.Tasks.Task.Delay(delayMs); - response = await client.GetAsync(nextUrl); + string snippet = json.Length > 500 ? json.Substring(0, 500) + "…" : json; + throw new Exception( + $"USGS OGC API error. Status={(int)status} ({status}). Body={snippet}"); } - response.EnsureSuccessStatusCode(); - var json = await response.Content.ReadAsStringAsync(); if (rawText != null) { @@ -669,11 +732,14 @@ public static async Task FromCHMN( string paramCode = isDischarge ? "47" : "46"; DateTime sd = startDate ?? DateTime.UtcNow.AddMonths(-18); DateTime ed = endDate ?? DateTime.UtcNow; + // Use '+' as the URL-safe space separator instead of a literal space + // in the query string. ECCC accepts the literal space, but some + // intermediate proxies and CDNs reject it. url = "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline" + $"?stations[]={Uri.EscapeDataString(stationNumber)}" + $"¶meters[]={paramCode}" + - $"&start_date={sd:yyyy-MM-dd HH:mm:ss}" + - $"&end_date={ed:yyyy-MM-dd HH:mm:ss}"; + $"&start_date={sd:yyyy-MM-dd}+{sd:HH:mm:ss}" + + $"&end_date={ed:yyyy-MM-dd}+{ed:HH:mm:ss}"; interval = TimeInterval.FiveMinute; } else // PeakDischarge or PeakStage @@ -695,13 +761,13 @@ public static async Task FromCHMN( { var client = _decompressClient; - // The endpoint returns CSV (automatically decompressed by HttpClientHandler) - using HttpResponseMessage resp = await client.GetAsync(url); - resp.EnsureSuccessStatusCode(); - - string csv; + // The endpoint returns CSV (automatically decompressed by HttpClientHandler). + var (status, csv) = await GetWithRetryAsync(client, url); + if ((int)status >= 400) { - csv = await resp.Content.ReadAsStringAsync(); + string snippet = csv.Length > 500 ? csv.Substring(0, 500) + "…" : csv; + throw new Exception( + $"Failed to retrieve CHMN data. Status={(int)status} ({status}). Body={snippet}"); } // Parse the CSV response based on endpoint type @@ -774,9 +840,15 @@ private static void ParseCHMNDailyCsv(string csv, string stationNumber, TimeSeri if (!isDischarge && !isLevelParam) continue; } - // Parse date - if (!DateTime.TryParse(parts[dateCol].Trim(), CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime date)) + // Real-time CHMN timestamps include explicit Pacific/Eastern offsets, + // e.g. "2024-11-01T08:00:00-08:00". DateTime.TryParse with + // DateTimeStyles.None converts these to local time, shifting them on + // any non-Canadian-Pacific machine. Use DateTimeOffset to preserve + // the offset, then take the wall-clock value the gauge reported. + if (!DateTimeOffset.TryParse(parts[dateCol].Trim(), CultureInfo.InvariantCulture, + DateTimeStyles.None, out DateTimeOffset dto)) continue; + DateTime date = isDailyType ? dto.Date : dto.DateTime; // Parse value double val = double.NaN; @@ -968,10 +1040,11 @@ public static async Task FromABOM( client.DefaultRequestHeaders.Add("Connection", "keep-alive"); client.Timeout = TimeSpan.FromSeconds(30); - HttpResponseMessage response; + HttpStatusCode listStatus; + string listResponse; try { - response = await client.GetAsync(tsListUrl); + (listStatus, listResponse) = await GetWithRetryAsync(client, tsListUrl); } catch (HttpRequestException ex) { @@ -982,15 +1055,13 @@ public static async Task FromABOM( throw new Exception("Request to BOM API timed out.", ex); } - var listResponse = await response.Content.ReadAsStringAsync(); - // Surface the response body in the exception so transient upstream failures // (KiWIS 5xx with a JSON error payload) are self-documenting. - if (!response.IsSuccessStatusCode) + if ((int)listStatus >= 400) { string snippet = listResponse.Length > 500 ? listResponse.Substring(0, 500) + "…" : listResponse; throw new Exception( - $"Failed to retrieve time series list from BOM API. Status={(int)response.StatusCode} ({response.StatusCode}). Body={snippet}"); + $"Failed to retrieve time series list from BOM API. Status={(int)listStatus} ({listStatus}). Body={snippet}"); } // Check for error response from KiWIS @@ -1061,14 +1132,18 @@ public static async Task FromABOM( } } - // If no match found, take the first available series - if (tsId == null && root.GetArrayLength() > 1) + // No silent fallback: if the requested cadence isn't present we'd rather + // throw than quietly hand back hourly data when the caller asked for daily. + if (tsId == null) { - tsId = root[1][tsIdIndex].GetString(); + var available = new List(); + for (int i = 1; i < root.GetArrayLength() && available.Count < 10; i++) + if (tsNameIndex >= 0) + available.Add(root[i][tsNameIndex].GetString() ?? ""); + throw new Exception( + $"No suitable time series found for station {stationNumber} matching '{tsNamePattern}'. " + + $"Available ts_name values: [{string.Join(", ", available)}]"); } - - if (tsId == null) - throw new Exception($"No suitable time series found for station {stationNumber}"); } // Step 2: Get time series values using the ts_id @@ -1099,10 +1174,11 @@ public static async Task FromABOM( client.DefaultRequestHeaders.Add("Connection", "keep-alive"); client.Timeout = TimeSpan.FromSeconds(60); - HttpResponseMessage response; + HttpStatusCode valuesStatus; + string valuesResponse; try { - response = await client.GetAsync(valuesUrl); + (valuesStatus, valuesResponse) = await GetWithRetryAsync(client, valuesUrl); } catch (HttpRequestException ex) { @@ -1113,16 +1189,14 @@ public static async Task FromABOM( throw new Exception("Request to BOM API timed out while retrieving values.", ex); } - var valuesResponse = await response.Content.ReadAsStringAsync(); - // Surface the response body in the exception so transient upstream failures // (e.g. KiWIS returning 500 with `{"code":"DatasourceError","message":"Error connecting to WDP."}`) - // are self-documenting. Without this, EnsureSuccessStatusCode would discard the body. - if (!response.IsSuccessStatusCode) + // are self-documenting after the retry helper has exhausted its attempts. + if ((int)valuesStatus >= 400) { string snippet = valuesResponse.Length > 500 ? valuesResponse.Substring(0, 500) + "…" : valuesResponse; throw new Exception( - $"Failed to retrieve time series values from BOM API. Status={(int)response.StatusCode} ({response.StatusCode}). Body={snippet}"); + $"Failed to retrieve time series values from BOM API. Status={(int)valuesStatus} ({valuesStatus}). Body={snippet}"); } // Check for error response @@ -1147,10 +1221,15 @@ public static async Task FromABOM( { if (point.GetArrayLength() < 2) continue; - // Parse timestamp + // BOM returns ISO-8601 with explicit offset, e.g. "2021-11-01T00:00:00.000+10:00". + // DateTime.TryParse converts these to LOCAL time, which on a non-AEST machine + // shifts daily series labels by a calendar day. DateTimeOffset preserves the + // offset; .Date gives the wall-clock date BOM intended. string? timestampStr = point[0].GetString(); - if (!DateTime.TryParse(timestampStr, CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime date)) + if (!DateTimeOffset.TryParse(timestampStr, CultureInfo.InvariantCulture, + DateTimeStyles.None, out DateTimeOffset dto)) continue; + DateTime date = isInstantaneous ? dto.DateTime : dto.Date; // Parse value double val = double.NaN; diff --git a/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs b/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs index 3ec1712c..f5f6693d 100644 --- a/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs +++ b/Test_Numerics/Data/Time Series/Test_TimeSeriesDownload.cs @@ -122,24 +122,48 @@ public class Test_TimeSeriesDownload private static async Task Online() => await TimeSeriesDownload.IsConnectedToInternet(); /// - /// Per-service availability cache. Each entry runs its probe at most once per test - /// run; subsequent calls reuse the cached task. Used so that - /// integration tests skip cleanly when the upstream provider is unreachable - /// (e.g. BOM's WDP backend returning 500 / DatasourceError) instead of failing CI. + /// Per-service availability cache. Successful probes are cached for the rest of + /// the run. Failed probes are cached only for , + /// so a single transient flake (e.g. BOM's WDP backend returning 500 / + /// DatasourceError once) does not silently disable every test for that + /// provider for the entire run. /// - private static readonly ConcurrentDictionary>> _serviceAvailability = new(); + private static readonly ConcurrentDictionary _serviceState = new(); /// - /// Memoized "can we hit this service?" probe. Returns false when the host is offline - /// or when the supplied probe throws (any exception is treated as "service down"). + /// How long a "service unreachable" verdict sticks before we re-probe. /// - private static Task AvailableAsync(string serviceName, Func probe) => - _serviceAvailability.GetOrAdd(serviceName, _ => new Lazy>(async () => + private static readonly TimeSpan ProbeFailureTtl = TimeSpan.FromSeconds(30); + + /// + /// "Can we hit this service?" probe with TTL on negative results. Returns false + /// when the host is offline or when the supplied probe throws (any exception is + /// treated as "service down" for the next 30 seconds). + /// + private static async Task AvailableAsync(string serviceName, Func probe) + { + if (_serviceState.TryGetValue(serviceName, out var prior)) + { + if (prior.ok) return true; + if (DateTime.UtcNow - prior.at < ProbeFailureTtl) return false; + } + if (!await Online()) { - if (!await Online()) return false; - try { await probe(); return true; } - catch { return false; } - })).Value; + _serviceState[serviceName] = (false, DateTime.UtcNow); + return false; + } + try + { + await probe(); + _serviceState[serviceName] = (true, DateTime.UtcNow); + return true; + } + catch + { + _serviceState[serviceName] = (false, DateTime.UtcNow); + return false; + } + } /// /// Probes a small windowed CHMN download to confirm Water Survey of Canada services are responsive. From d86897a5bc2529282e39b97c6fdfa906bffc4fe7 Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Tue, 5 May 2026 14:06:39 -0600 Subject: [PATCH 4/9] Fixed a bug with KNN resampling of TimeSeries data. --- Numerics/Data/Time Series/TimeSeries.cs | 52 +++- .../Data/Time Series/Test_TimeSeries.cs | 278 ++++++++++++++++++ docs/data/time-series.md | 13 +- 3 files changed, 330 insertions(+), 13 deletions(-) diff --git a/Numerics/Data/Time Series/TimeSeries.cs b/Numerics/Data/Time Series/TimeSeries.cs index 4fdd52cc..b739b413 100644 --- a/Numerics/Data/Time Series/TimeSeries.cs +++ b/Numerics/Data/Time Series/TimeSeries.cs @@ -2195,15 +2195,28 @@ public TimeSeries PeaksOverThresholdSeries(double threshold, int minStepsBetween #endregion /// - /// Resample the time series using k-Nearest neighbors. + /// Resample the time series using the conditional k-Nearest Neighbors bootstrap of + /// Lall and Sharma (1996). At each step, finds the K historical observations whose + /// (standardized) value is closest to the current state, randomly picks one of them + /// at index j, and advances by taking x[j+1] — the observation that historically + /// came AFTER the chosen neighbor. This preserves the conditional p(x_{t+1} | x_t). /// /// The number of time steps to resample. - /// The number of nearest neighbors. - /// The prng seed. Default = 12345. - /// + /// The number of nearest neighbors. Lall-Sharma suggests floor(sqrt(n)). + /// The PRNG seed. Default = 12345. + /// A new of length . + /// + /// Thrown when the series has fewer than 11 observations (the underlying KNN search + /// requires at least 10 candidate points after the last observation is excluded so + /// that x[j+1] is always in range). + /// + /// + /// Lall, U., Sharma, A. (1996). A nearest neighbor bootstrap for resampling + /// hydrologic time series. Water Resources Research, 32(3), 679-693. + /// public TimeSeries ResampleWithKNN(int timeSteps, int k, int seed = 12345) { - if (Count < 2) throw new ArgumentException("Need at least 2 points for resampling."); + if (Count < 11) throw new ArgumentException("Need at least 11 points for KNN resampling."); if (timeSteps < 1) throw new ArgumentException("The number of time steps must be at least 1."); // Initialize @@ -2221,15 +2234,25 @@ public TimeSeries ResampleWithKNN(int timeSteps, int k, int seed = 12345) stdData.Standardize(); var stdValues = stdData.ValuesToArray(); + // Exclude the last observation from the KNN candidate set so that any returned + // neighbor index j satisfies j + 1 < Count and x[j+1] is always in range. + var candidateValues = new double[Count - 1]; + Array.Copy(stdValues, 0, candidateValues, 0, Count - 1); + // Perform k-NN - var kNN = new KNearestNeighbors(stdValues, stdValues, k); + var kNN = new KNearestNeighbors(candidateValues, candidateValues, k); for (int i = 1; i < timeSteps; i++) { double val = (currentValue - mean) / stdDev; var kNearest = kNN.GetNeighbors([val]); if (kNearest == null) continue; int selectedIdx = kNearest[prng.Next(kNearest.Length)]; - currentValue = this[selectedIdx].Value; + // Advance to the observation that came AFTER the chosen neighbor — this is + // what makes it a CONDITIONAL bootstrap of p(x_{t+1} | x_t). Returning + // this[selectedIdx] would collapse to a near-stationary process around the + // initial value because every returned neighbor is, by construction, close + // in value to currentValue. + currentValue = this[selectedIdx + 1].Value; currentDate = TimeSeries.AddTimeInterval(currentDate, TimeInterval); timeSeries.Add(new SeriesOrdinate(currentDate, currentValue)); } @@ -2238,12 +2261,21 @@ public TimeSeries ResampleWithKNN(int timeSteps, int k, int seed = 12345) } /// - /// Resample the time series using the block bootstrap. + /// Resample the time series using a non-overlapping fixed-block bootstrap. + /// Collects every contiguous block of length from the + /// observed series, draws blocks uniformly at random with replacement, and + /// concatenates them until values have been produced. + /// Preserves marginal mean / variance and within-block autocorrelation; introduces + /// discontinuities at block boundaries. /// /// The number of time steps to resample. /// The resampling block size. - /// The prng seed. Default = 12345. - /// + /// The PRNG seed. Default = 12345. + /// A new of length . + /// + /// Künsch, H.R. (1989). The jackknife and the bootstrap for general stationary + /// observations. Annals of Statistics, 17(3), 1217-1241. + /// public TimeSeries ResampleWithBlockBootstrap(int timeSteps, int blockSize, int seed = 12345) { if (Count < 2) throw new ArgumentException("Need at least 2 points for resampling."); diff --git a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs index 17f47d72..9ed921c0 100644 --- a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs +++ b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs @@ -1196,5 +1196,283 @@ public void Test_SeasonalDecompose_InvalidInputs() Assert.Throws(() => ts.SeasonalDecompose(12)); } + #region ResampleWithKNN tests + + /// + /// Build a stationary AR(1) series x[t] = phi*x[t-1] + N(0, sigma^2) using a fixed seed. + /// Used by the ResampleWithKNN/ResampleWithBlockBootstrap statistical tests. + /// + private static TimeSeries MakeAR1(int n, double phi, double sigma, int seed) + { + var rng = new Random(seed); + var values = new double[n]; + values[0] = 0.0; + for (int i = 1; i < n; i++) + { + // Box-Muller for a standard normal draw. + double u1 = 1.0 - rng.NextDouble(); + double u2 = 1.0 - rng.NextDouble(); + double z = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Cos(2.0 * Math.PI * u2); + values[i] = phi * values[i - 1] + sigma * z; + } + return new TimeSeries(TimeInterval.OneDay, new DateTime(2000, 1, 1), values); + } + + /// + /// Same seed must produce identical KNN resamples. + /// + [TestMethod] + public void Test_ResampleWithKNN_DeterministicWithSeed() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + var a = ts.ResampleWithKNN(timeSteps: 50, k: 10, seed: 42); + var b = ts.ResampleWithKNN(timeSteps: 50, k: 10, seed: 42); + Assert.AreEqual(a.Count, b.Count); + for (int i = 0; i < a.Count; i++) + Assert.AreEqual(a[i].Value, b[i].Value, 1e-12, + $"KNN must be deterministic with a fixed seed; index {i} differs."); + } + + /// + /// Output length must equal the requested timeSteps exactly. + /// + [TestMethod] + public void Test_ResampleWithKNN_OutputLength() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + foreach (int len in new[] { 1, 7, 50, 250 }) + { + var r = ts.ResampleWithKNN(timeSteps: len, k: 10, seed: 42); + Assert.AreEqual(len, r.Count, + $"KNN output length must match the requested timeSteps; expected {len}, got {r.Count}."); + } + } + + /// + /// Regression test for the off-by-one bug. + /// On a strongly trended series x[t] = t, the conditional KNN bootstrap (Lall-Sharma) + /// should advance through the trend, accumulating mean drift of ≈ +1 per step. The + /// pre-fix implementation took the neighbor's own value rather than x[j+1], which + /// kept the trajectory hovering near the starting value (zero net drift). + /// We average drift over multiple seeds to crush the random-walk noise. + /// + [TestMethod] + public void Test_ResampleWithKNN_AdvancesThroughTime() + { + // x[t] = t, t in [0, 199] + var values = Enumerable.Range(0, 200).Select(i => (double)i).ToArray(); + var ts = new TimeSeries(TimeInterval.OneDay, new DateTime(2000, 1, 1), values); + + const int steps = 50; + const int trials = 20; + double avgDrift = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + avgDrift += (r[steps - 1].Value - r[0].Value); + } + avgDrift /= trials; + + // Pre-fix: avgDrift ~ N(0, ~5) — fails this assertion clearly. + // Post-fix: avgDrift ~ +50 (drift = +1 per step over 50 steps). + Assert.IsTrue(avgDrift > 25.0, + $"Expected KNN trajectory to advance through the trend (avgDrift > 25 over {steps} steps); " + + $"observed avgDrift = {avgDrift:F2}. The pre-fix off-by-one keeps the trajectory near the starting value."); + } + + /// + /// The marginal mean of resampled tails should match the input mean within sampling error. + /// + [TestMethod] + public void Test_ResampleWithKNN_PreservesMarginalMean() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputMean = ts.MeanValue(); + double inputStd = ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailMeans = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + sumOfTailMeans += r.MeanValue(); + } + double grandMean = sumOfTailMeans / trials; + double standardError = inputStd / Math.Sqrt(steps * trials); + + // Allow 4x SE — generous to absorb short-series boundary effects in KNN. + Assert.IsTrue(Math.Abs(grandMean - inputMean) < 4.0 * standardError + 0.05, + $"KNN tails should preserve the marginal mean; input={inputMean:F4}, grand-mean={grandMean:F4}, " + + $"4*SE={4.0 * standardError:F4}."); + } + + /// + /// The marginal variance of resampled tails should be in the same regime as the input variance. + /// KNN can shrink variance because the conditional sampling pulls toward the local mean — we + /// only require the resample variance to remain within 50% of the input variance. + /// + [TestMethod] + public void Test_ResampleWithKNN_PreservesMarginalVariance() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputVar = ts.StandardDeviation() * ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailVars = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + double rStd = r.StandardDeviation(); + sumOfTailVars += rStd * rStd; + } + double avgTailVar = sumOfTailVars / trials; + double ratio = avgTailVar / inputVar; + + Assert.IsTrue(ratio > 0.5 && ratio < 2.0, + $"KNN tail variance should be within 0.5x..2x of input variance; input={inputVar:F4}, " + + $"avg-tail={avgTailVar:F4}, ratio={ratio:F2}."); + } + + /// + /// On a synthetic AR(1) with phi=0.7, the resampled tails should recover lag-1 autocorrelation + /// in the same regime. The conditional bootstrap is what gives KNN this property. + /// + [TestMethod] + public void Test_ResampleWithKNN_RecoversAR1Autocorrelation() + { + const double phiTrue = 0.7; + var ts = MakeAR1(n: 300, phi: phiTrue, sigma: 1.0, seed: 7); + + const int trials = 200; + const int steps = 100; + double sumLag1 = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithKNN(timeSteps: steps, k: 10, seed: seed); + sumLag1 += Lag1Autocorrelation(r); + } + double avgLag1 = sumLag1 / trials; + + // Post-fix: avgLag1 should be > 0.4 (close to phiTrue=0.7, allow shrinkage). + // Pre-fix: avgLag1 is dominated by KNN-neighbor random walk near a fixed point, NOT phi. + Assert.IsTrue(avgLag1 > 0.4 && avgLag1 < 0.95, + $"KNN should recover lag-1 autocorrelation in the AR(1) regime; expected ~{phiTrue}, got {avgLag1:F3}."); + } + + private static double Lag1Autocorrelation(TimeSeries ts) + { + int n = ts.Count; + if (n < 3) return double.NaN; + double mean = ts.MeanValue(); + double num = 0, den = 0; + for (int i = 1; i < n; i++) num += (ts[i].Value - mean) * (ts[i - 1].Value - mean); + for (int i = 0; i < n; i++) den += (ts[i].Value - mean) * (ts[i].Value - mean); + return den > 0 ? num / den : double.NaN; + } + + /// + /// KNN must reject series that are too short for a sensible conditional bootstrap. + /// + [TestMethod] + public void Test_ResampleWithKNN_ThrowsForShortSeries() + { + // Count = 10 — one less than required since we exclude the last index from candidates + // and the underlying KNearestNeighbors needs ≥10 training points. + var ts = MakeAR1(n: 10, phi: 0.5, sigma: 1.0, seed: 1); + Assert.Throws(() => ts.ResampleWithKNN(timeSteps: 5, k: 5, seed: 42)); + } + + #endregion + + #region ResampleWithBlockBootstrap tests + + /// + /// Same seed must produce identical block-bootstrap resamples. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_DeterministicWithSeed() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + var a = ts.ResampleWithBlockBootstrap(timeSteps: 50, blockSize: 5, seed: 42); + var b = ts.ResampleWithBlockBootstrap(timeSteps: 50, blockSize: 5, seed: 42); + Assert.AreEqual(a.Count, b.Count); + for (int i = 0; i < a.Count; i++) + Assert.AreEqual(a[i].Value, b[i].Value, 1e-12, + $"BlockBootstrap must be deterministic with a fixed seed; index {i} differs."); + } + + /// + /// Output length must equal the requested timeSteps exactly even when not divisible by blockSize. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_OutputLength() + { + var ts = MakeAR1(n: 100, phi: 0.5, sigma: 1.0, seed: 1); + // Try lengths that are NOT multiples of blockSize=7 to verify the truncation logic. + foreach (int len in new[] { 1, 7, 13, 50, 251 }) + { + var r = ts.ResampleWithBlockBootstrap(timeSteps: len, blockSize: 7, seed: 42); + Assert.AreEqual(len, r.Count, + $"BlockBootstrap output length must match requested timeSteps; expected {len}, got {r.Count}."); + } + } + + /// + /// The marginal mean of block-bootstrap tails should match the input mean within sampling error. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_PreservesMarginalMean() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputMean = ts.MeanValue(); + double inputStd = ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailMeans = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithBlockBootstrap(timeSteps: steps, blockSize: 5, seed: seed); + sumOfTailMeans += r.MeanValue(); + } + double grandMean = sumOfTailMeans / trials; + double standardError = inputStd / Math.Sqrt(steps * trials); + + Assert.IsTrue(Math.Abs(grandMean - inputMean) < 4.0 * standardError + 0.05, + $"BlockBootstrap tails should preserve the marginal mean; input={inputMean:F4}, " + + $"grand-mean={grandMean:F4}, 4*SE={4.0 * standardError:F4}."); + } + + /// + /// The marginal variance of block-bootstrap tails should be close to the input variance. + /// Block bootstrap preserves marginal variance well; we require within ±20%. + /// + [TestMethod] + public void Test_ResampleWithBlockBootstrap_PreservesMarginalVariance() + { + var ts = MakeAR1(n: 200, phi: 0.5, sigma: 1.0, seed: 7); + double inputVar = ts.StandardDeviation() * ts.StandardDeviation(); + + const int trials = 200; + const int steps = 100; + double sumOfTailVars = 0; + for (int seed = 1; seed <= trials; seed++) + { + var r = ts.ResampleWithBlockBootstrap(timeSteps: steps, blockSize: 5, seed: seed); + double rStd = r.StandardDeviation(); + sumOfTailVars += rStd * rStd; + } + double avgTailVar = sumOfTailVars / trials; + double ratio = avgTailVar / inputVar; + + Assert.IsTrue(ratio > 0.8 && ratio < 1.2, + $"BlockBootstrap should preserve marginal variance within ±20%; input={inputVar:F4}, " + + $"avg-tail={avgTailVar:F4}, ratio={ratio:F2}."); + } + + #endregion + } } diff --git a/docs/data/time-series.md b/docs/data/time-series.md index 62296722..1948eeb2 100644 --- a/docs/data/time-series.md +++ b/docs/data/time-series.md @@ -393,7 +393,7 @@ This prevents unreliable interpolation across long gaps while filling short data ### Block Bootstrap -The **block bootstrap** preserves temporal dependence by resampling contiguous blocks rather than individual observations. Given a block size $b$, the method randomly selects blocks of $b$ consecutive values (with replacement) and concatenates them: +The **block bootstrap** (Künsch 1989) preserves temporal dependence by resampling contiguous blocks rather than individual observations. Given a block size $b$, every contiguous run of $b$ consecutive values in the input is enumerated, blocks are drawn uniformly at random with replacement, and concatenated until the requested number of time steps is reached: ```cs // Generate a 1000-step synthetic series preserving 30-day temporal structure @@ -401,11 +401,11 @@ var resampled = ts.ResampleWithBlockBootstrap( timeSteps: 1000, blockSize: 30, seed: 42); ``` -Unlike the standard bootstrap (which destroys autocorrelation), the block bootstrap retains the short-range dependence structure within each block. +The block bootstrap preserves the marginal mean and variance and the within-block autocorrelation structure of the input series. Discontinuities can appear at block boundaries. ### k-Nearest Neighbors -The **k-nearest neighbors** (k-NN) resampling method generates synthetic time series that preserve the multivariate dependence structure. At each step, it finds the $k$ nearest neighbors of the current state in the historical record (using Euclidean distance on standardized values) and randomly selects one as the next value: +The **k-nearest neighbors** (k-NN) bootstrap of Lall & Sharma (1996) is a *conditional* resampler: at each step it draws the next synthetic value from the empirical conditional distribution $p(x_{t+1} \mid x_t)$. The current state $x_t$ is standardized, the $k$ historical observations whose standardized values are closest in Euclidean distance are identified, one of those neighbor indices $j$ is selected uniformly at random, and the trajectory advances by taking the value that *historically came after* the chosen neighbor — i.e., $x_{j+1}$: ```cs // Generate synthetic series using 5-nearest neighbors @@ -413,6 +413,13 @@ var synthetic = ts.ResampleWithKNN( timeSteps: 500, k: 5, seed: 42); ``` +The implementation is **univariate** (embedding dimension $d = 1$); higher-dimensional embedding and joint multivariate resampling are not currently exposed. Lall & Sharma suggest $k = \lfloor \sqrt{n} \rfloor$ as a reasonable default. The series must contain at least 11 observations. + +**References** + +- Künsch, H.R. (1989). The jackknife and the bootstrap for general stationary observations. *Annals of Statistics* **17**(3), 1217–1241. +- Lall, U., Sharma, A. (1996). A nearest neighbor bootstrap for resampling hydrologic time series. *Water Resources Research* **32**(3), 679–693. + ## Sorting and Filtering ### Sorting From 2e720ac3eab6743ced02ae4c98c1f3e12e0223c5 Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Thu, 7 May 2026 11:07:48 -0600 Subject: [PATCH 5/9] Updated doi's in JOSS .bib file --- paper/paper.bib | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index b3dcfaca..7226cb62 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -89,7 +89,8 @@ @incollection{neal2011 publisher = {CRC Press}, pages = {113--162}, year = {2011}, - chapter = {5} + chapter = {5}, + doi = {10.1201/b10905-6} } @article{vehtari2021, @@ -108,7 +109,8 @@ @book{efron1993 title = {An Introduction to the Bootstrap}, publisher = {Chapman \& Hall}, year = {1993}, - isbn = {978-0412042317} + isbn = {978-0412042317}, + doi = {10.1201/9780429246593} } @article{storn1997, @@ -167,7 +169,8 @@ @manual{hosking2019lmom title = {lmom: {L}-moments}, year = {2019}, note = {R package version 2.8}, - url = {https://CRAN.R-project.org/package=lmom} + url = {https://CRAN.R-project.org/package=lmom}, + doi = {10.32614/cran.package.lmom} } @article{stephenson2002evd, @@ -178,7 +181,8 @@ @article{stephenson2002evd number = {2}, pages = {31--32}, year = {2002}, - url = {https://CRAN.R-project.org/package=evd} + url = {https://CRAN.R-project.org/package=evd}, + doi = {10.32614/cran.package.evd} } @book{press2007, @@ -195,7 +199,8 @@ @book{rao2000 title = {Flood Frequency Analysis}, publisher = {CRC Press}, year = {2000}, - isbn = {978-0849300837} + isbn = {978-0849300837}, + doi = {10.1201/9780429128813} } @manual{viglione2024nsrfa, @@ -203,7 +208,8 @@ @manual{viglione2024nsrfa title = {nsRFA: Non-supervised Regional Frequency Analysis}, year = {2024}, note = {R package version 0.7-17}, - url = {https://CRAN.R-project.org/package=nsRFA} + url = {https://CRAN.R-project.org/package=nsRFA}, + doi = {10.32614/cran.package.nsrfa} } @manual{ribatet2022spatialextremes, @@ -211,7 +217,8 @@ @manual{ribatet2022spatialextremes title = {SpatialExtremes: Modelling Spatial Extremes}, year = {2022}, note = {R package version 2.1-0}, - url = {https://CRAN.R-project.org/package=SpatialExtremes} + url = {https://CRAN.R-project.org/package=SpatialExtremes}, + doi = {10.32614/cran.package.SpatialExtremes} } @article{rinnooy1987, From 732991469ee8f89aeaa124d0f8fbcc50d4dbff50 Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Sun, 24 May 2026 09:54:49 -0600 Subject: [PATCH 6/9] Switching double.MinValue to double.NegativeInfinity in distribution function calls and likelihod calls in the base classes. Improving tests to catch the sentinal returns. Improving NUTS sampler to handle double.NegativeInfinity in the gradient. --- .../Bivariate Copulas/Base/BivariateCopula.cs | 8 +- .../Base/MultivariateDistribution.cs | 16 ++-- .../Distributions/Multivariate/Dirichlet.cs | 8 +- .../Distributions/Multivariate/Multinomial.cs | 10 +-- .../Multivariate/MultivariateNormal.cs | 4 +- .../Multivariate/MultivariateStudentT.cs | 4 +- .../Base/UnivariateDistributionBase.cs | 8 +- .../Univariate/CompetingRisks.cs | 10 +-- Numerics/Distributions/Univariate/Mixture.cs | 11 ++- Numerics/Sampling/MCMC/Base/MCMCSampler.cs | 2 +- Numerics/Sampling/MCMC/NUTS.cs | 90 +++++++++++++++---- Numerics/Sampling/MCMC/SNIS.cs | 28 ++++-- Numerics/Utilities/Tools.cs | 12 +-- .../Bivariate Copulas/Test_NormalCopula.cs | 24 +++++ .../Multivariate/Test_Dirichlet.cs | 13 +++ .../Multivariate/Test_Multinomial.cs | 13 +++ .../Multivariate/Test_MultivariateNormal.cs | 11 +++ .../Multivariate/Test_MultivariateStudentT.cs | 13 ++- .../Univariate/Test_CompetingRisks.cs | 19 ++++ .../Distributions/Univariate/Test_Mixture.cs | 11 +++ Test_Numerics/Sampling/MCMC/Test_SNIS.cs | 54 +++++++++++ Test_Numerics/Utilities/Test_Tools.cs | 30 +++++++ 22 files changed, 338 insertions(+), 61 deletions(-) diff --git a/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs b/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs index 293ab03d..17a624d5 100644 --- a/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs +++ b/Numerics/Distributions/Bivariate Copulas/Base/BivariateCopula.cs @@ -110,6 +110,8 @@ public bool ParametersValid public double LogPDF(double u, double v) { double f = PDF(u, v); + if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0d) + return double.NegativeInfinity; return Math.Log(f); } @@ -194,7 +196,7 @@ public double PseudoLogLikelihood(IList sampleDataX, IList sampl double LogLH = 0; for (int i = 0; i < sampleDataX.Count; i++) LogLH += LogPDF(sampleDataX[i], sampleDataY[i]); - if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.MinValue; + if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.NegativeInfinity; return LogLH; } @@ -212,7 +214,7 @@ public double IFMLogLikelihood(IList sampleDataX, IList sampleDa double LogLH = 0; for (int i = 0; i < sampleDataX.Count; i++) LogLH += LogPDF(MarginalDistributionX.CDF(sampleDataX[i]), MarginalDistributionY.CDF(sampleDataY[i])); - if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.MinValue; + if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.NegativeInfinity; return LogLH; } @@ -230,7 +232,7 @@ public double LogLikelihood(IList sampleDataX, IList sampleDataY double LogLH = 0; for (int i = 0; i < sampleDataX.Count; i++) LogLH += LogPDF(MarginalDistributionX.CDF(sampleDataX[i]), MarginalDistributionY.CDF(sampleDataY[i])) + MarginalDistributionX.LogPDF(sampleDataX[i]) + MarginalDistributionY.LogPDF(sampleDataY[i]); - if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.MinValue; + if (double.IsNaN(LogLH) || double.IsInfinity(LogLH)) return double.NegativeInfinity; return LogLH; } diff --git a/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs b/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs index cb456131..3a235880 100644 --- a/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs +++ b/Numerics/Distributions/Multivariate/Base/MultivariateDistribution.cs @@ -55,8 +55,8 @@ public abstract class MultivariateDistribution : IMultivariateDistribution public virtual double LogPDF(double[] x) { double f = PDF(x); - // If the PDF returns an invalid probability, then return the worst log-probability. - if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0d) return double.MinValue; + // If the PDF returns an invalid probability, then return log(0). + if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0d) return double.NegativeInfinity; return Math.Log(f); } @@ -73,8 +73,8 @@ public virtual double LogPDF(double[] x) public virtual double LogCDF(double[] x) { double F = CDF(x); - // If the CDF returns an invalid probability, then return the worst log-probability. - if (double.IsNaN(F) || double.IsInfinity(F) || F <= 0d) return double.MinValue; + // If the CDF returns an invalid probability, then return log(0). + if (double.IsNaN(F) || double.IsInfinity(F) || F <= 0d) return double.NegativeInfinity; return Math.Log(F); } @@ -94,9 +94,9 @@ public virtual double CCDF(double[] x) public virtual double LogCCDF(double[] x) { double cF = CCDF(x); - // If the CCDF returns an invalid probability, then return the worst log-probability. - if (double.IsNaN(cF) || cF <= 0d) - return int.MinValue; + // If the CCDF returns an invalid probability, then return log(0). + if (double.IsNaN(cF) || double.IsInfinity(cF) || cF <= 0d) + return double.NegativeInfinity; return Math.Log(cF); } @@ -106,4 +106,4 @@ public virtual double LogCCDF(double[] x) public abstract MultivariateDistribution Clone(); } -} \ No newline at end of file +} diff --git a/Numerics/Distributions/Multivariate/Dirichlet.cs b/Numerics/Distributions/Multivariate/Dirichlet.cs index c2300aca..5737e92b 100644 --- a/Numerics/Distributions/Multivariate/Dirichlet.cs +++ b/Numerics/Distributions/Multivariate/Dirichlet.cs @@ -282,7 +282,7 @@ public override double PDF(double[] x) /// /// A point on the simplex. Components must be positive and sum to 1. /// - /// The log-density at x. Returns if x is not on the simplex. + /// The log-density at x. Returns if x is not on the simplex. /// /// /// @@ -295,16 +295,16 @@ public override double PDF(double[] x) /// public override double LogPDF(double[] x) { - if (x == null || x.Length != _dimension) return double.MinValue; + if (x == null || x.Length != _dimension) return double.NegativeInfinity; // Check that x is on the simplex double sum = 0; for (int i = 0; i < _dimension; i++) { - if (x[i] <= 0 || x[i] > 1) return double.MinValue; + if (x[i] <= 0 || x[i] > 1) return double.NegativeInfinity; sum += x[i]; } - if (Math.Abs(sum - 1.0) > 1e-10) return double.MinValue; + if (Math.Abs(sum - 1.0) > 1e-10) return double.NegativeInfinity; double logPdf = -_logNormalization; for (int i = 0; i < _dimension; i++) diff --git a/Numerics/Distributions/Multivariate/Multinomial.cs b/Numerics/Distributions/Multivariate/Multinomial.cs index 31d546ee..ff8019e6 100644 --- a/Numerics/Distributions/Multivariate/Multinomial.cs +++ b/Numerics/Distributions/Multivariate/Multinomial.cs @@ -204,20 +204,20 @@ public override double PDF(double[] x) /// Computes the log of the probability mass function. /// /// The count vector. Each xᵢ must be a non-negative integer and Σxᵢ = N. - /// The log probability. Returns if x is not valid. + /// The log probability. Returns if x is not valid. public double LogPMF(double[] x) { - if (x == null || x.Length != _dimension) return double.MinValue; + if (x == null || x.Length != _dimension) return double.NegativeInfinity; // Validate: all non-negative integers that sum to N int sum = 0; for (int i = 0; i < _dimension; i++) { int xi = (int)Math.Round(x[i]); - if (xi < 0 || Math.Abs(x[i] - xi) > 1e-10) return double.MinValue; + if (xi < 0 || Math.Abs(x[i] - xi) > 1e-10) return double.NegativeInfinity; sum += xi; } - if (sum != _n) return double.MinValue; + if (sum != _n) return double.NegativeInfinity; // Compute in log-space: log(N!) - sum(log(xi!)) + sum(xi * log(pi)) double logPmf = Factorial.LogFactorial(_n); @@ -227,7 +227,7 @@ public double LogPMF(double[] x) logPmf -= Factorial.LogFactorial(xi); if (xi > 0) { - if (_p[i] <= 0) return double.MinValue; + if (_p[i] <= 0) return double.NegativeInfinity; logPmf += xi * Math.Log(_p[i]); } } diff --git a/Numerics/Distributions/Multivariate/MultivariateNormal.cs b/Numerics/Distributions/Multivariate/MultivariateNormal.cs index 920108ad..90cedc82 100644 --- a/Numerics/Distributions/Multivariate/MultivariateNormal.cs +++ b/Numerics/Distributions/Multivariate/MultivariateNormal.cs @@ -338,7 +338,7 @@ public override double PDF(double[] x) public override double LogPDF(double[] x) { double f = -0.5d * Mahalanobis(x) + _lnconstant; - if (double.IsNaN(f) || double.IsInfinity(f)) return double.MinValue; + if (double.IsNaN(f) || double.IsInfinity(f)) return double.NegativeInfinity; return f; } @@ -2149,4 +2149,4 @@ public override MultivariateDistribution Clone() } } -} \ No newline at end of file +} diff --git a/Numerics/Distributions/Multivariate/MultivariateStudentT.cs b/Numerics/Distributions/Multivariate/MultivariateStudentT.cs index 66ce1031..5282f53a 100644 --- a/Numerics/Distributions/Multivariate/MultivariateStudentT.cs +++ b/Numerics/Distributions/Multivariate/MultivariateStudentT.cs @@ -400,7 +400,7 @@ public override double LogPDF(double[] x) { double Q = Mahalanobis(x); double f = _lnconstant - ((_degreesOfFreedom + _dimension) / 2.0) * Math.Log(1.0 + Q / _degreesOfFreedom); - if (double.IsNaN(f) || double.IsInfinity(f)) return double.MinValue; + if (double.IsNaN(f) || double.IsInfinity(f)) return double.NegativeInfinity; return f; } @@ -705,4 +705,4 @@ public override MultivariateDistribution Clone() } } -} \ No newline at end of file +} diff --git a/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs b/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs index 7c744f60..871e6992 100644 --- a/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs +++ b/Numerics/Distributions/Univariate/Base/UnivariateDistributionBase.cs @@ -150,7 +150,7 @@ public double LogLikelihood(IList sample) { logLH += LogPDF(sample[i]); } - if (double.IsNaN(logLH) || double.IsInfinity(logLH)) return double.MinValue; + if (double.IsNaN(logLH) || double.IsInfinity(logLH)) return double.NegativeInfinity; return logLH; } @@ -204,6 +204,8 @@ public double LogLikelihood_Intervals(double lowerLimit, double upperLimit) public virtual double LogPDF(double x) { double f = PDF(x); + if (double.IsNaN(f) || double.IsInfinity(f) || f <= 0.0) + return double.NegativeInfinity; return Math.Log(f); } @@ -220,6 +222,8 @@ public virtual double HF(double x) public virtual double LogCDF(double x) { double F = CDF(x); + if (double.IsNaN(F) || double.IsInfinity(F) || F <= 0.0) + return double.NegativeInfinity; return Math.Log(F); } @@ -233,6 +237,8 @@ public virtual double CCDF(double x) public virtual double LogCCDF(double x) { double cF = CCDF(x); + if (double.IsNaN(cF) || double.IsInfinity(cF) || cF <= 0.0) + return double.NegativeInfinity; return Math.Log(cF); } diff --git a/Numerics/Distributions/Univariate/CompetingRisks.cs b/Numerics/Distributions/Univariate/CompetingRisks.cs index 6627eaf8..a963b188 100644 --- a/Numerics/Distributions/Univariate/CompetingRisks.cs +++ b/Numerics/Distributions/Univariate/CompetingRisks.cs @@ -58,8 +58,8 @@ public CompetingRisks(IUnivariateDistribution[] distributions) private bool _mvnCreated = false; private MultivariateNormal _mvn = null!; - // Minimum log value to prevent -Infinity in log-likelihood - private const double _logZero = -745.0; // ≈ log(double.MinValue that's positive) + // Soft finite floor used in tail arithmetic before returning the final log-density. + private const double _logZero = -745.0; private const double _minDensity = 1E-300; /// @@ -579,7 +579,7 @@ public override double LogPDF(double x) // For dependent cases, fall back to numerical differentiation // but use a more stable approach double pdf = NumericalDerivative.Derivative(CDF, x); - return pdf > _minDensity ? Math.Log(pdf) : double.MinValue; + return pdf > _minDensity ? Math.Log(pdf) : double.NegativeInfinity; } } @@ -648,7 +648,7 @@ private double LogPDFMinimumIndependent(double x) // Final result: log(f) = log(sum h_i) + sum log(S_i) double logPdf = logSumHazard + sumLogSurvival; - return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.MinValue : logPdf; + return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.NegativeInfinity : logPdf; } /// @@ -714,7 +714,7 @@ private double LogPDFMaximumIndependent(double x) // Final result: log(f) = log(sum f_i/F_i) + sum log(F_i) double logPdf = logSumRatio + sumLogCdf; - return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.MinValue : logPdf; + return double.IsNaN(logPdf) || double.IsInfinity(logPdf) ? double.NegativeInfinity : logPdf; } diff --git a/Numerics/Distributions/Univariate/Mixture.cs b/Numerics/Distributions/Univariate/Mixture.cs index f6f311a8..564eef67 100644 --- a/Numerics/Distributions/Univariate/Mixture.cs +++ b/Numerics/Distributions/Univariate/Mixture.cs @@ -672,7 +672,7 @@ double EStep(double[] x) for (int i = 0; i < N; i++) { // Get max likelihood - double max = double.MinValue; + double max = double.NegativeInfinity; for (int k = 0; k < K; k++) { if (likelihood[i, k] > max) @@ -680,6 +680,13 @@ double EStep(double[] x) max = likelihood[i, k]; } } + if (double.IsNegativeInfinity(max)) + { + for (int k = 0; k < K; k++) + likelihood[i, k] = 0.0; + return double.NegativeInfinity; + } + // log-sum-exp trick begins here double sum = 0; for (int k = 0; k < K; k++) @@ -721,7 +728,7 @@ double logLH(double[] x) var dist = (Mixture)Clone(); dist.SetParameters(mleWeights, x); double lh = dist.LogLikelihood(sample); - if (double.IsNaN(lh) || double.IsInfinity(lh)) return double.MinValue; + if (double.IsNaN(lh) || double.IsInfinity(lh)) return double.NegativeInfinity; return lh; } diff --git a/Numerics/Sampling/MCMC/Base/MCMCSampler.cs b/Numerics/Sampling/MCMC/Base/MCMCSampler.cs index 716aa529..bbbb8465 100644 --- a/Numerics/Sampling/MCMC/Base/MCMCSampler.cs +++ b/Numerics/Sampling/MCMC/Base/MCMCSampler.cs @@ -618,7 +618,7 @@ public void Reset() AcceptCount = new int[NumberOfChains]; SampleCount = new int[NumberOfChains]; MeanLogLikelihood = new List(); - MAP = new ParameterSet([], double.MinValue); + MAP = new ParameterSet([], double.NegativeInfinity); } #endregion diff --git a/Numerics/Sampling/MCMC/NUTS.cs b/Numerics/Sampling/MCMC/NUTS.cs index ec42512f..76be0966 100644 --- a/Numerics/Sampling/MCMC/NUTS.cs +++ b/Numerics/Sampling/MCMC/NUTS.cs @@ -321,16 +321,9 @@ private double FindReasonableEpsilon(double[] theta0, double logLH0, int chainIn // Compute initial Hamiltonian double H0 = -logLH0 + 0.5 * DiagonalQuadraticForm(r0, _inverseMassMatrix[chainIndex]); - // Take one leapfrog step var thetaPrime = (double[])theta0.Clone(); var rPrime = (double[])r0.Clone(); - LeapfrogInPlace(thetaPrime, rPrime, epsilon, chainIndex); - - double logLH1 = SafeLogLikelihood(thetaPrime); - double H1 = -logLH1 + 0.5 * DiagonalQuadraticForm(rPrime, _inverseMassMatrix[chainIndex]); - double logAlpha = H0 - H1; - if (double.IsNaN(logAlpha) || double.IsNegativeInfinity(logAlpha)) - logAlpha = -1000.0; + double logAlpha = TrySingleStepLogAcceptance(theta0, r0, thetaPrime, rPrime, epsilon, H0, chainIndex); // Determine direction: double epsilon if acceptance too high, halve if too low double a = logAlpha > Math.Log(0.5) ? 1.0 : -1.0; @@ -347,21 +340,47 @@ private double FindReasonableEpsilon(double[] theta0, double logLH0, int chainIn if (epsilon < 1e-8 || epsilon > 1e6) break; - // Recompute with new epsilon Array.Copy(theta0, thetaPrime, D); Array.Copy(r0, rPrime, D); - LeapfrogInPlace(thetaPrime, rPrime, epsilon, chainIndex); - - logLH1 = SafeLogLikelihood(thetaPrime); - H1 = -logLH1 + 0.5 * DiagonalQuadraticForm(rPrime, _inverseMassMatrix[chainIndex]); - logAlpha = H0 - H1; - if (double.IsNaN(logAlpha) || double.IsNegativeInfinity(logAlpha)) - logAlpha = -1000.0; + logAlpha = TrySingleStepLogAcceptance(theta0, r0, thetaPrime, rPrime, epsilon, H0, chainIndex); } return Math.Max(1e-8, Math.Min(epsilon, 1e6)); } + /// + /// Attempts one leapfrog step and converts invalid Hamiltonian states to a low log-acceptance value. + /// + /// The starting position. + /// The starting momentum. + /// Working buffer for the proposed position. + /// Working buffer for the proposed momentum. + /// The leapfrog step size. + /// The initial Hamiltonian. + /// The chain index for mass matrix access. + /// The log acceptance statistic for the trial step, or a low value when the trial is invalid. + private double TrySingleStepLogAcceptance(double[] theta0, double[] r0, double[] thetaPrime, double[] rPrime, double epsilon, double initialHamiltonian, int chainIndex) + { + try + { + LeapfrogInPlace(thetaPrime, rPrime, epsilon, chainIndex); + + double logLH = SafeLogLikelihood(thetaPrime); + double hamiltonian = -logLH + 0.5 * DiagonalQuadraticForm(rPrime, _inverseMassMatrix[chainIndex]); + double logAlpha = initialHamiltonian - hamiltonian; + if (!Tools.IsFinite(logAlpha)) + return -1000.0; + + return logAlpha; + } + catch (ArithmeticException) + { + Array.Copy(theta0, thetaPrime, theta0.Length); + Array.Copy(r0, rPrime, r0.Length); + return -1000.0; + } + } + /// /// Performs a single leapfrog step in-place on raw arrays, using the per-chain mass matrix. /// Used by FindReasonableEpsilon to avoid Vector allocations. @@ -626,10 +645,23 @@ private TreeState BuildTree(Vector theta, Vector momentum, double epsilon, int d if (depth == 0) { // Base case: take one leapfrog step - var (thetaPrime, momentumPrime) = Leapfrog(theta, momentum, epsilon, chainIndex); + Vector thetaPrime; + Vector momentumPrime; + try + { + (thetaPrime, momentumPrime) = Leapfrog(theta, momentum, epsilon, chainIndex); + } + catch (ArithmeticException) + { + return InvalidTreeState(theta, momentum); + } + double logLH = SafeLogLikelihood(thetaPrime.Array); double H = -logLH + 0.5 * DiagonalQuadraticFormVec(momentumPrime, _inverseMassMatrix[chainIndex]); double logWeight = -H; + if (!Tools.IsFinite(logLH) || !Tools.IsFinite(H) || !Tools.IsFinite(logWeight)) + return InvalidTreeState(thetaPrime, momentumPrime); + bool divergent = (H - H0) > MAX_DELTA_H; double alpha = Math.Min(1.0, Math.Exp(H0 - H)); if (double.IsNaN(alpha)) alpha = 0; @@ -694,6 +726,30 @@ private TreeState BuildTree(Vector theta, Vector momentum, double epsilon, int d return tree; } + /// + /// Creates an invalid tree state for a trajectory that entered a non-finite log-density region. + /// + /// The position to preserve as the tree endpoint. + /// The momentum to preserve as the tree endpoint. + /// An invalid tree state with zero acceptance contribution. + private static TreeState InvalidTreeState(Vector theta, Vector momentum) + { + return new TreeState + { + ThetaMinus = theta.Clone(), + MomentumMinus = momentum.Clone(), + ThetaPlus = theta.Clone(), + MomentumPlus = momentum.Clone(), + ThetaPrime = theta.Clone(), + LogSumWeight = double.NegativeInfinity, + LogLikelihoodPrime = double.NegativeInfinity, + LeafCount = 1, + Valid = false, + SumAlpha = 0d, + NumAlpha = 1 + }; + } + /// /// Performs a single leapfrog integration step with boundary enforcement, /// using the per-chain diagonal mass matrix. diff --git a/Numerics/Sampling/MCMC/SNIS.cs b/Numerics/Sampling/MCMC/SNIS.cs index a5552f17..714b9a73 100644 --- a/Numerics/Sampling/MCMC/SNIS.cs +++ b/Numerics/Sampling/MCMC/SNIS.cs @@ -107,7 +107,7 @@ public override void Sample() MeanLogLikelihood = new List(); // Keeps track of best parameter set - MAP = new ParameterSet([], double.MinValue, double.MinValue); + MAP = new ParameterSet([], double.NegativeInfinity, double.NegativeInfinity); // Create parameter random values var rnds = _masterPRNG.NextDoubles(Iterations, NumberOfParameters); @@ -138,25 +138,33 @@ public override void Sample() // Get the maximum a posteriori for (int i = 0; i < Iterations; i++) - if (MarkovChains[0][i].Weight > MAP.Weight) + if (IsFinite(MarkovChains[0][i].Weight) && MarkovChains[0][i].Weight > MAP.Weight) MAP = MarkovChains[0][i].Clone(); // Get the normalization factor double max = MAP.Weight; + if (!IsFinite(max)) + throw new InvalidOperationException("SNIS failed because all importance weights are non-finite."); + double sum = 0; for (int i = 0; i < Iterations; i++) { - if (!double.IsNaN(MarkovChains[0][i].Weight) && MarkovChains[0][i].Weight != double.MinValue) + if (IsFinite(MarkovChains[0][i].Weight)) { sum += Math.Exp(MarkovChains[0][i].Weight - max); } - } + } + if (!IsFinite(sum) || sum <= 0d) + throw new InvalidOperationException("SNIS failed because the finite importance weights could not be normalized."); + double normalization = max + Math.Log(sum); + if (!IsFinite(normalization)) + throw new InvalidOperationException("SNIS failed because the importance-weight normalization is non-finite."); // Compute the posterior weights Parallel.For(0, Iterations, (idx) => { - double w = !double.IsNaN(MarkovChains[0][idx].Weight) && MarkovChains[0][idx].Weight != double.MinValue ? Math.Exp(MarkovChains[0][idx].Weight - normalization) : 0d; + double w = IsFinite(MarkovChains[0][idx].Weight) ? Math.Exp(MarkovChains[0][idx].Weight - normalization) : 0d; MarkovChains[0][idx] = new ParameterSet(MarkovChains[0][idx].Values, MarkovChains[0][idx].Fitness, w); }); @@ -180,5 +188,15 @@ public override void Sample() } } + /// + /// Determines whether a value is finite. + /// + /// The value to evaluate. + /// true when the value is neither NaN nor infinite; otherwise false. + private static bool IsFinite(double value) + { + return !double.IsNaN(value) && !double.IsInfinity(value); + } + } } diff --git a/Numerics/Utilities/Tools.cs b/Numerics/Utilities/Tools.cs index a69d6951..3455d4f0 100644 --- a/Numerics/Utilities/Tools.cs +++ b/Numerics/Utilities/Tools.cs @@ -519,7 +519,7 @@ public static double Product(IList values, IList indicators, bool u public static void MinMax(IList values, out double min, out double max) { min = double.MaxValue; - max = double.MinValue; + max = double.NegativeInfinity; if (values.Count == 0) { min = double.NaN; max = double.NaN; return; }; for (int i = 0; i < values.Count; i++) { @@ -556,11 +556,11 @@ public static int ArgMin(IList values) /// The list of values. public static int ArgMax(IList values) { - double max = double.MinValue; + double max = double.NegativeInfinity; int index = -1; for (int i = 0; i < values.Count; i++) { - if (values[i] > max) + if (!double.IsNaN(values[i]) && (index == -1 || values[i] > max)) { max = values[i]; index = i; @@ -622,7 +622,7 @@ public static double Min(IList values, IList indicators, bool useCo public static double Max(IList values) { if (values.Count == 0) return double.NaN; - double max = double.MinValue; + double max = double.NegativeInfinity; for (int i = 0; i < values.Count; i++) { double v = values[i]; @@ -643,7 +643,7 @@ public static double Max(IList values, IList indicators, bool useCo { if (values.Count == 0) return double.NaN; if (indicators.Count != values.Count) return double.NaN; - double max = double.MinValue; + double max = double.NegativeInfinity; bool any = false; int flag = useComplement ? 0 : 1; for (int i = 0; i < values.Count; i++) @@ -685,6 +685,7 @@ public static double ParallelAdd(ref double valueToAddTo, double valueToAdd) public static double LogSumExp(double u, double v) { double max = Math.Max(u, v); + if (double.IsNegativeInfinity(max)) return double.NegativeInfinity; return max + Math.Log(Math.Exp(u - max) + Math.Exp(v - max)); } @@ -696,6 +697,7 @@ public static double LogSumExp(IList values) { if (values.Count == 0) return double.NaN; double max = Max(values); + if (double.IsNegativeInfinity(max)) return double.NegativeInfinity; double sum = 0; for (int i = 0; i < values.Count; i++) { diff --git a/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs b/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs index da5ef2c2..6e43b5c0 100644 --- a/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs +++ b/Test_Numerics/Distributions/Bivariate Copulas/Test_NormalCopula.cs @@ -52,6 +52,30 @@ public void Test_PDF() Assert.AreEqual(3.208773, copula.PDF(0.8, 0.2), 1E-6); } + /// + /// Test that invalid boundary log-density evaluations return log(0). + /// + [TestMethod] + public void Test_LogPDF_InvalidBoundary_ReturnsNegativeInfinity() + { + var copula = new NormalCopula(0.8); + + Assert.AreEqual(double.NegativeInfinity, copula.LogPDF(0.0, 0.5)); + } + + /// + /// Test that aggregate pseudo log-likelihood returns log(0) when any term is invalid. + /// + [TestMethod] + public void Test_PseudoLogLikelihood_InvalidBoundary_ReturnsNegativeInfinity() + { + var copula = new NormalCopula(0.8); + + Assert.AreEqual( + double.NegativeInfinity, + copula.PseudoLogLikelihood(new[] { 0.0, 0.2 }, new[] { 0.5, 0.8 })); + } + /// /// Test CDF /// diff --git a/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs b/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs index 6fc04e62..b8f5321d 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_Dirichlet.cs @@ -215,6 +215,19 @@ public void Test_LogPDF() Assert.AreEqual(Math.Log(D.PDF(x)), D.LogPDF(x), 1e-10); } + /// + /// Test that invalid simplex inputs return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPDF_InvalidInput_ReturnsNegativeInfinity() + { + var D = new Dirichlet(new double[] { 1.0, 2.0, 3.0 }); + + Assert.AreEqual(double.NegativeInfinity, D.LogPDF(new double[] { 0.5, 0.5 })); + Assert.AreEqual(double.NegativeInfinity, D.LogPDF(new double[] { -0.1, 0.6, 0.5 })); + Assert.AreEqual(double.NegativeInfinity, D.LogPDF(new double[] { 0.3, 0.3, 0.3 })); + } + /// /// Test random sampling: all samples on the simplex and moments converge. /// diff --git a/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs b/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs index 19ba5060..25bddfa6 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_Multinomial.cs @@ -153,6 +153,19 @@ public void Test_LogPMF() Assert.AreEqual(Math.Log(M.PDF(x)), M.LogPMF(x), 1e-10); } + /// + /// Test that invalid count vectors return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPMF_InvalidInput_ReturnsNegativeInfinity() + { + var M = new Multinomial(10, new double[] { 0.5, 0.5 }); + + Assert.AreEqual(double.NegativeInfinity, M.LogPMF(new double[] { 3, 3 })); + Assert.AreEqual(double.NegativeInfinity, M.LogPMF(new double[] { -1, 11 })); + Assert.AreEqual(double.NegativeInfinity, M.LogPMF(new double[] { 5, 3, 2 })); + } + /// /// Test random sampling: each sample sums to N and moments converge. /// diff --git a/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs b/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs index 686610d0..93d227a5 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_MultivariateNormal.cs @@ -61,6 +61,17 @@ public void Test_MultivariateNormalDist() } + /// + /// Test that non-finite log-density evaluations return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPDF_NonFiniteInput_ReturnsNegativeInfinity() + { + var mvn = new MultivariateNormal(new[] { 0d, 0d }, new[,] { { 1d, 0d }, { 0d, 1d } }); + + Assert.AreEqual(double.NegativeInfinity, mvn.LogPDF(new[] { double.PositiveInfinity, 0d })); + } + /// /// Verification against Fortran routine /// http://www.math.wsu.edu/faculty/genz/software/fort77/mvndstpack.f diff --git a/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs b/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs index f970b5f7..7210e8dc 100644 --- a/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs +++ b/Test_Numerics/Distributions/Multivariate/Test_MultivariateStudentT.cs @@ -171,6 +171,17 @@ public void Test_LogPDF() Assert.AreEqual(-3.4353564596992499, mvt.LogPDF(new[] { 0.0, 0.0 }), 1E-10); } + /// + /// Verify that non-finite log-density evaluations return the canonical log(0) sentinel. + /// + [TestMethod] + public void Test_LogPDF_NonFiniteInput_ReturnsNegativeInfinity() + { + var mvt = CreateStandard2D(); + + Assert.AreEqual(double.NegativeInfinity, mvt.LogPDF(new[] { double.PositiveInfinity, 0.0 })); + } + /// /// Verify that 1D MVT PDF matches univariate Student's t PDF. /// MVT(ν=5, μ=[3], Σ=[[4]]) should equal StudentT(μ=3, σ=2, ν=5). @@ -937,4 +948,4 @@ public void Test_InverseCDF_ConsistentWithSampling() #endregion } -} \ No newline at end of file +} diff --git a/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs b/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs index f4b16c6b..cbaef530 100644 --- a/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs +++ b/Test_Numerics/Distributions/Univariate/Test_CompetingRisks.cs @@ -429,6 +429,25 @@ public void Test_LogPDF_ConsistentWithPDF() "LogPDF should equal log(PDF) at median for max rule"); } + /// + /// Verifies that dependent numerical-differentiation log-density returns log(0) + /// when the density is outside the component support. + /// + [TestMethod] + public void Test_LogPDF_DependentInvalidSupport_ReturnsNegativeInfinity() + { + var cr = new CompetingRisks(new UnivariateDistributionBase[] + { + new Exponential(1.0), + new Exponential(2.0) + }) + { + Dependency = Numerics.Data.Statistics.Probability.DependencyType.PerfectlyPositive + }; + + Assert.AreEqual(double.NegativeInfinity, cr.LogPDF(-1.0)); + } + #region Minimum Rule - 2 Distributions diff --git a/Test_Numerics/Distributions/Univariate/Test_Mixture.cs b/Test_Numerics/Distributions/Univariate/Test_Mixture.cs index b0cc281b..ed94ae02 100644 --- a/Test_Numerics/Distributions/Univariate/Test_Mixture.cs +++ b/Test_Numerics/Distributions/Univariate/Test_Mixture.cs @@ -249,5 +249,16 @@ public void Test_Mixture_InverseCDF_ZeroInflated() } } + /// + /// Test that a mixture with no component support at x returns log(0). + /// + [TestMethod] + public void Test_Mixture_LogPDF_InvalidSupport_ReturnsNegativeInfinity() + { + var mix = new Mixture(new[] { 1.0 }, new[] { new Exponential(1.0) }); + + Assert.AreEqual(double.NegativeInfinity, mix.LogPDF(-1.0)); + } + } } diff --git a/Test_Numerics/Sampling/MCMC/Test_SNIS.cs b/Test_Numerics/Sampling/MCMC/Test_SNIS.cs index 1f61f018..a0b37331 100644 --- a/Test_Numerics/Sampling/MCMC/Test_SNIS.cs +++ b/Test_Numerics/Sampling/MCMC/Test_SNIS.cs @@ -75,5 +75,59 @@ double logLH(double[] x) Assert.AreEqual(5771.81, results.ParameterResults[1].SummaryStatistics.UpperCI, 0.05 * 5771.81); } + /// + /// This test verifies that SNIS fails fast when every importance weight is invalid. + /// + [TestMethod] + public void Test_SNIS_AllInvalidWeights_ThrowsInvalidOperationException() + { + var priors = new List { new Uniform(0d, 1d) }; + double logLH(double[] x) => double.NegativeInfinity; + + var sampler = new SNIS(priors, logLH) + { + Iterations = 100, + OutputLength = 100, + PRNGSeed = 12345 + }; + + Assert.Throws(() => sampler.Sample()); + } + + /// + /// This test verifies that finite SNIS weights normalize cleanly when invalid samples are present. + /// + [TestMethod] + public void Test_SNIS_MixedFiniteAndInvalidWeights_NormalizesFiniteWeights() + { + var priors = new List { new Uniform(0d, 1d) }; + double logLH(double[] x) => x[0] < 0.5d ? 0d : double.NegativeInfinity; + + var sampler = new SNIS(priors, logLH) + { + Iterations = 100, + OutputLength = 100, + PRNGSeed = 12345 + }; + + sampler.Sample(); + + double sum = 0d; + bool foundZeroWeight = false; + bool foundPositiveWeight = false; + foreach (var parameterSet in sampler.MarkovChains[0]) + { + Assert.IsFalse(double.IsNaN(parameterSet.Weight)); + Assert.IsFalse(double.IsInfinity(parameterSet.Weight)); + sum += parameterSet.Weight; + foundZeroWeight |= parameterSet.Weight == 0d; + foundPositiveWeight |= parameterSet.Weight > 0d; + } + + Assert.IsTrue(foundZeroWeight); + Assert.IsTrue(foundPositiveWeight); + Assert.AreEqual(1d, sum, 1e-12); + } + } } diff --git a/Test_Numerics/Utilities/Test_Tools.cs b/Test_Numerics/Utilities/Test_Tools.cs index 110f9315..1287b891 100644 --- a/Test_Numerics/Utilities/Test_Tools.cs +++ b/Test_Numerics/Utilities/Test_Tools.cs @@ -354,6 +354,36 @@ public void Test_LogSumExp() Assert.AreEqual(1000.70815,result2, 1E-04); } + /// + /// Testing max helpers when every candidate is negative infinity. + /// + [TestMethod] + public void Test_MaxHelpers_AllNegativeInfinity() + { + List values = new List { double.NegativeInfinity, double.NegativeInfinity }; + List indicators = new List { 1, 1 }; + + Tools.MinMax(values, out double min, out double max); + + Assert.AreEqual(double.NegativeInfinity, min); + Assert.AreEqual(double.NegativeInfinity, max); + Assert.AreEqual(0, Tools.ArgMax(values)); + Assert.AreEqual(double.NegativeInfinity, Tools.Max(values)); + Assert.AreEqual(double.NegativeInfinity, Tools.Max(values, indicators)); + } + + /// + /// Testing log-sum-exponential when every log input is negative infinity. + /// + [TestMethod] + public void Test_LogSumExp_AllNegativeInfinity() + { + List values = new List { double.NegativeInfinity, double.NegativeInfinity }; + + Assert.AreEqual(double.NegativeInfinity, Tools.LogSumExp(double.NegativeInfinity, double.NegativeInfinity)); + Assert.AreEqual(double.NegativeInfinity, Tools.LogSumExp(values)); + } + /// /// Testing integer sequence. /// From 5c5dfd1c64afeec9edf6c299adf7fbe4da63b17e Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Mon, 8 Jun 2026 14:49:04 -0600 Subject: [PATCH 7/9] Added new link functions (Yoe-Johnson and Fisher-Z) and a new novel bootstrap method, the pivotal bootstrap. Added tests for each. --- Numerics/Data/Statistics/YeoJohnson.cs | 27 + .../Functions/Link Functions/FisherZLink.cs | 57 ++ .../Link Functions/LinkFunctionFactory.cs | 8 + .../Link Functions/LinkFunctionType.cs | 12 +- .../Link Functions/YeoJohnsonLink.cs | 101 ++ .../Sampling/Bootstrap/PivotalBootstrap.cs | 895 ++++++++++++++++++ Test_Numerics/Functions/Test_FisherZLink.cs | 96 ++ .../Functions/Test_YeoJohnsonLink.cs | 150 +++ .../Sampling/Test_PivotalBootstrap.cs | 166 ++++ 9 files changed, 1511 insertions(+), 1 deletion(-) create mode 100644 Numerics/Functions/Link Functions/FisherZLink.cs create mode 100644 Numerics/Functions/Link Functions/YeoJohnsonLink.cs create mode 100644 Numerics/Sampling/Bootstrap/PivotalBootstrap.cs create mode 100644 Test_Numerics/Functions/Test_FisherZLink.cs create mode 100644 Test_Numerics/Functions/Test_YeoJohnsonLink.cs create mode 100644 Test_Numerics/Sampling/Test_PivotalBootstrap.cs diff --git a/Numerics/Data/Statistics/YeoJohnson.cs b/Numerics/Data/Statistics/YeoJohnson.cs index ce4e32d6..b85adf11 100644 --- a/Numerics/Data/Statistics/YeoJohnson.cs +++ b/Numerics/Data/Statistics/YeoJohnson.cs @@ -37,6 +37,20 @@ public static void FitLambda(IList values, out double lambda) } } + /// + /// Fit the transform parameter using maximum likelihood estimation. + /// + /// The list of values to transform. + /// The fitted transformation exponent. + public static double FitLambda(IList values) + { + if (values == null) throw new ArgumentNullException(nameof(values)); + if (values.Count < 2) throw new ArgumentException("At least 2 values are required to fit lambda.", nameof(values)); + + FitLambda(values, out double lambda); + return lambda; + } + /// /// The log-likelihood function. The transformed observations are assumed to come from a /// normal distribution. The change of variable formula is used to write the log-likelihood function. @@ -160,6 +174,19 @@ public static double Transform(double value, double lambda) } } + /// + /// Returns the derivative of the Yeo-Johnson transformation with respect to the original value. + /// + /// The value at which to evaluate the derivative. + /// The transformation exponent. Range -5 to +5. + public static double Derivative(double value, double lambda) + { + if (Math.Abs(lambda) > 5d) return double.NaN; + return value >= 0d + ? Math.Pow(value + 1d, lambda - 1d) + : Math.Pow(-value + 1d, 1d - lambda); + } + /// /// Returns the Yeo-Johnson transformation of each value in the list. /// diff --git a/Numerics/Functions/Link Functions/FisherZLink.cs b/Numerics/Functions/Link Functions/FisherZLink.cs new file mode 100644 index 00000000..3d548bd7 --- /dev/null +++ b/Numerics/Functions/Link Functions/FisherZLink.cs @@ -0,0 +1,57 @@ +using System; +using System.Xml.Linq; + +namespace Numerics.Functions +{ + /// + /// Fisher z link function mapping correlations from (-1, 1) to the unconstrained real line. + /// + /// + /// + /// Domain: -1 < x < 1. h(x) = atanh(x), h⁻¹(η) = tanh(η), + /// h′(x) = 1 / (1 - x^2). + /// + /// + /// This link is commonly used for correlation parameters and other signed bounded parameters. + /// + /// + [Serializable] + public sealed class FisherZLink : ILinkFunction + { + /// + /// Thrown when is not in (-1, 1). + public double Link(double x) + { + if (x <= -1d || x >= 1d) + throw new ArgumentOutOfRangeException(nameof(x), "FisherZLink requires -1 < x < 1."); + + return 0.5d * Math.Log((1d + x) / (1d - x)); + } + + /// + public double InverseLink(double eta) + { + if (eta >= 0d) + { + double e = Math.Exp(-2d * eta); + return (1d - e) / (1d + e); + } + + double expEta = Math.Exp(2d * eta); + return (expEta - 1d) / (expEta + 1d); + } + + /// + /// Thrown when is not in (-1, 1). + public double DLink(double x) + { + if (x <= -1d || x >= 1d) + throw new ArgumentOutOfRangeException(nameof(x), "FisherZLink derivative requires -1 < x < 1."); + + return 1d / (1d - x * x); + } + + /// + public XElement ToXElement() => new XElement(nameof(FisherZLink)); + } +} diff --git a/Numerics/Functions/Link Functions/LinkFunctionFactory.cs b/Numerics/Functions/Link Functions/LinkFunctionFactory.cs index 4413a037..b932dc2e 100644 --- a/Numerics/Functions/Link Functions/LinkFunctionFactory.cs +++ b/Numerics/Functions/Link Functions/LinkFunctionFactory.cs @@ -35,6 +35,10 @@ public static ILinkFunction Create(LinkFunctionType type) return new ProbitLink(); case LinkFunctionType.ComplementaryLogLog: return new ComplementaryLogLogLink(); + case LinkFunctionType.YeoJohnson: + return new YeoJohnsonLink(); + case LinkFunctionType.FisherZ: + return new FisherZLink(); default: throw new ArgumentOutOfRangeException(nameof(type), $"Unknown link function type: {type}."); } @@ -63,6 +67,10 @@ public static ILinkFunction CreateFromXElement(XElement xElement) return new ProbitLink(); case nameof(ComplementaryLogLogLink): return new ComplementaryLogLogLink(); + case nameof(YeoJohnsonLink): + return new YeoJohnsonLink(xElement); + case nameof(FisherZLink): + return new FisherZLink(); default: throw new NotSupportedException($"Unknown link function type: '{xElement.Name.LocalName}'."); } diff --git a/Numerics/Functions/Link Functions/LinkFunctionType.cs b/Numerics/Functions/Link Functions/LinkFunctionType.cs index 5b58bdcf..56eb341f 100644 --- a/Numerics/Functions/Link Functions/LinkFunctionType.cs +++ b/Numerics/Functions/Link Functions/LinkFunctionType.cs @@ -39,6 +39,16 @@ public enum LinkFunctionType /// /// Complementary log-log link: η = log(−log(1 − x)). Used for asymmetric binary response models. /// - ComplementaryLogLog + ComplementaryLogLog, + + /// + /// Yeo-Johnson power-transformation link function for real-valued skew or shape parameters. + /// + YeoJohnson, + + /// + /// Fisher z link: η = atanh(x), commonly used for correlation parameters. + /// + FisherZ } } diff --git a/Numerics/Functions/Link Functions/YeoJohnsonLink.cs b/Numerics/Functions/Link Functions/YeoJohnsonLink.cs new file mode 100644 index 00000000..f1bf66ea --- /dev/null +++ b/Numerics/Functions/Link Functions/YeoJohnsonLink.cs @@ -0,0 +1,101 @@ +using Numerics.Data.Statistics; +using System; +using System.Globalization; +using System.Xml.Linq; + +namespace Numerics.Functions +{ + /// + /// Yeo-Johnson power-transformation link function. + /// + /// + /// + /// The Yeo-Johnson transformation is defined on the full real line. It is useful + /// for skew or shape parameters that do not have a closed-form + /// variance-stabilizing link. + /// + /// + [Serializable] + public sealed class YeoJohnsonLink : ILinkFunction + { + /// + /// Initializes a new instance of the class with lambda equal to 1. + /// + public YeoJohnsonLink() + { + Lambda = 1d; + } + + /// + /// Initializes a new instance of the class. + /// + /// The Yeo-Johnson transformation parameter. + /// Thrown when is not finite. + public YeoJohnsonLink(double lambda) + { + if (double.IsNaN(lambda) || double.IsInfinity(lambda)) + throw new ArgumentOutOfRangeException(nameof(lambda), "Lambda must be finite."); + + Lambda = lambda; + } + + /// + /// Initializes a new instance of the class by estimating lambda from representative values. + /// + /// Representative values used to estimate lambda. + /// Thrown when is null. + public YeoJohnsonLink(double[] values) + { + if (values == null) + throw new ArgumentNullException(nameof(values)); + if (values.Length < 2) + throw new ArgumentException("At least 2 values are required to fit lambda.", nameof(values)); + + Lambda = YeoJohnson.FitLambda(values); + } + + /// + /// Initializes a new instance of the class from its XML representation. + /// + /// The XML element to read. + /// Thrown when is null. + /// Thrown when the element does not contain a Lambda attribute. + public YeoJohnsonLink(XElement xElement) + { + if (xElement == null) + throw new ArgumentNullException(nameof(xElement)); + + XAttribute? lambdaAttribute = xElement.Attribute("Lambda"); + if (lambdaAttribute == null) + throw new ArgumentException("The YeoJohnsonLink element must contain a Lambda attribute.", nameof(xElement)); + + Lambda = double.Parse(lambdaAttribute.Value, NumberStyles.Any, CultureInfo.InvariantCulture); + } + + /// + /// Gets the Yeo-Johnson transformation parameter. + /// + public double Lambda { get; } + + /// + public double Link(double x) + { + return YeoJohnson.Transform(x, Lambda); + } + + /// + public double InverseLink(double eta) + { + return YeoJohnson.InverseTransform(eta, Lambda); + } + + /// + public double DLink(double x) + { + return YeoJohnson.Derivative(x, Lambda); + } + + /// + public XElement ToXElement() => new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", Lambda.ToString("G17", CultureInfo.InvariantCulture))); + } +} diff --git a/Numerics/Sampling/Bootstrap/PivotalBootstrap.cs b/Numerics/Sampling/Bootstrap/PivotalBootstrap.cs new file mode 100644 index 00000000..a33f1929 --- /dev/null +++ b/Numerics/Sampling/Bootstrap/PivotalBootstrap.cs @@ -0,0 +1,895 @@ +using Numerics.Data.Statistics; +using Numerics.Functions; +using Numerics.Mathematics.LinearAlgebra; +using Numerics.Mathematics.Optimization; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; + +namespace Numerics.Sampling +{ + /// + /// Specifies how invalid pivotal bootstrap draws should be handled. + /// + public enum PivotalBootstrapInvalidDrawPolicy + { + /// + /// Drop invalid pivotal draws from the returned ensemble. + /// + Drop, + + /// + /// Replace invalid pivotal draws with the corresponding raw bootstrap fit. + /// + UseRaw, + + /// + /// Replace invalid pivotal draws with the parent fit. + /// + UseParent + } + + /// + /// Stores a fitted parameter vector and its covariance matrix. + /// + [Serializable] + public sealed class PivotalBootstrapFit + { + /// + /// Initializes a new instance of the class. + /// + /// The fitted parameter set. + /// The covariance matrix for the fitted parameters. + public PivotalBootstrapFit(ParameterSet parameters, Matrix covariance) + { + if (parameters.Values == null) + throw new ArgumentException("The parameter set must contain values.", nameof(parameters)); + if (covariance == null) + throw new ArgumentNullException(nameof(covariance)); + if (!covariance.IsSquare) + throw new ArgumentException("The covariance matrix must be square.", nameof(covariance)); + if (covariance.NumberOfRows != parameters.Values.Length) + throw new ArgumentException("The covariance dimension must match the parameter count.", nameof(covariance)); + + Parameters = parameters.Clone(); + Covariance = covariance.Clone(); + } + + /// + /// Initializes a new instance of the class. + /// + /// The fitted parameter values. + /// The covariance matrix for the fitted parameters. + public PivotalBootstrapFit(double[] parameters, Matrix covariance) + : this(new ParameterSet(parameters != null ? (double[])parameters.Clone() : throw new ArgumentNullException(nameof(parameters)), double.NaN), covariance) + { + } + + /// + /// Gets the fitted parameter set. + /// + public ParameterSet Parameters { get; } + + /// + /// Gets the covariance matrix for . + /// + public Matrix Covariance { get; } + + /// + /// Gets the number of fitted parameters. + /// + public int ParameterCount => Parameters.Values.Length; + } + + /// + /// Delegate-backed scalar link used by . + /// + /// + /// This type gives callers complete control over the link, inverse link, and + /// derivative used for each parameter. Use to adapt + /// Numerics link-function classes such as , , + /// or . + /// + [Serializable] + public sealed class PivotalBootstrapLink + { + /// + /// Initializes a new instance of the class. + /// + /// The link transformation. + /// The inverse link transformation. + /// The first derivative of the link with respect to the raw parameter. + /// An optional descriptive name. + public PivotalBootstrapLink( + Func link, + Func inverseLink, + Func derivative, + string? name = null) + { + Link = link ?? throw new ArgumentNullException(nameof(link)); + InverseLink = inverseLink ?? throw new ArgumentNullException(nameof(inverseLink)); + Derivative = derivative ?? throw new ArgumentNullException(nameof(derivative)); + Name = name ?? "Custom"; + } + + /// + /// Gets an identity link. + /// + public static PivotalBootstrapLink Identity { get; } = FromLinkFunction(new IdentityLink()); + + /// + /// Gets the link transformation. + /// + public Func Link { get; } + + /// + /// Gets the inverse link transformation. + /// + public Func InverseLink { get; } + + /// + /// Gets the first derivative of the link with respect to the raw parameter. + /// + public Func Derivative { get; } + + /// + /// Gets the descriptive link name. + /// + public string Name { get; } + + /// + /// Adapts an to a pivotal bootstrap link. + /// + /// The Numerics link function. + /// An optional descriptive name. + public static PivotalBootstrapLink FromLinkFunction(ILinkFunction linkFunction, string? name = null) + { + if (linkFunction == null) + throw new ArgumentNullException(nameof(linkFunction)); + + return new PivotalBootstrapLink( + linkFunction.Link, + linkFunction.InverseLink, + linkFunction.DLink, + name ?? linkFunction.GetType().Name); + } + } + + /// + /// Context supplied to a pivotal bootstrap link factory. + /// + public sealed class PivotalBootstrapLinkContext + { + private readonly double[,] _rawParameterValues; + + /// + /// Initializes a new instance of the class. + /// + /// The parent fit. + /// The accepted raw bootstrap fits. + public PivotalBootstrapLinkContext(PivotalBootstrapFit parentFit, PivotalBootstrapFit[] rawBootstrapFits) + { + ParentFit = parentFit ?? throw new ArgumentNullException(nameof(parentFit)); + RawBootstrapFits = rawBootstrapFits ?? throw new ArgumentNullException(nameof(rawBootstrapFits)); + + int p = parentFit.ParameterCount; + _rawParameterValues = new double[rawBootstrapFits.Length, p]; + for (int i = 0; i < rawBootstrapFits.Length; i++) + { + for (int j = 0; j < p; j++) + _rawParameterValues[i, j] = rawBootstrapFits[i].Parameters.Values[j]; + } + } + + /// + /// Gets the parent fit. + /// + public PivotalBootstrapFit ParentFit { get; } + + /// + /// Gets the accepted raw bootstrap fits. + /// + public PivotalBootstrapFit[] RawBootstrapFits { get; } + + /// + /// Gets the number of parameters. + /// + public int ParameterCount => ParentFit.ParameterCount; + + /// + /// Gets a copy of the raw bootstrap values for one parameter. + /// + /// The zero-based parameter index. + public double[] GetRawParameterValues(int parameterIndex) + { + if (parameterIndex < 0 || parameterIndex >= ParameterCount) + throw new ArgumentOutOfRangeException(nameof(parameterIndex)); + + var values = new double[RawBootstrapFits.Length]; + for (int i = 0; i < values.Length; i++) + values[i] = _rawParameterValues[i, parameterIndex]; + return values; + } + } + + /// + /// Options for transforming raw bootstrap fits into pivotal bootstrap draws. + /// + public class PivotalBootstrapTransformOptions + { + /// + /// Gets or sets a factory that returns one scalar link per parameter. + /// + /// + /// The factory is called after raw bootstrap fits have been accepted, so it can + /// estimate data-adaptive links from the parent and raw bootstrap ensembles. + /// If omitted, identity links are used for every parameter. + /// + public Func? LinkFactory { get; set; } + + /// + /// Gets or sets an optional statistic function applied to each returned parameter set. + /// + public Func? StatisticFunction { get; set; } + + /// + /// Gets or sets an optional filter applied to raw bootstrap fits before the pivotal transform. + /// + public Func? ReplicateFilter { get; set; } + + /// + /// Gets or sets an optional validator applied to transformed parameter values. + /// + public Func? ParameterValidator { get; set; } + + /// + /// Gets or sets the policy used when a pivotal draw is invalid. Default is . + /// + public PivotalBootstrapInvalidDrawPolicy InvalidDrawPolicy { get; set; } = PivotalBootstrapInvalidDrawPolicy.Drop; + + /// + /// Gets or sets a value indicating whether covariance matrices should be repaired to be symmetric positive definite. + /// + public bool RegularizeCovariances { get; set; } = true; + + /// + /// Gets or sets an optional absolute component limit for the standardized pivot vector. + /// + public double? ZLimit { get; set; } + + /// + /// Gets or sets a value indicating whether small Gaussian jitter should be added in pivot space. + /// + public bool AddJitter { get; set; } + + /// + /// Gets or sets the standard deviation of optional Gaussian jitter in pivot space. + /// + public double JitterScale { get; set; } = 0.01d; + + /// + /// Gets or sets the pseudo-random-number generator seed. Default = 12345. + /// + public int PRNGSeed { get; set; } = 12345; + } + + /// + /// Options for running an end-to-end pivotal bootstrap. + /// + /// The data type being bootstrapped. + public sealed class PivotalBootstrapOptions : PivotalBootstrapTransformOptions + { + /// + /// Gets or sets the function used to resample data from the parent fit. + /// + public Func? ResampleFunction { get; set; } + + /// + /// Gets or sets the function used to fit one bootstrap data set and compute its covariance. + /// + public Func? FitFunction { get; set; } + + /// + /// Gets or sets the number of raw bootstrap replicates to request. Default = 10,000. + /// + public int Replicates { get; set; } = 10000; + + /// + /// Gets or sets the maximum number of retries for a failed raw bootstrap fit. Default = 20. + /// + public int MaxRetries { get; set; } = 20; + + /// + /// Gets or sets the maximum number of parallel workers. Values less than 1 use the default scheduler setting. + /// + public int MaxDegreeOfParallelism { get; set; } = 0; + } + + /// + /// Diagnostics returned by a pivotal bootstrap run. + /// + [Serializable] + public sealed class PivotalBootstrapDiagnostics + { + /// + /// Gets or sets the number of raw bootstrap replicates requested. + /// + public int RequestedReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits rejected before transformation. + /// + public int RejectedRawReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits that failed during end-to-end resampling. + /// + public int FailedRawReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits accepted for transformation. + /// + public int AcceptedRawReplicates { get; set; } + + /// + /// Gets or sets the number of transformed pivotal draws retained. + /// + public int RetainedPivotalReplicates { get; set; } + + /// + /// Gets or sets the number of transformed pivotal draws dropped or replaced according to policy. + /// + public int InvalidPivotalReplicates { get; set; } + + /// + /// Gets or sets the raw bootstrap resampling time. + /// + public TimeSpan ResamplingTime { get; set; } + + /// + /// Gets or sets the pivotal transformation time. + /// + public TimeSpan TransformationTime { get; set; } + } + + /// + /// Results returned by a pivotal bootstrap run. + /// + [Serializable] + public sealed class PivotalBootstrapResults + { + internal PivotalBootstrapResults( + PivotalBootstrapFit parentFit, + PivotalBootstrapFit[] rawFits, + ParameterSet[] pivotalParameterSets, + PivotalBootstrapLink[] links, + double[]? parentStatistics, + double[,]? rawStatistics, + double[,]? pivotalStatistics, + PivotalBootstrapDiagnostics diagnostics) + { + ParentFit = parentFit; + RawFits = rawFits; + RawParameterSets = rawFits.Select(f => f.Parameters.Clone()).ToArray(); + PivotalParameterSets = pivotalParameterSets; + Links = links; + ParentStatistics = parentStatistics; + RawStatistics = rawStatistics; + PivotalStatistics = pivotalStatistics; + Diagnostics = diagnostics; + } + + /// + /// Gets the parent fit. + /// + public PivotalBootstrapFit ParentFit { get; } + + /// + /// Gets the accepted raw bootstrap fits and their covariances. + /// + public PivotalBootstrapFit[] RawFits { get; } + + /// + /// Gets the accepted raw bootstrap parameter sets. + /// + public ParameterSet[] RawParameterSets { get; } + + /// + /// Gets the retained pivotal bootstrap parameter sets. + /// + public ParameterSet[] PivotalParameterSets { get; } + + /// + /// Gets the links used for the pivotal transformation. + /// + public PivotalBootstrapLink[] Links { get; } + + /// + /// Gets the statistic values for the parent fit, when a statistic function was supplied. + /// + public double[]? ParentStatistics { get; } + + /// + /// Gets raw bootstrap statistics as [replicate, statistic], when a statistic function was supplied. + /// + public double[,]? RawStatistics { get; } + + /// + /// Gets pivotal bootstrap statistics as [replicate, statistic], when a statistic function was supplied. + /// + public double[,]? PivotalStatistics { get; } + + /// + /// Gets run diagnostics. + /// + public PivotalBootstrapDiagnostics Diagnostics { get; } + + /// + /// Gets percentile confidence intervals for pivotal bootstrap parameters. + /// + /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. + public BootstrapStatisticResult[] GetPivotalParameterConfidenceIntervals(double alpha) + { + return CreateIntervalResults(PivotalParameterSets, ParentFit.Parameters.Values, alpha); + } + + /// + /// Gets percentile confidence intervals for raw bootstrap parameters. + /// + /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. + public BootstrapStatisticResult[] GetRawParameterConfidenceIntervals(double alpha) + { + return CreateIntervalResults(RawParameterSets, ParentFit.Parameters.Values, alpha); + } + + /// + /// Gets percentile confidence intervals for pivotal bootstrap statistics. + /// + /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. + public BootstrapStatisticResult[] GetPivotalStatisticConfidenceIntervals(double alpha) + { + if (PivotalStatistics == null || ParentStatistics == null) + throw new InvalidOperationException("No statistic function was supplied."); + + return CreateIntervalResults(PivotalStatistics, ParentStatistics, alpha); + } + + /// + /// Gets percentile confidence intervals for raw bootstrap statistics. + /// + /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. + public BootstrapStatisticResult[] GetRawStatisticConfidenceIntervals(double alpha) + { + if (RawStatistics == null || ParentStatistics == null) + throw new InvalidOperationException("No statistic function was supplied."); + + return CreateIntervalResults(RawStatistics, ParentStatistics, alpha); + } + + private static BootstrapStatisticResult[] CreateIntervalResults(ParameterSet[] parameterSets, double[] estimates, double alpha) + { + if (parameterSets.Length == 0) + return Array.Empty(); + + int columns = parameterSets[0].Values.Length; + var values = new double[parameterSets.Length, columns]; + for (int i = 0; i < parameterSets.Length; i++) + { + for (int j = 0; j < columns; j++) + values[i, j] = parameterSets[i].Values[j]; + } + + return CreateIntervalResults(values, estimates, alpha); + } + + private static BootstrapStatisticResult[] CreateIntervalResults(double[,] values, double[] estimates, double alpha) + { + if (alpha <= 0d || alpha >= 1d) + throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); + + int rows = values.GetLength(0); + int columns = values.GetLength(1); + var results = new BootstrapStatisticResult[columns]; + for (int j = 0; j < columns; j++) + { + var column = new List(rows); + for (int i = 0; i < rows; i++) + { + double value = values[i, j]; + if (IsFinite(value)) + column.Add(value); + } + + column.Sort(); + results[j] = new BootstrapStatisticResult + { + PopulationEstimate = estimates[j], + LowerCI = column.Count > 0 ? Statistics.Percentile(column, alpha / 2d, true) : double.NaN, + UpperCI = column.Count > 0 ? Statistics.Percentile(column, 1d - alpha / 2d, true) : double.NaN, + ValidCount = column.Count, + TotalCount = rows, + StandardError = column.Count > 1 ? Statistics.StandardDeviation(column) : double.NaN, + Mean = column.Count > 0 ? column.Average() : double.NaN + }; + } + + return results; + } + + private static bool IsFinite(double value) + { + return !double.IsNaN(value) && !double.IsInfinity(value); + } + } + + /// + /// Runs the bias-corrected pivotal bootstrap. + /// + public static class PivotalBootstrap + { + /// + /// Runs an end-to-end pivotal bootstrap. + /// + /// The data type being bootstrapped. + /// The original data. + /// The parent fit and covariance. + /// Bootstrap options. + public static PivotalBootstrapResults Run( + TData originalData, + PivotalBootstrapFit parentFit, + PivotalBootstrapOptions options) + { + if (options == null) + throw new ArgumentNullException(nameof(options)); + if (options.ResampleFunction == null) + throw new InvalidOperationException("A resample function is required."); + if (options.FitFunction == null) + throw new InvalidOperationException("A fit function is required."); + if (options.Replicates < 1) + throw new ArgumentOutOfRangeException(nameof(options.Replicates), "The number of replicates must be positive."); + if (options.MaxRetries < 1) + throw new ArgumentOutOfRangeException(nameof(options.MaxRetries), "The maximum retry count must be positive."); + + ValidateFit(parentFit, null); + var stopwatch = Stopwatch.StartNew(); + var prng = new MersenneTwister(options.PRNGSeed); + int[] seeds = prng.NextIntegers(options.Replicates); + var rawFits = new PivotalBootstrapFit?[options.Replicates]; + int failed = 0; + + var parallelOptions = new ParallelOptions(); + if (options.MaxDegreeOfParallelism > 0) + parallelOptions.MaxDegreeOfParallelism = options.MaxDegreeOfParallelism; + + Parallel.For(0, options.Replicates, parallelOptions, index => + { + bool accepted = false; + for (int retry = 0; retry < options.MaxRetries; retry++) + { + try + { + var rng = new MersenneTwister(seeds[index] + 10 * retry); + TData sample = options.ResampleFunction(originalData, parentFit, rng); + PivotalBootstrapFit fit = options.FitFunction(sample); + if (!IsAcceptedRawFit(fit, parentFit.ParameterCount, options)) + continue; + + rawFits[index] = fit; + accepted = true; + } + catch + { + // Retry failed resamples or fits. + } + + if (accepted) + break; + } + + if (!accepted) + Interlocked.Increment(ref failed); + }); + + stopwatch.Stop(); + PivotalBootstrapFit[] acceptedFits = rawFits.Where(f => f != null).Select(f => f!).ToArray(); + PivotalBootstrapResults results = Transform(parentFit, acceptedFits, options); + results.Diagnostics.RequestedReplicates = options.Replicates; + results.Diagnostics.FailedRawReplicates = failed; + results.Diagnostics.ResamplingTime = stopwatch.Elapsed; + return results; + } + + /// + /// Transforms raw bootstrap fits into pivotal bootstrap draws. + /// + /// The parent fit and covariance. + /// Raw bootstrap fits and covariances. + /// Transformation options. If omitted, identity links are used. + public static PivotalBootstrapResults Transform( + PivotalBootstrapFit parentFit, + IEnumerable rawBootstrapFits, + PivotalBootstrapTransformOptions? options = null) + { + if (rawBootstrapFits == null) + throw new ArgumentNullException(nameof(rawBootstrapFits)); + + options ??= new PivotalBootstrapTransformOptions(); + ValidateFit(parentFit, null); + int p = parentFit.ParameterCount; + var stopwatch = Stopwatch.StartNew(); + PivotalBootstrapFit[] suppliedRaw = rawBootstrapFits.ToArray(); + var acceptedRaw = suppliedRaw + .Where(f => IsAcceptedRawFit(f, p, options)) + .ToArray(); + + if (acceptedRaw.Length == 0) + throw new InvalidOperationException("No valid raw bootstrap fits were supplied."); + + var diagnostics = new PivotalBootstrapDiagnostics + { + RequestedReplicates = acceptedRaw.Length, + RejectedRawReplicates = suppliedRaw.Length - acceptedRaw.Length, + AcceptedRawReplicates = acceptedRaw.Length + }; + + var context = new PivotalBootstrapLinkContext(parentFit, acceptedRaw); + PivotalBootstrapLink[] links = options.LinkFactory != null + ? options.LinkFactory(context) + : Enumerable.Repeat(PivotalBootstrapLink.Identity, p).ToArray(); + ValidateLinks(links, p); + + double[] parentEta = LinkParameters(parentFit.Parameters.Values, links); + Matrix parentLinkCovariance = LinkCovariance(parentFit, links, options.RegularizeCovariances); + var parentCholesky = new CholeskyDecomposition(parentLinkCovariance); + + var pivotalParameterSets = new List(acceptedRaw.Length); + var jitterRng = new MersenneTwister(options.PRNGSeed); + int invalid = 0; + + for (int i = 0; i < acceptedRaw.Length; i++) + { + PivotalBootstrapFit rawFit = acceptedRaw[i]; + if (TryCreatePivotalDraw(rawFit, parentFit, links, parentEta, parentCholesky, options, jitterRng, out ParameterSet pivotalParameters)) + { + pivotalParameterSets.Add(pivotalParameters); + } + else + { + invalid++; + if (TryApplyInvalidPolicy(rawFit, parentFit, options.InvalidDrawPolicy, out ParameterSet fallbackParameters)) + pivotalParameterSets.Add(fallbackParameters); + } + } + + stopwatch.Stop(); + diagnostics.InvalidPivotalReplicates = invalid; + diagnostics.RetainedPivotalReplicates = pivotalParameterSets.Count; + diagnostics.TransformationTime = stopwatch.Elapsed; + + Func? statistic = options.StatisticFunction; + double[]? parentStatistics = statistic != null ? ValidateStatistics(statistic(parentFit.Parameters)) : null; + double[,]? rawStatistics = statistic != null ? ComputeStatistics(acceptedRaw.Select(f => f.Parameters).ToArray(), statistic) : null; + double[,]? pivotalStatistics = statistic != null ? ComputeStatistics(pivotalParameterSets.ToArray(), statistic) : null; + + return new PivotalBootstrapResults( + parentFit, + acceptedRaw, + pivotalParameterSets.ToArray(), + links, + parentStatistics, + rawStatistics, + pivotalStatistics, + diagnostics); + } + + private static bool TryCreatePivotalDraw( + PivotalBootstrapFit rawFit, + PivotalBootstrapFit parentFit, + PivotalBootstrapLink[] links, + double[] parentEta, + CholeskyDecomposition parentCholesky, + PivotalBootstrapTransformOptions options, + Random jitterRng, + out ParameterSet pivotalParameters) + { + pivotalParameters = default; + try + { + double[] rawEta = LinkParameters(rawFit.Parameters.Values, links); + Matrix rawLinkCovariance = LinkCovariance(rawFit, links, options.RegularizeCovariances); + var rawCholesky = new CholeskyDecomposition(rawLinkCovariance); + var difference = new double[parentEta.Length]; + for (int j = 0; j < difference.Length; j++) + difference[j] = parentEta[j] - rawEta[j]; + + double[] z = rawCholesky.Forward(new Vector(difference)).ToArray(); + if (options.ZLimit.HasValue && z.Any(value => Math.Abs(value) > options.ZLimit.Value)) + return false; + + if (options.AddJitter && options.JitterScale > 0d) + { + for (int j = 0; j < z.Length; j++) + z[j] += options.JitterScale * Numerics.Distributions.Normal.StandardZ(Math.Min(1d - 1e-16, Math.Max(1e-16, jitterRng.NextDouble()))); + } + + double[] reinflated = parentCholesky.L * z; + double[] pivotalEta = new double[parentEta.Length]; + double[] pivotalValues = new double[parentEta.Length]; + for (int j = 0; j < pivotalValues.Length; j++) + { + pivotalEta[j] = parentEta[j] + reinflated[j]; + pivotalValues[j] = links[j].InverseLink(pivotalEta[j]); + } + + if (!AllFinite(pivotalValues)) + return false; + if (options.ParameterValidator != null && !options.ParameterValidator(pivotalValues)) + return false; + + pivotalParameters = new ParameterSet(pivotalValues, rawFit.Parameters.Fitness, rawFit.Parameters.Weight); + return true; + } + catch + { + return false; + } + } + + private static bool TryApplyInvalidPolicy( + PivotalBootstrapFit rawFit, + PivotalBootstrapFit parentFit, + PivotalBootstrapInvalidDrawPolicy policy, + out ParameterSet parameterSet) + { + parameterSet = default; + switch (policy) + { + case PivotalBootstrapInvalidDrawPolicy.Drop: + return false; + case PivotalBootstrapInvalidDrawPolicy.UseRaw: + parameterSet = rawFit.Parameters.Clone(); + return true; + case PivotalBootstrapInvalidDrawPolicy.UseParent: + parameterSet = parentFit.Parameters.Clone(); + return true; + default: + throw new ArgumentOutOfRangeException(nameof(policy), $"Unknown invalid-draw policy: {policy}."); + } + } + + private static bool IsAcceptedRawFit(PivotalBootstrapFit? fit, int parameterCount, PivotalBootstrapTransformOptions options) + { + if (fit == null || !IsValidFit(fit, parameterCount)) + return false; + if (options.ReplicateFilter != null && !options.ReplicateFilter(fit)) + return false; + return true; + } + + private static void ValidateFit(PivotalBootstrapFit fit, int? parameterCount) + { + if (!IsValidFit(fit, parameterCount)) + throw new ArgumentException("The fit must contain finite parameters and a finite square covariance matrix.", nameof(fit)); + } + + private static bool IsValidFit(PivotalBootstrapFit? fit, int? parameterCount) + { + if (fit == null) + return false; + if (fit.Parameters.Values == null || fit.Parameters.Values.Length == 0) + return false; + if (parameterCount.HasValue && fit.Parameters.Values.Length != parameterCount.Value) + return false; + if (!AllFinite(fit.Parameters.Values)) + return false; + if (fit.Covariance == null || !fit.Covariance.IsSquare || fit.Covariance.NumberOfRows != fit.Parameters.Values.Length) + return false; + + for (int i = 0; i < fit.Covariance.NumberOfRows; i++) + { + for (int j = 0; j < fit.Covariance.NumberOfColumns; j++) + { + if (!IsFinite(fit.Covariance[i, j])) + return false; + } + } + + return true; + } + + private static void ValidateLinks(PivotalBootstrapLink[] links, int parameterCount) + { + if (links == null) + throw new ArgumentNullException(nameof(links)); + if (links.Length != parameterCount) + throw new ArgumentException("The link factory must return one link per parameter.", nameof(links)); + if (links.Any(link => link == null)) + throw new ArgumentException("The link factory returned a null link.", nameof(links)); + } + + private static double[] LinkParameters(double[] parameters, PivotalBootstrapLink[] links) + { + var eta = new double[parameters.Length]; + for (int i = 0; i < parameters.Length; i++) + eta[i] = links[i].Link(parameters[i]); + + if (!AllFinite(eta)) + throw new ArgumentException("The link transformation produced a non-finite value."); + + return eta; + } + + private static Matrix LinkCovariance(PivotalBootstrapFit fit, PivotalBootstrapLink[] links, bool regularize) + { + int p = fit.ParameterCount; + var jacobian = new Matrix(p); + for (int i = 0; i < p; i++) + { + double derivative = links[i].Derivative(fit.Parameters.Values[i]); + if (!IsFinite(derivative)) + throw new ArgumentException("The link derivative produced a non-finite value."); + jacobian[i, i] = derivative; + } + + Matrix linkCovariance = jacobian * fit.Covariance * jacobian.Transpose(); + if (!regularize) + return linkCovariance; + + return MatrixRegularization.MakeSymmetricPositiveDefinite(linkCovariance); + } + + private static double[,] ComputeStatistics(ParameterSet[] parameterSets, Func statistic) + { + if (parameterSets.Length == 0) + return new double[0, 0]; + + double[] first = ValidateStatistics(statistic(parameterSets[0])); + var values = new double[parameterSets.Length, first.Length]; + for (int j = 0; j < first.Length; j++) + values[0, j] = first[j]; + + for (int i = 1; i < parameterSets.Length; i++) + { + double[] row = ValidateStatistics(statistic(parameterSets[i]), first.Length); + for (int j = 0; j < first.Length; j++) + values[i, j] = row[j]; + } + + return values; + } + + private static double[] ValidateStatistics(double[] statistics, int? expectedLength = null) + { + if (statistics == null) + throw new InvalidOperationException("The statistic function returned null."); + if (statistics.Length == 0) + throw new InvalidOperationException("The statistic function must return at least one statistic."); + if (expectedLength.HasValue && statistics.Length != expectedLength.Value) + throw new InvalidOperationException("The statistic function must return the same number of statistics for every draw."); + if (!AllFinite(statistics)) + throw new InvalidOperationException("The statistic function returned a non-finite value."); + + return statistics; + } + + private static bool AllFinite(double[] values) + { + for (int i = 0; i < values.Length; i++) + { + if (!IsFinite(values[i])) + return false; + } + + return true; + } + + private static bool IsFinite(double value) + { + return !double.IsNaN(value) && !double.IsInfinity(value); + } + + } +} diff --git a/Test_Numerics/Functions/Test_FisherZLink.cs b/Test_Numerics/Functions/Test_FisherZLink.cs new file mode 100644 index 00000000..e59996d8 --- /dev/null +++ b/Test_Numerics/Functions/Test_FisherZLink.cs @@ -0,0 +1,96 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics.Functions; +using System; + +namespace Functions +{ + /// + /// Unit tests for the class. + /// + [TestClass] + public class Test_FisherZLink + { + private const double RoundTripTol = 1e-12; + private const double DeltaH = 1e-7; + private const double DerivativeTol = 1e-6; + + /// + /// Verifies known link values. + /// + [TestMethod] + public void Link_KnownValues() + { + var link = new FisherZLink(); + + Assert.AreEqual(0d, link.Link(0d), 1e-12); + Assert.AreEqual(0.5d * Math.Log(3d), link.Link(0.5d), 1e-12); + Assert.AreEqual(-0.5d * Math.Log(3d), link.Link(-0.5d), 1e-12); + } + + /// + /// Verifies inverse-link values. + /// + [TestMethod] + public void InverseLink_KnownValues() + { + var link = new FisherZLink(); + + Assert.AreEqual(0d, link.InverseLink(0d), 1e-12); + Assert.AreEqual(0.5d, link.InverseLink(0.5d * Math.Log(3d)), 1e-12); + Assert.AreEqual(-0.5d, link.InverseLink(-0.5d * Math.Log(3d)), 1e-12); + } + + /// + /// Verifies round-trip behavior over the correlation domain. + /// + [TestMethod] + public void RoundTrip_RecoversInput() + { + var link = new FisherZLink(); + foreach (double x in new[] { -0.99d, -0.75d, -0.25d, 0d, 0.25d, 0.75d, 0.99d }) + { + Assert.AreEqual(x, link.InverseLink(link.Link(x)), RoundTripTol); + } + } + + /// + /// Verifies derivative values and finite-difference consistency. + /// + [TestMethod] + public void DLink_FiniteDifferenceConsistency() + { + var link = new FisherZLink(); + foreach (double x in new[] { -0.8d, -0.25d, 0d, 0.25d, 0.8d }) + { + double expected = 1d / (1d - x * x); + double finiteDifference = (link.Link(x + DeltaH) - link.Link(x - DeltaH)) / (2d * DeltaH); + Assert.AreEqual(expected, link.DLink(x), 1e-12); + Assert.AreEqual(finiteDifference, link.DLink(x), DerivativeTol); + } + } + + /// + /// Verifies domain checks. + /// + [TestMethod] + public void Link_OutsideOpenInterval_Throws() + { + var link = new FisherZLink(); + + Assert.Throws(() => link.Link(-1d)); + Assert.Throws(() => link.Link(1d)); + Assert.Throws(() => link.DLink(-1d)); + Assert.Throws(() => link.DLink(1d)); + } + + /// + /// Verifies factory and XML construction. + /// + [TestMethod] + public void Factory_CreatesFisherZLink() + { + Assert.IsInstanceOfType(LinkFunctionFactory.Create(LinkFunctionType.FisherZ), typeof(FisherZLink)); + Assert.IsInstanceOfType(LinkFunctionFactory.CreateFromXElement(new FisherZLink().ToXElement()), typeof(FisherZLink)); + } + } +} diff --git a/Test_Numerics/Functions/Test_YeoJohnsonLink.cs b/Test_Numerics/Functions/Test_YeoJohnsonLink.cs new file mode 100644 index 00000000..16ccbfb0 --- /dev/null +++ b/Test_Numerics/Functions/Test_YeoJohnsonLink.cs @@ -0,0 +1,150 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics.Functions; +using System; +using System.Xml.Linq; + +namespace Functions +{ + /// + /// Unit tests for the class. + /// + [TestClass] + public class Test_YeoJohnsonLink + { + private const double RoundTripTol = 1e-9; + private const double DeltaH = 1e-7; + private const double DerivativeTol = 1e-4; + + /// + /// Verifies the default constructor uses the identity-transform lambda. + /// + [TestMethod] + public void Constructor_Default_LambdaIsOne() + { + var link = new YeoJohnsonLink(); + + Assert.AreEqual(1d, link.Lambda, 1e-12); + } + + /// + /// Verifies the lambda constructor stores the requested value. + /// + [TestMethod] + public void Constructor_Lambda_StoresValue() + { + var link = new YeoJohnsonLink(0.5d); + + Assert.AreEqual(0.5d, link.Lambda, 1e-12); + } + + /// + /// Verifies the values constructor rejects null. + /// + [TestMethod] + public void Constructor_Values_Null_Throws() + { + Assert.Throws(() => new YeoJohnsonLink((double[])null!)); + } + + /// + /// Verifies the values constructor rejects degenerate samples. + /// + [TestMethod] + public void Constructor_Values_SingleElement_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(new[] { 1d })); + } + + /// + /// Verifies lambda fitting produces a finite value for a non-degenerate sample. + /// + [TestMethod] + public void Constructor_Values_ProducesFiniteLambda() + { + var link = new YeoJohnsonLink(new[] { -2d, -1d, -0.25d, 0d, 0.5d, 1d, 3d }); + + Assert.IsTrue(IsFinite(link.Lambda)); + } + + /// + /// Verifies the XML constructor rejects null. + /// + [TestMethod] + public void Constructor_XElement_Null_Throws() + { + Assert.Throws(() => new YeoJohnsonLink((XElement)null!)); + } + + /// + /// Verifies lambda equal to 1 is the identity transform. + /// + [TestMethod] + public void Link_LambdaOne_IsIdentity() + { + var link = new YeoJohnsonLink(1d); + foreach (double x in new[] { -5d, -1d, 0d, 0.5d, 1d, 5d }) + { + Assert.AreEqual(x, link.Link(x), 1e-10); + Assert.AreEqual(1d, link.DLink(x), 1e-10); + } + } + + /// + /// Verifies round-trip behavior for positive and negative values. + /// + [TestMethod] + public void RoundTrip_PositiveAndNegativeValues() + { + var link = new YeoJohnsonLink(0.5d); + foreach (double x in new[] { -10d, -5d, -1d, -0.1d, 0d, 0.1d, 1d, 5d, 10d }) + { + double recovered = link.InverseLink(link.Link(x)); + Assert.AreEqual(x, recovered, RoundTripTol); + } + } + + /// + /// Verifies the derivative against finite differences. + /// + [TestMethod] + public void DLink_FiniteDifferenceConsistency() + { + var link = new YeoJohnsonLink(0.5d); + foreach (double x in new[] { -5d, -2d, -0.5d, 0.5d, 1d, 5d }) + { + double finiteDifference = (link.Link(x + DeltaH) - link.Link(x - DeltaH)) / (2d * DeltaH); + Assert.AreEqual(finiteDifference, link.DLink(x), DerivativeTol); + } + } + + /// + /// Verifies XML serialization preserves lambda. + /// + [TestMethod] + public void XmlRoundTrip_PreservesLambda() + { + var original = new YeoJohnsonLink(0.75d); + var restored = new YeoJohnsonLink(original.ToXElement()); + + Assert.AreEqual(original.Lambda, restored.Lambda, 1e-12); + } + + /// + /// Verifies factory construction for Yeo-Johnson links. + /// + [TestMethod] + public void Factory_CreatesYeoJohnsonLink() + { + Assert.IsInstanceOfType(LinkFunctionFactory.Create(LinkFunctionType.YeoJohnson), typeof(YeoJohnsonLink)); + + var restored = LinkFunctionFactory.CreateFromXElement(new YeoJohnsonLink(0.25d).ToXElement()); + Assert.IsInstanceOfType(restored, typeof(YeoJohnsonLink)); + Assert.AreEqual(0.25d, ((YeoJohnsonLink)restored).Lambda, 1e-12); + } + + private static bool IsFinite(double value) + { + return !double.IsNaN(value) && !double.IsInfinity(value); + } + } +} diff --git a/Test_Numerics/Sampling/Test_PivotalBootstrap.cs b/Test_Numerics/Sampling/Test_PivotalBootstrap.cs new file mode 100644 index 00000000..6227ef4b --- /dev/null +++ b/Test_Numerics/Sampling/Test_PivotalBootstrap.cs @@ -0,0 +1,166 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics.Data.Statistics; +using Numerics.Distributions; +using Numerics.Functions; +using Numerics.Mathematics.LinearAlgebra; +using Numerics.Mathematics.Optimization; +using Numerics.Sampling; +using System; +using System.Linq; + +namespace Sampling +{ + /// + /// Unit tests for the bias-corrected pivotal bootstrap. + /// + [TestClass] + public class Test_PivotalBootstrap + { + /// + /// Verifies the pivotal transform uses the replicate covariance to standardize + /// and the parent covariance to re-inflate. + /// + [TestMethod] + public void Transform_IdentityLink_UsesBothCovariances() + { + var parent = Fit(new[] { 10d, 20d }, new double[,] { { 4d, 0d }, { 0d, 9d } }); + var raw = Fit(new[] { 8d, 17d }, new double[,] { { 1d, 0d }, { 0d, 1d } }); + + var options = new PivotalBootstrapTransformOptions + { + RegularizeCovariances = false + }; + + PivotalBootstrapResults result = PivotalBootstrap.Transform(parent, new[] { raw }, options); + + Assert.HasCount(1, result.PivotalParameterSets); + Assert.AreEqual(14d, result.PivotalParameterSets[0].Values[0], 1e-12); + Assert.AreEqual(29d, result.PivotalParameterSets[0].Values[1], 1e-12); + } + + /// + /// Verifies callers have full control over the links through the link factory. + /// + [TestMethod] + public void Transform_CustomLinkFactory_ControlsLinkDefinition() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + bool factoryWasCalled = false; + + var options = new PivotalBootstrapTransformOptions + { + RegularizeCovariances = false, + LinkFactory = context => + { + factoryWasCalled = true; + Assert.AreEqual(1, context.ParameterCount); + CollectionAssert.AreEqual(new[] { 8d }, context.GetRawParameterValues(0)); + return new[] + { + new PivotalBootstrapLink( + x => 2d * x, + eta => eta / 2d, + x => 2d, + "Double") + }; + } + }; + + PivotalBootstrapResults result = PivotalBootstrap.Transform(parent, new[] { raw }, options); + + Assert.IsTrue(factoryWasCalled); + Assert.AreEqual("Double", result.Links[0].Name); + Assert.AreEqual(14d, result.PivotalParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies the normal location-scale pivotal bootstrap recovers objective-Bayes marginals + /// and quantile intervals to Monte-Carlo tolerance. + /// + [TestMethod] + public void Run_NormalLocationScale_MatchesObjectiveBayesMarginalsAndQuantileIntervals() + { + const int sampleSize = 100; + const int replicates = 8000; + var parentDistribution = new Normal(3d, 0.7d); + var parent = new PivotalBootstrapFit( + new ParameterSet(parentDistribution.GetParameters, double.NaN), + new Matrix(parentDistribution.ParameterCovariance(sampleSize, ParameterEstimationMethod.MaximumLikelihood))); + double[] originalData = new double[sampleSize]; + double[] probabilities = { 0.5d, 0.95d }; + + var options = new PivotalBootstrapOptions + { + Replicates = replicates, + PRNGSeed = 12345, + ResampleFunction = (data, fit, rng) => + { + var distribution = new Normal(fit.Parameters.Values[0], fit.Parameters.Values[1]); + return distribution.GenerateRandomValues(sampleSize, rng.Next()); + }, + FitFunction = sample => FitNormal(sample), + LinkFactory = context => new[] + { + PivotalBootstrapLink.FromLinkFunction(new IdentityLink()), + PivotalBootstrapLink.FromLinkFunction(new LogLink()) + }, + ParameterValidator = values => values[1] > 0d, + StatisticFunction = ps => + { + var distribution = new Normal(ps.Values[0], ps.Values[1]); + return probabilities.Select(distribution.InverseCDF).ToArray(); + } + }; + + PivotalBootstrapResults result = PivotalBootstrap.Run(originalData, parent, options); + + Assert.IsGreaterThan(0.99d * replicates, result.PivotalParameterSets.Length); + double[] mus = result.PivotalParameterSets.Select(ps => ps.Values[0]).ToArray(); + double[] sigmas = result.PivotalParameterSets.Select(ps => ps.Values[1]).ToArray(); + var muPosterior = new Normal(parentDistribution.Mu, parentDistribution.Sigma / Math.Sqrt(sampleSize)); + var sigmaSquaredPosterior = new InverseChiSquared(sampleSize - 1, parentDistribution.Sigma * parentDistribution.Sigma); + + Array.Sort(mus); + double[] sigmaSquares = sigmas.Select(sigma => sigma * sigma).ToArray(); + Array.Sort(sigmaSquares); + + double muKs = GoodnessOfFit.KolmogorovSmirnov(mus, muPosterior); + double sigmaKs = GoodnessOfFit.KolmogorovSmirnov(sigmaSquares, sigmaSquaredPosterior); + + Assert.IsLessThan(0.04d, muKs, $"Mu marginal KS distance was {muKs}."); + Assert.IsLessThan(0.08d, sigmaKs, $"Sigma marginal KS distance was {sigmaKs}."); + + BootstrapStatisticResult[] quantileIntervals = result.GetPivotalStatisticConfidenceIntervals(0.1d); + double[,] objectiveBayesIntervals = parentDistribution.MonteCarloConfidenceIntervals( + sampleSize, + 20000, + probabilities, + new[] { 0.05d, 0.95d }); + + for (int i = 0; i < probabilities.Length; i++) + { + Assert.AreEqual(objectiveBayesIntervals[i, 0], quantileIntervals[i].LowerCI, 0.10d * parentDistribution.Sigma); + Assert.AreEqual(objectiveBayesIntervals[i, 1], quantileIntervals[i].UpperCI, 0.10d * parentDistribution.Sigma); + } + } + + private static PivotalBootstrapFit Fit(double[] values, double[,] covariance) + { + return new PivotalBootstrapFit(new ParameterSet(values, double.NaN), new Matrix(covariance)); + } + + private static PivotalBootstrapFit FitNormal(double[] sample) + { + var distribution = new Normal(); + ((IEstimation)distribution).Estimate(sample, ParameterEstimationMethod.MaximumLikelihood); + if (!distribution.ParametersValid) + throw new InvalidOperationException("The normal fit produced invalid parameters."); + + return new PivotalBootstrapFit( + new ParameterSet(distribution.GetParameters, double.NaN), + new Matrix(distribution.ParameterCovariance(sample.Length, ParameterEstimationMethod.MaximumLikelihood))); + } + + } +} From 3c37337adfbc5a90f4aab2ad948ebe960c29da68 Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Tue, 16 Jun 2026 15:17:31 -0600 Subject: [PATCH 8/9] Improved XML documentation and unit testing for new links and bootstrap methods. Refactored pivotal bootstrap classes to align with existing architecture. --- .../Functions/Link Functions/FisherZLink.cs | 8 +- .../Link Functions/YeoJohnsonLink.cs | 41 +- Numerics/Sampling/Bootstrap/Bootstrap.cs | 980 ++++++++++++++++-- .../Sampling/Bootstrap/PivotalBootstrap.cs | 895 ---------------- .../Bootstrap/Support/BootstrapFit.cs | 75 ++ .../Support/PivotalBootstrapContext.cs | 79 ++ .../Support/PivotalBootstrapDiagnostics.cs | 57 + .../PivotalBootstrapInvalidDrawPolicy.cs | 23 + Test_Numerics/Functions/Test_FisherZLink.cs | 22 + .../Functions/Test_YeoJohnsonLink.cs | 67 +- .../Sampling/Test_PivotalBootstrap.cs | 563 ++++++++-- 11 files changed, 1728 insertions(+), 1082 deletions(-) delete mode 100644 Numerics/Sampling/Bootstrap/PivotalBootstrap.cs create mode 100644 Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs create mode 100644 Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs create mode 100644 Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs create mode 100644 Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs diff --git a/Numerics/Functions/Link Functions/FisherZLink.cs b/Numerics/Functions/Link Functions/FisherZLink.cs index 3d548bd7..bbbe4dac 100644 --- a/Numerics/Functions/Link Functions/FisherZLink.cs +++ b/Numerics/Functions/Link Functions/FisherZLink.cs @@ -14,6 +14,10 @@ namespace Numerics.Functions /// /// This link is commonly used for correlation parameters and other signed bounded parameters. /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// /// [Serializable] public sealed class FisherZLink : ILinkFunction @@ -22,7 +26,7 @@ public sealed class FisherZLink : ILinkFunction /// Thrown when is not in (-1, 1). public double Link(double x) { - if (x <= -1d || x >= 1d) + if (!Tools.IsFinite(x) || x <= -1d || x >= 1d) throw new ArgumentOutOfRangeException(nameof(x), "FisherZLink requires -1 < x < 1."); return 0.5d * Math.Log((1d + x) / (1d - x)); @@ -45,7 +49,7 @@ public double InverseLink(double eta) /// Thrown when is not in (-1, 1). public double DLink(double x) { - if (x <= -1d || x >= 1d) + if (!Tools.IsFinite(x) || x <= -1d || x >= 1d) throw new ArgumentOutOfRangeException(nameof(x), "FisherZLink derivative requires -1 < x < 1."); return 1d / (1d - x * x); diff --git a/Numerics/Functions/Link Functions/YeoJohnsonLink.cs b/Numerics/Functions/Link Functions/YeoJohnsonLink.cs index f1bf66ea..61b9bdec 100644 --- a/Numerics/Functions/Link Functions/YeoJohnsonLink.cs +++ b/Numerics/Functions/Link Functions/YeoJohnsonLink.cs @@ -14,10 +14,21 @@ namespace Numerics.Functions /// for skew or shape parameters that do not have a closed-form /// variance-stabilizing link. /// + /// + /// The transformation parameter lambda is restricted to the range used by + /// , -5 <= lambda <= 5. + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// /// [Serializable] public sealed class YeoJohnsonLink : ILinkFunction { + private const double MinimumLambda = -5d; + private const double MaximumLambda = 5d; + /// /// Initializes a new instance of the class with lambda equal to 1. /// @@ -30,13 +41,10 @@ public YeoJohnsonLink() /// Initializes a new instance of the class. /// /// The Yeo-Johnson transformation parameter. - /// Thrown when is not finite. + /// Thrown when is not finite or is outside [-5, 5]. public YeoJohnsonLink(double lambda) { - if (double.IsNaN(lambda) || double.IsInfinity(lambda)) - throw new ArgumentOutOfRangeException(nameof(lambda), "Lambda must be finite."); - - Lambda = lambda; + Lambda = ValidateLambda(lambda, nameof(lambda)); } /// @@ -44,14 +52,18 @@ public YeoJohnsonLink(double lambda) /// /// Representative values used to estimate lambda. /// Thrown when is null. + /// Thrown when fewer than two finite values are supplied or lambda fitting fails. public YeoJohnsonLink(double[] values) { if (values == null) throw new ArgumentNullException(nameof(values)); if (values.Length < 2) throw new ArgumentException("At least 2 values are required to fit lambda.", nameof(values)); + for (int i = 0; i < values.Length; i++) + if (!Tools.IsFinite(values[i])) + throw new ArgumentException("Every representative value must be finite.", nameof(values)); - Lambda = YeoJohnson.FitLambda(values); + Lambda = ValidateLambda(YeoJohnson.FitLambda(values), nameof(values)); } /// @@ -69,7 +81,7 @@ public YeoJohnsonLink(XElement xElement) if (lambdaAttribute == null) throw new ArgumentException("The YeoJohnsonLink element must contain a Lambda attribute.", nameof(xElement)); - Lambda = double.Parse(lambdaAttribute.Value, NumberStyles.Any, CultureInfo.InvariantCulture); + Lambda = ValidateLambda(double.Parse(lambdaAttribute.Value, NumberStyles.Any, CultureInfo.InvariantCulture), "Lambda"); } /// @@ -97,5 +109,20 @@ public double DLink(double x) /// public XElement ToXElement() => new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", Lambda.ToString("G17", CultureInfo.InvariantCulture))); + + /// + /// Validates a Yeo-Johnson lambda value. + /// + /// The lambda value to validate. + /// The parameter name to use in the exception. + /// The validated lambda value. + /// Thrown when is not finite or is outside [-5, 5]. + private static double ValidateLambda(double lambda, string paramName) + { + if (!Tools.IsFinite(lambda) || lambda < MinimumLambda || lambda > MaximumLambda) + throw new ArgumentOutOfRangeException(paramName, "Lambda must be finite and in the range [-5, 5]."); + + return lambda; + } } } diff --git a/Numerics/Sampling/Bootstrap/Bootstrap.cs b/Numerics/Sampling/Bootstrap/Bootstrap.cs index 8c1c1ba9..51c6402c 100644 --- a/Numerics/Sampling/Bootstrap/Bootstrap.cs +++ b/Numerics/Sampling/Bootstrap/Bootstrap.cs @@ -1,7 +1,11 @@ using Numerics.Data.Statistics; using Numerics.Distributions; +using Numerics.Functions; +using Numerics.Mathematics.LinearAlgebra; using Numerics.Mathematics.Optimization; using System; +using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -11,7 +15,7 @@ namespace Numerics.Sampling /// /// A general-purpose bootstrap class for parametric or non-parametric bootstrap analysis. - /// Supports all major confidence interval methods: Percentile, Bias-Corrected, BCa, Normal, and Bootstrap-t. + /// Supports Percentile, Bias-Corrected, BCa, Normal, Bootstrap-t, and covariance-aware pivotal bootstrap workflows. /// /// The type of data being bootstrapped. /// @@ -20,16 +24,21 @@ namespace Numerics.Sampling /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil /// /// + /// The regular bootstrap methods use and require . + /// The pivotal bootstrap is a separate covariance-aware run mode that uses + /// and . The two modes share + /// result storage, but their required delegates and supported confidence interval methods are validated separately. + /// + /// /// /// /// public class Bootstrap { - #region Construction /// - /// Constructs a new bootstrap analysis. + /// Constructs a new regular bootstrap analysis. /// /// The original data. /// The original fitted parameter set. @@ -39,20 +48,75 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) _originalParameters = originalParameters; } + /// + /// Constructs a new covariance-aware bootstrap analysis. + /// + /// The original data. + /// The original fitted parameter set and covariance matrix. + /// Thrown when is null. + public Bootstrap(TData originalData, BootstrapFit originalFit) + { + if (originalFit == null) + throw new ArgumentNullException(nameof(originalFit)); + + _originalData = originalData; + _originalParameters = originalFit.Parameters.Clone(); + _originalCovariance = originalFit.Covariance.Clone(); + } + #endregion #region Members - private TData _originalData; + /// + /// Identifies the workflow that produced the active bootstrap results. + /// + private enum BootstrapRunType + { + /// + /// No bootstrap workflow has completed. + /// + None, + + /// + /// Results came from . + /// + Regular, + + /// + /// Results came from . + /// + DoubleBootstrap, + + /// + /// Results came from . + /// + Studentized, + + /// + /// Results came from or . + /// + Pivotal + } + + private readonly TData _originalData; private ParameterSet _originalParameters; + private Matrix? _originalCovariance; private ParameterSet[] _bootstrapParameterSets = null!; private double[,] _bootstrapStatistics = null!; + private ParameterSet[] _rawBootstrapParameterSets = Array.Empty(); + private double[,]? _rawBootstrapStatistics; + private BootstrapFit[] _rawBootstrapFits = Array.Empty(); + private ILinkFunction?[] _pivotalLinks = Array.Empty(); + private PivotalBootstrapDiagnostics? _pivotalDiagnostics; + private double[]? _parentStatistics; private int _numStats; private int _numParams; private int _failedCount; private bool[] _validFlags = null!; private double[,]? _studentizedValues; private double[,]? _transformedStatistics; + private BootstrapRunType _runType; #endregion @@ -65,18 +129,24 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) /// /// Delegate function for fitting a model to data and returning a parameter set. + /// Required by regular bootstrap methods. /// public Func? FitFunction { get; set; } + /// + /// Delegate function for fitting a model to data and returning a parameter set plus covariance matrix. + /// Required by . + /// + public Func? FitWithCovarianceFunction { get; set; } + /// /// Delegate function for extracting statistics from a fitted parameter set. - /// Returns an array of statistic values (e.g., quantiles at multiple probabilities). + /// Required by regular bootstrap methods and optional for pivotal bootstrap. /// public Func? StatisticFunction { get; set; } /// /// Optional delegate for computing a leave-one-out jackknife sample. - /// Takes the original data and the index of the observation to remove. /// Required for the BCa confidence interval method. /// public Func? JackknifeFunction { get; set; } @@ -89,13 +159,13 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) /// /// Optional transform applied to statistic values before Normal and Bootstrap-t CI computation. - /// Default is cube-root: x → x^(1/3). Set to null for no transform. + /// Default is cube-root. /// public Func Transform { get; set; } = x => Math.Pow(x, 1d / 3d); /// - /// Optional inverse transform corresponding to Transform. - /// Default is cube: x → x^3. Set to null for no transform. + /// Optional inverse transform corresponding to . + /// Default is cube. /// public Func InverseTransform { get; set; } = x => Math.Pow(x, 3d); @@ -120,17 +190,95 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) public int InnerReplicates { get; set; } = 300; /// - /// Gets the bootstrapped model parameter sets. + /// Gets or sets the original covariance matrix used by the pivotal bootstrap. + /// + public Matrix? OriginalCovariance + { + get => _originalCovariance?.Clone(); + set => _originalCovariance = value?.Clone(); + } + + /// + /// Gets or sets a factory that returns one optional per fitted parameter for pivotal bootstrap. + /// Null link entries are treated as identity links by . + /// + public Func? PivotalLinkFactory { get; set; } + + /// + /// Gets or sets an optional filter applied to valid raw covariance-aware bootstrap fits before pivotal transformation. + /// + public Func? PivotalReplicateFilter { get; set; } + + /// + /// Gets or sets an optional validator applied to transformed pivotal parameter values. + /// + public Func? PivotalParameterValidator { get; set; } + + /// + /// Gets or sets the policy used when a pivotal draw is invalid. Default is . + /// + public PivotalBootstrapInvalidDrawPolicy PivotalInvalidDrawPolicy { get; set; } = PivotalBootstrapInvalidDrawPolicy.Drop; + + /// + /// Gets or sets a value indicating whether pivotal bootstrap covariance matrices should be regularized before Cholesky decomposition. + /// + public bool RegularizePivotalCovariances { get; set; } = true; + + /// + /// Gets or sets an optional absolute component limit for the standardized pivotal vector. + /// + public double? PivotalZLimit { get; set; } + + /// + /// Gets or sets a value indicating whether Gaussian jitter should be added to pivotal vectors before the optional limit check. + /// + public bool AddPivotalJitter { get; set; } + + /// + /// Gets or sets the base standard deviation of optional Gaussian jitter. The applied scale is this value divided by sqrt(parameter count). + /// + public double PivotalJitterScale { get; set; } = 0.01d; + + /// + /// Gets the active bootstrapped model parameter sets. + /// For pivotal bootstrap, these are the retained pivotal parameter draws. /// public ParameterSet[] BootstrapParameterSets => _bootstrapParameterSets; /// - /// Gets the bootstrapped statistics as a 2D array [replicates, statistics]. + /// Gets the active bootstrapped statistics as a 2D array [replicate, statistic]. + /// For pivotal bootstrap, these are pivotal statistics when was supplied. /// public double[,] BootstrapStatistics => _bootstrapStatistics; + /// + /// Gets the accepted raw covariance-aware bootstrap fits from the most recent pivotal bootstrap run. + /// + public BootstrapFit[] RawBootstrapFits => _rawBootstrapFits; + + /// + /// Gets the accepted raw covariance-aware bootstrap parameter sets from the most recent pivotal bootstrap run. + /// + public ParameterSet[] RawBootstrapParameterSets => _rawBootstrapParameterSets; + + /// + /// Gets the accepted raw bootstrap statistics from the most recent pivotal bootstrap run, when was supplied. + /// + public double[,]? RawBootstrapStatistics => _rawBootstrapStatistics; + + /// + /// Gets the pivotal link functions used by the most recent pivotal bootstrap run. + /// + public ILinkFunction?[] PivotalLinks => (ILinkFunction?[])_pivotalLinks.Clone(); + + /// + /// Gets diagnostics from the most recent pivotal bootstrap run. + /// + public PivotalBootstrapDiagnostics? PivotalDiagnostics => _pivotalDiagnostics; + /// /// Gets the number of replicates that failed after all retries. + /// For pivotal bootstrap, this is the raw covariance-aware fit failure count. /// public int FailedReplicates => _failedCount; @@ -139,15 +287,19 @@ public Bootstrap(TData originalData, ParameterSet originalParameters) #region Run Methods /// - /// Runs the basic bootstrap procedure with error handling and retry logic. + /// Runs the regular bootstrap procedure with error handling and retry logic. /// + /// Thrown when a required regular-bootstrap delegate has not been set. + /// Thrown when or is not positive. public void Run() { ValidateCoreDelegates(); + ValidateReplicationSettings(); var resample = ResampleFunction!; var fit = FitFunction!; var statistic = StatisticFunction!; InitializeState(); + ResetPivotalState(); var prng = new MersenneTwister(PRNGSeed); var seeds = prng.NextIntegers(Replicates); @@ -162,19 +314,9 @@ public void Run() var rng = new MersenneTwister(seeds[idx] + 10 * retry); var sample = resample(_originalData, _originalParameters, rng); var fitResult = fit(sample); - var stat = statistic(fitResult); - - // Validate: check for NaN in statistics - bool hasNaN = false; - for (int k = 0; k < _numStats; k++) - { - if (double.IsNaN(stat[k]) || double.IsInfinity(stat[k])) - { - hasNaN = true; - break; - } - } - if (hasNaN) continue; + if (!HasExpectedFiniteParameterValues(fitResult, _numParams)) + continue; + var stat = ValidateStatistics(statistic(fitResult), _numStats); _bootstrapParameterSets[idx] = fitResult; for (int k = 0; k < _numStats; k++) @@ -184,29 +326,36 @@ public void Run() } catch (Exception) { - // retry + // Retry failed regular bootstrap replicates. } if (succeeded) break; } if (!succeeded) - { MarkFailed(idx); - } }); + + _runType = BootstrapRunType.Regular; } /// /// Runs the double bootstrap procedure with bias correction. /// /// Number of inner bootstrap replicates. Default = 300. + /// Thrown when a required regular-bootstrap delegate has not been set. + /// Thrown when replicate counts are not positive. public void RunDoubleBootstrap(int innerReplicates = 300) { ValidateCoreDelegates(); + ValidateReplicationSettings(); + if (innerReplicates < 1) + throw new ArgumentOutOfRangeException(nameof(innerReplicates), "The number of inner replicates must be positive."); + var resample = ResampleFunction!; var fit = FitFunction!; var statistic = StatisticFunction!; InitializeState(); + ResetPivotalState(); var prng = new MersenneTwister(PRNGSeed); var seeds = prng.NextIntegers(Replicates); @@ -220,13 +369,13 @@ public void RunDoubleBootstrap(int innerReplicates = 300) { var rng = new MersenneTwister(seeds[idx] + 10 * retry); - // Outer bootstrap var outerSample = resample(_originalData, _originalParameters, rng); var outerFit = fit(outerSample); - var outerStat = statistic(outerFit); + if (!HasExpectedFiniteParameterValues(outerFit, _numParams)) + continue; + var outerStat = ValidateStatistics(statistic(outerFit), _numStats); - // Inner bootstrap for bias estimation - int p = outerFit.Values.Length; + int p = _numParams; var parmsInnerSum = new double[p]; var statsInnerSum = new double[_numStats]; int validInner = 0; @@ -237,7 +386,9 @@ public void RunDoubleBootstrap(int innerReplicates = 300) { var innerSample = resample(outerSample, outerFit, rng); var innerFit = fit(innerSample); - var innerStat = statistic(innerFit); + if (!HasExpectedFiniteParameterValues(innerFit, p)) + continue; + var innerStat = ValidateStatistics(statistic(innerFit), _numStats); for (int i = 0; i < p; i++) parmsInnerSum[i] += innerFit.Values[i]; @@ -247,13 +398,12 @@ public void RunDoubleBootstrap(int innerReplicates = 300) } catch (Exception) { - // skip failed inner replicate + // Skip failed inner replicate. } } if (validInner == 0) continue; - // Bias-correct parameters var biasCorrectedParms = new double[p]; for (int i = 0; i < p; i++) { @@ -261,7 +411,6 @@ public void RunDoubleBootstrap(int innerReplicates = 300) biasCorrectedParms[i] = outerFit.Values[i] - (innerMean - outerFit.Values[i]); } - // Bias-correct statistics var biasCorrectedStats = new double[_numStats]; for (int i = 0; i < _numStats; i++) { @@ -269,7 +418,7 @@ public void RunDoubleBootstrap(int innerReplicates = 300) biasCorrectedStats[i] = outerStat[i] - (innerMean - outerStat[i]); } - _bootstrapParameterSets[idx] = new ParameterSet(biasCorrectedParms, outerFit.Fitness); + _bootstrapParameterSets[idx] = new ParameterSet(biasCorrectedParms, outerFit.Fitness, outerFit.Weight); for (int k = 0; k < _numStats; k++) _bootstrapStatistics[idx, k] = biasCorrectedStats[k]; _validFlags[idx] = true; @@ -277,34 +426,41 @@ public void RunDoubleBootstrap(int innerReplicates = 300) } catch (Exception) { - // retry + // Retry failed outer replicate. } if (succeeded) break; } if (!succeeded) - { MarkFailed(idx); - } }); + + _runType = BootstrapRunType.DoubleBootstrap; } /// - /// Runs the bootstrap procedure with nested inner bootstrap for studentized (Bootstrap-t) confidence intervals. - /// Must be called before requesting Bootstrap-t CIs. + /// Runs the regular bootstrap procedure with nested inner bootstrap for studentized Bootstrap-t confidence intervals. /// + /// Thrown when a required regular-bootstrap delegate has not been set. + /// Thrown when replicate counts are not positive. public void RunWithStudentizedBootstrap() { ValidateCoreDelegates(); + ValidateReplicationSettings(); + if (InnerReplicates < 1) + throw new ArgumentOutOfRangeException(nameof(InnerReplicates), "The number of inner replicates must be positive."); + var resample = ResampleFunction!; var fitFunc = FitFunction!; var statistic = StatisticFunction!; + ResetPivotalState(); var originalStats = statistic(_originalParameters); + ValidateStatistics(originalStats); + _parentStatistics = (double[])originalStats.Clone(); _numStats = originalStats.Length; _numParams = _originalParameters.Values.Length; - // Apply transform to population statistics var popTransformed = new double[_numStats]; for (int i = 0; i < _numStats; i++) popTransformed[i] = ApplyTransform(originalStats[i]); @@ -331,18 +487,18 @@ public void RunWithStudentizedBootstrap() var rng = new MersenneTwister(seeds[idx] + 10 * retry); var sample = resample(_originalData, _originalParameters, rng); var outerFit = fitFunc(sample); - var outerStats = statistic(outerFit); + if (!HasExpectedFiniteParameterValues(outerFit, _numParams)) + continue; + var outerStats = ValidateStatistics(statistic(outerFit), _numStats); _bootstrapParameterSets[idx] = outerFit; for (int k = 0; k < _numStats; k++) _bootstrapStatistics[idx, k] = outerStats[k]; - // Transform outer statistics var outerTransformed = new double[_numStats]; for (int j = 0; j < _numStats; j++) outerTransformed[j] = ApplyTransform(outerStats[j]); - // Inner bootstrap for SE estimation var innerPrng = new MersenneTwister(seeds[idx]); var innerSeeds = innerPrng.NextIntegers(InnerReplicates); var innerTransformed = new double[InnerReplicates, _numStats]; @@ -354,7 +510,14 @@ public void RunWithStudentizedBootstrap() { var innerSample = resample(sample, outerFit, new MersenneTwister(innerSeeds[k])); var innerFit = fitFunc(innerSample); - var innerStats = statistic(innerFit); + if (!HasExpectedFiniteParameterValues(innerFit, _numParams)) + { + for (int j = 0; j < _numStats; j++) + innerTransformed[k, j] = double.NaN; + continue; + } + var innerStats = ValidateStatistics(statistic(innerFit), _numStats); + for (int j = 0; j < _numStats; j++) innerTransformed[k, j] = ApplyTransform(innerStats[j]); validInner++; @@ -368,11 +531,10 @@ public void RunWithStudentizedBootstrap() if (validInner < 2) continue; - // Compute inner SE per statistic and studentized values for (int j = 0; j < _numStats; j++) { var col = innerTransformed.GetColumn(j); - var validCol = col.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validCol = col.Where(Tools.IsFinite).ToArray(); double se = validCol.Length > 1 ? Statistics.StandardDeviation(validCol) : double.NaN; transformedStatistics[idx, j] = outerTransformed[j]; studentizedValues[idx, j] = se > 0 ? (popTransformed[j] - outerTransformed[j]) / se : double.NaN; @@ -383,7 +545,7 @@ public void RunWithStudentizedBootstrap() } catch (Exception) { - // retry + // Retry failed outer replicate. } if (succeeded) break; } @@ -398,6 +560,310 @@ public void RunWithStudentizedBootstrap() } } }); + + _runType = BootstrapRunType.Studentized; + } + + #endregion + + #region Pivotal Bootstrap + + /// + /// Runs the covariance-aware pivotal bootstrap as a distinct bootstrap mode. + /// + /// + /// Thrown when , , or + /// has not been supplied. + /// + /// Thrown when or is not positive. + public void RunPivotalBootstrap() + { + ValidatePivotalDelegates(); + ValidateReplicationSettings(); + + var parentFit = CreateOriginalFit(); + var resample = ResampleFunction!; + var fit = FitWithCovarianceFunction!; + + var stopwatch = Stopwatch.StartNew(); + var prng = new MersenneTwister(PRNGSeed); + var seeds = prng.NextIntegers(Replicates); + var rawFits = new BootstrapFit?[Replicates]; + int failed = 0; + + Parallel.For(0, Replicates, index => + { + bool succeeded = false; + for (int retry = 0; retry < MaxRetries; retry++) + { + try + { + var rng = new MersenneTwister(seeds[index] + 10 * retry); + TData sample = resample(_originalData, parentFit.Parameters, rng); + BootstrapFit rawFit = fit(sample); + if (!IsValidFit(rawFit, parentFit.ParameterCount)) + continue; + + rawFits[index] = rawFit; + succeeded = true; + } + catch (Exception) + { + // Retry failed pivotal bootstrap resamples or covariance-aware fits. + } + + if (succeeded) + break; + } + + if (!succeeded) + Interlocked.Increment(ref failed); + }); + + stopwatch.Stop(); + + var acceptedRawFits = rawFits.Where(f => f != null).Select(f => f!).ToArray(); + TransformPivotalBootstrap(acceptedRawFits, Replicates, failed, stopwatch.Elapsed); + } + + /// + /// Transforms precomputed raw covariance-aware bootstrap fits into pivotal bootstrap draws. + /// + /// Raw bootstrap fits and covariances to transform. + /// Thrown when is null. + /// Thrown when no valid raw fits are accepted. + public void TransformPivotalBootstrap(IEnumerable rawFits) + { + if (rawFits == null) + throw new ArgumentNullException(nameof(rawFits)); + + BootstrapFit[] suppliedFits = rawFits.ToArray(); + TransformPivotalBootstrap(suppliedFits, suppliedFits.Length, 0, TimeSpan.Zero); + } + + /// + /// Applies the pivotal transformation to raw fits and stores the resulting active bootstrap ensemble. + /// + /// Raw bootstrap fits supplied to the transformation. + /// The number of raw replicates requested or supplied. + /// The number of raw replicates that failed before transformation. + /// The elapsed raw resampling and fitting time. + /// Thrown when no valid raw fits are accepted. + private void TransformPivotalBootstrap(BootstrapFit[] rawFits, int requestedReplicates, int failedRawReplicates, TimeSpan resamplingTime) + { + BootstrapFit parentFit = CreateOriginalFit(); + int p = parentFit.ParameterCount; + var stopwatch = Stopwatch.StartNew(); + + BootstrapFit[] acceptedRawFits = rawFits + .Where(fit => IsAcceptedRawFit(fit, p)) + .ToArray(); + + if (acceptedRawFits.Length == 0) + throw new InvalidOperationException("No valid raw bootstrap fits were supplied for pivotal transformation."); + + ILinkFunction?[] links = CreatePivotalLinks(parentFit, acceptedRawFits); + var linkController = new LinkController(links); + double[] parentEta = linkController.Link(parentFit.Parameters.Values); + ValidateTransformedValues(parentEta, "The parent link transformation produced a non-finite value."); + + Matrix parentLinkCovariance = LinkCovariance(parentFit, linkController); + var parentCholesky = new CholeskyDecomposition(parentLinkCovariance); + var pivotalParameterSets = new List(acceptedRawFits.Length); + var jitterRng = new MersenneTwister(PRNGSeed); + int invalid = 0; + + for (int i = 0; i < acceptedRawFits.Length; i++) + { + BootstrapFit rawFit = acceptedRawFits[i]; + if (TryCreatePivotalDraw(rawFit, parentFit, linkController, parentEta, parentCholesky, jitterRng, out ParameterSet pivotalParameters)) + { + pivotalParameterSets.Add(pivotalParameters); + } + else + { + invalid++; + if (TryApplyInvalidPolicy(rawFit, parentFit, out ParameterSet fallbackParameters)) + pivotalParameterSets.Add(fallbackParameters); + } + } + + stopwatch.Stop(); + + _rawBootstrapFits = acceptedRawFits; + _rawBootstrapParameterSets = acceptedRawFits.Select(f => f.Parameters.Clone()).ToArray(); + _bootstrapParameterSets = pivotalParameterSets.ToArray(); + _numParams = p; + _failedCount = failedRawReplicates; + _validFlags = Enumerable.Repeat(true, _bootstrapParameterSets.Length).ToArray(); + _studentizedValues = null; + _transformedStatistics = null; + _pivotalLinks = links; + _pivotalDiagnostics = new PivotalBootstrapDiagnostics + { + RequestedReplicates = requestedReplicates, + FailedRawReplicates = failedRawReplicates, + RejectedRawReplicates = rawFits.Length - acceptedRawFits.Length, + AcceptedRawReplicates = acceptedRawFits.Length, + InvalidPivotalReplicates = invalid, + RetainedPivotalReplicates = _bootstrapParameterSets.Length, + ResamplingTime = resamplingTime, + TransformationTime = stopwatch.Elapsed + }; + + if (StatisticFunction != null) + { + _parentStatistics = ValidateStatistics(StatisticFunction(parentFit.Parameters)); + _numStats = _parentStatistics.Length; + _rawBootstrapStatistics = ComputeStatistics(_rawBootstrapParameterSets, StatisticFunction); + _bootstrapStatistics = ComputeStatistics(_bootstrapParameterSets, StatisticFunction); + } + else + { + _parentStatistics = Array.Empty(); + _numStats = 0; + _rawBootstrapStatistics = new double[_rawBootstrapParameterSets.Length, 0]; + _bootstrapStatistics = new double[_bootstrapParameterSets.Length, 0]; + } + + _runType = BootstrapRunType.Pivotal; + } + + /// + /// Attempts to create one pivotal draw from an accepted raw fit. + /// + /// The accepted raw bootstrap fit. + /// The parent bootstrap fit. + /// The parameter link controller. + /// The parent parameters in link space. + /// The Cholesky decomposition of the parent link-space covariance. + /// The random-number generator used for optional jitter. + /// The created pivotal parameters when the method returns true. + /// True when a finite and valid pivotal draw is created; otherwise false. + private bool TryCreatePivotalDraw( + BootstrapFit rawFit, + BootstrapFit parentFit, + LinkController linkController, + double[] parentEta, + CholeskyDecomposition parentCholesky, + Random jitterRng, + out ParameterSet pivotalParameters) + { + pivotalParameters = default; + try + { + double[] rawEta = linkController.Link(rawFit.Parameters.Values); + ValidateTransformedValues(rawEta, "The raw link transformation produced a non-finite value."); + + Matrix rawLinkCovariance = LinkCovariance(rawFit, linkController); + var rawCholesky = new CholeskyDecomposition(rawLinkCovariance); + var difference = new double[parentEta.Length]; + for (int j = 0; j < difference.Length; j++) + difference[j] = parentEta[j] - rawEta[j]; + + double[] z = rawCholesky.Forward(new Vector(difference)).ToArray(); + if (!ContainsOnlyFiniteValues(z)) + return false; + + if (AddPivotalJitter && PivotalJitterScale > 0d) + { + double jitterScale = PivotalJitterScale / Math.Sqrt(Math.Max(1, z.Length)); + for (int j = 0; j < z.Length; j++) + { + double u = Tools.Clamp(jitterRng.NextDouble(), 1e-16, 1d - 1e-16); + z[j] += jitterScale * Normal.StandardZ(u); + } + } + + if (PivotalZLimit.HasValue && z.Any(value => Math.Abs(value) > PivotalZLimit.Value)) + return false; + + double[] reinflated = parentCholesky.L * z; + var pivotalEta = new double[parentEta.Length]; + for (int j = 0; j < pivotalEta.Length; j++) + pivotalEta[j] = parentEta[j] + reinflated[j]; + + double[] pivotalValues = linkController.InverseLink(pivotalEta); + if (!ContainsOnlyFiniteValues(pivotalValues)) + return false; + if (PivotalParameterValidator != null && !PivotalParameterValidator(pivotalValues)) + return false; + + pivotalParameters = new ParameterSet(pivotalValues, rawFit.Parameters.Fitness, rawFit.Parameters.Weight); + return true; + } + catch + { + return false; + } + } + + /// + /// Applies the configured invalid pivotal draw policy. + /// + /// The raw bootstrap fit associated with the invalid pivotal draw. + /// The parent bootstrap fit. + /// The fallback parameter set when the method returns true. + /// True when a fallback parameter set should be retained; otherwise false. + /// Thrown when is unknown. + private bool TryApplyInvalidPolicy(BootstrapFit rawFit, BootstrapFit parentFit, out ParameterSet parameterSet) + { + parameterSet = default; + switch (PivotalInvalidDrawPolicy) + { + case PivotalBootstrapInvalidDrawPolicy.Drop: + return false; + case PivotalBootstrapInvalidDrawPolicy.UseRaw: + parameterSet = rawFit.Parameters.Clone(); + return true; + case PivotalBootstrapInvalidDrawPolicy.UseParent: + parameterSet = parentFit.Parameters.Clone(); + return true; + default: + throw new ArgumentOutOfRangeException(nameof(PivotalInvalidDrawPolicy), $"Unknown invalid-draw policy: {PivotalInvalidDrawPolicy}."); + } + } + + /// + /// Computes the link-space covariance for a bootstrap fit using the supplied link controller. + /// + /// The fit containing native parameter covariance. + /// The link controller used to compute the diagonal Jacobian. + /// The covariance matrix in link space. + private Matrix LinkCovariance(BootstrapFit fit, LinkController linkController) + { + Matrix jacobian = linkController.LinkJacobian(fit.Parameters.Values); + for (int i = 0; i < jacobian.NumberOfRows; i++) + { + if (!Tools.IsFinite(jacobian[i, i])) + throw new ArgumentException("The link derivative produced a non-finite value."); + } + + Matrix linkCovariance = jacobian * fit.Covariance * jacobian.Transpose(); + return RegularizePivotalCovariances + ? MatrixRegularization.MakeSymmetricPositiveDefinite(linkCovariance) + : linkCovariance; + } + + /// + /// Builds the pivotal link array from or the identity default. + /// + /// The original parent fit. + /// The accepted raw bootstrap fits. + /// One optional link function per fitted parameter. + /// Thrown when the link factory returns the wrong number of links. + private ILinkFunction?[] CreatePivotalLinks(BootstrapFit parentFit, BootstrapFit[] acceptedRawFits) + { + ILinkFunction?[] links = PivotalLinkFactory != null + ? PivotalLinkFactory(new PivotalBootstrapContext(parentFit, acceptedRawFits)) + : new ILinkFunction?[parentFit.ParameterCount]; + + if (links == null) + throw new ArgumentException("The pivotal link factory must not return null.", nameof(PivotalLinkFactory)); + if (links.Length != parentFit.ParameterCount) + throw new ArgumentException("The pivotal link factory must return one link per parameter.", nameof(PivotalLinkFactory)); + + return (ILinkFunction?[])links.Clone(); } #endregion @@ -408,24 +874,30 @@ public void RunWithStudentizedBootstrap() /// Computes bootstrap confidence intervals using the specified method. /// /// The confidence interval method. - /// The confidence level. Default = 0.1, resulting in 90% CIs. - /// A BootstrapResults object containing CIs for both parameters and statistics. + /// The two-sided alpha level. Default = 0.1, resulting in 90% confidence intervals. + /// A object containing confidence intervals for parameters and statistics. + /// Thrown when the requested interval method is incompatible with the last run mode. + /// Thrown when is not between zero and one. public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double alpha = 0.1) { - if (_bootstrapStatistics == null) - throw new InvalidOperationException("Run() or RunWithStudentizedBootstrap() must be called first."); - if (alpha <= 0 || alpha >= 1) - throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); - if (method == BootstrapCIMethod.BCa && (JackknifeFunction == null || SampleSizeFunction == null)) - throw new InvalidOperationException("JackknifeFunction and SampleSizeFunction must be set for BCa method."); - if (method == BootstrapCIMethod.BootstrapT && _studentizedValues == null) - throw new InvalidOperationException("RunWithStudentizedBootstrap() must be called before requesting Bootstrap-t CIs."); + ValidateConfidenceIntervalRequest(method, alpha); + + if (_runType == BootstrapRunType.Pivotal) + { + return CreatePercentileResults( + _bootstrapParameterSets, + _bootstrapStatistics, + _originalParameters.Values, + _parentStatistics, + alpha, + _failedCount, + method); + } if (StatisticFunction == null) throw new InvalidOperationException("StatisticFunction must be set."); - var originalStats = StatisticFunction(_originalParameters); + var originalStats = ValidateStatistics(StatisticFunction(_originalParameters)); - // Compute acceleration constants once for BCa double[]? accelConstants = null; if (method == BootstrapCIMethod.BCa) accelConstants = ComputeAccelerationConstants(originalStats); @@ -439,7 +911,6 @@ public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double FailedReplicates = _failedCount }; - // Compute CIs for each statistic for (int i = 0; i < _numStats; i++) { var values = _bootstrapStatistics.GetColumn(i); @@ -463,7 +934,6 @@ public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double } } - // Compute percentile CIs for each parameter for (int i = 0; i < _numParams; i++) { var values = _bootstrapParameterSets.Select(ps => ps.Values[i]).ToArray(); @@ -473,16 +943,44 @@ public BootstrapResults GetConfidenceIntervals(BootstrapCIMethod method, double return results; } + /// + /// Computes raw-bootstrap percentile confidence intervals after a pivotal bootstrap run. + /// + /// The two-sided alpha level. Default = 0.1, resulting in 90% confidence intervals. + /// Percentile confidence intervals for the accepted raw bootstrap ensemble. + /// Thrown when the last run was not pivotal bootstrap. + /// Thrown when is not between zero and one. + public BootstrapResults GetRawPivotalConfidenceIntervals(double alpha = 0.1) + { + if (_runType != BootstrapRunType.Pivotal) + throw new InvalidOperationException("RunPivotalBootstrap() or TransformPivotalBootstrap() must be called before requesting raw pivotal-bootstrap confidence intervals."); + if (alpha <= 0d || alpha >= 1d) + throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); + + return CreatePercentileResults( + _rawBootstrapParameterSets, + _rawBootstrapStatistics, + _originalParameters.Values, + _parentStatistics, + alpha, + _failedCount, + BootstrapCIMethod.Percentile); + } + #endregion #region CI Methods /// - /// Computes percentile confidence intervals for a single statistic. + /// Computes percentile confidence intervals for a single statistic or parameter. /// + /// Bootstrap values for one statistic or parameter. + /// The original population estimate. + /// The two-sided alpha level. + /// The percentile confidence interval summary. private BootstrapStatisticResult ComputePercentileCI(double[] values, double populationEstimate, double alpha) { - var validValues = values.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validValues = values.Where(Tools.IsFinite).ToArray(); Array.Sort(validValues); double lowerP = alpha / 2d; @@ -501,15 +999,18 @@ private BootstrapStatisticResult ComputePercentileCI(double[] values, double pop } /// - /// Computes bias-corrected (BC) confidence intervals for a single statistic. + /// Computes bias-corrected confidence intervals for a single statistic. /// + /// Bootstrap statistic values. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The bias-corrected confidence interval summary. private BootstrapStatisticResult ComputeBiasCorrectedCI(double[] values, double populationEstimate, double alpha) { - var validValues = values.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validValues = values.Where(Tools.IsFinite).ToArray(); int validN = validValues.Length; if (validN == 0) return EmptyResult(populationEstimate, values.Length); - // Count proportion <= population estimate int countLeq = 0; for (int i = 0; i < validN; i++) if (validValues[i] <= populationEstimate) countLeq++; @@ -536,15 +1037,19 @@ private BootstrapStatisticResult ComputeBiasCorrectedCI(double[] values, double } /// - /// Computes bias-corrected and accelerated (BCa) confidence intervals for a single statistic. + /// Computes bias-corrected and accelerated confidence intervals for a single statistic. /// + /// Bootstrap statistic values. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The jackknife acceleration constant. + /// The BCa confidence interval summary. private BootstrapStatisticResult ComputeBCaCI(double[] values, double populationEstimate, double alpha, double acceleration) { - var validValues = values.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validValues = values.Where(Tools.IsFinite).ToArray(); int validN = validValues.Length; if (validN == 0) return EmptyResult(populationEstimate, values.Length); - // Count proportion <= population estimate (matching BCaQuantileCI line 593) int countLeq = 0; for (int i = 0; i < validN; i++) if (validValues[i] <= populationEstimate) countLeq++; @@ -577,18 +1082,20 @@ private BootstrapStatisticResult ComputeBCaCI(double[] values, double population } /// - /// Computes Normal (standard) confidence intervals for a single statistic. - /// Uses a configurable transform for transformation invariance. + /// Computes Normal confidence intervals for a single statistic using the configured transform pair. /// + /// Bootstrap statistic values. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The Normal confidence interval summary. private BootstrapStatisticResult ComputeNormalCI(double[] values, double populationEstimate, double alpha) { double popTransformed = ApplyTransform(populationEstimate); - var transformedValid = new double[values.Length]; int validCount = 0; for (int i = 0; i < values.Length; i++) { - if (!double.IsNaN(values[i]) && !double.IsInfinity(values[i])) + if (Tools.IsFinite(values[i])) { transformedValid[validCount] = ApplyTransform(values[i]); validCount++; @@ -620,19 +1127,20 @@ private BootstrapStatisticResult ComputeNormalCI(double[] values, double populat } /// - /// Computes Bootstrap-t (studentized) confidence intervals for a single statistic. - /// Requires RunWithStudentizedBootstrap() to have been called first. + /// Computes Bootstrap-t confidence intervals for a single statistic. /// + /// The zero-based statistic index. + /// The original statistic estimate. + /// The two-sided alpha level. + /// The Bootstrap-t confidence interval summary. private BootstrapStatisticResult ComputeBootstrapTCI(int statisticIndex, double populationEstimate, double alpha) { double popTransformed = ApplyTransform(populationEstimate); - - // GetConfidenceIntervals() validates _studentizedValues is non-null before calling this method var xCol = _transformedStatistics!.GetColumn(statisticIndex); var tCol = _studentizedValues!.GetColumn(statisticIndex); - var validX = xCol.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); - var validT = tCol.Where(x => !double.IsNaN(x) && !double.IsInfinity(x)).ToArray(); + var validX = xCol.Where(Tools.IsFinite).ToArray(); + var validT = tCol.Where(Tools.IsFinite).ToArray(); if (validT.Length < 2) return EmptyResult(populationEstimate, Replicates); @@ -659,11 +1167,12 @@ private BootstrapStatisticResult ComputeBootstrapTCI(int statisticIndex, double #region BCa Support /// - /// Computes the acceleration constants for each statistic using jackknife leave-one-out. + /// Computes acceleration constants for each statistic using leave-one-out jackknife samples. /// + /// The original statistic estimates. + /// The acceleration constant for each statistic. private double[] ComputeAccelerationConstants(double[] populationEstimates) { - // Caller (GetConfidenceIntervals) validates these delegates before calling this method var sampleSize = SampleSizeFunction!; var jackknife = JackknifeFunction!; var fitFunc = FitFunction!; @@ -691,7 +1200,7 @@ private double[] ComputeAccelerationConstants(double[] populationEstimates) } catch (Exception) { - // Skip failed jackknife samples + // Skip failed jackknife samples. } }); @@ -706,8 +1215,9 @@ private double[] ComputeAccelerationConstants(double[] populationEstimates) #region Private Helpers /// - /// Validates that the core delegate functions are set. + /// Validates that the regular bootstrap delegates are set. /// + /// Thrown when a required regular-bootstrap delegate is missing. private void ValidateCoreDelegates() { if (ResampleFunction == null) @@ -719,12 +1229,38 @@ private void ValidateCoreDelegates() } /// - /// Initializes internal state arrays before a bootstrap run. + /// Validates that the pivotal bootstrap delegates and parent covariance are set. + /// + /// Thrown when a required pivotal-bootstrap input is missing. + private void ValidatePivotalDelegates() + { + if (ResampleFunction == null) + throw new InvalidOperationException("ResampleFunction must be set before running the pivotal bootstrap."); + if (FitWithCovarianceFunction == null) + throw new InvalidOperationException("FitWithCovarianceFunction must be set before running the pivotal bootstrap."); + if (_originalCovariance == null) + throw new InvalidOperationException("OriginalCovariance must be set before running the pivotal bootstrap."); + } + + /// + /// Validates the configured replicate and retry counts. + /// + /// Thrown when or is not positive. + private void ValidateReplicationSettings() + { + if (Replicates < 1) + throw new ArgumentOutOfRangeException(nameof(Replicates), "The number of replicates must be positive."); + if (MaxRetries < 1) + throw new ArgumentOutOfRangeException(nameof(MaxRetries), "The maximum retry count must be positive."); + } + + /// + /// Initializes regular-bootstrap state arrays before a regular run. /// private void InitializeState() { - // ValidateCoreDelegates() is always called before this method - var originalStats = StatisticFunction!(_originalParameters); + var originalStats = ValidateStatistics(StatisticFunction!(_originalParameters)); + _parentStatistics = (double[])originalStats.Clone(); _numStats = originalStats.Length; _numParams = _originalParameters.Values.Length; @@ -734,11 +1270,25 @@ private void InitializeState() _failedCount = 0; _studentizedValues = null; _transformedStatistics = null; + _runType = BootstrapRunType.None; + } + + /// + /// Clears stored pivotal-bootstrap state before a regular bootstrap run. + /// + private void ResetPivotalState() + { + _rawBootstrapFits = Array.Empty(); + _rawBootstrapParameterSets = Array.Empty(); + _rawBootstrapStatistics = null; + _pivotalLinks = Array.Empty(); + _pivotalDiagnostics = null; } /// - /// Marks a replicate as failed with NaN values. + /// Marks a replicate as failed with NaN parameter and statistic values. /// + /// The zero-based replicate index. private void MarkFailed(int idx) { var nanParams = new double[_numParams]; @@ -751,24 +1301,31 @@ private void MarkFailed(int idx) } /// - /// Applies the Transform function, or returns the value unchanged if Transform is null. + /// Applies the configured regular-bootstrap statistic transform. /// + /// The value to transform. + /// The transformed value, or the original value when is null. private double ApplyTransform(double value) { return Transform != null ? Transform(value) : value; } /// - /// Applies the InverseTransform function, or returns the value unchanged if InverseTransform is null. + /// Applies the configured inverse regular-bootstrap statistic transform. /// + /// The value to inverse transform. + /// The inverse-transformed value, or the original value when is null. private double ApplyInverseTransform(double value) { return InverseTransform != null ? InverseTransform(value) : value; } /// - /// Creates an empty result for when there are insufficient valid values. + /// Creates an empty confidence interval result for insufficient valid values. /// + /// The original estimate. + /// The total number of attempted values. + /// A confidence interval result containing NaN interval bounds. private BootstrapStatisticResult EmptyResult(double populationEstimate, int totalCount) { return new BootstrapStatisticResult @@ -783,6 +1340,243 @@ private BootstrapStatisticResult EmptyResult(double populationEstimate, int tota }; } + /// + /// Creates percentile results for parameter and statistic ensembles. + /// + /// The parameter ensemble. + /// The statistic ensemble, or null when no statistic function was supplied. + /// The original parameter estimates. + /// The original statistic estimates. + /// The two-sided alpha level. + /// The number of failed replicates. + /// The confidence interval method label to store in the result. + /// A bootstrap results object containing percentile confidence intervals. + private BootstrapResults CreatePercentileResults( + ParameterSet[] parameterSets, + double[,]? statistics, + double[] parameterEstimates, + double[]? statisticEstimates, + double alpha, + int failedReplicates, + BootstrapCIMethod method) + { + int statisticCount = statistics?.GetLength(1) ?? 0; + var results = new BootstrapResults + { + Method = method, + Alpha = alpha, + StatisticResults = new BootstrapStatisticResult[statisticCount], + ParameterResults = new BootstrapStatisticResult[parameterEstimates.Length], + FailedReplicates = failedReplicates + }; + + for (int i = 0; i < statisticCount; i++) + { + double estimate = statisticEstimates != null && i < statisticEstimates.Length ? statisticEstimates[i] : double.NaN; + results.StatisticResults[i] = ComputePercentileCI(statistics!.GetColumn(i), estimate, alpha); + } + + for (int i = 0; i < parameterEstimates.Length; i++) + results.ParameterResults[i] = ComputePercentileCI(GetParameterColumn(parameterSets, i), parameterEstimates[i], alpha); + + return results; + } + + /// + /// Extracts one parameter column from an ensemble of parameter sets. + /// + /// The parameter ensemble. + /// The zero-based parameter index. + /// The values for the requested parameter. + private static double[] GetParameterColumn(ParameterSet[] parameterSets, int parameterIndex) + { + var values = new double[parameterSets.Length]; + for (int i = 0; i < parameterSets.Length; i++) + values[i] = parameterSets[i].Values[parameterIndex]; + return values; + } + + /// + /// Computes statistic values for each parameter set. + /// + /// The parameter sets to evaluate. + /// The statistic function. + /// A two-dimensional statistic array indexed by replicate and statistic. + private static double[,] ComputeStatistics(ParameterSet[] parameterSets, Func statistic) + { + if (parameterSets.Length == 0) + return new double[0, 0]; + + double[] first = ValidateStatistics(statistic(parameterSets[0])); + var values = new double[parameterSets.Length, first.Length]; + for (int j = 0; j < first.Length; j++) + values[0, j] = first[j]; + + for (int i = 1; i < parameterSets.Length; i++) + { + double[] row = ValidateStatistics(statistic(parameterSets[i]), first.Length); + for (int j = 0; j < first.Length; j++) + values[i, j] = row[j]; + } + + return values; + } + + /// + /// Validates statistic values returned by . + /// + /// The statistic values to validate. + /// The expected statistic count, when known. + /// The validated statistic values. + /// Thrown when the statistic values are null, empty, non-finite, or have the wrong length. + private static double[] ValidateStatistics(double[] statistics, int? expectedLength = null) + { + if (statistics == null) + throw new InvalidOperationException("The statistic function returned null."); + if (statistics.Length == 0) + throw new InvalidOperationException("The statistic function must return at least one statistic."); + if (expectedLength.HasValue && statistics.Length != expectedLength.Value) + throw new InvalidOperationException("The statistic function must return the same number of statistics for every draw."); + if (!ContainsOnlyFiniteValues(statistics)) + throw new InvalidOperationException("The statistic function returned a non-finite value."); + + return statistics; + } + + /// + /// Determines whether a fitted parameter set has the expected finite parameter vector. + /// + /// The fitted parameter set to evaluate. + /// The expected number of parameters. + /// True when the parameter values are non-null, have the expected length, and are finite; otherwise false. + private static bool HasExpectedFiniteParameterValues(ParameterSet parameters, int expectedLength) + { + return parameters.Values != null + && parameters.Values.Length == expectedLength + && ContainsOnlyFiniteValues(parameters.Values); + } + + /// + /// Creates the original covariance-aware fit for pivotal bootstrap. + /// + /// The original fit and covariance. + /// Thrown when is not set. + private BootstrapFit CreateOriginalFit() + { + if (_originalCovariance == null) + throw new InvalidOperationException("OriginalCovariance must be set before running or transforming a pivotal bootstrap."); + + var fit = new BootstrapFit(_originalParameters, _originalCovariance); + if (!IsValidFit(fit, null)) + throw new ArgumentException("The original fit must contain finite parameters and a finite square covariance matrix."); + + return fit; + } + + /// + /// Determines whether a raw covariance-aware fit should be accepted before pivotal transformation. + /// + /// The fit to evaluate. + /// The required parameter count. + /// True when the fit is valid and passes the optional replicate filter; otherwise false. + private bool IsAcceptedRawFit(BootstrapFit? fit, int parameterCount) + { + if (!IsValidFit(fit, parameterCount)) + return false; + if (PivotalReplicateFilter != null && !PivotalReplicateFilter(fit!)) + return false; + return true; + } + + /// + /// Determines whether a covariance-aware fit has finite parameters and a finite square covariance matrix. + /// + /// The fit to evaluate. + /// The required parameter count, when known. + /// True when the fit is valid; otherwise false. + private static bool IsValidFit(BootstrapFit? fit, int? parameterCount) + { + if (fit == null) + return false; + if (fit.Parameters.Values == null || fit.Parameters.Values.Length == 0) + return false; + if (parameterCount.HasValue && fit.Parameters.Values.Length != parameterCount.Value) + return false; + if (!ContainsOnlyFiniteValues(fit.Parameters.Values)) + return false; + if (fit.Covariance == null || !fit.Covariance.IsSquare || fit.Covariance.NumberOfRows != fit.Parameters.Values.Length) + return false; + + for (int i = 0; i < fit.Covariance.NumberOfRows; i++) + { + for (int j = 0; j < fit.Covariance.NumberOfColumns; j++) + { + if (!Tools.IsFinite(fit.Covariance[i, j])) + return false; + } + } + + return true; + } + + /// + /// Validates that transformed values are finite. + /// + /// The transformed values. + /// The exception message to use when validation fails. + /// Thrown when any value is non-finite. + private static void ValidateTransformedValues(double[] values, string message) + { + if (!ContainsOnlyFiniteValues(values)) + throw new ArgumentException(message); + } + + /// + /// Determines whether every value in an array is finite. + /// + /// The values to evaluate. + /// True when every value is finite; otherwise false. + private static bool ContainsOnlyFiniteValues(double[] values) + { + if (values == null) + return false; + + for (int i = 0; i < values.Length; i++) + { + if (!Tools.IsFinite(values[i])) + return false; + } + + return true; + } + + /// + /// Validates a confidence interval request against the last bootstrap run mode. + /// + /// The requested confidence interval method. + /// The requested alpha level. + /// Thrown when no run has completed or when the method is incompatible with the run mode. + /// Thrown when is not between zero and one. + private void ValidateConfidenceIntervalRequest(BootstrapCIMethod method, double alpha) + { + if (_runType == BootstrapRunType.None || _bootstrapStatistics == null) + throw new InvalidOperationException("A bootstrap run must be completed before requesting confidence intervals."); + if (alpha <= 0d || alpha >= 1d) + throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); + + if (_runType == BootstrapRunType.Pivotal) + { + if (method != BootstrapCIMethod.Percentile) + throw new InvalidOperationException("Only percentile confidence intervals are supported after a pivotal bootstrap run."); + return; + } + + if (method == BootstrapCIMethod.BCa && (JackknifeFunction == null || SampleSizeFunction == null)) + throw new InvalidOperationException("JackknifeFunction and SampleSizeFunction must be set for BCa method."); + if (method == BootstrapCIMethod.BootstrapT && _studentizedValues == null) + throw new InvalidOperationException("RunWithStudentizedBootstrap() must be called before requesting Bootstrap-t confidence intervals."); + } + #endregion } } diff --git a/Numerics/Sampling/Bootstrap/PivotalBootstrap.cs b/Numerics/Sampling/Bootstrap/PivotalBootstrap.cs deleted file mode 100644 index a33f1929..00000000 --- a/Numerics/Sampling/Bootstrap/PivotalBootstrap.cs +++ /dev/null @@ -1,895 +0,0 @@ -using Numerics.Data.Statistics; -using Numerics.Functions; -using Numerics.Mathematics.LinearAlgebra; -using Numerics.Mathematics.Optimization; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Threading; -using System.Threading.Tasks; - -namespace Numerics.Sampling -{ - /// - /// Specifies how invalid pivotal bootstrap draws should be handled. - /// - public enum PivotalBootstrapInvalidDrawPolicy - { - /// - /// Drop invalid pivotal draws from the returned ensemble. - /// - Drop, - - /// - /// Replace invalid pivotal draws with the corresponding raw bootstrap fit. - /// - UseRaw, - - /// - /// Replace invalid pivotal draws with the parent fit. - /// - UseParent - } - - /// - /// Stores a fitted parameter vector and its covariance matrix. - /// - [Serializable] - public sealed class PivotalBootstrapFit - { - /// - /// Initializes a new instance of the class. - /// - /// The fitted parameter set. - /// The covariance matrix for the fitted parameters. - public PivotalBootstrapFit(ParameterSet parameters, Matrix covariance) - { - if (parameters.Values == null) - throw new ArgumentException("The parameter set must contain values.", nameof(parameters)); - if (covariance == null) - throw new ArgumentNullException(nameof(covariance)); - if (!covariance.IsSquare) - throw new ArgumentException("The covariance matrix must be square.", nameof(covariance)); - if (covariance.NumberOfRows != parameters.Values.Length) - throw new ArgumentException("The covariance dimension must match the parameter count.", nameof(covariance)); - - Parameters = parameters.Clone(); - Covariance = covariance.Clone(); - } - - /// - /// Initializes a new instance of the class. - /// - /// The fitted parameter values. - /// The covariance matrix for the fitted parameters. - public PivotalBootstrapFit(double[] parameters, Matrix covariance) - : this(new ParameterSet(parameters != null ? (double[])parameters.Clone() : throw new ArgumentNullException(nameof(parameters)), double.NaN), covariance) - { - } - - /// - /// Gets the fitted parameter set. - /// - public ParameterSet Parameters { get; } - - /// - /// Gets the covariance matrix for . - /// - public Matrix Covariance { get; } - - /// - /// Gets the number of fitted parameters. - /// - public int ParameterCount => Parameters.Values.Length; - } - - /// - /// Delegate-backed scalar link used by . - /// - /// - /// This type gives callers complete control over the link, inverse link, and - /// derivative used for each parameter. Use to adapt - /// Numerics link-function classes such as , , - /// or . - /// - [Serializable] - public sealed class PivotalBootstrapLink - { - /// - /// Initializes a new instance of the class. - /// - /// The link transformation. - /// The inverse link transformation. - /// The first derivative of the link with respect to the raw parameter. - /// An optional descriptive name. - public PivotalBootstrapLink( - Func link, - Func inverseLink, - Func derivative, - string? name = null) - { - Link = link ?? throw new ArgumentNullException(nameof(link)); - InverseLink = inverseLink ?? throw new ArgumentNullException(nameof(inverseLink)); - Derivative = derivative ?? throw new ArgumentNullException(nameof(derivative)); - Name = name ?? "Custom"; - } - - /// - /// Gets an identity link. - /// - public static PivotalBootstrapLink Identity { get; } = FromLinkFunction(new IdentityLink()); - - /// - /// Gets the link transformation. - /// - public Func Link { get; } - - /// - /// Gets the inverse link transformation. - /// - public Func InverseLink { get; } - - /// - /// Gets the first derivative of the link with respect to the raw parameter. - /// - public Func Derivative { get; } - - /// - /// Gets the descriptive link name. - /// - public string Name { get; } - - /// - /// Adapts an to a pivotal bootstrap link. - /// - /// The Numerics link function. - /// An optional descriptive name. - public static PivotalBootstrapLink FromLinkFunction(ILinkFunction linkFunction, string? name = null) - { - if (linkFunction == null) - throw new ArgumentNullException(nameof(linkFunction)); - - return new PivotalBootstrapLink( - linkFunction.Link, - linkFunction.InverseLink, - linkFunction.DLink, - name ?? linkFunction.GetType().Name); - } - } - - /// - /// Context supplied to a pivotal bootstrap link factory. - /// - public sealed class PivotalBootstrapLinkContext - { - private readonly double[,] _rawParameterValues; - - /// - /// Initializes a new instance of the class. - /// - /// The parent fit. - /// The accepted raw bootstrap fits. - public PivotalBootstrapLinkContext(PivotalBootstrapFit parentFit, PivotalBootstrapFit[] rawBootstrapFits) - { - ParentFit = parentFit ?? throw new ArgumentNullException(nameof(parentFit)); - RawBootstrapFits = rawBootstrapFits ?? throw new ArgumentNullException(nameof(rawBootstrapFits)); - - int p = parentFit.ParameterCount; - _rawParameterValues = new double[rawBootstrapFits.Length, p]; - for (int i = 0; i < rawBootstrapFits.Length; i++) - { - for (int j = 0; j < p; j++) - _rawParameterValues[i, j] = rawBootstrapFits[i].Parameters.Values[j]; - } - } - - /// - /// Gets the parent fit. - /// - public PivotalBootstrapFit ParentFit { get; } - - /// - /// Gets the accepted raw bootstrap fits. - /// - public PivotalBootstrapFit[] RawBootstrapFits { get; } - - /// - /// Gets the number of parameters. - /// - public int ParameterCount => ParentFit.ParameterCount; - - /// - /// Gets a copy of the raw bootstrap values for one parameter. - /// - /// The zero-based parameter index. - public double[] GetRawParameterValues(int parameterIndex) - { - if (parameterIndex < 0 || parameterIndex >= ParameterCount) - throw new ArgumentOutOfRangeException(nameof(parameterIndex)); - - var values = new double[RawBootstrapFits.Length]; - for (int i = 0; i < values.Length; i++) - values[i] = _rawParameterValues[i, parameterIndex]; - return values; - } - } - - /// - /// Options for transforming raw bootstrap fits into pivotal bootstrap draws. - /// - public class PivotalBootstrapTransformOptions - { - /// - /// Gets or sets a factory that returns one scalar link per parameter. - /// - /// - /// The factory is called after raw bootstrap fits have been accepted, so it can - /// estimate data-adaptive links from the parent and raw bootstrap ensembles. - /// If omitted, identity links are used for every parameter. - /// - public Func? LinkFactory { get; set; } - - /// - /// Gets or sets an optional statistic function applied to each returned parameter set. - /// - public Func? StatisticFunction { get; set; } - - /// - /// Gets or sets an optional filter applied to raw bootstrap fits before the pivotal transform. - /// - public Func? ReplicateFilter { get; set; } - - /// - /// Gets or sets an optional validator applied to transformed parameter values. - /// - public Func? ParameterValidator { get; set; } - - /// - /// Gets or sets the policy used when a pivotal draw is invalid. Default is . - /// - public PivotalBootstrapInvalidDrawPolicy InvalidDrawPolicy { get; set; } = PivotalBootstrapInvalidDrawPolicy.Drop; - - /// - /// Gets or sets a value indicating whether covariance matrices should be repaired to be symmetric positive definite. - /// - public bool RegularizeCovariances { get; set; } = true; - - /// - /// Gets or sets an optional absolute component limit for the standardized pivot vector. - /// - public double? ZLimit { get; set; } - - /// - /// Gets or sets a value indicating whether small Gaussian jitter should be added in pivot space. - /// - public bool AddJitter { get; set; } - - /// - /// Gets or sets the standard deviation of optional Gaussian jitter in pivot space. - /// - public double JitterScale { get; set; } = 0.01d; - - /// - /// Gets or sets the pseudo-random-number generator seed. Default = 12345. - /// - public int PRNGSeed { get; set; } = 12345; - } - - /// - /// Options for running an end-to-end pivotal bootstrap. - /// - /// The data type being bootstrapped. - public sealed class PivotalBootstrapOptions : PivotalBootstrapTransformOptions - { - /// - /// Gets or sets the function used to resample data from the parent fit. - /// - public Func? ResampleFunction { get; set; } - - /// - /// Gets or sets the function used to fit one bootstrap data set and compute its covariance. - /// - public Func? FitFunction { get; set; } - - /// - /// Gets or sets the number of raw bootstrap replicates to request. Default = 10,000. - /// - public int Replicates { get; set; } = 10000; - - /// - /// Gets or sets the maximum number of retries for a failed raw bootstrap fit. Default = 20. - /// - public int MaxRetries { get; set; } = 20; - - /// - /// Gets or sets the maximum number of parallel workers. Values less than 1 use the default scheduler setting. - /// - public int MaxDegreeOfParallelism { get; set; } = 0; - } - - /// - /// Diagnostics returned by a pivotal bootstrap run. - /// - [Serializable] - public sealed class PivotalBootstrapDiagnostics - { - /// - /// Gets or sets the number of raw bootstrap replicates requested. - /// - public int RequestedReplicates { get; set; } - - /// - /// Gets or sets the number of raw bootstrap fits rejected before transformation. - /// - public int RejectedRawReplicates { get; set; } - - /// - /// Gets or sets the number of raw bootstrap fits that failed during end-to-end resampling. - /// - public int FailedRawReplicates { get; set; } - - /// - /// Gets or sets the number of raw bootstrap fits accepted for transformation. - /// - public int AcceptedRawReplicates { get; set; } - - /// - /// Gets or sets the number of transformed pivotal draws retained. - /// - public int RetainedPivotalReplicates { get; set; } - - /// - /// Gets or sets the number of transformed pivotal draws dropped or replaced according to policy. - /// - public int InvalidPivotalReplicates { get; set; } - - /// - /// Gets or sets the raw bootstrap resampling time. - /// - public TimeSpan ResamplingTime { get; set; } - - /// - /// Gets or sets the pivotal transformation time. - /// - public TimeSpan TransformationTime { get; set; } - } - - /// - /// Results returned by a pivotal bootstrap run. - /// - [Serializable] - public sealed class PivotalBootstrapResults - { - internal PivotalBootstrapResults( - PivotalBootstrapFit parentFit, - PivotalBootstrapFit[] rawFits, - ParameterSet[] pivotalParameterSets, - PivotalBootstrapLink[] links, - double[]? parentStatistics, - double[,]? rawStatistics, - double[,]? pivotalStatistics, - PivotalBootstrapDiagnostics diagnostics) - { - ParentFit = parentFit; - RawFits = rawFits; - RawParameterSets = rawFits.Select(f => f.Parameters.Clone()).ToArray(); - PivotalParameterSets = pivotalParameterSets; - Links = links; - ParentStatistics = parentStatistics; - RawStatistics = rawStatistics; - PivotalStatistics = pivotalStatistics; - Diagnostics = diagnostics; - } - - /// - /// Gets the parent fit. - /// - public PivotalBootstrapFit ParentFit { get; } - - /// - /// Gets the accepted raw bootstrap fits and their covariances. - /// - public PivotalBootstrapFit[] RawFits { get; } - - /// - /// Gets the accepted raw bootstrap parameter sets. - /// - public ParameterSet[] RawParameterSets { get; } - - /// - /// Gets the retained pivotal bootstrap parameter sets. - /// - public ParameterSet[] PivotalParameterSets { get; } - - /// - /// Gets the links used for the pivotal transformation. - /// - public PivotalBootstrapLink[] Links { get; } - - /// - /// Gets the statistic values for the parent fit, when a statistic function was supplied. - /// - public double[]? ParentStatistics { get; } - - /// - /// Gets raw bootstrap statistics as [replicate, statistic], when a statistic function was supplied. - /// - public double[,]? RawStatistics { get; } - - /// - /// Gets pivotal bootstrap statistics as [replicate, statistic], when a statistic function was supplied. - /// - public double[,]? PivotalStatistics { get; } - - /// - /// Gets run diagnostics. - /// - public PivotalBootstrapDiagnostics Diagnostics { get; } - - /// - /// Gets percentile confidence intervals for pivotal bootstrap parameters. - /// - /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. - public BootstrapStatisticResult[] GetPivotalParameterConfidenceIntervals(double alpha) - { - return CreateIntervalResults(PivotalParameterSets, ParentFit.Parameters.Values, alpha); - } - - /// - /// Gets percentile confidence intervals for raw bootstrap parameters. - /// - /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. - public BootstrapStatisticResult[] GetRawParameterConfidenceIntervals(double alpha) - { - return CreateIntervalResults(RawParameterSets, ParentFit.Parameters.Values, alpha); - } - - /// - /// Gets percentile confidence intervals for pivotal bootstrap statistics. - /// - /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. - public BootstrapStatisticResult[] GetPivotalStatisticConfidenceIntervals(double alpha) - { - if (PivotalStatistics == null || ParentStatistics == null) - throw new InvalidOperationException("No statistic function was supplied."); - - return CreateIntervalResults(PivotalStatistics, ParentStatistics, alpha); - } - - /// - /// Gets percentile confidence intervals for raw bootstrap statistics. - /// - /// The two-sided alpha level, e.g. 0.1 for a 90 percent interval. - public BootstrapStatisticResult[] GetRawStatisticConfidenceIntervals(double alpha) - { - if (RawStatistics == null || ParentStatistics == null) - throw new InvalidOperationException("No statistic function was supplied."); - - return CreateIntervalResults(RawStatistics, ParentStatistics, alpha); - } - - private static BootstrapStatisticResult[] CreateIntervalResults(ParameterSet[] parameterSets, double[] estimates, double alpha) - { - if (parameterSets.Length == 0) - return Array.Empty(); - - int columns = parameterSets[0].Values.Length; - var values = new double[parameterSets.Length, columns]; - for (int i = 0; i < parameterSets.Length; i++) - { - for (int j = 0; j < columns; j++) - values[i, j] = parameterSets[i].Values[j]; - } - - return CreateIntervalResults(values, estimates, alpha); - } - - private static BootstrapStatisticResult[] CreateIntervalResults(double[,] values, double[] estimates, double alpha) - { - if (alpha <= 0d || alpha >= 1d) - throw new ArgumentOutOfRangeException(nameof(alpha), "Alpha must be between 0 and 1."); - - int rows = values.GetLength(0); - int columns = values.GetLength(1); - var results = new BootstrapStatisticResult[columns]; - for (int j = 0; j < columns; j++) - { - var column = new List(rows); - for (int i = 0; i < rows; i++) - { - double value = values[i, j]; - if (IsFinite(value)) - column.Add(value); - } - - column.Sort(); - results[j] = new BootstrapStatisticResult - { - PopulationEstimate = estimates[j], - LowerCI = column.Count > 0 ? Statistics.Percentile(column, alpha / 2d, true) : double.NaN, - UpperCI = column.Count > 0 ? Statistics.Percentile(column, 1d - alpha / 2d, true) : double.NaN, - ValidCount = column.Count, - TotalCount = rows, - StandardError = column.Count > 1 ? Statistics.StandardDeviation(column) : double.NaN, - Mean = column.Count > 0 ? column.Average() : double.NaN - }; - } - - return results; - } - - private static bool IsFinite(double value) - { - return !double.IsNaN(value) && !double.IsInfinity(value); - } - } - - /// - /// Runs the bias-corrected pivotal bootstrap. - /// - public static class PivotalBootstrap - { - /// - /// Runs an end-to-end pivotal bootstrap. - /// - /// The data type being bootstrapped. - /// The original data. - /// The parent fit and covariance. - /// Bootstrap options. - public static PivotalBootstrapResults Run( - TData originalData, - PivotalBootstrapFit parentFit, - PivotalBootstrapOptions options) - { - if (options == null) - throw new ArgumentNullException(nameof(options)); - if (options.ResampleFunction == null) - throw new InvalidOperationException("A resample function is required."); - if (options.FitFunction == null) - throw new InvalidOperationException("A fit function is required."); - if (options.Replicates < 1) - throw new ArgumentOutOfRangeException(nameof(options.Replicates), "The number of replicates must be positive."); - if (options.MaxRetries < 1) - throw new ArgumentOutOfRangeException(nameof(options.MaxRetries), "The maximum retry count must be positive."); - - ValidateFit(parentFit, null); - var stopwatch = Stopwatch.StartNew(); - var prng = new MersenneTwister(options.PRNGSeed); - int[] seeds = prng.NextIntegers(options.Replicates); - var rawFits = new PivotalBootstrapFit?[options.Replicates]; - int failed = 0; - - var parallelOptions = new ParallelOptions(); - if (options.MaxDegreeOfParallelism > 0) - parallelOptions.MaxDegreeOfParallelism = options.MaxDegreeOfParallelism; - - Parallel.For(0, options.Replicates, parallelOptions, index => - { - bool accepted = false; - for (int retry = 0; retry < options.MaxRetries; retry++) - { - try - { - var rng = new MersenneTwister(seeds[index] + 10 * retry); - TData sample = options.ResampleFunction(originalData, parentFit, rng); - PivotalBootstrapFit fit = options.FitFunction(sample); - if (!IsAcceptedRawFit(fit, parentFit.ParameterCount, options)) - continue; - - rawFits[index] = fit; - accepted = true; - } - catch - { - // Retry failed resamples or fits. - } - - if (accepted) - break; - } - - if (!accepted) - Interlocked.Increment(ref failed); - }); - - stopwatch.Stop(); - PivotalBootstrapFit[] acceptedFits = rawFits.Where(f => f != null).Select(f => f!).ToArray(); - PivotalBootstrapResults results = Transform(parentFit, acceptedFits, options); - results.Diagnostics.RequestedReplicates = options.Replicates; - results.Diagnostics.FailedRawReplicates = failed; - results.Diagnostics.ResamplingTime = stopwatch.Elapsed; - return results; - } - - /// - /// Transforms raw bootstrap fits into pivotal bootstrap draws. - /// - /// The parent fit and covariance. - /// Raw bootstrap fits and covariances. - /// Transformation options. If omitted, identity links are used. - public static PivotalBootstrapResults Transform( - PivotalBootstrapFit parentFit, - IEnumerable rawBootstrapFits, - PivotalBootstrapTransformOptions? options = null) - { - if (rawBootstrapFits == null) - throw new ArgumentNullException(nameof(rawBootstrapFits)); - - options ??= new PivotalBootstrapTransformOptions(); - ValidateFit(parentFit, null); - int p = parentFit.ParameterCount; - var stopwatch = Stopwatch.StartNew(); - PivotalBootstrapFit[] suppliedRaw = rawBootstrapFits.ToArray(); - var acceptedRaw = suppliedRaw - .Where(f => IsAcceptedRawFit(f, p, options)) - .ToArray(); - - if (acceptedRaw.Length == 0) - throw new InvalidOperationException("No valid raw bootstrap fits were supplied."); - - var diagnostics = new PivotalBootstrapDiagnostics - { - RequestedReplicates = acceptedRaw.Length, - RejectedRawReplicates = suppliedRaw.Length - acceptedRaw.Length, - AcceptedRawReplicates = acceptedRaw.Length - }; - - var context = new PivotalBootstrapLinkContext(parentFit, acceptedRaw); - PivotalBootstrapLink[] links = options.LinkFactory != null - ? options.LinkFactory(context) - : Enumerable.Repeat(PivotalBootstrapLink.Identity, p).ToArray(); - ValidateLinks(links, p); - - double[] parentEta = LinkParameters(parentFit.Parameters.Values, links); - Matrix parentLinkCovariance = LinkCovariance(parentFit, links, options.RegularizeCovariances); - var parentCholesky = new CholeskyDecomposition(parentLinkCovariance); - - var pivotalParameterSets = new List(acceptedRaw.Length); - var jitterRng = new MersenneTwister(options.PRNGSeed); - int invalid = 0; - - for (int i = 0; i < acceptedRaw.Length; i++) - { - PivotalBootstrapFit rawFit = acceptedRaw[i]; - if (TryCreatePivotalDraw(rawFit, parentFit, links, parentEta, parentCholesky, options, jitterRng, out ParameterSet pivotalParameters)) - { - pivotalParameterSets.Add(pivotalParameters); - } - else - { - invalid++; - if (TryApplyInvalidPolicy(rawFit, parentFit, options.InvalidDrawPolicy, out ParameterSet fallbackParameters)) - pivotalParameterSets.Add(fallbackParameters); - } - } - - stopwatch.Stop(); - diagnostics.InvalidPivotalReplicates = invalid; - diagnostics.RetainedPivotalReplicates = pivotalParameterSets.Count; - diagnostics.TransformationTime = stopwatch.Elapsed; - - Func? statistic = options.StatisticFunction; - double[]? parentStatistics = statistic != null ? ValidateStatistics(statistic(parentFit.Parameters)) : null; - double[,]? rawStatistics = statistic != null ? ComputeStatistics(acceptedRaw.Select(f => f.Parameters).ToArray(), statistic) : null; - double[,]? pivotalStatistics = statistic != null ? ComputeStatistics(pivotalParameterSets.ToArray(), statistic) : null; - - return new PivotalBootstrapResults( - parentFit, - acceptedRaw, - pivotalParameterSets.ToArray(), - links, - parentStatistics, - rawStatistics, - pivotalStatistics, - diagnostics); - } - - private static bool TryCreatePivotalDraw( - PivotalBootstrapFit rawFit, - PivotalBootstrapFit parentFit, - PivotalBootstrapLink[] links, - double[] parentEta, - CholeskyDecomposition parentCholesky, - PivotalBootstrapTransformOptions options, - Random jitterRng, - out ParameterSet pivotalParameters) - { - pivotalParameters = default; - try - { - double[] rawEta = LinkParameters(rawFit.Parameters.Values, links); - Matrix rawLinkCovariance = LinkCovariance(rawFit, links, options.RegularizeCovariances); - var rawCholesky = new CholeskyDecomposition(rawLinkCovariance); - var difference = new double[parentEta.Length]; - for (int j = 0; j < difference.Length; j++) - difference[j] = parentEta[j] - rawEta[j]; - - double[] z = rawCholesky.Forward(new Vector(difference)).ToArray(); - if (options.ZLimit.HasValue && z.Any(value => Math.Abs(value) > options.ZLimit.Value)) - return false; - - if (options.AddJitter && options.JitterScale > 0d) - { - for (int j = 0; j < z.Length; j++) - z[j] += options.JitterScale * Numerics.Distributions.Normal.StandardZ(Math.Min(1d - 1e-16, Math.Max(1e-16, jitterRng.NextDouble()))); - } - - double[] reinflated = parentCholesky.L * z; - double[] pivotalEta = new double[parentEta.Length]; - double[] pivotalValues = new double[parentEta.Length]; - for (int j = 0; j < pivotalValues.Length; j++) - { - pivotalEta[j] = parentEta[j] + reinflated[j]; - pivotalValues[j] = links[j].InverseLink(pivotalEta[j]); - } - - if (!AllFinite(pivotalValues)) - return false; - if (options.ParameterValidator != null && !options.ParameterValidator(pivotalValues)) - return false; - - pivotalParameters = new ParameterSet(pivotalValues, rawFit.Parameters.Fitness, rawFit.Parameters.Weight); - return true; - } - catch - { - return false; - } - } - - private static bool TryApplyInvalidPolicy( - PivotalBootstrapFit rawFit, - PivotalBootstrapFit parentFit, - PivotalBootstrapInvalidDrawPolicy policy, - out ParameterSet parameterSet) - { - parameterSet = default; - switch (policy) - { - case PivotalBootstrapInvalidDrawPolicy.Drop: - return false; - case PivotalBootstrapInvalidDrawPolicy.UseRaw: - parameterSet = rawFit.Parameters.Clone(); - return true; - case PivotalBootstrapInvalidDrawPolicy.UseParent: - parameterSet = parentFit.Parameters.Clone(); - return true; - default: - throw new ArgumentOutOfRangeException(nameof(policy), $"Unknown invalid-draw policy: {policy}."); - } - } - - private static bool IsAcceptedRawFit(PivotalBootstrapFit? fit, int parameterCount, PivotalBootstrapTransformOptions options) - { - if (fit == null || !IsValidFit(fit, parameterCount)) - return false; - if (options.ReplicateFilter != null && !options.ReplicateFilter(fit)) - return false; - return true; - } - - private static void ValidateFit(PivotalBootstrapFit fit, int? parameterCount) - { - if (!IsValidFit(fit, parameterCount)) - throw new ArgumentException("The fit must contain finite parameters and a finite square covariance matrix.", nameof(fit)); - } - - private static bool IsValidFit(PivotalBootstrapFit? fit, int? parameterCount) - { - if (fit == null) - return false; - if (fit.Parameters.Values == null || fit.Parameters.Values.Length == 0) - return false; - if (parameterCount.HasValue && fit.Parameters.Values.Length != parameterCount.Value) - return false; - if (!AllFinite(fit.Parameters.Values)) - return false; - if (fit.Covariance == null || !fit.Covariance.IsSquare || fit.Covariance.NumberOfRows != fit.Parameters.Values.Length) - return false; - - for (int i = 0; i < fit.Covariance.NumberOfRows; i++) - { - for (int j = 0; j < fit.Covariance.NumberOfColumns; j++) - { - if (!IsFinite(fit.Covariance[i, j])) - return false; - } - } - - return true; - } - - private static void ValidateLinks(PivotalBootstrapLink[] links, int parameterCount) - { - if (links == null) - throw new ArgumentNullException(nameof(links)); - if (links.Length != parameterCount) - throw new ArgumentException("The link factory must return one link per parameter.", nameof(links)); - if (links.Any(link => link == null)) - throw new ArgumentException("The link factory returned a null link.", nameof(links)); - } - - private static double[] LinkParameters(double[] parameters, PivotalBootstrapLink[] links) - { - var eta = new double[parameters.Length]; - for (int i = 0; i < parameters.Length; i++) - eta[i] = links[i].Link(parameters[i]); - - if (!AllFinite(eta)) - throw new ArgumentException("The link transformation produced a non-finite value."); - - return eta; - } - - private static Matrix LinkCovariance(PivotalBootstrapFit fit, PivotalBootstrapLink[] links, bool regularize) - { - int p = fit.ParameterCount; - var jacobian = new Matrix(p); - for (int i = 0; i < p; i++) - { - double derivative = links[i].Derivative(fit.Parameters.Values[i]); - if (!IsFinite(derivative)) - throw new ArgumentException("The link derivative produced a non-finite value."); - jacobian[i, i] = derivative; - } - - Matrix linkCovariance = jacobian * fit.Covariance * jacobian.Transpose(); - if (!regularize) - return linkCovariance; - - return MatrixRegularization.MakeSymmetricPositiveDefinite(linkCovariance); - } - - private static double[,] ComputeStatistics(ParameterSet[] parameterSets, Func statistic) - { - if (parameterSets.Length == 0) - return new double[0, 0]; - - double[] first = ValidateStatistics(statistic(parameterSets[0])); - var values = new double[parameterSets.Length, first.Length]; - for (int j = 0; j < first.Length; j++) - values[0, j] = first[j]; - - for (int i = 1; i < parameterSets.Length; i++) - { - double[] row = ValidateStatistics(statistic(parameterSets[i]), first.Length); - for (int j = 0; j < first.Length; j++) - values[i, j] = row[j]; - } - - return values; - } - - private static double[] ValidateStatistics(double[] statistics, int? expectedLength = null) - { - if (statistics == null) - throw new InvalidOperationException("The statistic function returned null."); - if (statistics.Length == 0) - throw new InvalidOperationException("The statistic function must return at least one statistic."); - if (expectedLength.HasValue && statistics.Length != expectedLength.Value) - throw new InvalidOperationException("The statistic function must return the same number of statistics for every draw."); - if (!AllFinite(statistics)) - throw new InvalidOperationException("The statistic function returned a non-finite value."); - - return statistics; - } - - private static bool AllFinite(double[] values) - { - for (int i = 0; i < values.Length; i++) - { - if (!IsFinite(values[i])) - return false; - } - - return true; - } - - private static bool IsFinite(double value) - { - return !double.IsNaN(value) && !double.IsInfinity(value); - } - - } -} diff --git a/Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs b/Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs new file mode 100644 index 00000000..698f9954 --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/BootstrapFit.cs @@ -0,0 +1,75 @@ +using Numerics.Mathematics.LinearAlgebra; +using Numerics.Mathematics.Optimization; +using System; + +namespace Numerics.Sampling +{ + /// + /// Stores a fitted parameter vector and the covariance matrix associated with that fit. + /// + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + /// This type is used by covariance-aware bootstrap procedures, including the + /// pivotal bootstrap. The parameter values and covariance are cloned on input + /// so callers can safely reuse or mutate their source objects after construction. + /// + /// + [Serializable] + public sealed class BootstrapFit + { + /// + /// Initializes a new instance of the class. + /// + /// The fitted parameter set. + /// The covariance matrix for . + /// + /// Thrown when has no values, when + /// is not square, or when the covariance dimension does not match the parameter count. + /// + /// Thrown when is null. + public BootstrapFit(ParameterSet parameters, Matrix covariance) + { + if (parameters.Values == null || parameters.Values.Length == 0) + throw new ArgumentException("The parameter set must contain at least one value.", nameof(parameters)); + if (covariance == null) + throw new ArgumentNullException(nameof(covariance)); + if (!covariance.IsSquare) + throw new ArgumentException("The covariance matrix must be square.", nameof(covariance)); + if (covariance.NumberOfRows != parameters.Values.Length) + throw new ArgumentException("The covariance dimension must match the parameter count.", nameof(covariance)); + + Parameters = parameters.Clone(); + Covariance = covariance.Clone(); + } + + /// + /// Initializes a new instance of the class. + /// + /// The fitted parameter values. + /// The covariance matrix for . + /// Thrown when or is null. + public BootstrapFit(double[] parameters, Matrix covariance) + : this(new ParameterSet(parameters != null ? (double[])parameters.Clone() : throw new ArgumentNullException(nameof(parameters)), double.NaN), covariance) + { + } + + /// + /// Gets the fitted parameter set. + /// + public ParameterSet Parameters { get; } + + /// + /// Gets the covariance matrix for . + /// + public Matrix Covariance { get; } + + /// + /// Gets the number of fitted parameters. + /// + public int ParameterCount => Parameters.Values.Length; + } +} diff --git a/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs new file mode 100644 index 00000000..d44ce502 --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapContext.cs @@ -0,0 +1,79 @@ +using System; + +namespace Numerics.Sampling +{ + /// + /// Provides accepted raw bootstrap fits to a pivotal bootstrap link factory. + /// + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + /// Link factories can use this context to select fixed links, such as + /// LogLink for positive scale parameters, or to fit data-adaptive links, + /// such as YeoJohnsonLink, from the accepted raw bootstrap ensemble. + /// + /// + public sealed class PivotalBootstrapContext + { + private readonly double[,] _rawParameterValues; + + /// + /// Initializes a new instance of the class. + /// + /// The original parent fit. + /// The accepted raw bootstrap fits. + /// Thrown when or is null. + /// Thrown when a raw fit parameter count differs from the parent fit. + public PivotalBootstrapContext(BootstrapFit parentFit, BootstrapFit[] rawBootstrapFits) + { + ParentFit = parentFit ?? throw new ArgumentNullException(nameof(parentFit)); + RawBootstrapFits = rawBootstrapFits ?? throw new ArgumentNullException(nameof(rawBootstrapFits)); + + int p = parentFit.ParameterCount; + _rawParameterValues = new double[rawBootstrapFits.Length, p]; + for (int i = 0; i < rawBootstrapFits.Length; i++) + { + if (rawBootstrapFits[i].ParameterCount != p) + throw new ArgumentException("Every raw bootstrap fit must have the same parameter count as the parent fit.", nameof(rawBootstrapFits)); + + for (int j = 0; j < p; j++) + _rawParameterValues[i, j] = rawBootstrapFits[i].Parameters.Values[j]; + } + } + + /// + /// Gets the original parent fit. + /// + public BootstrapFit ParentFit { get; } + + /// + /// Gets the accepted raw bootstrap fits. + /// + public BootstrapFit[] RawBootstrapFits { get; } + + /// + /// Gets the number of fitted parameters. + /// + public int ParameterCount => ParentFit.ParameterCount; + + /// + /// Gets a copy of the accepted raw bootstrap values for one parameter. + /// + /// The zero-based parameter index. + /// The accepted raw bootstrap values for . + /// Thrown when is outside the parameter range. + public double[] GetRawParameterValues(int parameterIndex) + { + if (parameterIndex < 0 || parameterIndex >= ParameterCount) + throw new ArgumentOutOfRangeException(nameof(parameterIndex)); + + var values = new double[RawBootstrapFits.Length]; + for (int i = 0; i < values.Length; i++) + values[i] = _rawParameterValues[i, parameterIndex]; + return values; + } + } +} diff --git a/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs new file mode 100644 index 00000000..b50586ac --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapDiagnostics.cs @@ -0,0 +1,57 @@ +using System; + +namespace Numerics.Sampling +{ + /// + /// Stores diagnostic counts and timings from the most recent pivotal bootstrap run. + /// + /// + /// + /// Authors: + /// Haden Smith, USACE Risk Management Center, cole.h.smith@usace.army.mil + /// + /// + [Serializable] + public sealed class PivotalBootstrapDiagnostics + { + /// + /// Gets or sets the number of raw bootstrap replicates requested or supplied. + /// + public int RequestedReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits rejected before transformation. + /// + public int RejectedRawReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits that failed after all retries. + /// + public int FailedRawReplicates { get; set; } + + /// + /// Gets or sets the number of raw bootstrap fits accepted for transformation. + /// + public int AcceptedRawReplicates { get; set; } + + /// + /// Gets or sets the number of invalid pivotal draws encountered after raw-fit acceptance. + /// + public int InvalidPivotalReplicates { get; set; } + + /// + /// Gets or sets the number of pivotal draws retained after invalid-draw handling. + /// + public int RetainedPivotalReplicates { get; set; } + + /// + /// Gets or sets the time spent generating and fitting raw bootstrap replicates. + /// + public TimeSpan ResamplingTime { get; set; } + + /// + /// Gets or sets the time spent applying the pivotal transformation. + /// + public TimeSpan TransformationTime { get; set; } + } +} diff --git a/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs new file mode 100644 index 00000000..13851d5f --- /dev/null +++ b/Numerics/Sampling/Bootstrap/Support/PivotalBootstrapInvalidDrawPolicy.cs @@ -0,0 +1,23 @@ +namespace Numerics.Sampling +{ + /// + /// Specifies how invalid pivotal bootstrap draws are handled after the two-covariance transform. + /// + public enum PivotalBootstrapInvalidDrawPolicy + { + /// + /// Drop invalid pivotal draws from the retained pivotal ensemble. + /// + Drop, + + /// + /// Replace invalid pivotal draws with the corresponding accepted raw bootstrap fit. + /// + UseRaw, + + /// + /// Replace invalid pivotal draws with the original parent fit. + /// + UseParent + } +} diff --git a/Test_Numerics/Functions/Test_FisherZLink.cs b/Test_Numerics/Functions/Test_FisherZLink.cs index e59996d8..e79d363e 100644 --- a/Test_Numerics/Functions/Test_FisherZLink.cs +++ b/Test_Numerics/Functions/Test_FisherZLink.cs @@ -1,4 +1,5 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics; using Numerics.Functions; using System; @@ -40,6 +41,25 @@ public void InverseLink_KnownValues() Assert.AreEqual(-0.5d, link.InverseLink(-0.5d * Math.Log(3d)), 1e-12); } + /// + /// Verifies inverse-link values remain finite and approach the correlation bounds for large eta. + /// + [TestMethod] + public void InverseLink_LargeEta_ApproachesBounds() + { + var link = new FisherZLink(); + + double upper = link.InverseLink(10d); + double lower = link.InverseLink(-10d); + + Assert.IsTrue(Tools.IsFinite(upper)); + Assert.IsTrue(Tools.IsFinite(lower)); + Assert.IsLessThan(1d, upper); + Assert.IsGreaterThan(0.99999999d, upper); + Assert.IsGreaterThan(-1d, lower); + Assert.IsLessThan(-0.99999999d, lower); + } + /// /// Verifies round-trip behavior over the correlation domain. /// @@ -79,8 +99,10 @@ public void Link_OutsideOpenInterval_Throws() Assert.Throws(() => link.Link(-1d)); Assert.Throws(() => link.Link(1d)); + Assert.Throws(() => link.Link(double.NaN)); Assert.Throws(() => link.DLink(-1d)); Assert.Throws(() => link.DLink(1d)); + Assert.Throws(() => link.DLink(double.PositiveInfinity)); } /// diff --git a/Test_Numerics/Functions/Test_YeoJohnsonLink.cs b/Test_Numerics/Functions/Test_YeoJohnsonLink.cs index 16ccbfb0..dc84ad77 100644 --- a/Test_Numerics/Functions/Test_YeoJohnsonLink.cs +++ b/Test_Numerics/Functions/Test_YeoJohnsonLink.cs @@ -1,4 +1,5 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics; using Numerics.Functions; using System; using System.Xml.Linq; @@ -37,6 +38,18 @@ public void Constructor_Lambda_StoresValue() Assert.AreEqual(0.5d, link.Lambda, 1e-12); } + /// + /// Verifies the lambda constructor rejects non-finite and unsupported lambda values. + /// + [TestMethod] + public void Constructor_Lambda_InvalidValues_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(double.NaN)); + Assert.Throws(() => new YeoJohnsonLink(double.PositiveInfinity)); + Assert.Throws(() => new YeoJohnsonLink(-5.1d)); + Assert.Throws(() => new YeoJohnsonLink(5.1d)); + } + /// /// Verifies the values constructor rejects null. /// @@ -55,6 +68,16 @@ public void Constructor_Values_SingleElement_Throws() Assert.Throws(() => new YeoJohnsonLink(new[] { 1d })); } + /// + /// Verifies the values constructor rejects non-finite samples. + /// + [TestMethod] + public void Constructor_Values_NonFiniteValue_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(new[] { 1d, double.NaN })); + Assert.Throws(() => new YeoJohnsonLink(new[] { 1d, double.NegativeInfinity })); + } + /// /// Verifies lambda fitting produces a finite value for a non-degenerate sample. /// @@ -63,7 +86,7 @@ public void Constructor_Values_ProducesFiniteLambda() { var link = new YeoJohnsonLink(new[] { -2d, -1d, -0.25d, 0d, 0.5d, 1d, 3d }); - Assert.IsTrue(IsFinite(link.Lambda)); + Assert.IsTrue(Tools.IsFinite(link.Lambda)); } /// @@ -75,6 +98,17 @@ public void Constructor_XElement_Null_Throws() Assert.Throws(() => new YeoJohnsonLink((XElement)null!)); } + /// + /// Verifies XML construction rejects missing or invalid lambda values. + /// + [TestMethod] + public void Constructor_XElement_InvalidLambda_Throws() + { + Assert.Throws(() => new YeoJohnsonLink(new XElement(nameof(YeoJohnsonLink)))); + Assert.Throws(() => new YeoJohnsonLink(new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", "NaN")))); + Assert.Throws(() => new YeoJohnsonLink(new XElement(nameof(YeoJohnsonLink), new XAttribute("Lambda", "6")))); + } + /// /// Verifies lambda equal to 1 is the identity transform. /// @@ -89,6 +123,32 @@ public void Link_LambdaOne_IsIdentity() } } + /// + /// Verifies lambda zero uses the logarithmic positive-value branch. + /// + [TestMethod] + public void Link_LambdaZero_UsesPositiveLogBranch() + { + var link = new YeoJohnsonLink(0d); + + Assert.AreEqual(Math.Log(3d), link.Link(2d), 1e-12); + Assert.AreEqual(2d, link.InverseLink(Math.Log(3d)), 1e-12); + Assert.AreEqual(1d / 3d, link.DLink(2d), 1e-12); + } + + /// + /// Verifies lambda two uses the logarithmic negative-value branch. + /// + [TestMethod] + public void Link_LambdaTwo_UsesNegativeLogBranch() + { + var link = new YeoJohnsonLink(2d); + + Assert.AreEqual(-Math.Log(3d), link.Link(-2d), 1e-12); + Assert.AreEqual(-2d, link.InverseLink(-Math.Log(3d)), 1e-12); + Assert.AreEqual(1d / 3d, link.DLink(-2d), 1e-12); + } + /// /// Verifies round-trip behavior for positive and negative values. /// @@ -141,10 +201,5 @@ public void Factory_CreatesYeoJohnsonLink() Assert.IsInstanceOfType(restored, typeof(YeoJohnsonLink)); Assert.AreEqual(0.25d, ((YeoJohnsonLink)restored).Lambda, 1e-12); } - - private static bool IsFinite(double value) - { - return !double.IsNaN(value) && !double.IsInfinity(value); - } } } diff --git a/Test_Numerics/Sampling/Test_PivotalBootstrap.cs b/Test_Numerics/Sampling/Test_PivotalBootstrap.cs index 6227ef4b..2bdac82a 100644 --- a/Test_Numerics/Sampling/Test_PivotalBootstrap.cs +++ b/Test_Numerics/Sampling/Test_PivotalBootstrap.cs @@ -1,4 +1,5 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using Numerics; using Numerics.Data.Statistics; using Numerics.Distributions; using Numerics.Functions; @@ -11,7 +12,7 @@ namespace Sampling { /// - /// Unit tests for the bias-corrected pivotal bootstrap. + /// Unit tests for the covariance-aware pivotal bootstrap workflow on . /// [TestClass] public class Test_PivotalBootstrap @@ -25,142 +26,546 @@ public void Transform_IdentityLink_UsesBothCovariances() { var parent = Fit(new[] { 10d, 20d }, new double[,] { { 4d, 0d }, { 0d, 9d } }); var raw = Fit(new[] { 8d, 17d }, new double[,] { { 1d, 0d }, { 0d, 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; - var options = new PivotalBootstrapTransformOptions - { - RegularizeCovariances = false - }; + boot.TransformPivotalBootstrap(new[] { raw }); - PivotalBootstrapResults result = PivotalBootstrap.Transform(parent, new[] { raw }, options); + Assert.HasCount(1, boot.BootstrapParameterSets); + Assert.AreEqual(14d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + Assert.AreEqual(29d, boot.BootstrapParameterSets[0].Values[1], 1e-12); + } - Assert.HasCount(1, result.PivotalParameterSets); - Assert.AreEqual(14d, result.PivotalParameterSets[0].Values[0], 1e-12); - Assert.AreEqual(29d, result.PivotalParameterSets[0].Values[1], 1e-12); + /// + /// Verifies a full covariance matrix is used in the two-covariance transform. + /// + [TestMethod] + public void Transform_FullCovariance_UsesCholeskyAlgebra() + { + var parent = Fit(new[] { 10d, 20d }, new double[,] { { 4d, 1.2d }, { 1.2d, 9d } }); + var raw = Fit(new[] { 8d, 17d }, new double[,] { { 1d, 0.25d }, { 0.25d, 2d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + + boot.TransformPivotalBootstrap(new[] { raw }); + + double[] expected = ExpectedIdentityPivotalValues(parent, raw); + Assert.AreEqual(expected[0], boot.BootstrapParameterSets[0].Values[0], 1e-12); + Assert.AreEqual(expected[1], boot.BootstrapParameterSets[0].Values[1], 1e-12); } /// - /// Verifies callers have full control over the links through the link factory. + /// Verifies callers have full control over pivotal links through the link factory. /// [TestMethod] public void Transform_CustomLinkFactory_ControlsLinkDefinition() { var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); bool factoryWasCalled = false; - var options = new PivotalBootstrapTransformOptions + boot.RegularizePivotalCovariances = false; + boot.PivotalLinkFactory = context => { - RegularizeCovariances = false, - LinkFactory = context => + factoryWasCalled = true; + Assert.AreEqual(1, context.ParameterCount); + CollectionAssert.AreEqual(new[] { 8d }, context.GetRawParameterValues(0)); + return new ILinkFunction[] { new LinearScaleLink(2d) }; + }; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.IsTrue(factoryWasCalled); + Assert.IsInstanceOfType(boot.PivotalLinks[0], typeof(LinearScaleLink)); + Assert.AreEqual(14d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies standard Numerics links can be mixed in a pivotal transformation. + /// + [TestMethod] + public void Transform_LogFisherZAndYeoJohnsonLinks_RetainsFiniteDraws() + { + var parent = Fit( + new[] { 1d, 2d, 0.25d, -0.5d }, + new double[,] { - factoryWasCalled = true; - Assert.AreEqual(1, context.ParameterCount); - CollectionAssert.AreEqual(new[] { 8d }, context.GetRawParameterValues(0)); - return new[] + { 0.04d, 0d, 0d, 0d }, + { 0d, 0.09d, 0d, 0d }, + { 0d, 0d, 0.01d, 0d }, + { 0d, 0d, 0d, 0.16d } + }); + var rawFits = new[] + { + Fit( + new[] { 0.9d, 1.8d, 0.10d, -0.7d }, + new double[,] + { + { 0.01d, 0d, 0d, 0d }, + { 0d, 0.04d, 0d, 0d }, + { 0d, 0d, 0.0025d, 0d }, + { 0d, 0d, 0d, 0.09d } + }), + Fit( + new[] { 1.1d, 2.1d, 0.35d, -0.3d }, + new double[,] { - new PivotalBootstrapLink( - x => 2d * x, - eta => eta / 2d, - x => 2d, - "Double") - }; - } + { 0.01d, 0d, 0d, 0d }, + { 0d, 0.04d, 0d, 0d }, + { 0d, 0d, 0.0025d, 0d }, + { 0d, 0d, 0d, 0.09d } + }) + }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalLinkFactory = context => new ILinkFunction[] + { + null, + new LogLink(), + new FisherZLink(), + new YeoJohnsonLink(context.GetRawParameterValues(3)) }; + boot.PivotalParameterValidator = values => values[1] > 0d && values[2] > -1d && values[2] < 1d; - PivotalBootstrapResults result = PivotalBootstrap.Transform(parent, new[] { raw }, options); + boot.TransformPivotalBootstrap(rawFits); - Assert.IsTrue(factoryWasCalled); - Assert.AreEqual("Double", result.Links[0].Name); - Assert.AreEqual(14d, result.PivotalParameterSets[0].Values[0], 1e-12); + Assert.HasCount(2, boot.BootstrapParameterSets); + Assert.IsNull(boot.PivotalLinks[0]); + Assert.IsInstanceOfType(boot.PivotalLinks[1], typeof(LogLink)); + Assert.IsInstanceOfType(boot.PivotalLinks[2], typeof(FisherZLink)); + Assert.IsInstanceOfType(boot.PivotalLinks[3], typeof(YeoJohnsonLink)); + foreach (double value in boot.BootstrapParameterSets[0].Values) + Assert.IsTrue(Tools.IsFinite(value)); + } + + /// + /// Verifies raw and pivotal confidence intervals are exposed separately after pivotal transformation. + /// + [TestMethod] + public void Transform_WithStatisticFunction_StoresRawAndPivotalStatistics() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var rawFits = new[] + { + Fit(new[] { 8d }, new double[,] { { 1d } }), + Fit(new[] { 9d }, new double[,] { { 1d } }), + Fit(new[] { 10d }, new double[,] { { 1d } }), + Fit(new[] { 11d }, new double[,] { { 1d } }), + Fit(new[] { 12d }, new double[,] { { 1d } }) + }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.StatisticFunction = ps => new[] { ps.Values[0] * 2d }; + + boot.TransformPivotalBootstrap(rawFits); + BootstrapResults pivotal = boot.GetConfidenceIntervals(BootstrapCIMethod.Percentile, 0.2d); + BootstrapResults raw = boot.GetRawPivotalConfidenceIntervals(0.2d); + + Assert.HasCount(1, pivotal.StatisticResults); + Assert.HasCount(1, raw.StatisticResults); + Assert.AreEqual(20d, pivotal.StatisticResults[0].PopulationEstimate, 1e-12); + Assert.AreEqual(20d, raw.StatisticResults[0].PopulationEstimate, 1e-12); + Assert.IsGreaterThan(pivotal.ParameterResults[0].LowerCI, raw.ParameterResults[0].LowerCI); } /// - /// Verifies the normal location-scale pivotal bootstrap recovers objective-Bayes marginals - /// and quantile intervals to Monte-Carlo tolerance. + /// Verifies pivotal parameter ensembles can be generated without a statistic function. /// [TestMethod] - public void Run_NormalLocationScale_MatchesObjectiveBayesMarginalsAndQuantileIntervals() + public void Transform_WithoutStatisticFunction_ProducesParameterIntervalsOnly() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + + boot.TransformPivotalBootstrap(new[] { raw }); + BootstrapResults results = boot.GetConfidenceIntervals(BootstrapCIMethod.Percentile); + + Assert.HasCount(1, results.ParameterResults); + Assert.HasCount(0, results.StatisticResults); + Assert.AreEqual(0, boot.BootstrapStatistics.GetLength(1)); + } + + /// + /// Verifies invalid pivotal draws can be dropped. + /// + [TestMethod] + public void Transform_InvalidDrawPolicyDrop_DropsInvalidPivotalDraws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalParameterValidator = _ => false; + boot.PivotalInvalidDrawPolicy = PivotalBootstrapInvalidDrawPolicy.Drop; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(0, boot.BootstrapParameterSets); + Assert.AreEqual(1, boot.PivotalDiagnostics!.InvalidPivotalReplicates); + Assert.AreEqual(0, boot.PivotalDiagnostics.RetainedPivotalReplicates); + } + + /// + /// Verifies invalid pivotal draws can fall back to raw bootstrap fits. + /// + [TestMethod] + public void Transform_InvalidDrawPolicyUseRaw_UsesRawParameters() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalParameterValidator = _ => false; + boot.PivotalInvalidDrawPolicy = PivotalBootstrapInvalidDrawPolicy.UseRaw; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(1, boot.BootstrapParameterSets); + Assert.AreEqual(8d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies invalid pivotal draws can fall back to the parent fit. + /// + [TestMethod] + public void Transform_InvalidDrawPolicyUseParent_UsesParentParameters() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalParameterValidator = _ => false; + boot.PivotalInvalidDrawPolicy = PivotalBootstrapInvalidDrawPolicy.UseParent; + + boot.TransformPivotalBootstrap(new[] { raw }); + + Assert.HasCount(1, boot.BootstrapParameterSets); + Assert.AreEqual(10d, boot.BootstrapParameterSets[0].Values[0], 1e-12); + } + + /// + /// Verifies raw replicate filtering occurs before pivotal transformation and is reported in diagnostics. + /// + [TestMethod] + public void Transform_ReplicateFilter_RejectsRawFitsAndUpdatesDiagnostics() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var rawFits = new[] + { + Fit(new[] { 8d }, new double[,] { { 1d } }), + Fit(new[] { 9d }, new double[,] { { 1d } }), + Fit(new[] { 12d }, new double[,] { { 1d } }) + }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.PivotalReplicateFilter = fit => fit.Parameters.Values[0] >= 9d; + + boot.TransformPivotalBootstrap(rawFits); + + Assert.HasCount(2, boot.RawBootstrapParameterSets); + Assert.HasCount(2, boot.BootstrapParameterSets); + Assert.AreEqual(1, boot.PivotalDiagnostics!.RejectedRawReplicates); + } + + /// + /// Verifies the pivotal run method uses the covariance-aware fit delegate, not the regular fit delegate. + /// + [TestMethod] + public void RunPivotalBootstrap_IgnoresRegularFitFunction() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var samples = new[] { 8d, 9d, 10d, 11d }; + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.Replicates = samples.Length; + boot.MaxRetries = 1; + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitFunction = _ => throw new InvalidOperationException("Regular fit should not be used by pivotal bootstrap."); + int index = -1; + boot.FitWithCovarianceFunction = _ => + { + int next = System.Threading.Interlocked.Increment(ref index); + return Fit(new[] { samples[next] }, new double[,] { { 1d } }); + }; + + boot.RunPivotalBootstrap(); + + Assert.HasCount(samples.Length, boot.RawBootstrapFits); + Assert.HasCount(samples.Length, boot.BootstrapParameterSets); + } + + /// + /// Verifies regular bootstrap ignores pivotal-only properties and delegates. + /// + [TestMethod] + public void Run_RegularBootstrap_IgnoresPivotalOnlyProperties() + { + var boot = new Bootstrap(new[] { 1d }, new ParameterSet(new[] { 10d }, double.NaN)); + boot.Replicates = 3; + boot.MaxRetries = 1; + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitFunction = _ => new ParameterSet(new[] { 11d }, double.NaN); + boot.StatisticFunction = ps => new[] { ps.Values[0] }; + boot.FitWithCovarianceFunction = _ => throw new InvalidOperationException("Pivotal fit should not be used by regular bootstrap."); + boot.PivotalLinkFactory = _ => throw new InvalidOperationException("Pivotal links should not be used by regular bootstrap."); + boot.PivotalReplicateFilter = _ => throw new InvalidOperationException("Pivotal filters should not be used by regular bootstrap."); + boot.OriginalCovariance = new Matrix(new[,] { { 1d } }); + + boot.Run(); + + Assert.HasCount(3, boot.BootstrapParameterSets); + Assert.HasCount(0, boot.RawBootstrapParameterSets); + } + + /// + /// Verifies missing covariance-aware fit delegate produces a clear failure. + /// + [TestMethod] + public void RunPivotalBootstrap_WithoutFitWithCovarianceFunction_Throws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var boot = CreatePivotalBootstrap(parent); + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitWithCovarianceFunction = null; + + var exception = Assert.Throws(() => boot.RunPivotalBootstrap()); + StringAssert.Contains(exception.Message, nameof(Bootstrap.FitWithCovarianceFunction)); + } + + /// + /// Verifies missing original covariance produces a clear failure. + /// + [TestMethod] + public void RunPivotalBootstrap_WithoutOriginalCovariance_Throws() + { + var boot = new Bootstrap(new[] { 1d }, new ParameterSet(new[] { 10d }, double.NaN)); + boot.ResampleFunction = (data, ps, rng) => data; + boot.FitWithCovarianceFunction = _ => Fit(new[] { 10d }, new double[,] { { 1d } }); + + var exception = Assert.Throws(() => boot.RunPivotalBootstrap()); + StringAssert.Contains(exception.Message, nameof(Bootstrap.OriginalCovariance)); + } + + /// + /// Verifies incompatible confidence interval methods are rejected after a pivotal run. + /// + [TestMethod] + public void GetConfidenceIntervals_BCaAfterPivotalRun_Throws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var raw = Fit(new[] { 8d }, new double[,] { { 1d } }); + var boot = CreatePivotalBootstrap(parent); + boot.RegularizePivotalCovariances = false; + boot.TransformPivotalBootstrap(new[] { raw }); + + var exception = Assert.Throws(() => boot.GetConfidenceIntervals(BootstrapCIMethod.BCa)); + StringAssert.Contains(exception.Message, "Only percentile"); + } + + /// + /// Verifies raw pivotal confidence intervals require a pivotal run first. + /// + [TestMethod] + public void GetRawPivotalConfidenceIntervals_BeforePivotalRun_Throws() + { + var parent = Fit(new[] { 10d }, new double[,] { { 4d } }); + var boot = CreatePivotalBootstrap(parent); + + Assert.Throws(() => boot.GetRawPivotalConfidenceIntervals()); + } + + /// + /// Verifies repeated pivotal runs with the same seed retain reproducibility. + /// + [TestMethod] + public void RunPivotalBootstrap_WithSameSeed_IsReproducible() + { + Bootstrap first = CreateSeededNormalPivotalBootstrap(); + Bootstrap second = CreateSeededNormalPivotalBootstrap(); + + first.RunPivotalBootstrap(); + second.RunPivotalBootstrap(); + + Assert.HasCount(first.BootstrapParameterSets.Length, second.BootstrapParameterSets); + for (int i = 0; i < first.BootstrapParameterSets.Length; i++) + { + Assert.AreEqual(first.BootstrapParameterSets[i].Values[0], second.BootstrapParameterSets[i].Values[0], 1e-12); + Assert.AreEqual(first.BootstrapParameterSets[i].Values[1], second.BootstrapParameterSets[i].Values[1], 1e-12); + } + } + + /// + /// Verifies the normal location-scale pivotal bootstrap produces plausible objective-Bayes-style quantile intervals. + /// + [TestMethod] + public void Run_NormalLocationScale_MatchesObjectiveBayesQuantileIntervals() { const int sampleSize = 100; - const int replicates = 8000; + const int replicates = 2500; var parentDistribution = new Normal(3d, 0.7d); - var parent = new PivotalBootstrapFit( + var parent = new BootstrapFit( new ParameterSet(parentDistribution.GetParameters, double.NaN), new Matrix(parentDistribution.ParameterCovariance(sampleSize, ParameterEstimationMethod.MaximumLikelihood))); double[] originalData = new double[sampleSize]; double[] probabilities = { 0.5d, 0.95d }; + var boot = new Bootstrap(originalData, parent); - var options = new PivotalBootstrapOptions + boot.Replicates = replicates; + boot.PRNGSeed = 12345; + boot.ResampleFunction = (data, ps, rng) => { - Replicates = replicates, - PRNGSeed = 12345, - ResampleFunction = (data, fit, rng) => - { - var distribution = new Normal(fit.Parameters.Values[0], fit.Parameters.Values[1]); - return distribution.GenerateRandomValues(sampleSize, rng.Next()); - }, - FitFunction = sample => FitNormal(sample), - LinkFactory = context => new[] - { - PivotalBootstrapLink.FromLinkFunction(new IdentityLink()), - PivotalBootstrapLink.FromLinkFunction(new LogLink()) - }, - ParameterValidator = values => values[1] > 0d, - StatisticFunction = ps => - { - var distribution = new Normal(ps.Values[0], ps.Values[1]); - return probabilities.Select(distribution.InverseCDF).ToArray(); - } + var distribution = new Normal(ps.Values[0], ps.Values[1]); + return distribution.GenerateRandomValues(sampleSize, rng.Next()); + }; + boot.FitWithCovarianceFunction = FitNormal; + boot.PivotalLinkFactory = _ => new ILinkFunction[] { new IdentityLink(), new LogLink() }; + boot.PivotalParameterValidator = values => values[1] > 0d; + boot.StatisticFunction = ps => + { + var distribution = new Normal(ps.Values[0], ps.Values[1]); + return probabilities.Select(distribution.InverseCDF).ToArray(); }; - PivotalBootstrapResults result = PivotalBootstrap.Run(originalData, parent, options); - - Assert.IsGreaterThan(0.99d * replicates, result.PivotalParameterSets.Length); - double[] mus = result.PivotalParameterSets.Select(ps => ps.Values[0]).ToArray(); - double[] sigmas = result.PivotalParameterSets.Select(ps => ps.Values[1]).ToArray(); - var muPosterior = new Normal(parentDistribution.Mu, parentDistribution.Sigma / Math.Sqrt(sampleSize)); - var sigmaSquaredPosterior = new InverseChiSquared(sampleSize - 1, parentDistribution.Sigma * parentDistribution.Sigma); - - Array.Sort(mus); - double[] sigmaSquares = sigmas.Select(sigma => sigma * sigma).ToArray(); - Array.Sort(sigmaSquares); - - double muKs = GoodnessOfFit.KolmogorovSmirnov(mus, muPosterior); - double sigmaKs = GoodnessOfFit.KolmogorovSmirnov(sigmaSquares, sigmaSquaredPosterior); - - Assert.IsLessThan(0.04d, muKs, $"Mu marginal KS distance was {muKs}."); - Assert.IsLessThan(0.08d, sigmaKs, $"Sigma marginal KS distance was {sigmaKs}."); - - BootstrapStatisticResult[] quantileIntervals = result.GetPivotalStatisticConfidenceIntervals(0.1d); + boot.RunPivotalBootstrap(); + BootstrapResults quantileIntervals = boot.GetConfidenceIntervals(BootstrapCIMethod.Percentile, 0.1d); double[,] objectiveBayesIntervals = parentDistribution.MonteCarloConfidenceIntervals( sampleSize, - 20000, + 12000, probabilities, new[] { 0.05d, 0.95d }); + Assert.IsGreaterThan(0.98d * replicates, boot.BootstrapParameterSets.Length); for (int i = 0; i < probabilities.Length; i++) { - Assert.AreEqual(objectiveBayesIntervals[i, 0], quantileIntervals[i].LowerCI, 0.10d * parentDistribution.Sigma); - Assert.AreEqual(objectiveBayesIntervals[i, 1], quantileIntervals[i].UpperCI, 0.10d * parentDistribution.Sigma); + Assert.AreEqual(objectiveBayesIntervals[i, 0], quantileIntervals.StatisticResults[i].LowerCI, 0.12d * parentDistribution.Sigma); + Assert.AreEqual(objectiveBayesIntervals[i, 1], quantileIntervals.StatisticResults[i].UpperCI, 0.12d * parentDistribution.Sigma); } } - private static PivotalBootstrapFit Fit(double[] values, double[,] covariance) + /// + /// Creates a covariance-aware bootstrap configured with a fixed parent fit. + /// + /// The parent fit. + /// A bootstrap configured for pivotal transformations. + private static Bootstrap CreatePivotalBootstrap(BootstrapFit parent) { - return new PivotalBootstrapFit(new ParameterSet(values, double.NaN), new Matrix(covariance)); + return new Bootstrap(Array.Empty(), parent); } - private static PivotalBootstrapFit FitNormal(double[] sample) + /// + /// Creates a covariance-aware fit from parameter values and covariance entries. + /// + /// The fitted parameter values. + /// The covariance matrix. + /// The covariance-aware fit. + private static BootstrapFit Fit(double[] values, double[,] covariance) + { + return new BootstrapFit(new ParameterSet(values, double.NaN), new Matrix(covariance)); + } + + /// + /// Fits a normal distribution by maximum likelihood and returns parameter covariance. + /// + /// The sample to fit. + /// The covariance-aware normal fit. + private static BootstrapFit FitNormal(double[] sample) { var distribution = new Normal(); ((IEstimation)distribution).Estimate(sample, ParameterEstimationMethod.MaximumLikelihood); if (!distribution.ParametersValid) throw new InvalidOperationException("The normal fit produced invalid parameters."); - return new PivotalBootstrapFit( + return new BootstrapFit( new ParameterSet(distribution.GetParameters, double.NaN), new Matrix(distribution.ParameterCovariance(sample.Length, ParameterEstimationMethod.MaximumLikelihood))); } + /// + /// Creates a reproducible normal location-scale pivotal bootstrap for reproducibility checks. + /// + /// A configured pivotal bootstrap instance. + private static Bootstrap CreateSeededNormalPivotalBootstrap() + { + const int sampleSize = 25; + var distribution = new Normal(3d, 0.7d); + var parent = new BootstrapFit( + new ParameterSet(distribution.GetParameters, double.NaN), + new Matrix(distribution.ParameterCovariance(sampleSize, ParameterEstimationMethod.MaximumLikelihood))); + var boot = new Bootstrap(new double[sampleSize], parent); + boot.Replicates = 50; + boot.PRNGSeed = 8675309; + boot.ResampleFunction = (data, ps, rng) => new Normal(ps.Values[0], ps.Values[1]).GenerateRandomValues(sampleSize, rng.Next()); + boot.FitWithCovarianceFunction = FitNormal; + boot.PivotalLinkFactory = _ => new ILinkFunction[] { null, new LogLink() }; + boot.PivotalParameterValidator = values => values[1] > 0d; + return boot; + } + + /// + /// Computes the expected identity-link pivotal value for a single raw fit. + /// + /// The parent fit. + /// The raw bootstrap fit. + /// The expected pivotal parameter values. + private static double[] ExpectedIdentityPivotalValues(BootstrapFit parent, BootstrapFit raw) + { + var parentCholesky = new CholeskyDecomposition(parent.Covariance); + var rawCholesky = new CholeskyDecomposition(raw.Covariance); + var difference = new double[parent.ParameterCount]; + for (int i = 0; i < difference.Length; i++) + difference[i] = parent.Parameters.Values[i] - raw.Parameters.Values[i]; + + double[] z = rawCholesky.Forward(new Vector(difference)).ToArray(); + double[] reinflated = parentCholesky.L * z; + var expected = new double[difference.Length]; + for (int i = 0; i < expected.Length; i++) + expected[i] = parent.Parameters.Values[i] + reinflated[i]; + return expected; + } + + /// + /// A linear scaling link used to verify the pivotal link factory accepts custom instances. + /// + [Serializable] + private sealed class LinearScaleLink : ILinkFunction + { + private readonly double _scale; + + /// + /// Initializes a new instance of the class. + /// + /// The nonzero linear scale. + public LinearScaleLink(double scale) + { + _scale = scale; + } + + /// + public double Link(double x) + { + return _scale * x; + } + + /// + public double InverseLink(double eta) + { + return eta / _scale; + } + + /// + public double DLink(double x) + { + return _scale; + } + + /// + public System.Xml.Linq.XElement ToXElement() + { + return new System.Xml.Linq.XElement(nameof(LinearScaleLink)); + } + } } } From c9d4b6ac90707f9c43a896cf062d5f95caecdd1c Mon Sep 17 00:00:00 2001 From: HadenSmith Date: Wed, 17 Jun 2026 14:04:01 -0600 Subject: [PATCH 9/9] Changing snapshot so it doesn't run tests. Fixing warnings in the test library. --- .github/workflows/Snapshot.yml | 3 ++- Test_Numerics/Data/Time Series/Test_TimeSeries.cs | 6 +++--- .../Sampling/MCMC/Test_MCMCResults_Recompute.cs | 12 ++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/Snapshot.yml b/.github/workflows/Snapshot.yml index 764b9077..e2f56de1 100644 --- a/.github/workflows/Snapshot.yml +++ b/.github/workflows/Snapshot.yml @@ -11,7 +11,8 @@ jobs: dotnet-version: '10.0.x' project-names: 'Numerics' version: '2.0.0' - run-tests: true + # PR integration already runs the full multi-target test suite. + run-tests: false nuget-source: 'https://www.hec.usace.army.mil/nexus/repository/consequences-nuget-public/' secrets: NUGET_API_KEY: ${{ secrets.NEXUS_NUGET_APIKEY }} diff --git a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs index 9ed921c0..7aced5d8 100644 --- a/Test_Numerics/Data/Time Series/Test_TimeSeries.cs +++ b/Test_Numerics/Data/Time Series/Test_TimeSeries.cs @@ -1275,7 +1275,7 @@ public void Test_ResampleWithKNN_AdvancesThroughTime() // Pre-fix: avgDrift ~ N(0, ~5) — fails this assertion clearly. // Post-fix: avgDrift ~ +50 (drift = +1 per step over 50 steps). - Assert.IsTrue(avgDrift > 25.0, + Assert.IsGreaterThan(25.0, avgDrift, $"Expected KNN trajectory to advance through the trend (avgDrift > 25 over {steps} steps); " + $"observed avgDrift = {avgDrift:F2}. The pre-fix off-by-one keeps the trajectory near the starting value."); } @@ -1302,7 +1302,7 @@ public void Test_ResampleWithKNN_PreservesMarginalMean() double standardError = inputStd / Math.Sqrt(steps * trials); // Allow 4x SE — generous to absorb short-series boundary effects in KNN. - Assert.IsTrue(Math.Abs(grandMean - inputMean) < 4.0 * standardError + 0.05, + Assert.IsLessThan(4.0 * standardError + 0.05, Math.Abs(grandMean - inputMean), $"KNN tails should preserve the marginal mean; input={inputMean:F4}, grand-mean={grandMean:F4}, " + $"4*SE={4.0 * standardError:F4}."); } @@ -1440,7 +1440,7 @@ public void Test_ResampleWithBlockBootstrap_PreservesMarginalMean() double grandMean = sumOfTailMeans / trials; double standardError = inputStd / Math.Sqrt(steps * trials); - Assert.IsTrue(Math.Abs(grandMean - inputMean) < 4.0 * standardError + 0.05, + Assert.IsLessThan(4.0 * standardError + 0.05, Math.Abs(grandMean - inputMean), $"BlockBootstrap tails should preserve the marginal mean; input={inputMean:F4}, " + $"grand-mean={grandMean:F4}, 4*SE={4.0 * standardError:F4}."); } diff --git a/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs b/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs index e5f3aca9..5a8ec980 100644 --- a/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs +++ b/Test_Numerics/Sampling/MCMC/Test_MCMCResults_Recompute.cs @@ -90,7 +90,7 @@ public void Recompute_PreservesOutputReference() // The Output list must be the same reference — the chain itself is not touched. Assert.AreSame(outputRef, results.Output, "Output reference must not change."); - Assert.AreEqual(1000, results.Output.Count, "Output count must be preserved."); + Assert.HasCount(1000, results.Output, "Output count must be preserved."); } [TestMethod] @@ -102,7 +102,7 @@ public void Recompute_PreservesMAP() results.RecomputeParameterResults(alpha: 0.05); - Assert.AreEqual(valuesBefore.Length, results.MAP.Values.Length, "MAP value count must not change."); + Assert.HasCount(valuesBefore.Length, results.MAP.Values, "MAP value count must not change."); for (int i = 0; i < valuesBefore.Length; i++) Assert.AreEqual(valuesBefore[i], results.MAP.Values[i], 1e-12, $"MAP value [{i}] must not change."); Assert.AreEqual(fitnessBefore, results.MAP.Fitness, 1e-12, "MAP fitness must not change."); @@ -176,9 +176,9 @@ public void Recompute_NarrowsCIWhenAlphaIncreases() { double lower80 = results.ParameterResults[i].SummaryStatistics.LowerCI; double upper80 = results.ParameterResults[i].SummaryStatistics.UpperCI; - Assert.IsTrue(lower80 > lower90[i], + Assert.IsGreaterThan(lower90[i], lower80, $"Parameter {i}: 80% LowerCI ({lower80}) must be > 90% LowerCI ({lower90[i]}) — band narrows when alpha increases."); - Assert.IsTrue(upper80 < upper90[i], + Assert.IsLessThan(upper90[i], upper80, $"Parameter {i}: 80% UpperCI ({upper80}) must be < 90% UpperCI ({upper90[i]}) — band narrows when alpha increases."); } } @@ -197,9 +197,9 @@ public void Recompute_WidensCIWhenAlphaDecreases() { double lower95 = results.ParameterResults[i].SummaryStatistics.LowerCI; double upper95 = results.ParameterResults[i].SummaryStatistics.UpperCI; - Assert.IsTrue(lower95 < lower90[i], + Assert.IsLessThan(lower90[i], lower95, $"Parameter {i}: 95% LowerCI ({lower95}) must be < 90% LowerCI ({lower90[i]}) — band widens when alpha decreases."); - Assert.IsTrue(upper95 > upper90[i], + Assert.IsGreaterThan(upper90[i], upper95, $"Parameter {i}: 95% UpperCI ({upper95}) must be > 90% UpperCI ({upper90[i]}) — band widens when alpha decreases."); } }